From ba2fe3e3c43192e298b60969dc766e3d8389da87 Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Fri, 9 Sep 2022 15:44:30 +0200 Subject: [PATCH 01/17] Remove do_repair option from citus_copy_shard_placement (#6299) Co-authored-by: Marco Slot --- .../commands/create_distributed_table.c | 2 +- .../distributed/operations/shard_rebalancer.c | 6 +- .../distributed/operations/shard_split.c | 6 +- .../{repair_shards.c => shard_transfer.c} | 486 +++++------------- .../replication/multi_logical_replication.c | 2 +- src/backend/distributed/shared_library_init.c | 2 +- .../distributed/sql/citus--11.0-4--11.1-1.sql | 1 + .../sql/downgrades/citus--11.1-1--11.0-4.sql | 3 +- .../citus_copy_shard_placement/11.1-1.sql | 19 + .../citus_copy_shard_placement/latest.sql | 3 +- .../distributed/utils/reference_table_utils.c | 16 +- .../distributed/coordinator_protocol.h | 5 +- .../{repair_shards.h => shard_transfer.h} | 0 ...ent.out => citus_copy_shard_placement.out} | 26 +- .../regress/expected/citus_local_tables.out | 4 +- .../expected/citus_local_tables_ent.out | 10 +- .../expected/ignoring_orphaned_shards.out | 27 - ...ation_copy_placement_vs_copy_placement.out | 40 +- ...olation_copy_placement_vs_modification.out | 100 ++-- ..._create_distributed_table_concurrently.out | 32 +- .../expected/isolation_dml_vs_repair.out | 236 --------- .../multi_colocated_shard_rebalance.out | 20 +- .../multi_colocated_shard_transfer.out | 231 --------- src/test/regress/expected/multi_extension.out | 106 ++-- src/test/regress/expected/multi_move_mx.out | 6 +- .../regress/expected/multi_prepare_sql.out | 12 +- .../expected/multi_reference_table.out | 17 - .../regress/expected/multi_repair_shards.out | 94 ---- .../multi_replicate_reference_table.out | 10 +- .../regress/expected/multi_schema_support.out | 56 +- .../expected/multi_tenant_isolation.out | 1 - .../multi_tenant_isolation_nonblocking.out | 1 - .../multi_unsupported_worker_operations.out | 4 +- .../expected/replicated_partitioned_table.out | 32 +- src/test/regress/expected/tableam.out | 9 +- .../expected/upgrade_list_citus_objects.out | 2 +- src/test/regress/isolation_schedule | 1 - src/test/regress/multi_1_schedule | 6 +- ...tion_copy_placement_vs_copy_placement.spec | 12 +- ...lation_copy_placement_vs_modification.spec | 26 +- ...create_distributed_table_concurrently.spec | 2 +- .../regress/spec/isolation_dml_vs_repair.spec | 104 ---- ...ent.sql => citus_copy_shard_placement.sql} | 24 +- src/test/regress/sql/citus_local_tables.sql | 4 +- .../regress/sql/citus_local_tables_ent.sql | 8 +- .../regress/sql/ignoring_orphaned_shards.sql | 8 - .../sql/multi_colocated_shard_rebalance.sql | 14 +- .../sql/multi_colocated_shard_transfer.sql | 114 ---- src/test/regress/sql/multi_move_mx.sql | 6 +- src/test/regress/sql/multi_prepare_sql.sql | 10 +- .../regress/sql/multi_reference_table.sql | 9 - src/test/regress/sql/multi_repair_shards.sql | 82 --- .../sql/multi_replicate_reference_table.sql | 8 +- src/test/regress/sql/multi_schema_support.sql | 27 +- .../multi_unsupported_worker_operations.sql | 4 +- .../sql/replicated_partitioned_table.sql | 30 +- src/test/regress/sql/tableam.sql | 7 +- 57 files changed, 481 insertions(+), 1652 deletions(-) rename src/backend/distributed/operations/{repair_shards.c => shard_transfer.c} (86%) create mode 100644 src/backend/distributed/sql/udfs/citus_copy_shard_placement/11.1-1.sql rename src/include/distributed/{repair_shards.h => shard_transfer.h} (100%) rename src/test/regress/expected/{master_copy_shard_placement.out => citus_copy_shard_placement.out} (88%) delete mode 100644 src/test/regress/expected/isolation_dml_vs_repair.out delete mode 100644 src/test/regress/expected/multi_colocated_shard_transfer.out delete mode 100644 src/test/regress/expected/multi_repair_shards.out delete mode 100644 src/test/regress/spec/isolation_dml_vs_repair.spec rename src/test/regress/sql/{master_copy_shard_placement.sql => citus_copy_shard_placement.sql} (85%) delete mode 100644 src/test/regress/sql/multi_colocated_shard_transfer.sql delete mode 100644 src/test/regress/sql/multi_repair_shards.sql diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index a3ff6c4cb..95a5062ce 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -60,10 +60,10 @@ #include "distributed/reference_table_utils.h" #include "distributed/relation_access_tracking.h" #include "distributed/remote_commands.h" -#include "distributed/repair_shards.h" #include "distributed/resource_lock.h" #include "distributed/shard_rebalancer.h" #include "distributed/shard_split.h" +#include "distributed/shard_transfer.h" #include "distributed/shared_library_init.h" #include "distributed/shard_rebalancer.h" #include "distributed/worker_protocol.h" diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c index c6079eb83..0e9b8db1d 100644 --- a/src/backend/distributed/operations/shard_rebalancer.c +++ b/src/backend/distributed/operations/shard_rebalancer.c @@ -43,10 +43,10 @@ #include "distributed/pg_dist_rebalance_strategy.h" #include "distributed/reference_table_utils.h" #include "distributed/remote_commands.h" -#include "distributed/repair_shards.h" #include "distributed/resource_lock.h" #include "distributed/shard_rebalancer.h" #include "distributed/shard_cleaner.h" +#include "distributed/shard_transfer.h" #include "distributed/tuplestore.h" #include "distributed/utils/array_type.h" #include "distributed/worker_protocol.h" @@ -1621,7 +1621,6 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent, uint64 shardId = placementUpdateEvent->shardId; WorkerNode *sourceNode = placementUpdateEvent->sourceNode; WorkerNode *targetNode = placementUpdateEvent->targetNode; - const char *doRepair = "false"; Datum shardTranferModeLabelDatum = DirectFunctionCall1(enum_out, shardReplicationModeOid); @@ -1665,13 +1664,12 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent, else if (updateType == PLACEMENT_UPDATE_COPY) { appendStringInfo(placementUpdateCommand, - "SELECT citus_copy_shard_placement(%ld,%s,%u,%s,%u,%s,%s)", + "SELECT citus_copy_shard_placement(%ld,%s,%u,%s,%u,%s)", shardId, quote_literal_cstr(sourceNode->workerName), sourceNode->workerPort, quote_literal_cstr(targetNode->workerName), targetNode->workerPort, - doRepair, quote_literal_cstr(shardTranferModeLabel)); } else diff --git a/src/backend/distributed/operations/shard_split.c b/src/backend/distributed/operations/shard_split.c index 4656e798f..2f1f29868 100644 --- a/src/backend/distributed/operations/shard_split.c +++ b/src/backend/distributed/operations/shard_split.c @@ -29,7 +29,7 @@ #include "distributed/remote_commands.h" #include "distributed/shard_split.h" #include "distributed/reference_table_utils.h" -#include "distributed/repair_shards.h" +#include "distributed/shard_transfer.h" #include "distributed/resource_lock.h" #include "distributed/multi_partitioning_utils.h" #include "distributed/worker_manager.h" @@ -233,9 +233,7 @@ ErrorIfCannotSplitShard(SplitOperation splitOperation, ShardInterval *sourceShar "for the shard %lu", SplitOperationName[splitOperation], SplitTargetName[splitOperation], - relationName, shardId), - errhint("Use master_copy_shard_placement UDF to " - "repair the inactive shard placement."))); + relationName, shardId))); } } } diff --git a/src/backend/distributed/operations/repair_shards.c b/src/backend/distributed/operations/shard_transfer.c similarity index 86% rename from src/backend/distributed/operations/repair_shards.c rename to src/backend/distributed/operations/shard_transfer.c index f42925ff8..468706f0e 100644 --- a/src/backend/distributed/operations/repair_shards.c +++ b/src/backend/distributed/operations/shard_transfer.c @@ -1,9 +1,8 @@ /*------------------------------------------------------------------------- * - * repair_shards.c + * shard_transfer.c * - * This file contains functions to repair unhealthy shard placements using data - * from healthy ones. + * This file contains functions to transfer shards between nodes. * * Copyright (c) Citus Data, Inc. * @@ -31,7 +30,6 @@ #include "distributed/listutils.h" #include "distributed/shard_cleaner.h" #include "distributed/coordinator_protocol.h" -#include "distributed/repair_shards.h" #include "distributed/metadata_cache.h" #include "distributed/metadata_sync.h" #include "distributed/multi_join_order.h" @@ -43,6 +41,7 @@ #include "distributed/resource_lock.h" #include "distributed/shard_rebalancer.h" #include "distributed/shard_split.h" +#include "distributed/shard_transfer.h" #include "distributed/worker_manager.h" #include "distributed/worker_protocol.h" #include "distributed/worker_transaction.h" @@ -76,9 +75,6 @@ static bool CanUseLogicalReplication(Oid relationId, char shardReplicationMode); static void ErrorIfTableCannotBeReplicated(Oid relationId); static void ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName, int targetNodePort); -static void RepairShardPlacement(int64 shardId, const char *sourceNodeName, - int32 sourceNodePort, const char *targetNodeName, - int32 targetNodePort); static void ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, int32 sourceNodePort, char *targetNodeName, int32 targetNodePort, @@ -95,12 +91,6 @@ static void CopyShardTablesViaLogicalReplication(List *shardIntervalList, static void CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort, char *targetNodeName, int32 targetNodePort); -static List * CopyPartitionShardsCommandList(ShardInterval *shardInterval, - const char *sourceNodeName, - int32 sourceNodePort); -static void EnsureShardCanBeRepaired(int64 shardId, const char *sourceNodeName, - int32 sourceNodePort, const char *targetNodeName, - int32 targetNodePort); static void EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName, int32 sourceNodePort, const char *targetNodeName, int32 targetNodePort); @@ -150,14 +140,8 @@ bool CheckAvailableSpaceBeforeMove = true; /* - * citus_copy_shard_placement implements a user-facing UDF to repair data from - * a healthy (source) node to an inactive (target) node. To accomplish this it - * entirely recreates the table structure before copying all data. During this - * time all modifications are paused to the shard. After successful repair, the - * inactive placement is marked healthy and modifications may continue. If the - * repair fails at any point, this function throws an error, leaving the node - * in an unhealthy state. Please note that citus_copy_shard_placement copies - * given shard along with its co-located shards. + * citus_copy_shard_placement implements a user-facing UDF to copy a placement + * from a source node to a target node, including all co-located placements. */ Datum citus_copy_shard_placement(PG_FUNCTION_ARGS) @@ -165,6 +149,35 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS) CheckCitusVersion(ERROR); EnsureCoordinator(); + int64 shardId = PG_GETARG_INT64(0); + text *sourceNodeNameText = PG_GETARG_TEXT_P(1); + int32 sourceNodePort = PG_GETARG_INT32(2); + text *targetNodeNameText = PG_GETARG_TEXT_P(3); + int32 targetNodePort = PG_GETARG_INT32(4); + Oid shardReplicationModeOid = PG_GETARG_OID(5); + + char *sourceNodeName = text_to_cstring(sourceNodeNameText); + char *targetNodeName = text_to_cstring(targetNodeNameText); + + char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid); + + ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort, + shardReplicationMode); + + PG_RETURN_VOID(); +} + + +/* + * master_copy_shard_placement is a wrapper function for old UDF name. + */ +Datum +master_copy_shard_placement(PG_FUNCTION_ARGS) +{ + CheckCitusVersion(ERROR); + EnsureCoordinator(); + int64 shardId = PG_GETARG_INT64(0); text *sourceNodeNameText = PG_GETARG_TEXT_P(1); int32 sourceNodePort = PG_GETARG_INT32(2); @@ -177,137 +190,21 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS) char *targetNodeName = text_to_cstring(targetNodeNameText); char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid); - if (doRepair && shardReplicationMode == TRANSFER_MODE_FORCE_LOGICAL) - { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("logical replication cannot be used for repairs"))); - } - - ShardInterval *shardInterval = LoadShardInterval(shardId); - ErrorIfTableCannotBeReplicated(shardInterval->relationId); - ErrorIfTargetNodeIsNotSafeToCopyTo(targetNodeName, targetNodePort); - - AcquirePlacementColocationLock(shardInterval->relationId, ExclusiveLock, - doRepair ? "repair" : "copy"); if (doRepair) { - RepairShardPlacement(shardId, sourceNodeName, sourceNodePort, targetNodeName, - targetNodePort); - } - else - { - ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort, - targetNodeName, targetNodePort, - shardReplicationMode); + ereport(WARNING, (errmsg("do_repair argument is deprecated"))); } + ReplicateColocatedShardPlacement(shardId, sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort, + shardReplicationMode); + + PG_RETURN_VOID(); } -/* - * master_copy_shard_placement is a wrapper function for old UDF name. - */ -Datum -master_copy_shard_placement(PG_FUNCTION_ARGS) -{ - return citus_copy_shard_placement(fcinfo); -} - - -/* - * ShardListSizeInBytes returns the size in bytes of a set of shard tables. - */ -uint64 -ShardListSizeInBytes(List *shardList, char *workerNodeName, uint32 - workerNodePort) -{ - uint32 connectionFlag = 0; - - /* we skip child tables of a partitioned table if this boolean variable is true */ - bool optimizePartitionCalculations = true; - StringInfo tableSizeQuery = GenerateSizeQueryOnMultiplePlacements(shardList, - TOTAL_RELATION_SIZE, - optimizePartitionCalculations); - - MultiConnection *connection = GetNodeConnection(connectionFlag, workerNodeName, - workerNodePort); - PGresult *result = NULL; - int queryResult = ExecuteOptionalRemoteCommand(connection, tableSizeQuery->data, - &result); - - if (queryResult != RESPONSE_OKAY) - { - ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE), - errmsg("cannot get the size because of a connection error"))); - } - - List *sizeList = ReadFirstColumnAsText(result); - if (list_length(sizeList) != 1) - { - ereport(ERROR, (errmsg( - "received wrong number of rows from worker, expected 1 received %d", - list_length(sizeList)))); - } - - StringInfo totalSizeStringInfo = (StringInfo) linitial(sizeList); - char *totalSizeString = totalSizeStringInfo->data; - uint64 totalSize = SafeStringToUint64(totalSizeString); - - PQclear(result); - ForgetResults(connection); - - return totalSize; -} - - -/* - * CheckSpaceConstraints checks there is enough space to place the colocation - * on the node that the connection is connected to. - */ -static void -CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes) -{ - uint64 diskAvailableInBytes = 0; - uint64 diskSizeInBytes = 0; - bool success = - GetNodeDiskSpaceStatsForConnection(connection, &diskAvailableInBytes, - &diskSizeInBytes); - if (!success) - { - ereport(ERROR, (errmsg("Could not fetch disk stats for node: %s-%d", - connection->hostname, connection->port))); - } - - uint64 diskAvailableInBytesAfterShardMove = 0; - if (diskAvailableInBytes < colocationSizeInBytes) - { - /* - * even though the space will be less than "0", we set it to 0 for convenience. - */ - diskAvailableInBytes = 0; - } - else - { - diskAvailableInBytesAfterShardMove = diskAvailableInBytes - colocationSizeInBytes; - } - uint64 desiredNewDiskAvailableInBytes = diskSizeInBytes * - (DesiredPercentFreeAfterMove / 100); - if (diskAvailableInBytesAfterShardMove < desiredNewDiskAvailableInBytes) - { - ereport(ERROR, (errmsg("not enough empty space on node if the shard is moved, " - "actual available space after move will be %ld bytes, " - "desired available space after move is %ld bytes," - "estimated size increase on node after move is %ld bytes.", - diskAvailableInBytesAfterShardMove, - desiredNewDiskAvailableInBytes, colocationSizeInBytes), - errhint( - "consider lowering citus.desired_percent_disk_available_after_move."))); - } -} - - /* * citus_move_shard_placement moves given shard (and its co-located shards) from one * node to the other node. To accomplish this it entirely recreates the table structure @@ -370,8 +267,8 @@ citus_move_shard_placement(PG_FUNCTION_ARGS) { char *relationName = get_rel_name(colocatedTableId); ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot repair shard"), - errdetail("Table %s is a foreign table. Repairing " + errmsg("cannot move shard"), + errdetail("Table %s is a foreign table. Moving " "shards backed by foreign tables is " "not supported.", relationName))); } @@ -518,6 +415,98 @@ EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList, } +/* + * ShardListSizeInBytes returns the size in bytes of a set of shard tables. + */ +uint64 +ShardListSizeInBytes(List *shardList, char *workerNodeName, uint32 + workerNodePort) +{ + uint32 connectionFlag = 0; + + /* we skip child tables of a partitioned table if this boolean variable is true */ + bool optimizePartitionCalculations = true; + StringInfo tableSizeQuery = GenerateSizeQueryOnMultiplePlacements(shardList, + TOTAL_RELATION_SIZE, + optimizePartitionCalculations); + + MultiConnection *connection = GetNodeConnection(connectionFlag, workerNodeName, + workerNodePort); + PGresult *result = NULL; + int queryResult = ExecuteOptionalRemoteCommand(connection, tableSizeQuery->data, + &result); + + if (queryResult != RESPONSE_OKAY) + { + ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("cannot get the size because of a connection error"))); + } + + List *sizeList = ReadFirstColumnAsText(result); + if (list_length(sizeList) != 1) + { + ereport(ERROR, (errmsg( + "received wrong number of rows from worker, expected 1 received %d", + list_length(sizeList)))); + } + + StringInfo totalSizeStringInfo = (StringInfo) linitial(sizeList); + char *totalSizeString = totalSizeStringInfo->data; + uint64 totalSize = SafeStringToUint64(totalSizeString); + + PQclear(result); + ForgetResults(connection); + + return totalSize; +} + + +/* + * CheckSpaceConstraints checks there is enough space to place the colocation + * on the node that the connection is connected to. + */ +static void +CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes) +{ + uint64 diskAvailableInBytes = 0; + uint64 diskSizeInBytes = 0; + bool success = + GetNodeDiskSpaceStatsForConnection(connection, &diskAvailableInBytes, + &diskSizeInBytes); + if (!success) + { + ereport(ERROR, (errmsg("Could not fetch disk stats for node: %s-%d", + connection->hostname, connection->port))); + } + + uint64 diskAvailableInBytesAfterShardMove = 0; + if (diskAvailableInBytes < colocationSizeInBytes) + { + /* + * even though the space will be less than "0", we set it to 0 for convenience. + */ + diskAvailableInBytes = 0; + } + else + { + diskAvailableInBytesAfterShardMove = diskAvailableInBytes - colocationSizeInBytes; + } + uint64 desiredNewDiskAvailableInBytes = diskSizeInBytes * + (DesiredPercentFreeAfterMove / 100); + if (diskAvailableInBytesAfterShardMove < desiredNewDiskAvailableInBytes) + { + ereport(ERROR, (errmsg("not enough empty space on node if the shard is moved, " + "actual available space after move will be %ld bytes, " + "desired available space after move is %ld bytes," + "estimated size increase on node after move is %ld bytes.", + diskAvailableInBytesAfterShardMove, + desiredNewDiskAvailableInBytes, colocationSizeInBytes), + errhint( + "consider lowering citus.desired_percent_disk_available_after_move."))); + } +} + + /* * ErrorIfTargetNodeIsNotSafeToMove throws error if the target node is not * eligible for moving shards. @@ -885,122 +874,6 @@ LookupShardTransferMode(Oid shardReplicationModeOid) } -/* - * RepairShardPlacement repairs given shard from a source node to target node. - * This function is not co-location aware. It only repairs given shard. - */ -static void -RepairShardPlacement(int64 shardId, const char *sourceNodeName, int32 sourceNodePort, - const char *targetNodeName, int32 targetNodePort) -{ - ShardInterval *shardInterval = LoadShardInterval(shardId); - Oid distributedTableId = shardInterval->relationId; - - char *tableOwner = TableOwner(shardInterval->relationId); - - /* prevent table from being dropped */ - LockRelationOid(distributedTableId, AccessShareLock); - - EnsureTableOwner(distributedTableId); - - if (IsForeignTable(distributedTableId)) - { - char *relationName = get_rel_name(distributedTableId); - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot repair shard"), - errdetail("Table %s is a foreign table. Repairing " - "shards backed by foreign tables is " - "not supported.", relationName))); - } - - /* - * Let's not allow repairing partitions to prevent any edge cases. - * We're already not allowing any kind of modifications on the partitions - * so their placements are not likely to be marked as INVALID. The only - * possible case to mark placement of a partition as invalid is - * "ALTER TABLE parent_table DETACH PARTITION partition_table". But, - * given that the table would become a regular distributed table if the - * command succeeds, we're OK since the regular distributed tables can - * be repaired later on. - */ - EnsurePartitionTableNotReplicated(distributedTableId); - - /* - * We take a lock on the referenced table if there is a foreign constraint - * during the copy procedure. If we do not block DMLs on the referenced - * table, we cannot avoid the inconsistency between the two copies of the - * data. Currently, we do not support replication factor > 1 on the tables - * with foreign constraints, so this command will fail for this case anyway. - * However, it is taken as a precaution in case we support it one day. - */ - LockReferencedReferenceShardDistributionMetadata(shardId, ExclusiveLock); - - /* - * We plan to move the placement to the healthy state, so we need to grab a shard - * metadata lock (in exclusive mode). - */ - LockShardDistributionMetadata(shardId, ExclusiveLock); - - /* - * For shard repair, there should be healthy placement in source node and unhealthy - * placement in the target node. - */ - EnsureShardCanBeRepaired(shardId, sourceNodeName, sourceNodePort, targetNodeName, - targetNodePort); - - /* - * If the shard belongs to a partitioned table, we need to load the data after - * creating the partitions and the partitioning hierarcy. - */ - bool partitionedTable = PartitionedTableNoLock(distributedTableId); - bool includeData = !partitionedTable; - - /* we generate necessary commands to recreate the shard in target node */ - List *ddlCommandList = - CopyShardCommandList(shardInterval, sourceNodeName, sourceNodePort, includeData); - - List *foreignConstraintCommandList = CopyShardForeignConstraintCommandList( - shardInterval); - ddlCommandList = list_concat(ddlCommandList, foreignConstraintCommandList); - - /* - * CopyShardCommandList() drops the table which cascades to partitions if the - * table is a partitioned table. This means that we need to create both parent - * table and its partitions. - * - * We also skipped copying the data, so include it here. - */ - if (partitionedTable) - { - char *shardName = ConstructQualifiedShardName(shardInterval); - StringInfo copyShardDataCommand = makeStringInfo(); - - List *partitionCommandList = - CopyPartitionShardsCommandList(shardInterval, sourceNodeName, sourceNodePort); - ddlCommandList = list_concat(ddlCommandList, partitionCommandList); - - /* finally copy the data as well */ - appendStringInfo(copyShardDataCommand, WORKER_APPEND_TABLE_TO_SHARD, - quote_literal_cstr(shardName), /* table to append */ - quote_literal_cstr(shardName), /* remote table name */ - quote_literal_cstr(sourceNodeName), /* remote host */ - sourceNodePort); /* remote port */ - ddlCommandList = lappend(ddlCommandList, copyShardDataCommand->data); - } - - EnsureNoModificationsHaveBeenDone(); - SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort, tableOwner, - ddlCommandList); - - /* after successful repair, we update shard state as healthy*/ - List *placementList = ShardPlacementListWithoutOrphanedPlacements(shardId); - ShardPlacement *placement = SearchShardPlacementInListOrError(placementList, - targetNodeName, - targetNodePort); - UpdateShardPlacementState(placement->placementId, SHARD_STATE_ACTIVE); -} - - /* * ReplicateColocatedShardPlacement replicates the given shard and its * colocated shards from a source node to target node. @@ -1013,6 +886,12 @@ ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, ShardInterval *shardInterval = LoadShardInterval(shardId); Oid distributedTableId = shardInterval->relationId; + ErrorIfTableCannotBeReplicated(shardInterval->relationId); + ErrorIfTargetNodeIsNotSafeToCopyTo(targetNodeName, targetNodePort); + EnsureNoModificationsHaveBeenDone(); + + AcquirePlacementColocationLock(shardInterval->relationId, ExclusiveLock, "copy"); + List *colocatedTableList = ColocatedTableList(distributedTableId); List *colocatedShardList = ColocatedShardIntervalList(shardInterval); @@ -1439,88 +1318,6 @@ CreateShardCopyCommand(ShardInterval *shard, } -/* - * CopyPartitionShardsCommandList gets a shardInterval which is a shard that - * belongs to partitioned table (this is asserted). - * - * The function returns a list of commands which re-creates all the partitions - * of the input shardInterval. - */ -static List * -CopyPartitionShardsCommandList(ShardInterval *shardInterval, const char *sourceNodeName, - int32 sourceNodePort) -{ - Oid distributedTableId = shardInterval->relationId; - List *ddlCommandList = NIL; - - Assert(PartitionedTableNoLock(distributedTableId)); - - List *partitionList = PartitionList(distributedTableId); - Oid partitionOid = InvalidOid; - foreach_oid(partitionOid, partitionList) - { - uint64 partitionShardId = - ColocatedShardIdInRelation(partitionOid, shardInterval->shardIndex); - ShardInterval *partitionShardInterval = LoadShardInterval(partitionShardId); - bool includeData = false; - - List *copyCommandList = - CopyShardCommandList(partitionShardInterval, sourceNodeName, sourceNodePort, - includeData); - ddlCommandList = list_concat(ddlCommandList, copyCommandList); - - char *attachPartitionCommand = - GenerateAttachShardPartitionCommand(partitionShardInterval); - ddlCommandList = lappend(ddlCommandList, attachPartitionCommand); - } - - return ddlCommandList; -} - - -/* - * EnsureShardCanBeRepaired checks if the given shard has a healthy placement in the source - * node and inactive node on the target node. - */ -static void -EnsureShardCanBeRepaired(int64 shardId, const char *sourceNodeName, int32 sourceNodePort, - const char *targetNodeName, int32 targetNodePort) -{ - List *shardPlacementList = - ShardPlacementListIncludingOrphanedPlacements(shardId); - - ShardPlacement *sourcePlacement = SearchShardPlacementInListOrError( - shardPlacementList, - sourceNodeName, - sourceNodePort); - if (sourcePlacement->shardState != SHARD_STATE_ACTIVE) - { - ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("source placement must be in active state"))); - } - - ShardPlacement *targetPlacement = SearchShardPlacementInListOrError( - shardPlacementList, - targetNodeName, - targetNodePort); - - /* - * shardStateInactive is a legacy state for a placement. As of Citus 11, - * we never mark any placement as INACTIVE. - * - * Still, we prefer to keep this function/code here, as users may need - * to recover placements that are marked as inactive pre Citus 11. - * - */ - int shardStateInactive = 3; - if (targetPlacement->shardState != shardStateInactive) - { - ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("target placement must be in inactive state"))); - } -} - - /* * EnsureShardCanBeCopied checks if the given shard has a healthy placement in the source * node and no placements in the target node. @@ -1544,6 +1341,7 @@ EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName, int32 sourceNo ShardPlacement *targetPlacement = SearchShardPlacementInList(shardPlacementList, targetNodeName, targetNodePort); + if (targetPlacement != NULL) { if (targetPlacement->shardState == SHARD_STATE_TO_DELETE) @@ -1901,7 +1699,7 @@ RecreateTableDDLCommandList(Oid relationId) else { ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("repair target is not a regular, foreign or partitioned " + errmsg("target is not a regular, foreign or partitioned " "table"))); } diff --git a/src/backend/distributed/replication/multi_logical_replication.c b/src/backend/distributed/replication/multi_logical_replication.c index 54efd08db..3f1c429f0 100644 --- a/src/backend/distributed/replication/multi_logical_replication.c +++ b/src/backend/distributed/replication/multi_logical_replication.c @@ -44,9 +44,9 @@ #include "distributed/priority.h" #include "distributed/distributed_planner.h" #include "distributed/remote_commands.h" -#include "distributed/repair_shards.h" #include "distributed/resource_lock.h" #include "distributed/shard_rebalancer.h" +#include "distributed/shard_transfer.h" #include "distributed/version_compat.h" #include "nodes/bitmapset.h" #include "parser/scansup.h" diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 0f5f1501f..f03e7ab1d 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -73,9 +73,9 @@ #include "distributed/recursive_planning.h" #include "distributed/reference_table_utils.h" #include "distributed/relation_access_tracking.h" -#include "distributed/repair_shards.h" #include "distributed/run_from_same_connection.h" #include "distributed/shard_cleaner.h" +#include "distributed/shard_transfer.h" #include "distributed/shared_connection_stats.h" #include "distributed/shardsplit_shared_memory.h" #include "distributed/query_pushdown_planning.h" diff --git a/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql b/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql index a5433c7d4..89d96eda4 100644 --- a/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql +++ b/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql @@ -1,6 +1,7 @@ #include "udfs/citus_locks/11.1-1.sql" #include "udfs/create_distributed_table_concurrently/11.1-1.sql" #include "udfs/citus_internal_delete_partition_metadata/11.1-1.sql" +#include "udfs/citus_copy_shard_placement/11.1-1.sql" DROP FUNCTION pg_catalog.worker_create_schema(bigint,text); DROP FUNCTION pg_catalog.worker_cleanup_job_schema_cache(); diff --git a/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql b/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql index 2318b98f1..83c91b06d 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql @@ -105,7 +105,6 @@ DROP TABLE pg_catalog.pg_dist_cleanup; DROP SEQUENCE pg_catalog.pg_dist_operationid_seq; DROP SEQUENCE pg_catalog.pg_dist_cleanup_recordid_seq; DROP PROCEDURE pg_catalog.citus_cleanup_orphaned_resources(); - DROP FUNCTION pg_catalog.citus_job_cancel(bigint); DROP FUNCTION pg_catalog.citus_job_wait(bigint, pg_catalog.citus_job_status); DROP TABLE pg_catalog.pg_dist_background_task_depend; @@ -113,3 +112,5 @@ DROP TABLE pg_catalog.pg_dist_background_task; DROP TYPE pg_catalog.citus_task_status; DROP TABLE pg_catalog.pg_dist_background_job; DROP TYPE pg_catalog.citus_job_status; +DROP FUNCTION pg_catalog.citus_copy_shard_placement; +#include "../udfs/citus_copy_shard_placement/10.0-1.sql" diff --git a/src/backend/distributed/sql/udfs/citus_copy_shard_placement/11.1-1.sql b/src/backend/distributed/sql/udfs/citus_copy_shard_placement/11.1-1.sql new file mode 100644 index 000000000..3a8b7d53f --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_copy_shard_placement/11.1-1.sql @@ -0,0 +1,19 @@ +DROP FUNCTION pg_catalog.citus_copy_shard_placement; +CREATE FUNCTION pg_catalog.citus_copy_shard_placement( + shard_id bigint, + source_node_name text, + source_node_port integer, + target_node_name text, + target_node_port integer, + transfer_mode citus.shard_transfer_mode default 'auto') +RETURNS void +LANGUAGE C STRICT +AS 'MODULE_PATHNAME', $$citus_copy_shard_placement$$; + +COMMENT ON FUNCTION pg_catalog.citus_copy_shard_placement(shard_id bigint, + source_node_name text, + source_node_port integer, + target_node_name text, + target_node_port integer, + shard_transfer_mode citus.shard_transfer_mode) +IS 'copy a shard from the source node to the destination node'; diff --git a/src/backend/distributed/sql/udfs/citus_copy_shard_placement/latest.sql b/src/backend/distributed/sql/udfs/citus_copy_shard_placement/latest.sql index d501cc1e7..3a8b7d53f 100644 --- a/src/backend/distributed/sql/udfs/citus_copy_shard_placement/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_copy_shard_placement/latest.sql @@ -1,10 +1,10 @@ +DROP FUNCTION pg_catalog.citus_copy_shard_placement; CREATE FUNCTION pg_catalog.citus_copy_shard_placement( shard_id bigint, source_node_name text, source_node_port integer, target_node_name text, target_node_port integer, - do_repair bool DEFAULT true, transfer_mode citus.shard_transfer_mode default 'auto') RETURNS void LANGUAGE C STRICT @@ -15,6 +15,5 @@ COMMENT ON FUNCTION pg_catalog.citus_copy_shard_placement(shard_id bigint, source_node_port integer, target_node_name text, target_node_port integer, - do_repair bool, shard_transfer_mode citus.shard_transfer_mode) IS 'copy a shard from the source node to the destination node'; diff --git a/src/backend/distributed/utils/reference_table_utils.c b/src/backend/distributed/utils/reference_table_utils.c index f4b2f4be3..8b37674d0 100644 --- a/src/backend/distributed/utils/reference_table_utils.c +++ b/src/backend/distributed/utils/reference_table_utils.c @@ -83,7 +83,7 @@ replicate_reference_tables(PG_FUNCTION_ARGS) /* * EnsureReferenceTablesExistOnAllNodes ensures that a shard placement for every * reference table exists on all nodes. If a node does not have a set of shard - * placements, then master_copy_shard_placement is called in a subtransaction + * placements, then citus_copy_shard_placement is called in a subtransaction * to pull the data to the new node. */ void @@ -96,7 +96,7 @@ EnsureReferenceTablesExistOnAllNodes(void) /* * EnsureReferenceTablesExistOnAllNodesExtended ensures that a shard placement for every * reference table exists on all nodes. If a node does not have a set of shard placements, - * then master_copy_shard_placement is called in a subtransaction to pull the data to the + * then citus_copy_shard_placement is called in a subtransaction to pull the data to the * new node. * * The transferMode is passed on to the implementation of the copy to control the locks @@ -193,7 +193,7 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode) } /* - * master_copy_shard_placement triggers metadata sync-up, which tries to + * citus_copy_shard_placement triggers metadata sync-up, which tries to * acquire a ShareLock on pg_dist_node. We do master_copy_shad_placement * in a separate connection. If we have modified pg_dist_node in the * current backend, this will cause a deadlock. @@ -207,7 +207,7 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode) /* * Modifications to reference tables in current transaction are not visible - * to master_copy_shard_placement, since it is done in a separate backend. + * to citus_copy_shard_placement, since it is done in a separate backend. */ if (AnyRelationsModifiedInTransaction(referenceTableIdList)) { @@ -235,7 +235,7 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode) newWorkerNode->workerPort))); /* - * Call master_copy_shard_placement using citus extension owner. Current + * Call citus_copy_shard_placement using citus extension owner. Current * user might not have permissions to do the copy. */ const char *userName = CitusExtensionOwnerName(); @@ -348,7 +348,7 @@ WorkersWithoutReferenceTablePlacement(uint64 shardId, LOCKMODE lockMode) /* - * CopyShardPlacementToWorkerNodeQuery returns the master_copy_shard_placement + * CopyShardPlacementToWorkerNodeQuery returns the citus_copy_shard_placement * command to copy the given shard placement to given node. */ static StringInfo @@ -364,8 +364,8 @@ CopyShardPlacementToWorkerNodeQuery(ShardPlacement *sourceShardPlacement, "auto"; appendStringInfo(queryString, - "SELECT master_copy_shard_placement(" - UINT64_FORMAT ", %s, %d, %s, %d, do_repair := false, " + "SELECT citus_copy_shard_placement(" + UINT64_FORMAT ", %s, %d, %s, %d, " "transfer_mode := %s)", sourceShardPlacement->shardId, quote_literal_cstr(sourceShardPlacement->nodeName), diff --git a/src/include/distributed/coordinator_protocol.h b/src/include/distributed/coordinator_protocol.h index 73709a864..4471553e8 100644 --- a/src/include/distributed/coordinator_protocol.h +++ b/src/include/distributed/coordinator_protocol.h @@ -47,7 +47,7 @@ #define CANDIDATE_NODE_FIELDS 2 #define WORKER_NODE_FIELDS 2 -/* transfer mode for master_copy_shard_placement */ +/* transfer mode for citus_copy_shard_placement */ #define TRANSFER_MODE_AUTOMATIC 'a' #define TRANSFER_MODE_FORCE_LOGICAL 'l' #define TRANSFER_MODE_BLOCK_WRITES 'b' @@ -286,9 +286,6 @@ extern Datum isolate_tenant_to_new_shard(PG_FUNCTION_ARGS); /* function declarations for shard split functionality */ extern Datum citus_split_shard_by_split_points(PG_FUNCTION_ARGS); -/* function declarations for shard repair functionality */ -extern Datum master_copy_shard_placement(PG_FUNCTION_ARGS); - /* function declarations for shard copy functinality */ extern List * CopyShardCommandList(ShardInterval *shardInterval, const char *sourceNodeName, diff --git a/src/include/distributed/repair_shards.h b/src/include/distributed/shard_transfer.h similarity index 100% rename from src/include/distributed/repair_shards.h rename to src/include/distributed/shard_transfer.h diff --git a/src/test/regress/expected/master_copy_shard_placement.out b/src/test/regress/expected/citus_copy_shard_placement.out similarity index 88% rename from src/test/regress/expected/master_copy_shard_placement.out rename to src/test/regress/expected/citus_copy_shard_placement.out index 6d7fd4a69..61b935276 100644 --- a/src/test/regress/expected/master_copy_shard_placement.out +++ b/src/test/regress/expected/citus_copy_shard_placement.out @@ -1,4 +1,4 @@ --- Tests for master_copy_shard_placement, which can be used for adding replicas in statement-based replication +-- Tests for citus_copy_shard_placement, which can be used for adding replicas in statement-based replication CREATE SCHEMA mcsp; SET search_path TO mcsp; SET citus.next_shard_id TO 8139000; @@ -35,7 +35,7 @@ SELECT create_distributed_table('history','key'); (1 row) --- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement +-- Mark tables as non-mx tables, in order to be able to test citus_copy_shard_placement UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN ('data'::regclass, 'history'::regclass); INSERT INTO data VALUES ('key-1', 'value-1'); @@ -43,48 +43,43 @@ INSERT INTO data VALUES ('key-2', 'value-2'); INSERT INTO history VALUES ('key-1', '2020-02-01', 'old'); INSERT INTO history VALUES ('key-1', '2019-10-01', 'older'); -- verify we error out if no healthy placement exists at source -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('data', 'key-1'), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); ERROR: could not find placement matching "localhost:xxxxx" HINT: Confirm the placement still exists and try again. -- verify we error out if source and destination are the same -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('data', 'key-1'), 'localhost', :worker_2_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); ERROR: shard xxxxx already exists in the target node -- verify we error out if target already contains a healthy placement -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); ERROR: shard xxxxx already exists in the target node -- verify we error out if table has foreign key constraints INSERT INTO ref_table SELECT 1, value FROM data; ALTER TABLE data ADD CONSTRAINT distfk FOREIGN KEY (value) REFERENCES ref_table (b) MATCH FULL; -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('data', 'key-1'), 'localhost', :worker_2_port, - 'localhost', :worker_1_port, - do_repair := false); + 'localhost', :worker_1_port); ERROR: cannot replicate shards with foreign keys ALTER TABLE data DROP CONSTRAINT distfk; -- replicate shard that contains key-1 -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('data', 'key-1'), 'localhost', :worker_2_port, 'localhost', :worker_1_port, - do_repair := false, transfer_mode := 'block_writes'); - master_copy_shard_placement + citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -127,11 +122,10 @@ SELECT create_distributed_table('mx_table', 'a'); (1 row) -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('mx_table', '1'), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); ERROR: Table 'mx_table' is streaming replicated. Shards of streaming replicated tables cannot be copied SET client_min_messages TO ERROR; diff --git a/src/test/regress/expected/citus_local_tables.out b/src/test/regress/expected/citus_local_tables.out index da9ba8e1d..f2177948c 100644 --- a/src/test/regress/expected/citus_local_tables.out +++ b/src/test/regress/expected/citus_local_tables.out @@ -302,8 +302,8 @@ SELECT get_shard_id_for_distribution_column('citus_local_table_1'); 1504014 (1 row) --- master_copy_shard_placement is not supported -SELECT master_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port, true) +-- citus_copy_shard_placement is not supported +SELECT citus_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port) FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table_1'::regclass) as shardid; ERROR: Table 'citus_local_table_1' is a local table. Replicating shard of a local table added to metadata currently is not supported -- undistribute_table is supported diff --git a/src/test/regress/expected/citus_local_tables_ent.out b/src/test/regress/expected/citus_local_tables_ent.out index 21928eefd..99e18109f 100644 --- a/src/test/regress/expected/citus_local_tables_ent.out +++ b/src/test/regress/expected/citus_local_tables_ent.out @@ -24,12 +24,12 @@ SELECT citus_add_local_table_to_metadata('citus_local_table'); -- isolate_tenant_to_new_shard is not supported SELECT isolate_tenant_to_new_shard('citus_local_table', 100, shard_transfer_mode => 'block_writes'); ERROR: cannot isolate tenant because tenant isolation is only support for hash distributed tables --- master_copy_shard_placement is not supported -SELECT master_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port, false) +-- citus_copy_shard_placement is not supported +SELECT citus_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port, false) FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table'::regclass) as shardid; -ERROR: Table 'citus_local_table' is a local table. Replicating shard of a local table added to metadata currently is not supported --- master_move_shard_placement is not supported -SELECT master_move_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port) +ERROR: function citus_copy_shard_placement(bigint, unknown, integer, unknown, integer, boolean) does not exist at character 8 +-- citus_move_shard_placement is not supported +SELECT citus_move_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port) FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table'::regclass) as shardid; ERROR: table citus_local_tables_ent.citus_local_table is a local table, moving shard of a local table added to metadata is currently not supported -- replicate_table_shards is not suported diff --git a/src/test/regress/expected/ignoring_orphaned_shards.out b/src/test/regress/expected/ignoring_orphaned_shards.out index 2c12a5ae4..556dcb304 100644 --- a/src/test/regress/expected/ignoring_orphaned_shards.out +++ b/src/test/regress/expected/ignoring_orphaned_shards.out @@ -213,36 +213,9 @@ SELECT logicalrelid FROM pg_dist_partition WHERE colocationid = 92448300 ORDER B rep2 (2 rows) -UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = 92448300 AND groupid = 0; -SELECT shardid, shardstate, nodeport FROM pg_dist_shard_placement WHERE shardid = 92448300 ORDER BY placementid; - shardid | shardstate | nodeport ---------------------------------------------------------------------- - 92448300 | 4 | 57637 - 92448300 | 1 | 57638 - 92448300 | 3 | 57636 -(3 rows) - -- cannot copy from an orphaned shard SELECT * FROM citus_copy_shard_placement(92448300, 'localhost', :worker_1_port, 'localhost', :master_port); ERROR: source placement must be in active state --- cannot copy to an orphaned shard -SELECT * FROM citus_copy_shard_placement(92448300, 'localhost', :worker_2_port, 'localhost', :worker_1_port); -ERROR: target placement must be in inactive state --- can still copy to an inactive shard -SELECT * FROM citus_copy_shard_placement(92448300, 'localhost', :worker_2_port, 'localhost', :master_port); - citus_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - -SELECT shardid, shardstate, nodeport FROM pg_dist_shard_placement WHERE shardid = 92448300 ORDER BY placementid; - shardid | shardstate | nodeport ---------------------------------------------------------------------- - 92448300 | 4 | 57637 - 92448300 | 1 | 57638 - 92448300 | 1 | 57636 -(3 rows) - -- Make sure we don't send a query to the orphaned shard BEGIN; SET LOCAL citus.log_remote_commands TO ON; diff --git a/src/test/regress/expected/isolation_copy_placement_vs_copy_placement.out b/src/test/regress/expected/isolation_copy_placement_vs_copy_placement.out index e2cddade7..ad25dc539 100644 --- a/src/test/regress/expected/isolation_copy_placement_vs_copy_placement.out +++ b/src/test/regress/expected/isolation_copy_placement_vs_copy_placement.out @@ -1,53 +1,53 @@ Parsed test spec with 2 sessions -starting permutation: s1-load-cache s2-load-cache s2-set-placement-inactive s2-begin s2-repair-placement s1-repair-placement s2-commit +starting permutation: s1-load-cache s2-load-cache s2-delete-inactive s2-begin s2-repair-placement s1-repair-placement s2-commit step s1-load-cache: - COPY test_hash_table FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; + COPY test_hash_table FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; step s2-load-cache: - COPY test_hash_table FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; + COPY test_hash_table FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638; step s2-begin: - BEGIN; + BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) step s1-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); -ERROR: could not acquire the lock required to repair public.test_hash_table +ERROR: could not acquire the lock required to copy public.test_hash_table step s2-commit: - COMMIT; + COMMIT; -starting permutation: s2-set-placement-inactive s2-begin s2-repair-placement s1-repair-placement s2-commit -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638; +starting permutation: s2-delete-inactive s2-begin s2-repair-placement s1-repair-placement s2-commit +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638; step s2-begin: - BEGIN; + BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) step s1-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); -ERROR: could not acquire the lock required to repair public.test_hash_table +ERROR: could not acquire the lock required to copy public.test_hash_table step s2-commit: - COMMIT; + COMMIT; diff --git a/src/test/regress/expected/isolation_copy_placement_vs_modification.out b/src/test/regress/expected/isolation_copy_placement_vs_modification.out index ae210c7f4..f0af94855 100644 --- a/src/test/regress/expected/isolation_copy_placement_vs_modification.out +++ b/src/test/regress/expected/isolation_copy_placement_vs_modification.out @@ -1,6 +1,6 @@ Parsed test spec with 2 sessions -starting permutation: s1-load-cache s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-update s2-commit s1-commit s2-print-content +starting permutation: s1-load-cache s1-insert s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-update s2-commit s1-commit s2-print-content step s1-load-cache: TRUNCATE test_repair_placement_vs_modification; @@ -19,16 +19,16 @@ count 1 (1 row) -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; step s2-begin: BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -60,7 +60,7 @@ nodeport|success|result (2 rows) -starting permutation: s1-load-cache s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-delete s2-commit s1-commit s2-print-content +starting permutation: s1-load-cache s1-insert s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-delete s2-commit s1-commit s2-print-content step s1-load-cache: TRUNCATE test_repair_placement_vs_modification; @@ -79,16 +79,16 @@ count 1 (1 row) -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; step s2-begin: BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -120,7 +120,7 @@ nodeport|success|result (2 rows) -starting permutation: s1-load-cache s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-insert s2-commit s1-commit s2-print-content +starting permutation: s1-load-cache s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-insert s2-commit s1-commit s2-print-content step s1-load-cache: TRUNCATE test_repair_placement_vs_modification; @@ -136,16 +136,16 @@ count 0 (1 row) -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; step s2-begin: BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -177,7 +177,7 @@ nodeport|success|result (2 rows) -starting permutation: s1-load-cache s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-copy s2-commit s1-commit s2-print-content +starting permutation: s1-load-cache s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-copy s2-commit s1-commit s2-print-content step s1-load-cache: TRUNCATE test_repair_placement_vs_modification; @@ -193,16 +193,16 @@ count 0 (1 row) -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; step s2-begin: BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -234,7 +234,7 @@ nodeport|success|result (2 rows) -starting permutation: s1-load-cache s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-ddl s2-commit s1-commit s2-print-index-count +starting permutation: s1-load-cache s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-ddl s2-commit s1-commit s2-print-index-count step s1-load-cache: TRUNCATE test_repair_placement_vs_modification; @@ -250,16 +250,16 @@ count 0 (1 row) -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; step s2-begin: BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -291,7 +291,7 @@ nodeport|success|result (4 rows) -starting permutation: s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-update s2-commit s1-commit s2-print-content +starting permutation: s1-insert s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-update s2-commit s1-commit s2-print-content step s1-insert: INSERT INTO test_repair_placement_vs_modification VALUES (5, 10); @@ -307,16 +307,16 @@ count 1 (1 row) -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; step s2-begin: BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -348,7 +348,7 @@ nodeport|success|result (2 rows) -starting permutation: s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-delete s2-commit s1-commit s2-print-content +starting permutation: s1-insert s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-delete s2-commit s1-commit s2-print-content step s1-insert: INSERT INTO test_repair_placement_vs_modification VALUES (5, 10); @@ -364,16 +364,16 @@ count 1 (1 row) -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; step s2-begin: BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -405,7 +405,7 @@ nodeport|success|result (2 rows) -starting permutation: s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-insert s2-commit s1-commit s2-print-content +starting permutation: s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-insert s2-commit s1-commit s2-print-content step s1-begin: BEGIN; SET LOCAL citus.select_opens_transaction_block TO off; @@ -418,16 +418,16 @@ count 0 (1 row) -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; step s2-begin: BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -459,7 +459,7 @@ nodeport|success|result (2 rows) -starting permutation: s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-copy s2-commit s1-commit s2-print-content +starting permutation: s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-copy s2-commit s1-commit s2-print-content step s1-begin: BEGIN; SET LOCAL citus.select_opens_transaction_block TO off; @@ -472,16 +472,16 @@ count 0 (1 row) -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; step s2-begin: BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -513,7 +513,7 @@ nodeport|success|result (2 rows) -starting permutation: s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-ddl s2-commit s1-commit s2-print-index-count +starting permutation: s1-begin s1-select s2-delete-inactive s2-begin s2-repair-placement s1-ddl s2-commit s1-commit s2-print-index-count step s1-begin: BEGIN; SET LOCAL citus.select_opens_transaction_block TO off; @@ -526,16 +526,16 @@ count 0 (1 row) -step s2-set-placement-inactive: - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; +step s2-delete-inactive: + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; step s2-begin: BEGIN; step s2-repair-placement: - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); -master_copy_shard_placement +citus_copy_shard_placement --------------------------------------------------------------------- (1 row) diff --git a/src/test/regress/expected/isolation_create_distributed_table_concurrently.out b/src/test/regress/expected/isolation_create_distributed_table_concurrently.out index bf092851e..d15e119c0 100644 --- a/src/test/regress/expected/isolation_create_distributed_table_concurrently.out +++ b/src/test/regress/expected/isolation_create_distributed_table_concurrently.out @@ -585,12 +585,12 @@ create_distributed_table_concurrently (1 row) step s4-print-colocations: - SELECT * FROM pg_dist_colocation ORDER BY colocationid; + SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation ORDER BY colocationid; -colocationid|shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation +shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation --------------------------------------------------------------------- - 123173| 4| 1| 21| 0 - 123174| 4| 1| 23| 0 + 4| 1| 21| 0 + 4| 1| 23| 0 (2 rows) citus_remove_node @@ -651,12 +651,12 @@ create_distributed_table_concurrently (1 row) step s4-print-colocations: - SELECT * FROM pg_dist_colocation ORDER BY colocationid; + SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation ORDER BY colocationid; -colocationid|shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation +shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation --------------------------------------------------------------------- - 123175| 4| 1| 23| 0 - 123176| 4| 1| 21| 0 + 4| 1| 23| 0 + 4| 1| 21| 0 (2 rows) citus_remove_node @@ -700,12 +700,12 @@ create_distributed_table_concurrently (1 row) step s4-print-colocations: - SELECT * FROM pg_dist_colocation ORDER BY colocationid; + SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation ORDER BY colocationid; -colocationid|shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation +shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation --------------------------------------------------------------------- - 123177| 4| 1| 21| 0 - 123178| 4| 1| 23| 0 + 4| 1| 21| 0 + 4| 1| 23| 0 (2 rows) citus_remove_node @@ -748,12 +748,12 @@ create_distributed_table_concurrently (1 row) step s4-print-colocations: - SELECT * FROM pg_dist_colocation ORDER BY colocationid; + SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation ORDER BY colocationid; -colocationid|shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation +shardcount|replicationfactor|distributioncolumntype|distributioncolumncollation --------------------------------------------------------------------- - 123179| 4| 1| 21| 0 - 123180| 4| 1| 23| 0 + 4| 1| 21| 0 + 4| 1| 23| 0 (2 rows) citus_remove_node diff --git a/src/test/regress/expected/isolation_dml_vs_repair.out b/src/test/regress/expected/isolation_dml_vs_repair.out deleted file mode 100644 index 1ea19f6a5..000000000 --- a/src/test/regress/expected/isolation_dml_vs_repair.out +++ /dev/null @@ -1,236 +0,0 @@ -Parsed test spec with 2 sessions - -starting permutation: s2-invalidate-57637 s1-begin s1-insertone s2-repair s1-commit -create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637; - -step s1-begin: - BEGIN; - -step s1-insertone: - INSERT INTO test_dml_vs_repair VALUES(1, 1); - -step s2-repair: - SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637); - -step s1-commit: - COMMIT; - -step s2-repair: <... completed> -master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - - -starting permutation: s1-insertone s2-invalidate-57637 s1-begin s1-insertall s2-repair s1-commit -create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -step s1-insertone: - INSERT INTO test_dml_vs_repair VALUES(1, 1); - -step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637; - -step s1-begin: - BEGIN; - -step s1-insertall: - INSERT INTO test_dml_vs_repair SELECT test_id, data+1 FROM test_dml_vs_repair; - -step s2-repair: - SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637); - -step s1-commit: - COMMIT; - -step s2-repair: <... completed> -master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - - -starting permutation: s2-invalidate-57637 s2-begin s2-repair s1-insertone s2-commit s2-invalidate-57638 s1-display s2-invalidate-57637 s2-revalidate-57638 s1-display -create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637; - -step s2-begin: - BEGIN; - -step s2-repair: - SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637); - -master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - -step s1-insertone: - INSERT INTO test_dml_vs_repair VALUES(1, 1); - -step s2-commit: - COMMIT; - -step s1-insertone: <... completed> -step s2-invalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638; - -step s1-display: - SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id; - -test_id|data ---------------------------------------------------------------------- - 1| 1 -(1 row) - -step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637; - -step s2-revalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638; - -step s1-display: - SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id; - -test_id|data ---------------------------------------------------------------------- - 1| 1 -(1 row) - - -starting permutation: s2-invalidate-57637 s1-prepared-insertone s2-begin s2-repair s1-prepared-insertone s2-commit s2-invalidate-57638 s1-display s2-invalidate-57637 s2-revalidate-57638 s1-display -create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637; - -step s1-prepared-insertone: - EXECUTE insertone; - -step s2-begin: - BEGIN; - -step s2-repair: - SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637); - -master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - -step s1-prepared-insertone: - EXECUTE insertone; - -step s2-commit: - COMMIT; - -step s1-prepared-insertone: <... completed> -step s2-invalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638; - -step s1-display: - SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id; - -test_id|data ---------------------------------------------------------------------- - 1| 1 - 1| 1 -(2 rows) - -step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637; - -step s2-revalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638; - -step s1-display: - SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id; - -test_id|data ---------------------------------------------------------------------- - 1| 1 - 1| 1 -(2 rows) - - -starting permutation: s2-invalidate-57637 s1-insertone s1-prepared-insertall s2-begin s2-repair s1-prepared-insertall s2-commit s2-invalidate-57638 s1-display s2-invalidate-57637 s2-revalidate-57638 s1-display -create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637; - -step s1-insertone: - INSERT INTO test_dml_vs_repair VALUES(1, 1); - -step s1-prepared-insertall: - EXECUTE insertall; - -step s2-begin: - BEGIN; - -step s2-repair: - SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637); - -master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - -step s1-prepared-insertall: - EXECUTE insertall; - -step s2-commit: - COMMIT; - -step s1-prepared-insertall: <... completed> -step s2-invalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638; - -step s1-display: - SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id; - -test_id|data ---------------------------------------------------------------------- - 1| 1 - 1| 2 - 1| 2 - 1| 3 -(4 rows) - -step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637; - -step s2-revalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638; - -step s1-display: - SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id; - -test_id|data ---------------------------------------------------------------------- - 1| 1 - 1| 2 - 1| 2 - 1| 3 -(4 rows) - diff --git a/src/test/regress/expected/multi_colocated_shard_rebalance.out b/src/test/regress/expected/multi_colocated_shard_rebalance.out index 1a5c3b71b..bb3e68bd4 100644 --- a/src/test/regress/expected/multi_colocated_shard_rebalance.out +++ b/src/test/regress/expected/multi_colocated_shard_rebalance.out @@ -59,7 +59,7 @@ SELECT master_create_empty_shard('table6_append'); 13000021 (1 row) --- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement +-- Mark tables as non-mx tables, in order to be able to test citus_copy_shard_placement UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN ('table1_group1'::regclass, 'table2_group1'::regclass, 'table5_groupX'::regclass); -- test copy @@ -90,13 +90,13 @@ ORDER BY s.shardid, sp.nodeport; (12 rows) -- try to copy colocated shards without a replica identity -SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false); +SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port); ERROR: cannot use logical replication to transfer shards of the relation table2_group1 since it doesn't have a REPLICA IDENTITY or PRIMARY KEY DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY. HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'. -- copy colocated shards -SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); - master_copy_shard_placement +SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); + citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -144,7 +144,7 @@ SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_ \c - - - :master_port -- copy colocated shards again to see error message -SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); +SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); ERROR: shard xxxxx already exists in the target node -- test copying NOT colocated shard -- status before shard copy @@ -169,8 +169,8 @@ ORDER BY s.shardid, sp.nodeport; (8 rows) -- copy NOT colocated shard -SELECT master_copy_shard_placement(13000012, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); - master_copy_shard_placement +SELECT citus_copy_shard_placement(13000012, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); + citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -214,8 +214,8 @@ ORDER BY s.shardid, sp.nodeport; (2 rows) -- copy shard in append distributed table -SELECT master_copy_shard_placement(13000020, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical'); - master_copy_shard_placement +SELECT citus_copy_shard_placement(13000020, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical'); + citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -524,7 +524,7 @@ SELECT "Constraint", "Definition" FROM table_fkeys \c - - - :master_port -- test shard copy with foreign constraints -- we expect it to error out because we do not support foreign constraints with replication factor > 1 -SELECT master_copy_shard_placement(13000022, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false); +SELECT citus_copy_shard_placement(13000022, 'localhost', :worker_2_port, 'localhost', :worker_1_port); ERROR: cannot replicate shards with foreign keys -- lets also test that master_move_shard_placement doesn't break serials CREATE TABLE serial_move_test (key int, other_val serial); diff --git a/src/test/regress/expected/multi_colocated_shard_transfer.out b/src/test/regress/expected/multi_colocated_shard_transfer.out deleted file mode 100644 index cd15be0c3..000000000 --- a/src/test/regress/expected/multi_colocated_shard_transfer.out +++ /dev/null @@ -1,231 +0,0 @@ --- --- MULTI_COLOCATED_SHARD_TRANSFER --- --- These tables are created in multi_colocation_utils test --- test repair --- manually set shardstate as inactive -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_2_port AND (shardid = 1300000 OR shardid = 1300004); -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_2_port AND shardid = 1300016; -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_2_port AND shardid = 1300020; --- test repairing colocated shards --- status before shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport | colocationid | shardstate ---------------------------------------------------------------------- - 1300000 | table1_group1 | 57637 | 1000 | 1 - 1300000 | table1_group1 | 57638 | 1000 | 3 - 1300001 | table1_group1 | 57637 | 1000 | 1 - 1300001 | table1_group1 | 57638 | 1000 | 1 - 1300002 | table1_group1 | 57637 | 1000 | 1 - 1300002 | table1_group1 | 57638 | 1000 | 1 - 1300003 | table1_group1 | 57637 | 1000 | 1 - 1300003 | table1_group1 | 57638 | 1000 | 1 - 1300004 | table2_group1 | 57637 | 1000 | 1 - 1300004 | table2_group1 | 57638 | 1000 | 3 - 1300005 | table2_group1 | 57637 | 1000 | 1 - 1300005 | table2_group1 | 57638 | 1000 | 1 - 1300006 | table2_group1 | 57637 | 1000 | 1 - 1300006 | table2_group1 | 57638 | 1000 | 1 - 1300007 | table2_group1 | 57637 | 1000 | 1 - 1300007 | table2_group1 | 57638 | 1000 | 1 -(16 rows) - --- repair colocated shards -SELECT master_copy_shard_placement(1300000, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- status after shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport | colocationid | shardstate ---------------------------------------------------------------------- - 1300000 | table1_group1 | 57637 | 1000 | 1 - 1300000 | table1_group1 | 57638 | 1000 | 1 - 1300001 | table1_group1 | 57637 | 1000 | 1 - 1300001 | table1_group1 | 57638 | 1000 | 1 - 1300002 | table1_group1 | 57637 | 1000 | 1 - 1300002 | table1_group1 | 57638 | 1000 | 1 - 1300003 | table1_group1 | 57637 | 1000 | 1 - 1300003 | table1_group1 | 57638 | 1000 | 1 - 1300004 | table2_group1 | 57637 | 1000 | 1 - 1300004 | table2_group1 | 57638 | 1000 | 3 - 1300005 | table2_group1 | 57637 | 1000 | 1 - 1300005 | table2_group1 | 57638 | 1000 | 1 - 1300006 | table2_group1 | 57637 | 1000 | 1 - 1300006 | table2_group1 | 57638 | 1000 | 1 - 1300007 | table2_group1 | 57637 | 1000 | 1 - 1300007 | table2_group1 | 57638 | 1000 | 1 -(16 rows) - --- test repairing NOT colocated shard --- status before shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table5_groupX'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport | colocationid | shardstate ---------------------------------------------------------------------- - 1300016 | table5_groupx | 57637 | 0 | 1 - 1300016 | table5_groupx | 57638 | 0 | 3 - 1300017 | table5_groupx | 57637 | 0 | 1 - 1300017 | table5_groupx | 57638 | 0 | 1 - 1300018 | table5_groupx | 57637 | 0 | 1 - 1300018 | table5_groupx | 57638 | 0 | 1 - 1300019 | table5_groupx | 57637 | 0 | 1 - 1300019 | table5_groupx | 57638 | 0 | 1 -(8 rows) - --- repair NOT colocated shard -SELECT master_copy_shard_placement(1300016, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- status after shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table5_groupX'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport | colocationid | shardstate ---------------------------------------------------------------------- - 1300016 | table5_groupx | 57637 | 0 | 1 - 1300016 | table5_groupx | 57638 | 0 | 1 - 1300017 | table5_groupx | 57637 | 0 | 1 - 1300017 | table5_groupx | 57638 | 0 | 1 - 1300018 | table5_groupx | 57637 | 0 | 1 - 1300018 | table5_groupx | 57638 | 0 | 1 - 1300019 | table5_groupx | 57637 | 0 | 1 - 1300019 | table5_groupx | 57638 | 0 | 1 -(8 rows) - --- test repairing shard in append distributed table --- status before shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table6_append'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport | colocationid | shardstate ---------------------------------------------------------------------- - 1300020 | table6_append | 57637 | 0 | 1 - 1300020 | table6_append | 57638 | 0 | 3 - 1300021 | table6_append | 57637 | 0 | 1 - 1300021 | table6_append | 57638 | 0 | 1 -(4 rows) - --- repair shard in append distributed table -SELECT master_copy_shard_placement(1300020, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- status after shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table6_append'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport | colocationid | shardstate ---------------------------------------------------------------------- - 1300020 | table6_append | 57637 | 0 | 1 - 1300020 | table6_append | 57638 | 0 | 1 - 1300021 | table6_append | 57637 | 0 | 1 - 1300021 | table6_append | 57638 | 0 | 1 -(4 rows) - --- test repair while all placements of one shard in colocation group is unhealthy --- manually set shardstate as inactive -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1300000; --- status before shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport | colocationid | shardstate ---------------------------------------------------------------------- - 1300000 | table1_group1 | 57637 | 1000 | 3 - 1300000 | table1_group1 | 57638 | 1000 | 3 - 1300001 | table1_group1 | 57637 | 1000 | 1 - 1300001 | table1_group1 | 57638 | 1000 | 1 - 1300002 | table1_group1 | 57637 | 1000 | 1 - 1300002 | table1_group1 | 57638 | 1000 | 1 - 1300003 | table1_group1 | 57637 | 1000 | 1 - 1300003 | table1_group1 | 57638 | 1000 | 1 - 1300004 | table2_group1 | 57637 | 1000 | 1 - 1300004 | table2_group1 | 57638 | 1000 | 3 - 1300005 | table2_group1 | 57637 | 1000 | 1 - 1300005 | table2_group1 | 57638 | 1000 | 1 - 1300006 | table2_group1 | 57637 | 1000 | 1 - 1300006 | table2_group1 | 57638 | 1000 | 1 - 1300007 | table2_group1 | 57637 | 1000 | 1 - 1300007 | table2_group1 | 57638 | 1000 | 1 -(16 rows) - --- repair while all placements of one shard in colocation group is unhealthy -SELECT master_copy_shard_placement(1300000, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -ERROR: source placement must be in active state --- status after shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport | colocationid | shardstate ---------------------------------------------------------------------- - 1300000 | table1_group1 | 57637 | 1000 | 3 - 1300000 | table1_group1 | 57638 | 1000 | 3 - 1300001 | table1_group1 | 57637 | 1000 | 1 - 1300001 | table1_group1 | 57638 | 1000 | 1 - 1300002 | table1_group1 | 57637 | 1000 | 1 - 1300002 | table1_group1 | 57638 | 1000 | 1 - 1300003 | table1_group1 | 57637 | 1000 | 1 - 1300003 | table1_group1 | 57638 | 1000 | 1 - 1300004 | table2_group1 | 57637 | 1000 | 1 - 1300004 | table2_group1 | 57638 | 1000 | 3 - 1300005 | table2_group1 | 57637 | 1000 | 1 - 1300005 | table2_group1 | 57638 | 1000 | 1 - 1300006 | table2_group1 | 57637 | 1000 | 1 - 1300006 | table2_group1 | 57638 | 1000 | 1 - 1300007 | table2_group1 | 57637 | 1000 | 1 - 1300007 | table2_group1 | 57638 | 1000 | 1 -(16 rows) - diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 34d98b7d8..f76712647 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -1111,59 +1111,61 @@ ERROR: extension "citus" already exists -- Snapshot of state at 11.1-1 ALTER EXTENSION citus UPDATE TO '11.1-1'; SELECT * FROM multi_extension.print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- - access method columnar | - function alter_columnar_table_reset(regclass,boolean,boolean,boolean,boolean) void | - function alter_columnar_table_set(regclass,integer,integer,name,integer) void | - function citus_internal.columnar_ensure_am_depends_catalog() void | - function citus_internal.downgrade_columnar_storage(regclass) void | - function citus_internal.upgrade_columnar_storage(regclass) void | - function columnar.columnar_handler(internal) table_am_handler | - function isolate_tenant_to_new_shard(regclass,"any",text) bigint | - function replicate_reference_tables() void | - function worker_cleanup_job_schema_cache() void | - function worker_create_schema(bigint,text) void | - function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) void | - function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) void | - function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) void | - function worker_merge_files_into_table(bigint,integer,text[],text[]) void | - function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) void | - function worker_repartition_cleanup(bigint) void | - schema columnar | - sequence columnar.storageid_seq | - table columnar.chunk | - table columnar.chunk_group | - table columnar.options | - table columnar.stripe | - | function citus_cleanup_orphaned_resources() - | function citus_internal_delete_partition_metadata(regclass) void - | function citus_job_cancel(bigint) void - | function citus_job_wait(bigint,citus_job_status) void - | function citus_locks() SETOF record - | function citus_split_shard_by_split_points(bigint,text[],integer[],citus.shard_transfer_mode) void - | function create_distributed_table_concurrently(regclass,text,citus.distribution_type,text,integer) void - | function isolate_tenant_to_new_shard(regclass,"any",text,citus.shard_transfer_mode) bigint - | function replicate_reference_tables(citus.shard_transfer_mode) void - | function worker_copy_table_to_node(regclass,integer) void - | function worker_split_copy(bigint,text,split_copy_info[]) void - | function worker_split_shard_release_dsm() void - | function worker_split_shard_replication_setup(split_shard_info[]) SETOF replication_slot_info - | sequence pg_dist_background_job_job_id_seq - | sequence pg_dist_background_task_task_id_seq - | sequence pg_dist_cleanup_recordid_seq - | sequence pg_dist_operationid_seq - | table pg_dist_background_job - | table pg_dist_background_task - | table pg_dist_background_task_depend - | table pg_dist_cleanup - | type citus_job_status - | type citus_task_status - | type replication_slot_info - | type split_copy_info - | type split_shard_info - | view citus_locks -(50 rows) + access method columnar | + function alter_columnar_table_reset(regclass,boolean,boolean,boolean,boolean) void | + function alter_columnar_table_set(regclass,integer,integer,name,integer) void | + function citus_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) void | + function citus_internal.columnar_ensure_am_depends_catalog() void | + function citus_internal.downgrade_columnar_storage(regclass) void | + function citus_internal.upgrade_columnar_storage(regclass) void | + function columnar.columnar_handler(internal) table_am_handler | + function isolate_tenant_to_new_shard(regclass,"any",text) bigint | + function replicate_reference_tables() void | + function worker_cleanup_job_schema_cache() void | + function worker_create_schema(bigint,text) void | + function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) void | + function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) void | + function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) void | + function worker_merge_files_into_table(bigint,integer,text[],text[]) void | + function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) void | + function worker_repartition_cleanup(bigint) void | + schema columnar | + sequence columnar.storageid_seq | + table columnar.chunk | + table columnar.chunk_group | + table columnar.options | + table columnar.stripe | + | function citus_cleanup_orphaned_resources() + | function citus_copy_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) void + | function citus_internal_delete_partition_metadata(regclass) void + | function citus_job_cancel(bigint) void + | function citus_job_wait(bigint,citus_job_status) void + | function citus_locks() SETOF record + | function citus_split_shard_by_split_points(bigint,text[],integer[],citus.shard_transfer_mode) void + | function create_distributed_table_concurrently(regclass,text,citus.distribution_type,text,integer) void + | function isolate_tenant_to_new_shard(regclass,"any",text,citus.shard_transfer_mode) bigint + | function replicate_reference_tables(citus.shard_transfer_mode) void + | function worker_copy_table_to_node(regclass,integer) void + | function worker_split_copy(bigint,text,split_copy_info[]) void + | function worker_split_shard_release_dsm() void + | function worker_split_shard_replication_setup(split_shard_info[]) SETOF replication_slot_info + | sequence pg_dist_background_job_job_id_seq + | sequence pg_dist_background_task_task_id_seq + | sequence pg_dist_cleanup_recordid_seq + | sequence pg_dist_operationid_seq + | table pg_dist_background_job + | table pg_dist_background_task + | table pg_dist_background_task_depend + | table pg_dist_cleanup + | type citus_job_status + | type citus_task_status + | type replication_slot_info + | type split_copy_info + | type split_shard_info + | view citus_locks +(52 rows) DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff; -- show running version diff --git a/src/test/regress/expected/multi_move_mx.out b/src/test/regress/expected/multi_move_mx.out index 9b07a9f5f..eaf2273de 100644 --- a/src/test/regress/expected/multi_move_mx.out +++ b/src/test/regress/expected/multi_move_mx.out @@ -100,9 +100,9 @@ ORDER BY (12 rows) \c - - - :master_port --- Check that master_copy_shard_placement cannot be run with MX tables +-- Check that citus_copy_shard_placement cannot be run with MX tables SELECT - master_copy_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical') + citus_copy_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical') FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE @@ -229,7 +229,7 @@ ORDER BY -- Check that the UDFs cannot be called from the workers SELECT - master_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical') + citus_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical') FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE diff --git a/src/test/regress/expected/multi_prepare_sql.out b/src/test/regress/expected/multi_prepare_sql.out index e59b9e8f2..c21982aee 100644 --- a/src/test/regress/expected/multi_prepare_sql.out +++ b/src/test/regress/expected/multi_prepare_sql.out @@ -1128,7 +1128,7 @@ CREATE OR REPLACE FUNCTION immutable_bleat(text) RETURNS int LANGUAGE plpgsql IM CREATE TABLE test_table (test_id integer NOT NULL, data text); SET citus.shard_count TO 2; SET citus.shard_replication_factor TO 2; -SELECT create_distributed_table('test_table', 'test_id', 'hash'); +SELECT create_distributed_table('test_table', 'test_id', 'hash', colocate_with := 'none'); create_distributed_table --------------------------------------------------------------------- @@ -1157,7 +1157,7 @@ EXECUTE countsome; -- no replanning (0 rows) -- invalidate half of the placements using SQL, should invalidate via trigger -UPDATE pg_dist_shard_placement SET shardstate = '3' +DELETE FROM pg_dist_shard_placement WHERE shardid IN ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = :worker_1_port; @@ -1172,13 +1172,13 @@ EXECUTE countsome; -- no replanning --------------------------------------------------------------------- (0 rows) --- repair shards, should invalidate via master_metadata_utility.c -SELECT master_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port) +-- copy shards, should invalidate via master_metadata_utility.c +SELECT citus_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, transfer_mode := 'block_writes') FROM pg_dist_shard_placement WHERE shardid IN ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) - AND nodeport = :worker_1_port; - master_copy_shard_placement + AND nodeport = :worker_2_port; + citus_copy_shard_placement --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_reference_table.out b/src/test/regress/expected/multi_reference_table.out index 114d2c0fc..9d839a563 100644 --- a/src/test/regress/expected/multi_reference_table.out +++ b/src/test/regress/expected/multi_reference_table.out @@ -1421,23 +1421,6 @@ SELECT master_get_table_ddl_events('reference_schema.reference_table_ddl'); ALTER TABLE reference_schema.reference_table_ddl OWNER TO postgres (2 rows) --- in reality, we wouldn't need to repair any reference table shard placements --- however, the test could be relevant for other purposes -SELECT placementid AS a_placement_id FROM pg_dist_shard_placement WHERE shardid = :a_shard_id AND nodeport = :worker_1_port \gset -SELECT placementid AS b_placement_id FROM pg_dist_shard_placement WHERE shardid = :a_shard_id AND nodeport = :worker_2_port \gset -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE placementid = :a_placement_id; -SELECT master_copy_shard_placement(:a_shard_id, 'localhost', :worker_2_port, 'localhost', :worker_1_port); - master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - -SELECT shardid, shardstate FROM pg_dist_shard_placement WHERE placementid = :a_placement_id; - shardid | shardstate ---------------------------------------------------------------------- - 1250019 | 1 -(1 row) - -- some queries that are captured in functions CREATE OR REPLACE FUNCTION select_count_all() RETURNS bigint AS ' SELECT diff --git a/src/test/regress/expected/multi_repair_shards.out b/src/test/regress/expected/multi_repair_shards.out deleted file mode 100644 index c7fc05080..000000000 --- a/src/test/regress/expected/multi_repair_shards.out +++ /dev/null @@ -1,94 +0,0 @@ -SET citus.next_shard_id TO 820000; -SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset -SELECT groupid AS worker_1_group FROM pg_dist_node WHERE nodeport=:worker_1_port \gset --- =================================================================== --- test shard repair functionality --- =================================================================== --- create a table and create its distribution metadata -CREATE TABLE customer_engagements ( id integer, created_at date, event_data text ); --- add some indexes -CREATE INDEX ON customer_engagements (id); -CREATE INDEX ON customer_engagements (created_at); -CREATE INDEX ON customer_engagements (event_data); --- distribute the table --- create a single shard on the first worker -SET citus.shard_count TO 1; -SET citus.shard_replication_factor TO 2; -SELECT create_distributed_table('customer_engagements', 'id', 'hash'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- ingest some data for the tests -INSERT INTO customer_engagements VALUES (1, '01-01-2015', 'first event'); -INSERT INTO customer_engagements VALUES (2, '02-01-2015', 'second event'); -INSERT INTO customer_engagements VALUES (1, '03-01-2015', 'third event'); --- the following queries does the following: --- (i) create a new shard --- (ii) mark the second shard placements as unhealthy --- (iii) do basic checks i.e., only allow copy from healthy placement to unhealthy ones --- (iv) do a successful master_copy_shard_placement from the first placement to the second --- (v) mark the first placement as unhealthy and execute a query that is routed to the second placement --- get the newshardid -SELECT shardid as newshardid FROM pg_dist_shard WHERE logicalrelid = 'customer_engagements'::regclass -\gset --- now, update the second placement as unhealthy -UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid - AND groupid = :worker_2_group; --- cannot repair a shard after a modification (transaction still open during repair) -BEGIN; -ALTER TABLE customer_engagements ADD COLUMN value float; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -ERROR: cannot open new connections after the first modification command within a transaction -ROLLBACK; -BEGIN; -INSERT INTO customer_engagements VALUES (4, '04-01-2015', 'fourth event'); -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -ERROR: cannot open new connections after the first modification command within a transaction -ROLLBACK; --- modifications after reparing a shard are fine (will use new metadata) -BEGIN; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - -ALTER TABLE customer_engagements ADD COLUMN value float; -ROLLBACK; -BEGIN; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO customer_engagements VALUES (4, '04-01-2015', 'fourth event'); -ROLLBACK; --- deactivate placement -UPDATE pg_dist_placement SET shardstate = 1 WHERE groupid = :worker_2_group and shardid = :newshardid; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -ERROR: target placement must be in inactive state -UPDATE pg_dist_placement SET shardstate = 3 WHERE groupid = :worker_2_group and shardid = :newshardid; --- also try to copy from an inactive placement -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port); -ERROR: source placement must be in active state --- "copy" this shard from the first placement to the second one -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- now, update first placement as unhealthy (and raise a notice) so that queries are not routed to there -UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid AND groupid = :worker_1_group; --- get the data from the second placement -SELECT * FROM customer_engagements; - id | created_at | event_data ---------------------------------------------------------------------- - 1 | 01-01-2015 | first event - 2 | 02-01-2015 | second event - 1 | 03-01-2015 | third event -(3 rows) - diff --git a/src/test/regress/expected/multi_replicate_reference_table.out b/src/test/regress/expected/multi_replicate_reference_table.out index afec8052b..2a28208af 100644 --- a/src/test/regress/expected/multi_replicate_reference_table.out +++ b/src/test/regress/expected/multi_replicate_reference_table.out @@ -787,7 +787,6 @@ SELECT citus_copy_shard_placement( (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); ERROR: Copying shards to a non-existing node is not supported HINT: Add the target node via SELECT citus_add_node('localhost', 57638); @@ -802,7 +801,6 @@ SELECT citus_copy_shard_placement( (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); ERROR: Copying shards to a secondary (e.g., replica) node is not supported SELECT citus_remove_node('localhost', :worker_2_port); @@ -822,7 +820,6 @@ SELECT citus_copy_shard_placement( (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); ERROR: Copying shards to a non-active node is not supported HINT: Activate the target node via SELECT citus_activate_node('localhost', 57638); @@ -1005,7 +1002,7 @@ SELECT min(result) = max(result) AS consistent FROM run_command_on_placements('r t (1 row) --- test that metadata is synced when master_copy_shard_placement replicates +-- test that metadata is synced when citus_copy_shard_placement replicates -- reference table shards SET citus.replicate_reference_tables_on_activate TO off; SELECT 1 FROM master_remove_node('localhost', :worker_2_port); @@ -1021,13 +1018,12 @@ SELECT 1 FROM master_add_node('localhost', :worker_2_port); (1 row) SET citus.shard_replication_factor TO 1; -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( :ref_table_shard, 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); - master_copy_shard_placement + citus_copy_shard_placement --------------------------------------------------------------------- (1 row) diff --git a/src/test/regress/expected/multi_schema_support.out b/src/test/regress/expected/multi_schema_support.out index cc52183a7..66185d9b7 100644 --- a/src/test/regress/expected/multi_schema_support.out +++ b/src/test/regress/expected/multi_schema_support.out @@ -19,8 +19,8 @@ CREATE TABLE test_schema_support.nation_append( n_regionkey integer not null, n_comment varchar(152) ); -SELECT master_create_distributed_table('test_schema_support.nation_append', 'n_nationkey', 'append'); - master_create_distributed_table +SELECT create_distributed_table('test_schema_support.nation_append', 'n_nationkey', 'append'); + create_distributed_table --------------------------------------------------------------------- (1 row) @@ -41,8 +41,8 @@ CREATE TABLE test_schema_support."nation._'append" ( n_name char(25) not null, n_regionkey integer not null, n_comment varchar(152)); -SELECT master_create_distributed_table('test_schema_support."nation._''append"', 'n_nationkey', 'append'); - master_create_distributed_table +SELECT create_distributed_table('test_schema_support."nation._''append"', 'n_nationkey', 'append'); + create_distributed_table --------------------------------------------------------------------- (1 row) @@ -351,6 +351,7 @@ SET search_path TO public; SELECT quote_ident(current_setting('lc_collate')) as current_locale \gset CREATE COLLATION test_schema_support.english (LOCALE = :current_locale); \c - - - :master_port +SET citus.shard_replication_factor TO 2; CREATE TABLE test_schema_support.nation_hash_collation( n_nationkey integer not null, n_name char(25) not null COLLATE test_schema_support.english, @@ -364,14 +365,8 @@ SELECT master_get_table_ddl_events('test_schema_support.nation_hash_collation') CREATE TABLE test_schema_support.nation_hash_collation (n_nationkey integer NOT NULL, n_name character(25) NOT NULL COLLATE test_schema_support.english, n_regionkey integer NOT NULL, n_comment character varying(152)) (2 rows) -SELECT master_create_distributed_table('test_schema_support.nation_hash_collation', 'n_nationkey', 'hash'); - master_create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT master_create_worker_shards('test_schema_support.nation_hash_collation', 4, 2); - master_create_worker_shards +SELECT create_distributed_table('test_schema_support.nation_hash_collation', 'n_nationkey', 'hash', shard_count := 4, colocate_with := 'none'); + create_distributed_table --------------------------------------------------------------------- (1 row) @@ -407,14 +402,9 @@ CREATE TABLE nation_hash_collation_search_path( n_regionkey integer not null, n_comment varchar(152) ); -SELECT master_create_distributed_table('nation_hash_collation_search_path', 'n_nationkey', 'hash'); - master_create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT master_create_worker_shards('nation_hash_collation_search_path', 4, 2); - master_create_worker_shards +SET citus.shard_replication_factor TO 2; +SELECT create_distributed_table('nation_hash_collation_search_path', 'n_nationkey', 'hash', shard_count := 4, colocate_with := 'none'); + create_distributed_table --------------------------------------------------------------------- (1 row) @@ -452,14 +442,8 @@ CREATE TABLE test_schema_support.nation_hash_composite_types( n_comment varchar(152), test_col test_schema_support.new_composite_type ); -SELECT master_create_distributed_table('test_schema_support.nation_hash_composite_types', 'n_nationkey', 'hash'); - master_create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT master_create_worker_shards('test_schema_support.nation_hash_composite_types', 4, 2); - master_create_worker_shards +SELECT create_distributed_table('test_schema_support.nation_hash_composite_types', 'n_nationkey', 'hash', shard_count := 4, colocate_with := 'none'); + create_distributed_table --------------------------------------------------------------------- (1 row) @@ -638,12 +622,12 @@ DROP INDEX index1; \c - - - :worker_1_port \d test_schema_support.index1_1190003 \c - - - :master_port --- test master_copy_shard_placement with schemas +-- test citus_copy_shard_placement with schemas SET search_path TO public; --- mark shard as inactive -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1190000 and nodeport = :worker_1_port; -SELECT master_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port); - master_copy_shard_placement +-- delete placements +DELETE FROM pg_dist_shard_placement WHERE shardid = 1190000 and nodeport = :worker_1_port; +SELECT citus_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port, transfer_mode := 'block_writes'); + citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -659,9 +643,9 @@ SELECT shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE shardid --test with search_path is set SET search_path TO test_schema_support; -- mark shard as inactive -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1190000 and nodeport = :worker_1_port; -SELECT master_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port); - master_copy_shard_placement +DELETE FROM pg_dist_shard_placement WHERE shardid = 1190000 and nodeport = :worker_1_port; +SELECT citus_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port, transfer_mode := 'block_writes'); + citus_copy_shard_placement --------------------------------------------------------------------- (1 row) diff --git a/src/test/regress/expected/multi_tenant_isolation.out b/src/test/regress/expected/multi_tenant_isolation.out index 9c196b184..3f6537e79 100644 --- a/src/test/regress/expected/multi_tenant_isolation.out +++ b/src/test/regress/expected/multi_tenant_isolation.out @@ -667,7 +667,6 @@ SET search_path to "Tenant Isolation"; UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_1_port; SELECT isolate_tenant_to_new_shard('lineitem_date', '1997-08-08', shard_transfer_mode => 'block_writes'); ERROR: cannot isolate tenant because relation "lineitem_date" has an inactive shard placement for the shard xxxxx -HINT: Use master_copy_shard_placement UDF to repair the inactive shard placement. UPDATE pg_dist_shard_placement SET shardstate = 1 WHERE nodeport = :worker_1_port; \c - mx_isolation_role_ent - :master_port SET search_path to "Tenant Isolation"; diff --git a/src/test/regress/expected/multi_tenant_isolation_nonblocking.out b/src/test/regress/expected/multi_tenant_isolation_nonblocking.out index 4dc4a6809..5f3b36086 100644 --- a/src/test/regress/expected/multi_tenant_isolation_nonblocking.out +++ b/src/test/regress/expected/multi_tenant_isolation_nonblocking.out @@ -667,7 +667,6 @@ SET search_path to "Tenant Isolation"; UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_1_port; SELECT isolate_tenant_to_new_shard('lineitem_date', '1997-08-08', shard_transfer_mode => 'force_logical'); ERROR: cannot isolate tenant because relation "lineitem_date" has an inactive shard placement for the shard xxxxx -HINT: Use master_copy_shard_placement UDF to repair the inactive shard placement. UPDATE pg_dist_shard_placement SET shardstate = 1 WHERE nodeport = :worker_1_port; \c - mx_isolation_role_ent - :master_port SET search_path to "Tenant Isolation"; diff --git a/src/test/regress/expected/multi_unsupported_worker_operations.out b/src/test/regress/expected/multi_unsupported_worker_operations.out index d54e5e84b..a8e3cffc7 100644 --- a/src/test/regress/expected/multi_unsupported_worker_operations.out +++ b/src/test/regress/expected/multi_unsupported_worker_operations.out @@ -302,7 +302,7 @@ SELECT count(*) FROM mx_table; 5 (1 row) --- master_copy_shard_placement +-- citus_copy_shard_placement SELECT logicalrelid, shardid AS testshardid, nodename, nodeport FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'mx_table'::regclass AND nodeport=:worker_1_port @@ -311,7 +311,7 @@ LIMIT 1 \gset SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport = :worker_2_port \gset INSERT INTO pg_dist_placement (groupid, shardid, shardstate, shardlength) VALUES (:worker_2_group, :testshardid, 3, 0); -SELECT master_copy_shard_placement(:testshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); +SELECT citus_copy_shard_placement(:testshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); ERROR: operation is not allowed on this node HINT: Connect to the coordinator and run it again. SELECT shardid, nodename, nodeport, shardstate diff --git a/src/test/regress/expected/replicated_partitioned_table.out b/src/test/regress/expected/replicated_partitioned_table.out index f5adc40ca..6121942ce 100644 --- a/src/test/regress/expected/replicated_partitioned_table.out +++ b/src/test/regress/expected/replicated_partitioned_table.out @@ -203,7 +203,7 @@ SELECT create_distributed_table('collections_agg', 'key'); INSERT INTO collections_agg SELECT key, sum(key) FROM collections_1 GROUP BY key; -- coordinator roll-up INSERT INTO collections_agg SELECT collection_id, sum(key) FROM collections_1 GROUP BY collection_id; --- now make sure that repair functionality works fine +-- now make sure that copy functionality works fine -- create a table and create its distribution metadata CREATE TABLE customer_engagements (id integer, event_id int) PARTITION BY LIST ( event_id ); CREATE TABLE customer_engagements_1 @@ -220,7 +220,7 @@ CREATE INDEX ON customer_engagements (id, event_id); -- create a single shard on the first worker SET citus.shard_count TO 1; SET citus.shard_replication_factor TO 2; -SELECT create_distributed_table('customer_engagements', 'id', 'hash'); +SELECT create_distributed_table('customer_engagements', 'id', 'hash', colocate_with := 'none'); create_distributed_table --------------------------------------------------------------------- @@ -231,30 +231,22 @@ INSERT INTO customer_engagements VALUES (1, 1); INSERT INTO customer_engagements VALUES (2, 1); INSERT INTO customer_engagements VALUES (1, 2); INSERT INTO customer_engagements VALUES (2, 2); --- the following queries does the following: --- (i) create a new shard --- (ii) mark the second shard placements as unhealthy --- (iii) do basic checks i.e., only allow copy from healthy placement to unhealthy ones --- (iv) do a successful master_copy_shard_placement from the first placement to the second --- (v) mark the first placement as unhealthy and execute a query that is routed to the second placement -SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset -SELECT groupid AS worker_1_group FROM pg_dist_node WHERE nodeport=:worker_1_port \gset -- get the newshardid SELECT shardid as newshardid FROM pg_dist_shard WHERE logicalrelid = 'customer_engagements'::regclass \gset --- now, update the second placement as unhealthy -UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid - AND groupid = :worker_2_group; --- cannot repair a shard after a modification (transaction still open during repair) +-- delete all the placements on the second node belonging to partitioning hierarchy +DELETE FROM pg_dist_shard_placement p USING pg_dist_shard s +WHERE s.shardid = p.shardid AND nodeport = :worker_2_port AND logicalrelid::text LIKE 'customer_engagements%'; +-- cannot copy a shard after a modification (transaction still open during copy) BEGIN; INSERT INTO customer_engagements VALUES (1, 1); -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); +SELECT citus_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes'); ERROR: cannot open new connections after the first modification command within a transaction ROLLBACK; --- modifications after reparing a shard are fine (will use new metadata) +-- modifications after copying a shard are fine (will use new metadata) BEGIN; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - master_copy_shard_placement +SELECT citus_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes'); + citus_copy_shard_placement --------------------------------------------------------------------- (1 row) @@ -271,8 +263,8 @@ SELECT * FROM customer_engagements ORDER BY 1,2,3; ROLLBACK; BEGIN; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - master_copy_shard_placement +SELECT citus_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes'); + citus_copy_shard_placement --------------------------------------------------------------------- (1 row) diff --git a/src/test/regress/expected/tableam.out b/src/test/regress/expected/tableam.out index 242cb5310..36f3729dd 100644 --- a/src/test/regress/expected/tableam.out +++ b/src/test/regress/expected/tableam.out @@ -164,7 +164,7 @@ SELECT * FROM master_get_table_ddl_events('test_range_dist'); (2 rows) -- --- Test master_copy_shard_placement with a fake_am table +-- Test copy_copy_shard_placement with a fake_am table -- select a.shardid, a.nodeport FROM pg_dist_shard b, pg_dist_shard_placement a @@ -178,15 +178,14 @@ ORDER BY a.shardid, nodeport; 60003 | 57638 (4 rows) --- Change repmodel to allow master_copy_shard_placement +-- Change repmodel to allow copy_copy_shard_placement UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid = 'test_hash_dist'::regclass; -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('test_hash_dist', '1'), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); - master_copy_shard_placement + citus_copy_shard_placement --------------------------------------------------------------------- (1 row) diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out index 2ddde1310..bb04fcfb9 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects.out +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -35,7 +35,7 @@ ORDER BY 1; function citus_cleanup_orphaned_shards() function citus_conninfo_cache_invalidate() function citus_coordinator_nodeid() - function citus_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) + function citus_copy_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) function citus_create_restore_point(text) function citus_disable_node(text,integer,boolean) function citus_dist_local_group_cache_invalidate() diff --git a/src/test/regress/isolation_schedule b/src/test/regress/isolation_schedule index 853da116e..90d1463ad 100644 --- a/src/test/regress/isolation_schedule +++ b/src/test/regress/isolation_schedule @@ -26,7 +26,6 @@ test: isolation_citus_dist_activity test: isolation_remove_coordinator test: isolation_insert_select_repartition -test: isolation_dml_vs_repair test: isolation_copy_placement_vs_copy_placement test: isolation_concurrent_dml diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule index 9b248b5cd..c0bc4f1da 100644 --- a/src/test/regress/multi_1_schedule +++ b/src/test/regress/multi_1_schedule @@ -180,7 +180,7 @@ test: multi_modifications test: multi_distribution_metadata test: multi_prune_shard_list test: multi_upsert multi_simple_queries multi_data_types -test: master_copy_shard_placement +test: citus_copy_shard_placement # multi_utilities cannot be run in parallel with other tests because it checks # global locks test: multi_utilities @@ -188,7 +188,7 @@ test: foreign_key_to_reference_table validate_constraint test: multi_repartition_udt multi_repartitioned_subquery_udf multi_subtransactions test: multi_modifying_xacts -test: multi_generate_ddl_commands multi_repair_shards +test: multi_generate_ddl_commands test: multi_create_shards test: multi_transaction_recovery @@ -253,10 +253,8 @@ test: multi_truncate # ---------- # multi_colocation_utils tests utility functions written for co-location feature & internal API -# multi_colocated_shard_transfer tests master_copy_shard_placement with colocated tables. # ---------- test: multi_colocation_utils -test: multi_colocated_shard_transfer # ---------- # node_conninfo_reload tests that node_conninfo changes take effect diff --git a/src/test/regress/spec/isolation_copy_placement_vs_copy_placement.spec b/src/test/regress/spec/isolation_copy_placement_vs_copy_placement.spec index 18e94653e..258463293 100644 --- a/src/test/regress/spec/isolation_copy_placement_vs_copy_placement.spec +++ b/src/test/regress/spec/isolation_copy_placement_vs_copy_placement.spec @@ -28,7 +28,7 @@ step "s1-load-cache" step "s1-repair-placement" { - SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); } session "s2" @@ -38,14 +38,14 @@ step "s2-begin" BEGIN; } -step "s2-set-placement-inactive" +step "s2-delete-inactive" { - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638; + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard_for_test_table) AND nodeport = 57638; } step "s2-repair-placement" { - SELECT master_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard_for_test_table), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); } // since test_hash_table has rep > 1 simple select query doesn't hit all placements @@ -65,7 +65,7 @@ step "s2-commit" // note that "s1-repair-placement" errors out but that is expected // given that "s2-repair-placement" succeeds and the placement is // already repaired -permutation "s1-load-cache" "s2-load-cache" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-repair-placement" "s2-commit" +permutation "s1-load-cache" "s2-load-cache" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-repair-placement" "s2-commit" // the same test without the load caches -permutation "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-repair-placement" "s2-commit" +permutation "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-repair-placement" "s2-commit" diff --git a/src/test/regress/spec/isolation_copy_placement_vs_modification.spec b/src/test/regress/spec/isolation_copy_placement_vs_modification.spec index 28c65756e..4496a4fa6 100644 --- a/src/test/regress/spec/isolation_copy_placement_vs_modification.spec +++ b/src/test/regress/spec/isolation_copy_placement_vs_modification.spec @@ -73,14 +73,14 @@ step "s2-begin" BEGIN; } -step "s2-set-placement-inactive" +step "s2-delete-inactive" { - UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; + DELETE FROM pg_dist_shard_placement WHERE shardid IN (SELECT * FROM selected_shard) AND nodeport = 57638; } step "s2-repair-placement" { - SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT citus_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, transfer_mode := 'block_writes'); } step "s2-commit" @@ -113,16 +113,16 @@ step "s2-print-index-count" // repair a placement while concurrently performing an update/delete/insert/copy // note that at some points we use "s1-select" just after "s1-begin" given that BEGIN // may invalidate cache at certain cases -permutation "s1-load-cache" "s1-insert" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-update" "s2-commit" "s1-commit" "s2-print-content" -permutation "s1-load-cache" "s1-insert" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-delete" "s2-commit" "s1-commit" "s2-print-content" -permutation "s1-load-cache" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-insert" "s2-commit" "s1-commit" "s2-print-content" -permutation "s1-load-cache" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-copy" "s2-commit" "s1-commit" "s2-print-content" -permutation "s1-load-cache" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-ddl" "s2-commit" "s1-commit" "s2-print-index-count" +permutation "s1-load-cache" "s1-insert" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-update" "s2-commit" "s1-commit" "s2-print-content" +permutation "s1-load-cache" "s1-insert" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-delete" "s2-commit" "s1-commit" "s2-print-content" +permutation "s1-load-cache" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-insert" "s2-commit" "s1-commit" "s2-print-content" +permutation "s1-load-cache" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-copy" "s2-commit" "s1-commit" "s2-print-content" +permutation "s1-load-cache" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-ddl" "s2-commit" "s1-commit" "s2-print-index-count" // the same tests without loading the cache at first -permutation "s1-insert" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-update" "s2-commit" "s1-commit" "s2-print-content" -permutation "s1-insert" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-delete" "s2-commit" "s1-commit" "s2-print-content" -permutation "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-insert" "s2-commit" "s1-commit" "s2-print-content" -permutation "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-copy" "s2-commit" "s1-commit" "s2-print-content" -permutation "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-ddl" "s2-commit" "s1-commit" "s2-print-index-count" +permutation "s1-insert" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-update" "s2-commit" "s1-commit" "s2-print-content" +permutation "s1-insert" "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-delete" "s2-commit" "s1-commit" "s2-print-content" +permutation "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-insert" "s2-commit" "s1-commit" "s2-print-content" +permutation "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-copy" "s2-commit" "s1-commit" "s2-print-content" +permutation "s1-begin" "s1-select" "s2-delete-inactive" "s2-begin" "s2-repair-placement" "s1-ddl" "s2-commit" "s1-commit" "s2-print-index-count" diff --git a/src/test/regress/spec/isolation_create_distributed_table_concurrently.spec b/src/test/regress/spec/isolation_create_distributed_table_concurrently.spec index 02c31c96b..7bd305a93 100644 --- a/src/test/regress/spec/isolation_create_distributed_table_concurrently.spec +++ b/src/test/regress/spec/isolation_create_distributed_table_concurrently.spec @@ -169,7 +169,7 @@ step "s4-print-waiting-advisory-locks" step "s4-print-colocations" { - SELECT * FROM pg_dist_colocation ORDER BY colocationid; + SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation ORDER BY colocationid; } // show concurrent insert is NOT blocked by create_distributed_table_concurrently diff --git a/src/test/regress/spec/isolation_dml_vs_repair.spec b/src/test/regress/spec/isolation_dml_vs_repair.spec deleted file mode 100644 index ddf3c5b4f..000000000 --- a/src/test/regress/spec/isolation_dml_vs_repair.spec +++ /dev/null @@ -1,104 +0,0 @@ -setup -{ - CREATE TABLE test_dml_vs_repair (test_id integer NOT NULL, data int); - SET citus.shard_replication_factor TO 2; - SELECT create_distributed_table('test_dml_vs_repair', 'test_id', 'hash', shard_count:=1); -} - -teardown -{ - DROP TABLE IF EXISTS test_dml_vs_repair CASCADE; -} - -session "s1" - -setup -{ - DEALLOCATE all; - TRUNCATE test_dml_vs_repair; - PREPARE insertone AS INSERT INTO test_dml_vs_repair VALUES(1, 1); - PREPARE insertall AS INSERT INTO test_dml_vs_repair SELECT test_id, data+1 FROM test_dml_vs_repair; -} - -step "s1-begin" -{ - BEGIN; -} - -step "s1-insertone" -{ - INSERT INTO test_dml_vs_repair VALUES(1, 1); -} - -step "s1-prepared-insertone" -{ - EXECUTE insertone; -} - -step "s1-insertall" -{ - INSERT INTO test_dml_vs_repair SELECT test_id, data+1 FROM test_dml_vs_repair; -} - -step "s1-prepared-insertall" -{ - EXECUTE insertall; -} - -step "s1-display" -{ - SELECT * FROM test_dml_vs_repair WHERE test_id = 1 ORDER BY test_id; -} - -step "s1-commit" -{ - COMMIT; -} - -session "s2" - - -step "s2-begin" -{ - BEGIN; -} - -step "s2-invalidate-57637" -{ - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57637; -} - -step "s2-invalidate-57638" -{ - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638; -} - -step "s2-revalidate-57638" -{ - UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass) AND nodeport = 57638; -} - -step "s2-repair" -{ - SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_dml_vs_repair'::regclass), 'localhost', 57638, 'localhost', 57637); -} - -step "s2-commit" -{ - COMMIT; -} - -// verify that repair is blocked by ongoing modifying simple transaction -permutation "s2-invalidate-57637" "s1-begin" "s1-insertone" "s2-repair" "s1-commit" - -// verify that repair is blocked by ongoing modifying insert...select transaction -permutation "s1-insertone" "s2-invalidate-57637" "s1-begin" "s1-insertall" "s2-repair" "s1-commit" - -// verify that modifications wait for shard repair -permutation "s2-invalidate-57637" "s2-begin" "s2-repair" "s1-insertone" "s2-commit" "s2-invalidate-57638" "s1-display" "s2-invalidate-57637" "s2-revalidate-57638" "s1-display" - -// verify that prepared plain modifications wait for shard repair -permutation "s2-invalidate-57637" "s1-prepared-insertone" "s2-begin" "s2-repair" "s1-prepared-insertone" "s2-commit" "s2-invalidate-57638" "s1-display" "s2-invalidate-57637" "s2-revalidate-57638" "s1-display" - -// verify that prepared INSERT ... SELECT waits for shard repair -permutation "s2-invalidate-57637" "s1-insertone" "s1-prepared-insertall" "s2-begin" "s2-repair" "s1-prepared-insertall" "s2-commit" "s2-invalidate-57638" "s1-display" "s2-invalidate-57637" "s2-revalidate-57638" "s1-display" diff --git a/src/test/regress/sql/master_copy_shard_placement.sql b/src/test/regress/sql/citus_copy_shard_placement.sql similarity index 85% rename from src/test/regress/sql/master_copy_shard_placement.sql rename to src/test/regress/sql/citus_copy_shard_placement.sql index 30f36d56d..7861434d3 100644 --- a/src/test/regress/sql/master_copy_shard_placement.sql +++ b/src/test/regress/sql/citus_copy_shard_placement.sql @@ -1,4 +1,4 @@ --- Tests for master_copy_shard_placement, which can be used for adding replicas in statement-based replication +-- Tests for citus_copy_shard_placement, which can be used for adding replicas in statement-based replication CREATE SCHEMA mcsp; SET search_path TO mcsp; SET citus.next_shard_id TO 8139000; @@ -24,7 +24,7 @@ CREATE TABLE history_p1 PARTITION OF history FOR VALUES FROM ('2019-01-01') TO ( CREATE TABLE history_p2 PARTITION OF history FOR VALUES FROM ('2020-01-01') TO ('2021-01-01'); SELECT create_distributed_table('history','key'); --- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement +-- Mark tables as non-mx tables, in order to be able to test citus_copy_shard_placement UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN ('data'::regclass, 'history'::regclass); @@ -35,47 +35,42 @@ INSERT INTO history VALUES ('key-1', '2020-02-01', 'old'); INSERT INTO history VALUES ('key-1', '2019-10-01', 'older'); -- verify we error out if no healthy placement exists at source -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('data', 'key-1'), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); -- verify we error out if source and destination are the same -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('data', 'key-1'), 'localhost', :worker_2_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); -- verify we error out if target already contains a healthy placement -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); -- verify we error out if table has foreign key constraints INSERT INTO ref_table SELECT 1, value FROM data; ALTER TABLE data ADD CONSTRAINT distfk FOREIGN KEY (value) REFERENCES ref_table (b) MATCH FULL; -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('data', 'key-1'), 'localhost', :worker_2_port, - 'localhost', :worker_1_port, - do_repair := false); + 'localhost', :worker_1_port); ALTER TABLE data DROP CONSTRAINT distfk; -- replicate shard that contains key-1 -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('data', 'key-1'), 'localhost', :worker_2_port, 'localhost', :worker_1_port, - do_repair := false, transfer_mode := 'block_writes'); -- forcefully mark the old replica as inactive @@ -102,11 +97,10 @@ RESET client_min_messages; CREATE TABLE mx_table(a int); SELECT create_distributed_table('mx_table', 'a'); -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('mx_table', '1'), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); SET client_min_messages TO ERROR; diff --git a/src/test/regress/sql/citus_local_tables.sql b/src/test/regress/sql/citus_local_tables.sql index 57f93b076..8505734e5 100644 --- a/src/test/regress/sql/citus_local_tables.sql +++ b/src/test/regress/sql/citus_local_tables.sql @@ -223,8 +223,8 @@ SELECT master_create_empty_shard('citus_local_table_1'); -- get_shard_id_for_distribution_column is supported SELECT get_shard_id_for_distribution_column('citus_local_table_1', 'not_checking_this_arg_for_non_dist_tables'); SELECT get_shard_id_for_distribution_column('citus_local_table_1'); --- master_copy_shard_placement is not supported -SELECT master_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port, true) +-- citus_copy_shard_placement is not supported +SELECT citus_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port) FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table_1'::regclass) as shardid; -- undistribute_table is supported BEGIN; diff --git a/src/test/regress/sql/citus_local_tables_ent.sql b/src/test/regress/sql/citus_local_tables_ent.sql index c33abe09a..8b76c0a4d 100644 --- a/src/test/regress/sql/citus_local_tables_ent.sql +++ b/src/test/regress/sql/citus_local_tables_ent.sql @@ -19,12 +19,12 @@ SELECT citus_add_local_table_to_metadata('citus_local_table'); -- isolate_tenant_to_new_shard is not supported SELECT isolate_tenant_to_new_shard('citus_local_table', 100, shard_transfer_mode => 'block_writes'); --- master_copy_shard_placement is not supported -SELECT master_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port, false) +-- citus_copy_shard_placement is not supported +SELECT citus_copy_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port, false) FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table'::regclass) as shardid; --- master_move_shard_placement is not supported -SELECT master_move_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port) +-- citus_move_shard_placement is not supported +SELECT citus_move_shard_placement(shardid, 'localhost', :master_port, 'localhost', :worker_1_port) FROM (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='citus_local_table'::regclass) as shardid; -- replicate_table_shards is not suported diff --git a/src/test/regress/sql/ignoring_orphaned_shards.sql b/src/test/regress/sql/ignoring_orphaned_shards.sql index 774d7cd19..fffd43f92 100644 --- a/src/test/regress/sql/ignoring_orphaned_shards.sql +++ b/src/test/regress/sql/ignoring_orphaned_shards.sql @@ -82,16 +82,8 @@ SELECT logicalrelid FROM pg_dist_partition WHERE colocationid = 92448300 ORDER B SELECT update_distributed_table_colocation('rep2', 'rep1'); SELECT logicalrelid FROM pg_dist_partition WHERE colocationid = 92448300 ORDER BY 1; -UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = 92448300 AND groupid = 0; -SELECT shardid, shardstate, nodeport FROM pg_dist_shard_placement WHERE shardid = 92448300 ORDER BY placementid; - -- cannot copy from an orphaned shard SELECT * FROM citus_copy_shard_placement(92448300, 'localhost', :worker_1_port, 'localhost', :master_port); --- cannot copy to an orphaned shard -SELECT * FROM citus_copy_shard_placement(92448300, 'localhost', :worker_2_port, 'localhost', :worker_1_port); --- can still copy to an inactive shard -SELECT * FROM citus_copy_shard_placement(92448300, 'localhost', :worker_2_port, 'localhost', :master_port); -SELECT shardid, shardstate, nodeport FROM pg_dist_shard_placement WHERE shardid = 92448300 ORDER BY placementid; -- Make sure we don't send a query to the orphaned shard BEGIN; diff --git a/src/test/regress/sql/multi_colocated_shard_rebalance.sql b/src/test/regress/sql/multi_colocated_shard_rebalance.sql index 2c43460a2..a8ad39c06 100644 --- a/src/test/regress/sql/multi_colocated_shard_rebalance.sql +++ b/src/test/regress/sql/multi_colocated_shard_rebalance.sql @@ -38,7 +38,7 @@ SELECT master_create_distributed_table('table6_append', 'id', 'append'); SELECT master_create_empty_shard('table6_append'); SELECT master_create_empty_shard('table6_append'); --- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement +-- Mark tables as non-mx tables, in order to be able to test citus_copy_shard_placement UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN ('table1_group1'::regclass, 'table2_group1'::regclass, 'table5_groupX'::regclass); @@ -56,10 +56,10 @@ WHERE ORDER BY s.shardid, sp.nodeport; -- try to copy colocated shards without a replica identity -SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false); +SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -- copy colocated shards -SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); +SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); -- status after shard copy SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport @@ -79,7 +79,7 @@ SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_ -- copy colocated shards again to see error message -SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); +SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); -- test copying NOT colocated shard @@ -94,7 +94,7 @@ WHERE ORDER BY s.shardid, sp.nodeport; -- copy NOT colocated shard -SELECT master_copy_shard_placement(13000012, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); +SELECT citus_copy_shard_placement(13000012, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); -- status after shard copy SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport @@ -119,7 +119,7 @@ WHERE ORDER BY s.shardid, sp.nodeport; -- copy shard in append distributed table -SELECT master_copy_shard_placement(13000020, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical'); +SELECT citus_copy_shard_placement(13000020, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical'); -- status after shard copy SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport @@ -281,7 +281,7 @@ SELECT "Constraint", "Definition" FROM table_fkeys -- test shard copy with foreign constraints -- we expect it to error out because we do not support foreign constraints with replication factor > 1 -SELECT master_copy_shard_placement(13000022, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false); +SELECT citus_copy_shard_placement(13000022, 'localhost', :worker_2_port, 'localhost', :worker_1_port); -- lets also test that master_move_shard_placement doesn't break serials diff --git a/src/test/regress/sql/multi_colocated_shard_transfer.sql b/src/test/regress/sql/multi_colocated_shard_transfer.sql deleted file mode 100644 index 025324267..000000000 --- a/src/test/regress/sql/multi_colocated_shard_transfer.sql +++ /dev/null @@ -1,114 +0,0 @@ --- --- MULTI_COLOCATED_SHARD_TRANSFER --- - --- These tables are created in multi_colocation_utils test - --- test repair --- manually set shardstate as inactive -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_2_port AND (shardid = 1300000 OR shardid = 1300004); -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_2_port AND shardid = 1300016; -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE nodeport = :worker_2_port AND shardid = 1300020; - - --- test repairing colocated shards --- status before shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - --- repair colocated shards -SELECT master_copy_shard_placement(1300000, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - --- status after shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - - --- test repairing NOT colocated shard --- status before shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table5_groupX'::regclass -ORDER BY s.shardid, sp.nodeport; - --- repair NOT colocated shard -SELECT master_copy_shard_placement(1300016, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - --- status after shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table5_groupX'::regclass -ORDER BY s.shardid, sp.nodeport; - - --- test repairing shard in append distributed table --- status before shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table6_append'::regclass -ORDER BY s.shardid, sp.nodeport; - --- repair shard in append distributed table -SELECT master_copy_shard_placement(1300020, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - --- status after shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table6_append'::regclass -ORDER BY s.shardid, sp.nodeport; - - --- test repair while all placements of one shard in colocation group is unhealthy --- manually set shardstate as inactive -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1300000; - --- status before shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - --- repair while all placements of one shard in colocation group is unhealthy -SELECT master_copy_shard_placement(1300000, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - --- status after shard repair -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport, p.colocationid, sp.shardstate -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; diff --git a/src/test/regress/sql/multi_move_mx.sql b/src/test/regress/sql/multi_move_mx.sql index f9f8072d0..d02c1f417 100644 --- a/src/test/regress/sql/multi_move_mx.sql +++ b/src/test/regress/sql/multi_move_mx.sql @@ -54,9 +54,9 @@ ORDER BY logicalrelid, shardid; \c - - - :master_port --- Check that master_copy_shard_placement cannot be run with MX tables +-- Check that citus_copy_shard_placement cannot be run with MX tables SELECT - master_copy_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical') + citus_copy_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical') FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE @@ -146,7 +146,7 @@ ORDER BY -- Check that the UDFs cannot be called from the workers SELECT - master_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical') + citus_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical') FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE diff --git a/src/test/regress/sql/multi_prepare_sql.sql b/src/test/regress/sql/multi_prepare_sql.sql index de176264f..9819b059e 100644 --- a/src/test/regress/sql/multi_prepare_sql.sql +++ b/src/test/regress/sql/multi_prepare_sql.sql @@ -614,7 +614,7 @@ CREATE OR REPLACE FUNCTION immutable_bleat(text) RETURNS int LANGUAGE plpgsql IM CREATE TABLE test_table (test_id integer NOT NULL, data text); SET citus.shard_count TO 2; SET citus.shard_replication_factor TO 2; -SELECT create_distributed_table('test_table', 'test_id', 'hash'); +SELECT create_distributed_table('test_table', 'test_id', 'hash', colocate_with := 'none'); -- avoid 9.6+ only context messages \set VERBOSITY terse @@ -627,19 +627,19 @@ EXECUTE countsome; -- should indicate planning EXECUTE countsome; -- no replanning -- invalidate half of the placements using SQL, should invalidate via trigger -UPDATE pg_dist_shard_placement SET shardstate = '3' +DELETE FROM pg_dist_shard_placement WHERE shardid IN ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = :worker_1_port; EXECUTE countsome; -- should indicate replanning EXECUTE countsome; -- no replanning --- repair shards, should invalidate via master_metadata_utility.c -SELECT master_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port) +-- copy shards, should invalidate via master_metadata_utility.c +SELECT citus_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, transfer_mode := 'block_writes') FROM pg_dist_shard_placement WHERE shardid IN ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) - AND nodeport = :worker_1_port; + AND nodeport = :worker_2_port; EXECUTE countsome; -- should indicate replanning EXECUTE countsome; -- no replanning diff --git a/src/test/regress/sql/multi_reference_table.sql b/src/test/regress/sql/multi_reference_table.sql index d82e57e6b..bc31a137e 100644 --- a/src/test/regress/sql/multi_reference_table.sql +++ b/src/test/regress/sql/multi_reference_table.sql @@ -905,15 +905,6 @@ SELECT master_update_shard_statistics(:a_shard_id); SELECT master_get_table_ddl_events('reference_schema.reference_table_ddl'); --- in reality, we wouldn't need to repair any reference table shard placements --- however, the test could be relevant for other purposes -SELECT placementid AS a_placement_id FROM pg_dist_shard_placement WHERE shardid = :a_shard_id AND nodeport = :worker_1_port \gset -SELECT placementid AS b_placement_id FROM pg_dist_shard_placement WHERE shardid = :a_shard_id AND nodeport = :worker_2_port \gset - -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE placementid = :a_placement_id; -SELECT master_copy_shard_placement(:a_shard_id, 'localhost', :worker_2_port, 'localhost', :worker_1_port); -SELECT shardid, shardstate FROM pg_dist_shard_placement WHERE placementid = :a_placement_id; - -- some queries that are captured in functions CREATE OR REPLACE FUNCTION select_count_all() RETURNS bigint AS ' SELECT diff --git a/src/test/regress/sql/multi_repair_shards.sql b/src/test/regress/sql/multi_repair_shards.sql deleted file mode 100644 index f910585cb..000000000 --- a/src/test/regress/sql/multi_repair_shards.sql +++ /dev/null @@ -1,82 +0,0 @@ -SET citus.next_shard_id TO 820000; -SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset -SELECT groupid AS worker_1_group FROM pg_dist_node WHERE nodeport=:worker_1_port \gset - --- =================================================================== --- test shard repair functionality --- =================================================================== - --- create a table and create its distribution metadata -CREATE TABLE customer_engagements ( id integer, created_at date, event_data text ); - --- add some indexes -CREATE INDEX ON customer_engagements (id); -CREATE INDEX ON customer_engagements (created_at); -CREATE INDEX ON customer_engagements (event_data); - --- distribute the table --- create a single shard on the first worker -SET citus.shard_count TO 1; -SET citus.shard_replication_factor TO 2; -SELECT create_distributed_table('customer_engagements', 'id', 'hash'); - --- ingest some data for the tests -INSERT INTO customer_engagements VALUES (1, '01-01-2015', 'first event'); -INSERT INTO customer_engagements VALUES (2, '02-01-2015', 'second event'); -INSERT INTO customer_engagements VALUES (1, '03-01-2015', 'third event'); - --- the following queries does the following: --- (i) create a new shard --- (ii) mark the second shard placements as unhealthy --- (iii) do basic checks i.e., only allow copy from healthy placement to unhealthy ones --- (iv) do a successful master_copy_shard_placement from the first placement to the second --- (v) mark the first placement as unhealthy and execute a query that is routed to the second placement - --- get the newshardid -SELECT shardid as newshardid FROM pg_dist_shard WHERE logicalrelid = 'customer_engagements'::regclass -\gset - --- now, update the second placement as unhealthy -UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid - AND groupid = :worker_2_group; - --- cannot repair a shard after a modification (transaction still open during repair) -BEGIN; -ALTER TABLE customer_engagements ADD COLUMN value float; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -ROLLBACK; - -BEGIN; -INSERT INTO customer_engagements VALUES (4, '04-01-2015', 'fourth event'); -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -ROLLBACK; - --- modifications after reparing a shard are fine (will use new metadata) -BEGIN; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -ALTER TABLE customer_engagements ADD COLUMN value float; -ROLLBACK; - -BEGIN; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -INSERT INTO customer_engagements VALUES (4, '04-01-2015', 'fourth event'); -ROLLBACK; - --- deactivate placement -UPDATE pg_dist_placement SET shardstate = 1 WHERE groupid = :worker_2_group and shardid = :newshardid; - -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - -UPDATE pg_dist_placement SET shardstate = 3 WHERE groupid = :worker_2_group and shardid = :newshardid; - --- also try to copy from an inactive placement -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port); - --- "copy" this shard from the first placement to the second one -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - --- now, update first placement as unhealthy (and raise a notice) so that queries are not routed to there -UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid AND groupid = :worker_1_group; - --- get the data from the second placement -SELECT * FROM customer_engagements; diff --git a/src/test/regress/sql/multi_replicate_reference_table.sql b/src/test/regress/sql/multi_replicate_reference_table.sql index 172d08f35..9707ef7fe 100644 --- a/src/test/regress/sql/multi_replicate_reference_table.sql +++ b/src/test/regress/sql/multi_replicate_reference_table.sql @@ -515,7 +515,6 @@ SELECT citus_copy_shard_placement( (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); -- verify direct call to citus_copy_shard_placement errors if target node is secondary @@ -524,7 +523,6 @@ SELECT citus_copy_shard_placement( (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); SELECT citus_remove_node('localhost', :worker_2_port); @@ -534,7 +532,6 @@ SELECT citus_copy_shard_placement( (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); SELECT 1 FROM master_activate_node('localhost', :worker_2_port); @@ -620,7 +617,7 @@ SELECT count(*) - :ref_table_placements FROM pg_dist_shard_placement WHERE shard SELECT min(result) = max(result) AS consistent FROM run_command_on_placements('ref_table', 'SELECT sum(a) FROM %s'); --- test that metadata is synced when master_copy_shard_placement replicates +-- test that metadata is synced when citus_copy_shard_placement replicates -- reference table shards SET citus.replicate_reference_tables_on_activate TO off; SELECT 1 FROM master_remove_node('localhost', :worker_2_port); @@ -628,11 +625,10 @@ SELECT 1 FROM master_add_node('localhost', :worker_2_port); SET citus.shard_replication_factor TO 1; -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( :ref_table_shard, 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); SELECT result::int - :ref_table_placements diff --git a/src/test/regress/sql/multi_schema_support.sql b/src/test/regress/sql/multi_schema_support.sql index 403a1180b..b2d374494 100644 --- a/src/test/regress/sql/multi_schema_support.sql +++ b/src/test/regress/sql/multi_schema_support.sql @@ -32,7 +32,7 @@ CREATE TABLE test_schema_support.nation_append( n_regionkey integer not null, n_comment varchar(152) ); -SELECT master_create_distributed_table('test_schema_support.nation_append', 'n_nationkey', 'append'); +SELECT create_distributed_table('test_schema_support.nation_append', 'n_nationkey', 'append'); SELECT master_create_empty_shard('test_schema_support.nation_append') as simple_shardid \gset -- append table to shard @@ -55,7 +55,7 @@ CREATE TABLE test_schema_support."nation._'append" ( n_regionkey integer not null, n_comment varchar(152)); -SELECT master_create_distributed_table('test_schema_support."nation._''append"', 'n_nationkey', 'append'); +SELECT create_distributed_table('test_schema_support."nation._''append"', 'n_nationkey', 'append'); SELECT master_create_empty_shard('test_schema_support."nation._''append"') as special_shardid \gset copy test_schema_support."nation._'append" FROM STDIN with (append_to_shard :special_shardid, delimiter '|'); @@ -298,6 +298,7 @@ SELECT quote_ident(current_setting('lc_collate')) as current_locale \gset CREATE COLLATION test_schema_support.english (LOCALE = :current_locale); \c - - - :master_port +SET citus.shard_replication_factor TO 2; CREATE TABLE test_schema_support.nation_hash_collation( n_nationkey integer not null, @@ -306,8 +307,7 @@ CREATE TABLE test_schema_support.nation_hash_collation( n_comment varchar(152) ); SELECT master_get_table_ddl_events('test_schema_support.nation_hash_collation') ORDER BY 1; -SELECT master_create_distributed_table('test_schema_support.nation_hash_collation', 'n_nationkey', 'hash'); -SELECT master_create_worker_shards('test_schema_support.nation_hash_collation', 4, 2); +SELECT create_distributed_table('test_schema_support.nation_hash_collation', 'n_nationkey', 'hash', shard_count := 4, colocate_with := 'none'); \copy test_schema_support.nation_hash_collation FROM STDIN with delimiter '|'; 0|ALGERIA|0|haggle. carefully final deposits detect slyly agai @@ -329,8 +329,8 @@ CREATE TABLE nation_hash_collation_search_path( n_regionkey integer not null, n_comment varchar(152) ); -SELECT master_create_distributed_table('nation_hash_collation_search_path', 'n_nationkey', 'hash'); -SELECT master_create_worker_shards('nation_hash_collation_search_path', 4, 2); +SET citus.shard_replication_factor TO 2; +SELECT create_distributed_table('nation_hash_collation_search_path', 'n_nationkey', 'hash', shard_count := 4, colocate_with := 'none'); \copy nation_hash_collation_search_path FROM STDIN with delimiter '|'; 0|ALGERIA|0|haggle. carefully final deposits detect slyly agai @@ -355,8 +355,7 @@ CREATE TABLE test_schema_support.nation_hash_composite_types( n_comment varchar(152), test_col test_schema_support.new_composite_type ); -SELECT master_create_distributed_table('test_schema_support.nation_hash_composite_types', 'n_nationkey', 'hash'); -SELECT master_create_worker_shards('test_schema_support.nation_hash_composite_types', 4, 2); +SELECT create_distributed_table('test_schema_support.nation_hash_composite_types', 'n_nationkey', 'hash', shard_count := 4, colocate_with := 'none'); -- insert some data to verify composite type queries \copy test_schema_support.nation_hash_composite_types FROM STDIN with delimiter '|'; @@ -463,12 +462,12 @@ DROP INDEX index1; \c - - - :master_port --- test master_copy_shard_placement with schemas +-- test citus_copy_shard_placement with schemas SET search_path TO public; --- mark shard as inactive -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1190000 and nodeport = :worker_1_port; -SELECT master_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port); +-- delete placements +DELETE FROM pg_dist_shard_placement WHERE shardid = 1190000 and nodeport = :worker_1_port; +SELECT citus_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port, transfer_mode := 'block_writes'); -- verify shardstate SELECT shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE shardid = 1190000 ORDER BY nodeport; @@ -478,8 +477,8 @@ SELECT shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE shardid SET search_path TO test_schema_support; -- mark shard as inactive -UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE shardid = 1190000 and nodeport = :worker_1_port; -SELECT master_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port); +DELETE FROM pg_dist_shard_placement WHERE shardid = 1190000 and nodeport = :worker_1_port; +SELECT citus_copy_shard_placement(1190000, 'localhost', :worker_2_port, 'localhost', :worker_1_port, transfer_mode := 'block_writes'); -- verify shardstate SELECT shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE shardid = 1190000 ORDER BY nodeport; diff --git a/src/test/regress/sql/multi_unsupported_worker_operations.sql b/src/test/regress/sql/multi_unsupported_worker_operations.sql index 5fac64e3d..df70ebbe2 100644 --- a/src/test/regress/sql/multi_unsupported_worker_operations.sql +++ b/src/test/regress/sql/multi_unsupported_worker_operations.sql @@ -182,7 +182,7 @@ SELECT master_remove_distributed_table_metadata_from_workers('mx_table'::regclas SELECT master_remove_partition_metadata('mx_table'::regclass, 'public', 'mx_table'); SELECT count(*) FROM mx_table; --- master_copy_shard_placement +-- citus_copy_shard_placement SELECT logicalrelid, shardid AS testshardid, nodename, nodeport FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'mx_table'::regclass AND nodeport=:worker_1_port @@ -193,7 +193,7 @@ SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport = :worker_2_po INSERT INTO pg_dist_placement (groupid, shardid, shardstate, shardlength) VALUES (:worker_2_group, :testshardid, 3, 0); -SELECT master_copy_shard_placement(:testshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); +SELECT citus_copy_shard_placement(:testshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); SELECT shardid, nodename, nodeport, shardstate FROM pg_dist_shard_placement diff --git a/src/test/regress/sql/replicated_partitioned_table.sql b/src/test/regress/sql/replicated_partitioned_table.sql index f0573bb34..82c1ece53 100644 --- a/src/test/regress/sql/replicated_partitioned_table.sql +++ b/src/test/regress/sql/replicated_partitioned_table.sql @@ -152,7 +152,7 @@ INSERT INTO collections_agg SELECT key, sum(key) FROM collections_1 GROUP BY key -- coordinator roll-up INSERT INTO collections_agg SELECT collection_id, sum(key) FROM collections_1 GROUP BY collection_id; --- now make sure that repair functionality works fine +-- now make sure that copy functionality works fine -- create a table and create its distribution metadata CREATE TABLE customer_engagements (id integer, event_id int) PARTITION BY LIST ( event_id ); @@ -173,7 +173,7 @@ CREATE INDEX ON customer_engagements (id, event_id); -- create a single shard on the first worker SET citus.shard_count TO 1; SET citus.shard_replication_factor TO 2; -SELECT create_distributed_table('customer_engagements', 'id', 'hash'); +SELECT create_distributed_table('customer_engagements', 'id', 'hash', colocate_with := 'none'); -- ingest some data for the tests INSERT INTO customer_engagements VALUES (1, 1); @@ -181,39 +181,29 @@ INSERT INTO customer_engagements VALUES (2, 1); INSERT INTO customer_engagements VALUES (1, 2); INSERT INTO customer_engagements VALUES (2, 2); --- the following queries does the following: --- (i) create a new shard --- (ii) mark the second shard placements as unhealthy --- (iii) do basic checks i.e., only allow copy from healthy placement to unhealthy ones --- (iv) do a successful master_copy_shard_placement from the first placement to the second --- (v) mark the first placement as unhealthy and execute a query that is routed to the second placement - -SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset -SELECT groupid AS worker_1_group FROM pg_dist_node WHERE nodeport=:worker_1_port \gset - -- get the newshardid SELECT shardid as newshardid FROM pg_dist_shard WHERE logicalrelid = 'customer_engagements'::regclass \gset --- now, update the second placement as unhealthy -UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid - AND groupid = :worker_2_group; +-- delete all the placements on the second node belonging to partitioning hierarchy +DELETE FROM pg_dist_shard_placement p USING pg_dist_shard s +WHERE s.shardid = p.shardid AND nodeport = :worker_2_port AND logicalrelid::text LIKE 'customer_engagements%'; --- cannot repair a shard after a modification (transaction still open during repair) +-- cannot copy a shard after a modification (transaction still open during copy) BEGIN; INSERT INTO customer_engagements VALUES (1, 1); -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); +SELECT citus_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes'); ROLLBACK; --- modifications after reparing a shard are fine (will use new metadata) +-- modifications after copying a shard are fine (will use new metadata) BEGIN; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); +SELECT citus_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes'); ALTER TABLE customer_engagements ADD COLUMN value float DEFAULT 1.0; SELECT * FROM customer_engagements ORDER BY 1,2,3; ROLLBACK; BEGIN; -SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); +SELECT citus_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes'); INSERT INTO customer_engagements VALUES (1, 1); SELECT count(*) FROM customer_engagements; ROLLBACK; diff --git a/src/test/regress/sql/tableam.sql b/src/test/regress/sql/tableam.sql index f0ed5cfca..dd1976f70 100644 --- a/src/test/regress/sql/tableam.sql +++ b/src/test/regress/sql/tableam.sql @@ -81,7 +81,7 @@ COPY test_range_dist FROM PROGRAM 'echo 25, 16 && echo 26, 1 && echo 27, 4 && ec SELECT * FROM master_get_table_ddl_events('test_range_dist'); -- --- Test master_copy_shard_placement with a fake_am table +-- Test copy_copy_shard_placement with a fake_am table -- select a.shardid, a.nodeport @@ -89,14 +89,13 @@ FROM pg_dist_shard b, pg_dist_shard_placement a WHERE a.shardid=b.shardid AND logicalrelid = 'test_hash_dist'::regclass::oid ORDER BY a.shardid, nodeport; --- Change repmodel to allow master_copy_shard_placement +-- Change repmodel to allow copy_copy_shard_placement UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid = 'test_hash_dist'::regclass; -SELECT master_copy_shard_placement( +SELECT citus_copy_shard_placement( get_shard_id_for_distribution_column('test_hash_dist', '1'), 'localhost', :worker_1_port, 'localhost', :worker_2_port, - do_repair := false, transfer_mode := 'block_writes'); select a.shardid, a.nodeport From a2d86214b28c1f73b968d39ad4731cbec8f6f71d Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Fri, 9 Sep 2022 16:45:38 +0200 Subject: [PATCH 02/17] Share more replication code between moves and splits (#6310) The logical replication catchup part for shard splits and shard moves is very similar. This abstracts most of that similarity away into a single function. This also improves the logic for non blocking shard splits a bit, by using faster foreign key creation. It also parallelizes index creation which shard moves were already doing, but shard splits did not. --- .../distributed/operations/shard_split.c | 89 +-- .../replication/multi_logical_replication.c | 563 ++++++++++-------- .../distributed/multi_logical_replication.h | 8 + .../failure_tenant_isolation_nonblocking.out | 27 - .../failure_tenant_isolation_nonblocking.sql | 12 - 5 files changed, 340 insertions(+), 359 deletions(-) diff --git a/src/backend/distributed/operations/shard_split.c b/src/backend/distributed/operations/shard_split.c index 2f1f29868..7386f1555 100644 --- a/src/backend/distributed/operations/shard_split.c +++ b/src/backend/distributed/operations/shard_split.c @@ -132,8 +132,9 @@ static void UpdateDistributionColumnsForShardGroup(List *colocatedShardList, uint32 colocationId); static void InsertSplitChildrenShardMetadata(List *shardGroupSplitIntervalListList, List *workersForPlacementList); -static void CreatePartitioningHierarchy(List *shardGroupSplitIntervalListList, - List *workersForPlacementList); +static void CreatePartitioningHierarchyForBlockingSplit( + List *shardGroupSplitIntervalListList, + List *workersForPlacementList); static void CreateForeignKeyConstraints(List *shardGroupSplitIntervalListList, List *workersForPlacementList); static Task * CreateTaskForDDLCommandList(List *ddlCommandList, WorkerNode *workerNode); @@ -630,8 +631,9 @@ BlockingShardSplit(SplitOperation splitOperation, workersForPlacementList); /* create partitioning hierarchy, if any */ - CreatePartitioningHierarchy(shardGroupSplitIntervalListList, - workersForPlacementList); + CreatePartitioningHierarchyForBlockingSplit( + shardGroupSplitIntervalListList, + workersForPlacementList); /* * Create foreign keys if exists after the metadata changes happening in @@ -1218,8 +1220,8 @@ InsertSplitChildrenShardMetadata(List *shardGroupSplitIntervalListList, * hierarchy between the shardList, if any. */ static void -CreatePartitioningHierarchy(List *shardGroupSplitIntervalListList, - List *workersForPlacementList) +CreatePartitioningHierarchyForBlockingSplit(List *shardGroupSplitIntervalListList, + List *workersForPlacementList) { /* Create partition heirarchy between shards */ List *shardIntervalList = NIL; @@ -1610,51 +1612,18 @@ NonBlockingShardSplit(SplitOperation splitOperation, snapshot, distributionColumnOverrides); /* - * 9) Create replica identities, this needs to be done before enabling - * the subscriptions. + * 9) Logically replicate all the changes and do most of the table DDL, + * like index and foreign key creation. */ - CreateReplicaIdentities(logicalRepTargetList); + CompleteNonBlockingShardTransfer(sourceColocatedShardIntervalList, + sourceConnection, + publicationInfoHash, + logicalRepTargetList, + groupedLogicalRepTargetsHash, + SHARD_SPLIT); /* - * 10) Enable the subscriptions: Start the catchup phase - */ - EnableSubscriptions(logicalRepTargetList); - - /* 11) Wait for subscriptions to be ready */ - WaitForAllSubscriptionsToBecomeReady(groupedLogicalRepTargetsHash); - - /* 12) Wait for subscribers to catchup till source LSN */ - WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash); - - /* 13) Create Auxilary structures */ - CreateAuxiliaryStructuresForShardGroup(shardGroupSplitIntervalListList, - workersForPlacementList, - false /* includeReplicaIdentity*/); - - /* 14) Wait for subscribers to catchup till source LSN */ - WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash); - - /* Used for testing */ - ConflictOnlyWithIsolationTesting(); - - /* 15) Block writes on source shards */ - BlockWritesToShardList(sourceColocatedShardIntervalList); - - /* 16) Wait for subscribers to catchup till source LSN */ - WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash); - - /* 17) Drop Subscribers */ - DropSubscriptions(logicalRepTargetList); - - /* 18) Drop replication slots - */ - DropReplicationSlots(sourceConnection, logicalRepTargetList); - - /* 19) Drop Publications */ - DropPublications(sourceConnection, publicationInfoHash); - - /* - * 20) Delete old shards metadata and either mark the shards as + * 10) Delete old shards metadata and either mark the shards as * to be deferred drop or physically delete them. * Have to do that before creating the new shard metadata, * because there's cross-checks preventing inconsistent metadata @@ -1672,7 +1641,7 @@ NonBlockingShardSplit(SplitOperation splitOperation, DropShardListMetadata(sourceColocatedShardIntervalList); /* - * 21) In case of create_distributed_table_concurrently, which converts + * 11) In case of create_distributed_table_concurrently, which converts * a Citus local table to a distributed table, update the distributed * table metadata now. * @@ -1704,34 +1673,36 @@ NonBlockingShardSplit(SplitOperation splitOperation, targetColocationId); } - /* 22) Insert new shard and placement metdata */ + /* 12) Insert new shard and placement metdata */ InsertSplitChildrenShardMetadata(shardGroupSplitIntervalListList, workersForPlacementList); - CreatePartitioningHierarchy(shardGroupSplitIntervalListList, - workersForPlacementList); + /* 13) create partitioning hierarchy, if any, this needs to be done + * after the metadata is correct, because it fails for some + * uninvestigated reason otherwise. + */ + CreatePartitioningHierarchy(logicalRepTargetList); /* - * 23) Create foreign keys if exists after the metadata changes happening in + * 14) Create foreign keys if exists after the metadata changes happening in * DropShardList() and InsertSplitChildrenShardMetadata() because the foreign * key creation depends on the new metadata. */ - CreateForeignKeyConstraints(shardGroupSplitIntervalListList, - workersForPlacementList); + CreateUncheckedForeignKeyConstraints(logicalRepTargetList); /* - * 24) Release shared memory allocated by worker_split_shard_replication_setup udf + * 15) Release shared memory allocated by worker_split_shard_replication_setup udf * at source node. */ ExecuteSplitShardReleaseSharedMemory(sourceShardToCopyNode); - /* 25) Close source connection */ + /* 16) Close source connection */ CloseConnection(sourceConnection); - /* 26) Close all subscriber connections */ + /* 17) Close all subscriber connections */ CloseGroupedLogicalRepTargetsConnections(groupedLogicalRepTargetsHash); - /* 27) Close connection of template replication slot */ + /* 18) Close connection of template replication slot */ CloseConnection(sourceReplicationConnection); } PG_CATCH(); diff --git a/src/backend/distributed/replication/multi_logical_replication.c b/src/backend/distributed/replication/multi_logical_replication.c index 3f1c429f0..bc1a69a3d 100644 --- a/src/backend/distributed/replication/multi_logical_replication.c +++ b/src/backend/distributed/replication/multi_logical_replication.c @@ -114,31 +114,20 @@ bool PlacementMovedUsingLogicalReplicationInTX = false; static int logicalReplicationProgressReportTimeout = 10 * 1000; -static void CreateForeignKeyConstraints(List *logicalRepTargetList); static List * PrepareReplicationSubscriptionList(List *shardList); static List * GetReplicaIdentityCommandListForShard(Oid relationId, uint64 shardId); static List * GetIndexCommandListForShardBackingReplicaIdentity(Oid relationId, uint64 shardId); -static void CreatePostLogicalReplicationDataLoadObjects(List *shardList, - char *targetNodeName, - int32 targetNodePort); -static void ExecuteCreateIndexCommands(List *shardList, char *targetNodeName, - int targetNodePort); -static void ExecuteCreateConstraintsBackedByIndexCommands(List *shardList, - char *targetNodeName, - int targetNodePort); +static void CreatePostLogicalReplicationDataLoadObjects(List *logicalRepTargetList, + LogicalRepType type); +static void ExecuteCreateIndexCommands(List *logicalRepTargetList); +static void ExecuteCreateConstraintsBackedByIndexCommands(List *logicalRepTargetList); static List * ConvertNonExistingPlacementDDLCommandsToTasks(List *shardCommandList, - uint64 shardId, char *targetNodeName, int targetNodePort); -static void ExecuteClusterOnCommands(List *shardList, char *targetNodeName, - int targetNodePort); -static void ExecuteCreateIndexStatisticsCommands(List *shardList, char *targetNodeName, - int targetNodePort); -static void ExecuteRemainingPostLoadTableCommands(List *shardList, char *targetNodeName, - int targetNodePort); -static void CreatePartitioningHierarchy(List *shardList, char *targetNodeName, - int targetNodePort); +static void ExecuteClusterOnCommands(List *logicalRepTargetList); +static void ExecuteCreateIndexStatisticsCommands(List *logicalRepTargetList); +static void ExecuteRemainingPostLoadTableCommands(List *logicalRepTargetList); static char * escape_param_str(const char *str); static XLogRecPtr GetRemoteLSN(MultiConnection *connection, char *command); static bool RelationSubscriptionsAreReady( @@ -208,10 +197,6 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo */ ClaimConnectionExclusively(sourceConnection); - - MultiConnection *sourceReplicationConnection = - GetReplicationConnection(sourceNodeName, sourceNodePort); - WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort); WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort); @@ -229,6 +214,9 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo PG_TRY(); { + MultiConnection *sourceReplicationConnection = + GetReplicationConnection(sourceConnection->hostname, sourceConnection->port); + /* set up the publication on the source and subscription on the target */ CreatePublications(sourceConnection, publicationInfoHash); char *snapshot = CreateReplicationSlots( @@ -239,7 +227,7 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo CreateSubscriptions( sourceConnection, - databaseName, + sourceConnection->database, logicalRepTargetList); /* only useful for isolation testing, see the function comment for the details */ @@ -256,77 +244,14 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo CloseConnection(sourceReplicationConnection); /* - * We have to create the primary key (or any other replica identity) - * before the update/delete operations that are queued will be - * replicated. Because if the replica identity does not exist on the - * target, the replication would fail. - * - * So we it right after the initial data COPY, but before enabling the - * susbcriptions. We do it at this latest possible moment, because its - * much cheaper to build an index at once than to create it - * incrementally. So this way we create the primary key index in one go - * for all data from the initial COPY. + * Start the replication and copy all data */ - CreateReplicaIdentities(logicalRepTargetList); - - /* Start applying the changes from the replication slots to catch up. */ - EnableSubscriptions(logicalRepTargetList); - - /* - * The following check is a leftover from when used subscriptions with - * copy_data=true. It's probably not really necessary anymore, but it - * seemed like a nice check to keep. At least for debugging issues it - * seems nice to report differences between the subscription never - * becoming ready and the subscriber not applying WAL. It's also not - * entirely clear if the catchup check handles the case correctly where - * the subscription is not in the ready state yet, because so far it - * never had to. - */ - WaitForAllSubscriptionsToBecomeReady(groupedLogicalRepTargetsHash); - - /* - * Wait until all the subscriptions are caught up to changes that - * happened after the initial COPY on the shards. - */ - WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash); - - /* - * Now lets create the post-load objects, such as the indexes, constraints - * and partitioning hierarchy. Once they are done, wait until the replication - * catches up again. So we don't block writes too long. - */ - CreatePostLogicalReplicationDataLoadObjects(shardList, targetNodeName, - targetNodePort); - WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash); - - /* - * We're almost done, we'll block the writes to the shards that we're - * replicating and expect all the subscription to catch up quickly - * afterwards. - * - * Notice that although shards in partitioned relation are excluded from - * logical replication, they are still locked against modification, and - * foreign constraints are created on them too. - */ - BlockWritesToShardList(shardList); - - WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash); - - /* - * We're creating the foreign constraints to reference tables after the - * data is already replicated and all the necessary locks are acquired. - * - * We prefer to do it here because the placements of reference tables - * are always valid, and any modification during the shard move would - * cascade to the hash distributed tables' shards if we had created - * the constraints earlier. - */ - CreateForeignKeyConstraints(logicalRepTargetList); - - /* we're done, cleanup the publication and subscription */ - DropSubscriptions(logicalRepTargetList); - DropReplicationSlots(sourceConnection, logicalRepTargetList); - DropPublications(sourceConnection, publicationInfoHash); + CompleteNonBlockingShardTransfer(shardList, + sourceConnection, + publicationInfoHash, + logicalRepTargetList, + groupedLogicalRepTargetsHash, + SHARD_MOVE); /* * We use these connections exclusively for subscription management, @@ -405,6 +330,104 @@ CreateGroupedLogicalRepTargetsHash(List *logicalRepTargetList) } +/* + * CompleteNonBlockingShardTransfer uses logical replication to apply the changes + * made on the source to the target. It also runs all DDL on the target shards + * that need to be run after the data copy. + * + * For shard splits it skips the partition hierarchy and foreign key creation + * though, since those need to happen after the metadata is updated. + */ +void +CompleteNonBlockingShardTransfer(List *shardList, + MultiConnection *sourceConnection, + HTAB *publicationInfoHash, + List *logicalRepTargetList, + HTAB *groupedLogicalRepTargetsHash, + LogicalRepType type) +{ + /* + * We have to create the primary key (or any other replica identity) + * before the update/delete operations that are queued will be + * replicated. Because if the replica identity does not exist on the + * target, the replication would fail. + * + * So we it right after the initial data COPY, but before enabling the + * susbcriptions. We do it at this latest possible moment, because its + * much cheaper to build an index at once than to create it + * incrementally. So this way we create the primary key index in one go + * for all data from the initial COPY. + */ + CreateReplicaIdentities(logicalRepTargetList); + + /* Start applying the changes from the replication slots to catch up. */ + EnableSubscriptions(logicalRepTargetList); + + /* + * The following check is a leftover from when used subscriptions with + * copy_data=true. It's probably not really necessary anymore, but it + * seemed like a nice check to keep. At least for debugging issues it + * seems nice to report differences between the subscription never + * becoming ready and the subscriber not applying WAL. It's also not + * entirely clear if the catchup check handles the case correctly where + * the subscription is not in the ready state yet, because so far it + * never had to. + */ + WaitForAllSubscriptionsToBecomeReady(groupedLogicalRepTargetsHash); + + /* + * Wait until all the subscriptions are caught up to changes that + * happened after the initial COPY on the shards. + */ + WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash); + + /* + * Now lets create the post-load objects, such as the indexes, constraints + * and partitioning hierarchy. Once they are done, wait until the replication + * catches up again. So we don't block writes too long. + */ + CreatePostLogicalReplicationDataLoadObjects(logicalRepTargetList, type); + WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash); + + + /* only useful for isolation testing, see the function comment for the details */ + ConflictOnlyWithIsolationTesting(); + + /* + * We're almost done, we'll block the writes to the shards that we're + * replicating and expect all the subscription to catch up quickly + * afterwards. + * + * Notice that although shards in partitioned relation are excluded from + * logical replication, they are still locked against modification, and + * foreign constraints are created on them too. + */ + BlockWritesToShardList(shardList); + + WaitForAllSubscriptionsToCatchUp(sourceConnection, groupedLogicalRepTargetsHash); + + if (type != SHARD_SPLIT) + { + /* + * We're creating the foreign constraints to reference tables after the + * data is already replicated and all the necessary locks are acquired. + * + * We prefer to do it here because the placements of reference tables + * are always valid, and any modification during the shard move would + * cascade to the hash distributed tables' shards if we had created + * the constraints earlier. The same is true for foreign keys between + * tables owned by different users. + */ + CreateUncheckedForeignKeyConstraints(logicalRepTargetList); + } + + /* we're done, cleanup the publication and subscription */ + DropSubscriptions(logicalRepTargetList); + DropReplicationSlots(sourceConnection, logicalRepTargetList); + DropPublications(sourceConnection, publicationInfoHash); +} + + /* * CreateShardMovePublicationInfoHash creates hashmap of PublicationInfos for a * shard move. Even though we only support moving a shard to a single target @@ -742,8 +765,8 @@ GetReplicaIdentityCommandListForShard(Oid relationId, uint64 shardId) * the objects that can be created after the data is moved with logical replication. */ static void -CreatePostLogicalReplicationDataLoadObjects(List *shardList, char *targetNodeName, - int32 targetNodePort) +CreatePostLogicalReplicationDataLoadObjects(List *logicalRepTargetList, + LogicalRepType type) { /* * We create indexes in 4 steps. @@ -759,20 +782,25 @@ CreatePostLogicalReplicationDataLoadObjects(List *shardList, char *targetNodeNam * table and setting the statistics of indexes, depends on the indexes being * created. That's why the execution is divided into four distinct stages. */ - ExecuteCreateIndexCommands(shardList, targetNodeName, targetNodePort); - ExecuteCreateConstraintsBackedByIndexCommands(shardList, targetNodeName, - targetNodePort); - ExecuteClusterOnCommands(shardList, targetNodeName, targetNodePort); - ExecuteCreateIndexStatisticsCommands(shardList, targetNodeName, targetNodePort); + ExecuteCreateIndexCommands(logicalRepTargetList); + ExecuteCreateConstraintsBackedByIndexCommands(logicalRepTargetList); + ExecuteClusterOnCommands(logicalRepTargetList); + ExecuteCreateIndexStatisticsCommands(logicalRepTargetList); /* * Once the indexes are created, there are few more objects like triggers and table * statistics that should be created after the data move. */ - ExecuteRemainingPostLoadTableCommands(shardList, targetNodeName, targetNodePort); + ExecuteRemainingPostLoadTableCommands(logicalRepTargetList); - /* create partitioning hierarchy, if any */ - CreatePartitioningHierarchy(shardList, targetNodeName, targetNodePort); + /* + * Creating the partitioning hierarchy errors out in shard splits when + */ + if (type != SHARD_SPLIT) + { + /* create partitioning hierarchy, if any */ + CreatePartitioningHierarchy(logicalRepTargetList); + } } @@ -784,27 +812,31 @@ CreatePostLogicalReplicationDataLoadObjects(List *shardList, char *targetNodeNam * commands fail. */ static void -ExecuteCreateIndexCommands(List *shardList, char *targetNodeName, int targetNodePort) +ExecuteCreateIndexCommands(List *logicalRepTargetList) { List *taskList = NIL; - ListCell *shardCell = NULL; - foreach(shardCell, shardList) + LogicalRepTarget *target = NULL; + foreach_ptr(target, logicalRepTargetList) { - ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); - Oid relationId = shardInterval->relationId; + ShardInterval *shardInterval = NULL; + foreach_ptr(shardInterval, target->newShards) + { + Oid relationId = shardInterval->relationId; - List *tableCreateIndexCommandList = - GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, - INCLUDE_CREATE_INDEX_STATEMENTS); + List *tableCreateIndexCommandList = + GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, + INCLUDE_CREATE_INDEX_STATEMENTS); - List *shardCreateIndexCommandList = - WorkerApplyShardDDLCommandList(tableCreateIndexCommandList, - shardInterval->shardId); - List *taskListForShard = - ConvertNonExistingPlacementDDLCommandsToTasks(shardCreateIndexCommandList, - shardInterval->shardId, - targetNodeName, targetNodePort); - taskList = list_concat(taskList, taskListForShard); + List *shardCreateIndexCommandList = + WorkerApplyShardDDLCommandList(tableCreateIndexCommandList, + shardInterval->shardId); + List *taskListForShard = + ConvertNonExistingPlacementDDLCommandsToTasks( + shardCreateIndexCommandList, + target->superuserConnection->hostname, + target->superuserConnection->port); + taskList = list_concat(taskList, taskListForShard); + } } /* @@ -819,8 +851,7 @@ ExecuteCreateIndexCommands(List *shardList, char *targetNodeName, int targetNode */ ereport(DEBUG1, (errmsg("Creating post logical replication objects " - "(indexes) on node %s:%d", targetNodeName, - targetNodePort))); + "(indexes)"))); ExecuteTaskListOutsideTransaction(ROW_MODIFY_NONE, taskList, MaxAdaptiveExecutorPoolSize, @@ -836,45 +867,47 @@ ExecuteCreateIndexCommands(List *shardList, char *targetNodeName, int targetNode * commands fail. */ static void -ExecuteCreateConstraintsBackedByIndexCommands(List *shardList, char *targetNodeName, - int targetNodePort) +ExecuteCreateConstraintsBackedByIndexCommands(List *logicalRepTargetList) { ereport(DEBUG1, (errmsg("Creating post logical replication objects " - "(constraints backed by indexes) on node %s:%d", - targetNodeName, - targetNodePort))); + "(constraints backed by indexes)"))); MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, "CreateConstraintsBackedByIndexContext", ALLOCSET_DEFAULT_SIZES); MemoryContext oldContext = MemoryContextSwitchTo(localContext); - ListCell *shardCell = NULL; - foreach(shardCell, shardList) + LogicalRepTarget *target = NULL; + foreach_ptr(target, logicalRepTargetList) { - ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); - Oid relationId = shardInterval->relationId; - - List *tableCreateConstraintCommandList = - GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, - INCLUDE_CREATE_CONSTRAINT_STATEMENTS); - - if (tableCreateConstraintCommandList == NIL) + ShardInterval *shardInterval = NULL; + foreach_ptr(shardInterval, target->newShards) { - /* no constraints backed by indexes, skip */ + Oid relationId = shardInterval->relationId; + + List *tableCreateConstraintCommandList = + GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, + INCLUDE_CREATE_CONSTRAINT_STATEMENTS); + + if (tableCreateConstraintCommandList == NIL) + { + /* no constraints backed by indexes, skip */ + MemoryContextReset(localContext); + continue; + } + + List *shardCreateConstraintCommandList = + WorkerApplyShardDDLCommandList(tableCreateConstraintCommandList, + shardInterval->shardId); + + char *tableOwner = TableOwner(shardInterval->relationId); + SendCommandListToWorkerOutsideTransaction( + target->superuserConnection->hostname, + target->superuserConnection->port, + tableOwner, + shardCreateConstraintCommandList); MemoryContextReset(localContext); - continue; } - - List *shardCreateConstraintCommandList = - WorkerApplyShardDDLCommandList(tableCreateConstraintCommandList, - shardInterval->shardId); - - char *tableOwner = TableOwner(shardInterval->relationId); - SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort, - tableOwner, - shardCreateConstraintCommandList); - MemoryContextReset(localContext); } MemoryContextSwitchTo(oldContext); @@ -890,7 +923,6 @@ ExecuteCreateConstraintsBackedByIndexCommands(List *shardList, char *targetNodeN */ static List * ConvertNonExistingPlacementDDLCommandsToTasks(List *shardCommandList, - uint64 shardId, char *targetNodeName, int targetNodePort) { @@ -911,7 +943,6 @@ ConvertNonExistingPlacementDDLCommandsToTasks(List *shardCommandList, SetPlacementNodeMetadata(taskPlacement, workerNode); task->taskPlacementList = list_make1(taskPlacement); - task->anchorShardId = shardId; taskList = lappend(taskList, task); taskId++; @@ -929,34 +960,36 @@ ConvertNonExistingPlacementDDLCommandsToTasks(List *shardCommandList, * is aborted. */ static void -ExecuteClusterOnCommands(List *shardList, char *targetNodeName, int targetNodePort) +ExecuteClusterOnCommands(List *logicalRepTargetList) { List *taskList = NIL; - ListCell *shardCell; - foreach(shardCell, shardList) + LogicalRepTarget *target = NULL; + foreach_ptr(target, logicalRepTargetList) { - ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); - Oid relationId = shardInterval->relationId; + ShardInterval *shardInterval = NULL; + foreach_ptr(shardInterval, target->newShards) + { + Oid relationId = shardInterval->relationId; - List *tableAlterTableClusterOnCommandList = - GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, - INCLUDE_INDEX_CLUSTERED_STATEMENTS); + List *tableAlterTableClusterOnCommandList = + GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, + INCLUDE_INDEX_CLUSTERED_STATEMENTS); - List *shardAlterTableClusterOnCommandList = - WorkerApplyShardDDLCommandList(tableAlterTableClusterOnCommandList, - shardInterval->shardId); + List *shardAlterTableClusterOnCommandList = + WorkerApplyShardDDLCommandList(tableAlterTableClusterOnCommandList, + shardInterval->shardId); - List *taskListForShard = - ConvertNonExistingPlacementDDLCommandsToTasks( - shardAlterTableClusterOnCommandList, - shardInterval->shardId, - targetNodeName, targetNodePort); - taskList = list_concat(taskList, taskListForShard); + List *taskListForShard = + ConvertNonExistingPlacementDDLCommandsToTasks( + shardAlterTableClusterOnCommandList, + target->superuserConnection->hostname, + target->superuserConnection->port); + taskList = list_concat(taskList, taskListForShard); + } } ereport(DEBUG1, (errmsg("Creating post logical replication objects " - "(CLUSTER ON) on node %s:%d", targetNodeName, - targetNodePort))); + "(CLUSTER ON)"))); ExecuteTaskListOutsideTransaction(ROW_MODIFY_NONE, taskList, MaxAdaptiveExecutorPoolSize, @@ -972,48 +1005,51 @@ ExecuteClusterOnCommands(List *shardList, char *targetNodeName, int targetNodePo * is aborted. */ static void -ExecuteCreateIndexStatisticsCommands(List *shardList, char *targetNodeName, int - targetNodePort) +ExecuteCreateIndexStatisticsCommands(List *logicalRepTargetList) { ereport(DEBUG1, (errmsg("Creating post logical replication objects " - "(index statistics) on node %s:%d", targetNodeName, - targetNodePort))); + "(index statistics)"))); MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, "CreateIndexStatisticsContext", ALLOCSET_DEFAULT_SIZES); MemoryContext oldContext = MemoryContextSwitchTo(localContext); - ListCell *shardCell; - foreach(shardCell, shardList) + LogicalRepTarget *target = NULL; + foreach_ptr(target, logicalRepTargetList) { - ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); - Oid relationId = shardInterval->relationId; - - List *tableAlterIndexSetStatisticsCommandList = - GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, - INCLUDE_INDEX_STATISTICS_STATEMENTTS); - List *shardAlterIndexSetStatisticsCommandList = - WorkerApplyShardDDLCommandList(tableAlterIndexSetStatisticsCommandList, - shardInterval->shardId); - - if (shardAlterIndexSetStatisticsCommandList == NIL) + ShardInterval *shardInterval = NULL; + foreach_ptr(shardInterval, target->newShards) { - /* no index statistics exists, skip */ + Oid relationId = shardInterval->relationId; + + List *tableAlterIndexSetStatisticsCommandList = + GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, + INCLUDE_INDEX_STATISTICS_STATEMENTTS); + List *shardAlterIndexSetStatisticsCommandList = + WorkerApplyShardDDLCommandList(tableAlterIndexSetStatisticsCommandList, + shardInterval->shardId); + + if (shardAlterIndexSetStatisticsCommandList == NIL) + { + /* no index statistics exists, skip */ + MemoryContextReset(localContext); + continue; + } + + /* + * These remaining operations do not require significant resources, so no + * need to create them in parallel. + */ + char *tableOwner = TableOwner(shardInterval->relationId); + SendCommandListToWorkerOutsideTransaction( + target->superuserConnection->hostname, + target->superuserConnection->port, + tableOwner, + shardAlterIndexSetStatisticsCommandList); + MemoryContextReset(localContext); - continue; } - - /* - * These remaining operations do not require significant resources, so no - * need to create them in parallel. - */ - char *tableOwner = TableOwner(shardInterval->relationId); - SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort, - tableOwner, - shardAlterIndexSetStatisticsCommandList); - - MemoryContextReset(localContext); } MemoryContextSwitchTo(oldContext); @@ -1026,52 +1062,55 @@ ExecuteCreateIndexStatisticsCommands(List *shardList, char *targetNodeName, int * in the given target node. */ static void -ExecuteRemainingPostLoadTableCommands(List *shardList, char *targetNodeName, int - targetNodePort) +ExecuteRemainingPostLoadTableCommands(List *logicalRepTargetList) { ereport(DEBUG1, (errmsg("Creating post logical replication objects " - "(triggers and table statistics) on node %s:%d", - targetNodeName, - targetNodePort))); + "(triggers and table statistics)" + ))); MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, "CreateTableStatisticsContext", ALLOCSET_DEFAULT_SIZES); MemoryContext oldContext = MemoryContextSwitchTo(localContext); - ListCell *shardCell = NULL; - foreach(shardCell, shardList) + LogicalRepTarget *target = NULL; + foreach_ptr(target, logicalRepTargetList) { - ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); - Oid relationId = shardInterval->relationId; - - bool includeIndexes = false; - bool includeReplicaIdentity = false; - - List *tablePostLoadTableCommandList = - GetPostLoadTableCreationCommands(relationId, includeIndexes, - includeReplicaIdentity); - - List *shardPostLoadTableCommandList = - WorkerApplyShardDDLCommandList(tablePostLoadTableCommandList, - shardInterval->shardId); - - if (shardPostLoadTableCommandList == NIL) + ShardInterval *shardInterval = NULL; + foreach_ptr(shardInterval, target->newShards) { - /* no index statistics exists, skip */ - continue; + Oid relationId = shardInterval->relationId; + + bool includeIndexes = false; + bool includeReplicaIdentity = false; + + List *tablePostLoadTableCommandList = + GetPostLoadTableCreationCommands(relationId, includeIndexes, + includeReplicaIdentity); + + List *shardPostLoadTableCommandList = + WorkerApplyShardDDLCommandList(tablePostLoadTableCommandList, + shardInterval->shardId); + + if (shardPostLoadTableCommandList == NIL) + { + /* no index statistics exists, skip */ + continue; + } + + /* + * These remaining operations do not require significant resources, so no + * need to create them in parallel. + */ + char *tableOwner = TableOwner(shardInterval->relationId); + SendCommandListToWorkerOutsideTransaction( + target->superuserConnection->hostname, + target->superuserConnection->port, + tableOwner, + shardPostLoadTableCommandList); + + MemoryContextReset(localContext); } - - /* - * These remaining operations do not require significant resources, so no - * need to create them in parallel. - */ - char *tableOwner = TableOwner(shardInterval->relationId); - SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort, - tableOwner, - shardPostLoadTableCommandList); - - MemoryContextReset(localContext); } MemoryContextSwitchTo(oldContext); @@ -1082,40 +1121,42 @@ ExecuteRemainingPostLoadTableCommands(List *shardList, char *targetNodeName, int * CreatePartitioningHierarchy gets a shardList and creates the partitioning * hierarchy between the shardList, if any, */ -static void -CreatePartitioningHierarchy(List *shardList, char *targetNodeName, int targetNodePort) +void +CreatePartitioningHierarchy(List *logicalRepTargetList) { ereport(DEBUG1, (errmsg("Creating post logical replication objects " - "(partitioning hierarchy) on node %s:%d", targetNodeName, - targetNodePort))); + "(partitioning hierarchy)"))); MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, "CreatePartitioningHierarchy", ALLOCSET_DEFAULT_SIZES); MemoryContext oldContext = MemoryContextSwitchTo(localContext); - ListCell *shardCell = NULL; - foreach(shardCell, shardList) + LogicalRepTarget *target = NULL; + foreach_ptr(target, logicalRepTargetList) { - ShardInterval *shardInterval = (ShardInterval *) lfirst(shardCell); - - if (PartitionTable(shardInterval->relationId)) + ShardInterval *shardInterval = NULL; + foreach_ptr(shardInterval, target->newShards) { - char *attachPartitionCommand = - GenerateAttachShardPartitionCommand(shardInterval); + if (PartitionTable(shardInterval->relationId)) + { + char *attachPartitionCommand = + GenerateAttachShardPartitionCommand(shardInterval); - char *tableOwner = TableOwner(shardInterval->relationId); + char *tableOwner = TableOwner(shardInterval->relationId); - /* - * Attaching partition may acquire conflicting locks when created in - * parallel, so create them sequentially. Also attaching partition - * is a quick operation, so it is fine to execute sequentially. - */ - SendCommandListToWorkerOutsideTransaction(targetNodeName, targetNodePort, - tableOwner, - list_make1( - attachPartitionCommand)); - MemoryContextReset(localContext); + /* + * Attaching partition may acquire conflicting locks when created in + * parallel, so create them sequentially. Also attaching partition + * is a quick operation, so it is fine to execute sequentially. + */ + SendCommandListToWorkerOutsideTransaction( + target->superuserConnection->hostname, + target->superuserConnection->port, + tableOwner, + list_make1(attachPartitionCommand)); + MemoryContextReset(localContext); + } } } @@ -1124,17 +1165,17 @@ CreatePartitioningHierarchy(List *shardList, char *targetNodeName, int targetNod /* - * CreateForeignKeyConstraints is used to create the foreign constraints - * on the logical replication target without checking that they are actually - * valid. + * CreateUncheckedForeignKeyConstraints is used to create the foreign + * constraints on the logical replication target without checking that they are + * actually valid. * * We skip the validation phase of foreign keys to after a shard * move/copy/split because the validation is pretty costly and given that the * source placements are already valid, the validation in the target nodes is * useless. */ -static void -CreateForeignKeyConstraints(List *logicalRepTargetList) +void +CreateUncheckedForeignKeyConstraints(List *logicalRepTargetList) { MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, diff --git a/src/include/distributed/multi_logical_replication.h b/src/include/distributed/multi_logical_replication.h index 168f7b03c..994650568 100644 --- a/src/include/distributed/multi_logical_replication.h +++ b/src/include/distributed/multi_logical_replication.h @@ -177,5 +177,13 @@ extern void RecreateGroupedLogicalRepTargetsConnections( char *user, char *databaseName); extern void CloseGroupedLogicalRepTargetsConnections(HTAB *groupedLogicalRepTargetsHash); +extern void CompleteNonBlockingShardTransfer(List *shardList, + MultiConnection *sourceConnection, + HTAB *publicationInfoHash, + List *logicalRepTargetList, + HTAB *groupedLogicalRepTargetsHash, + LogicalRepType type); +extern void CreateUncheckedForeignKeyConstraints(List *logicalRepTargetList); +extern void CreatePartitioningHierarchy(List *logicalRepTargetList); #endif /* MULTI_LOGICAL_REPLICATION_H_ */ diff --git a/src/test/regress/expected/failure_tenant_isolation_nonblocking.out b/src/test/regress/expected/failure_tenant_isolation_nonblocking.out index 63719c4ec..c166a41af 100644 --- a/src/test/regress/expected/failure_tenant_isolation_nonblocking.out +++ b/src/test/regress/expected/failure_tenant_isolation_nonblocking.out @@ -233,24 +233,6 @@ SELECT citus.mitmproxy('conn.onQuery(query="CREATE SUBSCRIPTION").cancel(' || :p (1 row) -SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); -ERROR: canceling statement due to user request --- failure on colocated table constraints -SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE tenant_isolation.table_2 ADD CONSTRAINT").after(1).kill()'); - mitmproxy ---------------------------------------------------------------------- - -(1 row) - -SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); -ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open --- cancellation on colocated table constraints -SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE tenant_isolation.table_2 ADD CONSTRAINT").after(2).cancel(' || :pid || ')'); - mitmproxy ---------------------------------------------------------------------- - -(1 row) - SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); ERROR: canceling statement due to user request -- failure on catching up LSN @@ -382,15 +364,6 @@ SELECT citus.mitmproxy('conn.onQuery(query="ADD CONSTRAINT table_2_ref_id_fkey F SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx --- failure on foreign key creation -SELECT citus.mitmproxy('conn.onQuery(query="ADD CONSTRAINT table_2_ref_id_fkey FOREIGN KEY").after(2).cancel(' || :pid || ')'); - mitmproxy ---------------------------------------------------------------------- - -(1 row) - -SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); -ERROR: canceling statement due to user request -- failure on shard split transaction SELECT citus.mitmproxy('conn.onQuery(query="BEGIN").kill()'); mitmproxy diff --git a/src/test/regress/sql/failure_tenant_isolation_nonblocking.sql b/src/test/regress/sql/failure_tenant_isolation_nonblocking.sql index 60bfff417..2ee928a56 100644 --- a/src/test/regress/sql/failure_tenant_isolation_nonblocking.sql +++ b/src/test/regress/sql/failure_tenant_isolation_nonblocking.sql @@ -117,14 +117,6 @@ SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode SELECT citus.mitmproxy('conn.onQuery(query="CREATE SUBSCRIPTION").cancel(' || :pid || ')'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); --- failure on colocated table constraints -SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE tenant_isolation.table_2 ADD CONSTRAINT").after(1).kill()'); -SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); - --- cancellation on colocated table constraints -SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE tenant_isolation.table_2 ADD CONSTRAINT").after(2).cancel(' || :pid || ')'); -SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); - -- failure on catching up LSN SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_current_wal_lsn").kill()'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); @@ -184,10 +176,6 @@ SET citus.defer_drop_after_shard_split TO ON; SELECT citus.mitmproxy('conn.onQuery(query="ADD CONSTRAINT table_2_ref_id_fkey FOREIGN KEY").kill()'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); --- failure on foreign key creation -SELECT citus.mitmproxy('conn.onQuery(query="ADD CONSTRAINT table_2_ref_id_fkey FOREIGN KEY").after(2).cancel(' || :pid || ')'); -SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); - -- failure on shard split transaction SELECT citus.mitmproxy('conn.onQuery(query="BEGIN").kill()'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); From 2e943a64a00adb548c486878fa6d6950287fc058 Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Fri, 9 Sep 2022 18:21:36 +0200 Subject: [PATCH 03/17] Make shard moves more idempotent (#6313) Co-authored-by: Marco Slot --- .../distributed/operations/shard_transfer.c | 92 +++++++++++++++++++ .../expected/citus_copy_shard_placement.out | 12 ++- .../multi_colocated_shard_rebalance.out | 24 ++++- src/test/regress/expected/multi_move_mx.out | 14 --- .../sql/citus_copy_shard_placement.sql | 2 +- .../sql/multi_colocated_shard_rebalance.sql | 7 +- src/test/regress/sql/multi_move_mx.sql | 16 ---- 7 files changed, 127 insertions(+), 40 deletions(-) diff --git a/src/backend/distributed/operations/shard_transfer.c b/src/backend/distributed/operations/shard_transfer.c index 468706f0e..356e3dd6a 100644 --- a/src/backend/distributed/operations/shard_transfer.c +++ b/src/backend/distributed/operations/shard_transfer.c @@ -75,6 +75,9 @@ static bool CanUseLogicalReplication(Oid relationId, char shardReplicationMode); static void ErrorIfTableCannotBeReplicated(Oid relationId); static void ErrorIfTargetNodeIsNotSafeToCopyTo(const char *targetNodeName, int targetNodePort); +static void ErrorIfSameNode(char *sourceNodeName, int sourceNodePort, + char *targetNodeName, int targetNodePort, + const char *operationName); static void ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, int32 sourceNodePort, char *targetNodeName, int32 targetNodePort, @@ -107,6 +110,8 @@ static void UpdateColocatedShardPlacementMetadataOnWorkers(int64 shardId, int32 sourceNodePort, char *targetNodeName, int32 targetNodePort); +static bool IsShardListOnNode(List *colocatedShardList, char *targetNodeName, + uint32 targetPort); static void CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes); static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList, @@ -236,6 +241,10 @@ citus_move_shard_placement(PG_FUNCTION_ARGS) ListCell *colocatedTableCell = NULL; ListCell *colocatedShardCell = NULL; + ErrorIfSameNode(sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort, + "move"); + Oid relationId = RelationIdForShard(shardId); ErrorIfMoveUnsupportedTableType(relationId); ErrorIfTargetNodeIsNotSafeToMove(targetNodeName, targetNodePort); @@ -276,6 +285,20 @@ citus_move_shard_placement(PG_FUNCTION_ARGS) /* we sort colocatedShardList so that lock operations will not cause any deadlocks */ colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById); + + /* + * If there are no active placements on the source and only active placements on + * the target node, we assume the copy to already be done. + */ + if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) && + !IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort)) + { + ereport(WARNING, (errmsg("shard is already present on node %s:%d", + targetNodeName, targetNodePort), + errdetail("Move may have already completed."))); + PG_RETURN_VOID(); + } + foreach(colocatedShardCell, colocatedShardList) { ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell); @@ -390,6 +413,39 @@ citus_move_shard_placement(PG_FUNCTION_ARGS) } +/* + * IsShardListOnNode determines whether a co-located shard list has + * active placements on a given node. + */ +static bool +IsShardListOnNode(List *colocatedShardList, char *targetNodeName, uint32 targetNodePort) +{ + WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort); + if (workerNode == NULL) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Moving shards to a non-existing node is not supported"))); + } + + /* + * We exhaustively search all co-located shards + */ + ShardInterval *shardInterval = NULL; + foreach_ptr(shardInterval, colocatedShardList) + { + uint64 shardId = shardInterval->shardId; + List *placementList = ActiveShardPlacementListOnGroup(shardId, + workerNode->groupId); + if (placementList == NIL) + { + return false; + } + } + + return true; +} + + /* * EnsureEnoughDiskSpaceForShardMove checks that there is enough space for * shard moves of the given colocated shard list from source node to target node. @@ -552,6 +608,25 @@ ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort) } +/* + * ErrorIfSameNode throws an error if the two host:port combinations + * are the same. + */ +static void +ErrorIfSameNode(char *sourceNodeName, int sourceNodePort, + char *targetNodeName, int targetNodePort, + const char *operationName) +{ + if (strncmp(sourceNodeName, targetNodeName, MAX_NODE_LENGTH) == 0 && + sourceNodePort == targetNodePort) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot %s shard to the same node", + operationName))); + } +} + + /* * master_move_shard_placement is a wrapper around citus_move_shard_placement. */ @@ -886,6 +961,10 @@ ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, ShardInterval *shardInterval = LoadShardInterval(shardId); Oid distributedTableId = shardInterval->relationId; + ErrorIfSameNode(sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort, + "copy"); + ErrorIfTableCannotBeReplicated(shardInterval->relationId); ErrorIfTargetNodeIsNotSafeToCopyTo(targetNodeName, targetNodePort); EnsureNoModificationsHaveBeenDone(); @@ -904,6 +983,19 @@ ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, */ colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById); + /* + * If there are active placements on both nodes, we assume the copy to already + * be done. + */ + if (IsShardListOnNode(colocatedShardList, targetNodeName, targetNodePort) && + IsShardListOnNode(colocatedShardList, sourceNodeName, sourceNodePort)) + { + ereport(WARNING, (errmsg("shard is already present on node %s:%d", + targetNodeName, targetNodePort), + errdetail("Copy may have already completed."))); + return; + } + /* * At this point of the shard replication, we don't need to block the writes to * shards when logical replication is used. diff --git a/src/test/regress/expected/citus_copy_shard_placement.out b/src/test/regress/expected/citus_copy_shard_placement.out index 61b935276..d82b36383 100644 --- a/src/test/regress/expected/citus_copy_shard_placement.out +++ b/src/test/regress/expected/citus_copy_shard_placement.out @@ -56,14 +56,20 @@ SELECT citus_copy_shard_placement( 'localhost', :worker_2_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes'); -ERROR: shard xxxxx already exists in the target node --- verify we error out if target already contains a healthy placement +ERROR: cannot copy shard to the same node +-- verify we warn if target already contains a healthy placement SELECT citus_copy_shard_placement( (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), 'localhost', :worker_1_port, 'localhost', :worker_2_port, transfer_mode := 'block_writes'); -ERROR: shard xxxxx already exists in the target node +WARNING: shard is already present on node localhost:xxxxx +DETAIL: Copy may have already completed. + citus_copy_shard_placement +--------------------------------------------------------------------- + +(1 row) + -- verify we error out if table has foreign key constraints INSERT INTO ref_table SELECT 1, value FROM data; ALTER TABLE data ADD CONSTRAINT distfk FOREIGN KEY (value) REFERENCES ref_table (b) MATCH FULL; diff --git a/src/test/regress/expected/multi_colocated_shard_rebalance.out b/src/test/regress/expected/multi_colocated_shard_rebalance.out index bb3e68bd4..da3bae484 100644 --- a/src/test/regress/expected/multi_colocated_shard_rebalance.out +++ b/src/test/regress/expected/multi_colocated_shard_rebalance.out @@ -143,9 +143,15 @@ SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_ (1 row) \c - - - :master_port --- copy colocated shards again to see error message +-- copy colocated shards again to see warning SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); -ERROR: shard xxxxx already exists in the target node +WARNING: shard is already present on node localhost:xxxxx +DETAIL: Copy may have already completed. + citus_copy_shard_placement +--------------------------------------------------------------------- + +(1 row) + -- test copying NOT colocated shard -- status before shard copy SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport @@ -300,6 +306,15 @@ ORDER BY s.shardid, sp.nodeport; 13000011 | table2_group1 | 57638 (14 rows) +-- moving the shard again is idempotent +SELECT citus_move_shard_placement(13000001, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical'); +WARNING: shard is already present on node localhost:xxxxx +DETAIL: Move may have already completed. + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + -- also connect worker to verify we successfully moved given shard (and other colocated shards) \c - - - :worker_1_port SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000001'::regclass; @@ -412,8 +427,9 @@ ORDER BY s.shardid, sp.nodeport; (3 rows) -- try to move shard from wrong node -SELECT master_move_shard_placement(13000021, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); -ERROR: source placement must be in active state +SELECT master_move_shard_placement(13000021, 'localhost', :master_port, 'localhost', :worker_1_port, 'force_logical'); +ERROR: could not find placement matching "localhost:xxxxx" +HINT: Confirm the placement still exists and try again. -- test shard move with foreign constraints DROP TABLE IF EXISTS table1_group1, table2_group1; SET citus.shard_count TO 6; diff --git a/src/test/regress/expected/multi_move_mx.out b/src/test/regress/expected/multi_move_mx.out index eaf2273de..833c9f7df 100644 --- a/src/test/regress/expected/multi_move_mx.out +++ b/src/test/regress/expected/multi_move_mx.out @@ -138,20 +138,6 @@ SELECT pg_reload_conf(); t (1 row) -\c - - - :master_port -BEGIN; -SELECT - master_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical') -FROM - pg_dist_shard NATURAL JOIN pg_dist_shard_placement -WHERE - logicalrelid = 'mx_table_1'::regclass - AND nodeport = :worker_1_port -ORDER BY - shardid -LIMIT 1; -ERROR: source placement must be in active state -ROLLBACK; \c - - - :worker_2_port -- before reseting citus.node_conninfo, check that CREATE SUBSCRIPTION -- with citus_use_authinfo takes into account node_conninfo even when diff --git a/src/test/regress/sql/citus_copy_shard_placement.sql b/src/test/regress/sql/citus_copy_shard_placement.sql index 7861434d3..0e6a42e79 100644 --- a/src/test/regress/sql/citus_copy_shard_placement.sql +++ b/src/test/regress/sql/citus_copy_shard_placement.sql @@ -48,7 +48,7 @@ SELECT citus_copy_shard_placement( 'localhost', :worker_2_port, transfer_mode := 'block_writes'); --- verify we error out if target already contains a healthy placement +-- verify we warn if target already contains a healthy placement SELECT citus_copy_shard_placement( (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid), 'localhost', :worker_1_port, diff --git a/src/test/regress/sql/multi_colocated_shard_rebalance.sql b/src/test/regress/sql/multi_colocated_shard_rebalance.sql index a8ad39c06..2afbd0942 100644 --- a/src/test/regress/sql/multi_colocated_shard_rebalance.sql +++ b/src/test/regress/sql/multi_colocated_shard_rebalance.sql @@ -78,7 +78,7 @@ SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_ \c - - - :master_port --- copy colocated shards again to see error message +-- copy colocated shards again to see warning SELECT citus_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); @@ -159,6 +159,9 @@ WHERE AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; +-- moving the shard again is idempotent +SELECT citus_move_shard_placement(13000001, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical'); + -- also connect worker to verify we successfully moved given shard (and other colocated shards) \c - - - :worker_1_port SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000001'::regclass; @@ -222,7 +225,7 @@ ORDER BY s.shardid, sp.nodeport; -- try to move shard from wrong node -SELECT master_move_shard_placement(13000021, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); +SELECT master_move_shard_placement(13000021, 'localhost', :master_port, 'localhost', :worker_1_port, 'force_logical'); -- test shard move with foreign constraints diff --git a/src/test/regress/sql/multi_move_mx.sql b/src/test/regress/sql/multi_move_mx.sql index d02c1f417..166069a6e 100644 --- a/src/test/regress/sql/multi_move_mx.sql +++ b/src/test/regress/sql/multi_move_mx.sql @@ -86,23 +86,7 @@ LIMIT 1; ALTER SYSTEM SET citus.node_conninfo TO 'sslrootcert=/non/existing/certificate.crt sslmode=verify-full'; SELECT pg_reload_conf(); -\c - - - :master_port - -BEGIN; -SELECT - master_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical') -FROM - pg_dist_shard NATURAL JOIN pg_dist_shard_placement -WHERE - logicalrelid = 'mx_table_1'::regclass - AND nodeport = :worker_1_port -ORDER BY - shardid -LIMIT 1; -ROLLBACK; - \c - - - :worker_2_port - -- before reseting citus.node_conninfo, check that CREATE SUBSCRIPTION -- with citus_use_authinfo takes into account node_conninfo even when -- one of host, port, or user parameters are not specified. From 36f8c48560a5ebef5a21d0907e8707f63130caf7 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Mon, 12 Sep 2022 12:25:01 +0300 Subject: [PATCH 04/17] Add tests for allowing SET NULL/DEFAULT for subseet of columns PG 15 added support for that (d6f96ed94e73052f99a2e545ed17a8b2fdc1fb8a). We also add support, but we already do not support ON DELETE SET NULL/DEFAULT for distribution column. So, in essence, we add support for reference tables and Citus local tables. --- src/test/regress/expected/pg15.out | 69 +++++++++++++++++++++++++++--- src/test/regress/sql/pg15.sql | 41 ++++++++++++++++++ 2 files changed, 105 insertions(+), 5 deletions(-) diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out index 879c10137..03dfd3675 100644 --- a/src/test/regress/expected/pg15.out +++ b/src/test/regress/expected/pg15.out @@ -54,11 +54,11 @@ SELECT result FROM run_command_on_all_nodes(' SELECT result FROM run_command_on_all_nodes(' SELECT colliculocale FROM pg_collation WHERE collname = ''german_phonebook_test''; '); - result + result --------------------------------------------------------------------- - de-u-co-phonebk - de-u-co-phonebk - de-u-co-phonebk +de-u-co-phonebk +de-u-co-phonebk +de-u-co-phonebk (3 rows) -- with non-icu provider, colliculocale will be null, collcollate and collctype will be set @@ -552,7 +552,66 @@ SELECT count(*)=100 FROM copy_test2; t (1 row) +-- allow foreign key columns to have SET NULL/DEFAULT on column basis +-- currently only reference tables can support that +CREATE TABLE PKTABLE (tid int, id int, PRIMARY KEY (tid, id)); +CREATE TABLE FKTABLE ( + tid int, id int, + fk_id_del_set_null int, + fk_id_del_set_default int DEFAULT 0, + FOREIGN KEY (tid, fk_id_del_set_null) REFERENCES PKTABLE ON DELETE SET NULL (fk_id_del_set_null), + FOREIGN KEY (tid, fk_id_del_set_default) REFERENCES PKTABLE ON DELETE SET DEFAULT (fk_id_del_set_default) +); +SELECT create_reference_table('PKTABLE'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- ok, Citus could relax this constraint in the future +SELECT create_distributed_table('FKTABLE', 'tid'); +ERROR: cannot create foreign key constraint +DETAIL: SET NULL or SET DEFAULT is not supported in ON DELETE operation when distribution key is included in the foreign key constraint +-- with reference tables it should all work fine +SELECT create_reference_table('FKTABLE'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- show that the definition is expected +SELECT pg_get_constraintdef(oid) FROM pg_constraint WHERE conrelid = 'fktable'::regclass::oid ORDER BY oid; + pg_get_constraintdef +--------------------------------------------------------------------- + FOREIGN KEY (tid, fk_id_del_set_null) REFERENCES pktable(tid, id) ON DELETE SET NULL (fk_id_del_set_null) + FOREIGN KEY (tid, fk_id_del_set_default) REFERENCES pktable(tid, id) ON DELETE SET DEFAULT (fk_id_del_set_default) +(2 rows) + +\c - - - :worker_1_port +SET search_path TO pg15; +-- show that the definition is expected on the worker as well +SELECT pg_get_constraintdef(oid) FROM pg_constraint WHERE conrelid = 'fktable'::regclass::oid ORDER BY oid; + pg_get_constraintdef +--------------------------------------------------------------------- + FOREIGN KEY (tid, fk_id_del_set_default) REFERENCES pktable(tid, id) ON DELETE SET DEFAULT (fk_id_del_set_default) + FOREIGN KEY (tid, fk_id_del_set_null) REFERENCES pktable(tid, id) ON DELETE SET NULL (fk_id_del_set_null) +(2 rows) + +-- also, make sure that it works as expected +INSERT INTO PKTABLE VALUES (1, 0), (1, 1), (1, 2); +INSERT INTO FKTABLE VALUES + (1, 1, 1, NULL), + (1, 2, NULL, 2); +DELETE FROM PKTABLE WHERE id = 1 OR id = 2; +SELECT * FROM FKTABLE ORDER BY id; + tid | id | fk_id_del_set_null | fk_id_del_set_default +--------------------------------------------------------------------- + 1 | 1 | | + 1 | 2 | | 0 +(2 rows) + -- Clean up +\c - - - :master_port \set VERBOSITY terse DROP SCHEMA pg15 CASCADE; -NOTICE: drop cascades to 13 other objects +NOTICE: drop cascades to 15 other objects diff --git a/src/test/regress/sql/pg15.sql b/src/test/regress/sql/pg15.sql index 11942a4ce..f59a46aa3 100644 --- a/src/test/regress/sql/pg15.sql +++ b/src/test/regress/sql/pg15.sql @@ -298,6 +298,47 @@ ALTER TABLE copy_test2 RENAME COLUMN data_ TO data; COPY copy_test2 FROM :'temp_dir''copy_test.txt' WITH ( HEADER match, FORMAT text); SELECT count(*)=100 FROM copy_test2; + +-- allow foreign key columns to have SET NULL/DEFAULT on column basis +-- currently only reference tables can support that +CREATE TABLE PKTABLE (tid int, id int, PRIMARY KEY (tid, id)); +CREATE TABLE FKTABLE ( + tid int, id int, + fk_id_del_set_null int, + fk_id_del_set_default int DEFAULT 0, + FOREIGN KEY (tid, fk_id_del_set_null) REFERENCES PKTABLE ON DELETE SET NULL (fk_id_del_set_null), + FOREIGN KEY (tid, fk_id_del_set_default) REFERENCES PKTABLE ON DELETE SET DEFAULT (fk_id_del_set_default) +); + +SELECT create_reference_table('PKTABLE'); + +-- ok, Citus could relax this constraint in the future +SELECT create_distributed_table('FKTABLE', 'tid'); + +-- with reference tables it should all work fine +SELECT create_reference_table('FKTABLE'); + +-- show that the definition is expected +SELECT pg_get_constraintdef(oid) FROM pg_constraint WHERE conrelid = 'fktable'::regclass::oid ORDER BY oid; + +\c - - - :worker_1_port + +SET search_path TO pg15; + +-- show that the definition is expected on the worker as well +SELECT pg_get_constraintdef(oid) FROM pg_constraint WHERE conrelid = 'fktable'::regclass::oid ORDER BY oid; + +-- also, make sure that it works as expected +INSERT INTO PKTABLE VALUES (1, 0), (1, 1), (1, 2); +INSERT INTO FKTABLE VALUES + (1, 1, 1, NULL), + (1, 2, NULL, 2); +DELETE FROM PKTABLE WHERE id = 1 OR id = 2; +SELECT * FROM FKTABLE ORDER BY id; + + -- Clean up +\c - - - :master_port + \set VERBOSITY terse DROP SCHEMA pg15 CASCADE; From 53ffbe440a21df3b401c144e1b6400577ec876c0 Mon Sep 17 00:00:00 2001 From: naisila Date: Mon, 12 Sep 2022 11:08:20 +0300 Subject: [PATCH 05/17] Revert SQL/JSON features in ruleutils_15.c Reverting the following commits: https://github.com/citusdata/citus/pull/6085/commits/977ddaae56eb192c78bffae392e5b93ea034546e https://github.com/citusdata/citus/pull/6085/commits/4a5cf06defba3fb6b3048cd0196862b765e5b900 https://github.com/citusdata/citus/pull/6085/commits/9ae19c181f49ffde67669d01dafaa869c09fa739 https://github.com/citusdata/citus/pull/6085/commits/30447117e5ff991796cf0be8e7c8e0f18547e001 https://github.com/citusdata/citus/pull/6085/commits/f9c43f43325a226729b9c2f38bbdf3900936e2d5 https://github.com/citusdata/citus/pull/6085/commits/21dba4ed08baef6750e68b6592a813e4ac40f095 https://github.com/citusdata/citus/pull/6085/commits/262932da3e0cd015db3ec36d2c9ed003a352aee1 We have to manually make changes to this file. Follow the relevant PG commit in ruleutils.c & make the exact same changes in ruleutils_15.c Relevant PG commit: 96ef3237bf741c12390003e90a4d7115c0c854b7 --- .../distributed/deparser/ruleutils_15.c | 733 +----------------- 1 file changed, 24 insertions(+), 709 deletions(-) diff --git a/src/backend/distributed/deparser/ruleutils_15.c b/src/backend/distributed/deparser/ruleutils_15.c index 642d27fa1..307ad4c6a 100644 --- a/src/backend/distributed/deparser/ruleutils_15.c +++ b/src/backend/distributed/deparser/ruleutils_15.c @@ -430,12 +430,6 @@ static void get_coercion_expr(Node *arg, deparse_context *context, Node *parentNode); static void get_const_expr(Const *constval, deparse_context *context, int showtype); -static void get_json_constructor(JsonConstructorExpr *ctor, - deparse_context *context, bool showimplicit); -static void get_json_agg_constructor(JsonConstructorExpr *ctor, - deparse_context *context, - const char *funcname, - bool is_json_objectagg); static void get_const_collation(Const *constval, deparse_context *context); static void simple_quote_literal(StringInfo buf, const char *val); static void get_sublink_expr(SubLink *sublink, deparse_context *context); @@ -465,10 +459,6 @@ static char *generate_function_name(Oid funcid, int nargs, List *argnames, Oid *argtypes, bool has_variadic, bool *use_variadic_p, ParseExprKind special_exprkind); -static void get_json_path_spec(Node *path_spec, deparse_context *context, - bool showimplicit); -static void get_json_table_columns(TableFunc *tf, JsonTableParent *node, - deparse_context *context, bool showimplicit); #define only_marker(rte) ((rte)->inh ? "" : "ONLY ") @@ -661,7 +651,6 @@ pg_get_rule_expr(Node *expression) return buffer->data; } - /* * set_rtable_names: select RTE aliases to be used in printing a query * @@ -1981,7 +1970,6 @@ pop_ancestor_plan(deparse_namespace *dpns, deparse_namespace *save_dpns) *dpns = *save_dpns; } - /* ---------- * deparse_shard_query - Parse back a query for execution on a shard * @@ -1998,7 +1986,6 @@ deparse_shard_query(Query *query, Oid distrelid, int64 shardid, 0, WRAP_COLUMN_DEFAULT, 0); } - /* ---------- * get_query_def - Parse back one query parsetree * @@ -2026,7 +2013,6 @@ get_query_def(Query *query, StringInfo buf, List *parentnamespace, prettyFlags, wrapColumn, startIndent); } - /* ---------- * get_query_def_extended - Parse back one query parsetree, optionally * with extension using a shard identifier. @@ -2954,8 +2940,7 @@ get_rule_sortgroupclause(Index ref, List *tlist, bool force_colno, bool need_paren = (PRETTY_PAREN(context) || IsA(expr, FuncExpr) || IsA(expr, Aggref) - || IsA(expr, WindowFunc) - || IsA(expr, JsonConstructorExpr)); + || IsA(expr, WindowFunc)); if (need_paren) appendStringInfoChar(context->buf, '('); @@ -3447,7 +3432,6 @@ get_insert_query_def(Query *query, deparse_context *context, } } - /* ---------- * get_update_query_def - Parse back an UPDATE parsetree * ---------- @@ -3528,7 +3512,6 @@ get_update_query_def(Query *query, deparse_context *context, } } - /* ---------- * get_update_query_targetlist_def - Parse back an UPDATE targetlist * ---------- @@ -3680,7 +3663,6 @@ get_update_query_targetlist_def(Query *query, List *targetList, } } - /* ---------- * get_delete_query_def - Parse back a DELETE parsetree * ---------- @@ -3756,7 +3738,6 @@ get_delete_query_def(Query *query, deparse_context *context, } } - /* ---------- * get_utility_query_def - Parse back a UTILITY parsetree * ---------- @@ -4890,7 +4871,6 @@ get_simple_binary_op_name(OpExpr *expr) return NULL; } - /* * isSimpleNode - check if given node is simple (doesn't need parenthesizing) * @@ -4927,8 +4907,6 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags) case T_GroupingFunc: case T_WindowFunc: case T_FuncExpr: - case T_JsonConstructorExpr: - case T_JsonExpr: /* function-like: name(..) or name[..] */ return true; @@ -5022,7 +5000,6 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags) case T_NullTest: case T_BooleanTest: case T_DistinctExpr: - case T_JsonIsPredicate: switch (nodeTag(parentNode)) { case T_FuncExpr: @@ -5047,7 +5024,6 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags) case T_GroupingFunc: /* own parentheses */ case T_WindowFunc: /* own parentheses */ case T_CaseExpr: /* other separators */ - case T_JsonExpr: /* own parentheses */ return true; default: return false; @@ -5104,11 +5080,6 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags) return false; } - case T_JsonValueExpr: - /* maybe simple, check args */ - return isSimpleNode((Node *) ((JsonValueExpr *) node)->raw_expr, - node, prettyFlags); - default: break; } @@ -5116,7 +5087,6 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags) return false; } - /* * appendContextKeyword - append a keyword to buffer * @@ -5184,7 +5154,6 @@ removeStringInfoSpaces(StringInfo str) str->data[--(str->len)] = '\0'; } - /* * get_rule_expr_paren - deparse expr using get_rule_expr, * embracing the string with parentheses if necessary for prettyPrint. @@ -5214,122 +5183,6 @@ get_rule_expr_paren(Node *node, deparse_context *context, appendStringInfoChar(context->buf, ')'); } -/* - * get_json_path_spec - Parse back a JSON path specification - */ -static void -get_json_path_spec(Node *path_spec, deparse_context *context, bool showimplicit) -{ - if (IsA(path_spec, Const)) - get_const_expr((Const *) path_spec, context, -1); - else - get_rule_expr(path_spec, context, showimplicit); -} - -/* - * get_json_format - Parse back a JsonFormat node - */ -static void -get_json_format(JsonFormat *format, StringInfo buf) -{ - if (format->format_type == JS_FORMAT_DEFAULT) - return; - - appendStringInfoString(buf, - format->format_type == JS_FORMAT_JSONB ? - " FORMAT JSONB" : " FORMAT JSON"); - - if (format->encoding != JS_ENC_DEFAULT) - { - const char *encoding = - format->encoding == JS_ENC_UTF16 ? "UTF16" : - format->encoding == JS_ENC_UTF32 ? "UTF32" : "UTF8"; - - appendStringInfo(buf, " ENCODING %s", encoding); - } -} - -/* - * get_json_returning - Parse back a JsonReturning structure - */ -static void -get_json_returning(JsonReturning *returning, StringInfo buf, - bool json_format_by_default) -{ - if (!OidIsValid(returning->typid)) - return; - - appendStringInfo(buf, " RETURNING %s", - format_type_with_typemod(returning->typid, - returning->typmod)); - - if (!json_format_by_default || - returning->format->format_type != - (returning->typid == JSONBOID ? JS_FORMAT_JSONB : JS_FORMAT_JSON)) - get_json_format(returning->format, buf); -} - -static void -get_json_behavior(JsonBehavior *behavior, deparse_context *context, - const char *on) -{ - /* - * The order of array elements must correspond to the order of - * JsonBehaviorType members. - */ - const char *behavior_names[] = - { - " NULL", - " ERROR", - " EMPTY", - " TRUE", - " FALSE", - " UNKNOWN", - " EMPTY ARRAY", - " EMPTY OBJECT", - " DEFAULT " - }; - - if ((int) behavior->btype < 0 || behavior->btype >= lengthof(behavior_names)) - elog(ERROR, "invalid json behavior type: %d", behavior->btype); - - appendStringInfoString(context->buf, behavior_names[behavior->btype]); - - if (behavior->btype == JSON_BEHAVIOR_DEFAULT) - get_rule_expr(behavior->default_expr, context, false); - - appendStringInfo(context->buf, " ON %s", on); -} - -/* - * get_json_expr_options - * - * Parse back common options for JSON_QUERY, JSON_VALUE, JSON_EXISTS and - * JSON_TABLE columns. - */ -static void -get_json_expr_options(JsonExpr *jsexpr, deparse_context *context, - JsonBehaviorType default_behavior) -{ - if (jsexpr->op == JSON_QUERY_OP) - { - if (jsexpr->wrapper == JSW_CONDITIONAL) - appendStringInfo(context->buf, " WITH CONDITIONAL WRAPPER"); - else if (jsexpr->wrapper == JSW_UNCONDITIONAL) - appendStringInfo(context->buf, " WITH UNCONDITIONAL WRAPPER"); - - if (jsexpr->omit_quotes) - appendStringInfo(context->buf, " OMIT QUOTES"); - } - - if (jsexpr->op != JSON_EXISTS_OP && - jsexpr->on_empty->btype != default_behavior) - get_json_behavior(jsexpr->on_empty, context, "EMPTY"); - - if (jsexpr->on_error->btype != default_behavior) - get_json_behavior(jsexpr->on_error, context, "ERROR"); -} - /* ---------- * get_rule_expr - Parse back an expression * @@ -6510,115 +6363,6 @@ get_rule_expr(Node *node, deparse_context *context, } break; - case T_JsonValueExpr: - { - JsonValueExpr *jve = (JsonValueExpr *) node; - - get_rule_expr((Node *) jve->raw_expr, context, false); - get_json_format(jve->format, context->buf); - } - break; - - case T_JsonConstructorExpr: - get_json_constructor((JsonConstructorExpr *) node, context, false); - break; - - case T_JsonIsPredicate: - { - JsonIsPredicate *pred = (JsonIsPredicate *) node; - - if (!PRETTY_PAREN(context)) - appendStringInfoChar(context->buf, '('); - - get_rule_expr_paren(pred->expr, context, true, node); - - appendStringInfoString(context->buf, " IS JSON"); - - /* TODO: handle FORMAT clause */ - - switch (pred->item_type) - { - case JS_TYPE_SCALAR: - appendStringInfoString(context->buf, " SCALAR"); - break; - case JS_TYPE_ARRAY: - appendStringInfoString(context->buf, " ARRAY"); - break; - case JS_TYPE_OBJECT: - appendStringInfoString(context->buf, " OBJECT"); - break; - default: - break; - } - - if (pred->unique_keys) - appendStringInfoString(context->buf, " WITH UNIQUE KEYS"); - - if (!PRETTY_PAREN(context)) - appendStringInfoChar(context->buf, ')'); - } - break; - - case T_JsonExpr: - { - JsonExpr *jexpr = (JsonExpr *) node; - - switch (jexpr->op) - { - case JSON_QUERY_OP: - appendStringInfoString(buf, "JSON_QUERY("); - break; - case JSON_VALUE_OP: - appendStringInfoString(buf, "JSON_VALUE("); - break; - case JSON_EXISTS_OP: - appendStringInfoString(buf, "JSON_EXISTS("); - break; - default: - elog(ERROR, "unexpected JsonExpr type: %d", jexpr->op); - break; - } - - get_rule_expr(jexpr->formatted_expr, context, showimplicit); - - appendStringInfoString(buf, ", "); - - get_json_path_spec(jexpr->path_spec, context, showimplicit); - - if (jexpr->passing_values) - { - ListCell *lc1, - *lc2; - bool needcomma = false; - - appendStringInfoString(buf, " PASSING "); - - forboth(lc1, jexpr->passing_names, - lc2, jexpr->passing_values) - { - if (needcomma) - appendStringInfoString(buf, ", "); - needcomma = true; - - get_rule_expr((Node *) lfirst(lc2), context, showimplicit); - appendStringInfo(buf, " AS %s", - ((String *) lfirst_node(String, lc1))->sval); - } - } - - if (jexpr->op != JSON_EXISTS_OP || - jexpr->returning->typid != BOOLOID) - get_json_returning(jexpr->returning, context->buf, - jexpr->op == JSON_QUERY_OP); - - get_json_expr_options(jexpr, context, - jexpr->op == JSON_EXISTS_OP ? - JSON_BEHAVIOR_FALSE : JSON_BEHAVIOR_NULL); - - appendStringInfoString(buf, ")"); - } - break; - case T_List: { char *sep; @@ -6746,7 +6490,6 @@ looks_like_function(Node *node) case T_MinMaxExpr: case T_SQLValueFunction: case T_XmlExpr: - case T_JsonExpr: /* these are all accepted by func_expr_common_subexpr */ return true; default: @@ -6755,7 +6498,6 @@ looks_like_function(Node *node) return false; } - /* * get_oper_expr - Parse back an OpExpr node */ @@ -6894,90 +6636,6 @@ get_func_expr(FuncExpr *expr, deparse_context *context, appendStringInfoChar(buf, ')'); } -static void -get_json_constructor_options(JsonConstructorExpr *ctor, StringInfo buf) -{ - if (ctor->absent_on_null) - { - if (ctor->type == JSCTOR_JSON_OBJECT || - ctor->type == JSCTOR_JSON_OBJECTAGG) - appendStringInfoString(buf, " ABSENT ON NULL"); - } - else - { - if (ctor->type == JSCTOR_JSON_ARRAY || - ctor->type == JSCTOR_JSON_ARRAYAGG) - appendStringInfoString(buf, " NULL ON NULL"); - } - - if (ctor->unique) - appendStringInfoString(buf, " WITH UNIQUE KEYS"); - - if (!((ctor->type == JSCTOR_JSON_PARSE || - ctor->type == JSCTOR_JSON_SCALAR) && - ctor->returning->typid == JSONOID)) - get_json_returning(ctor->returning, buf, true); -} - -static void -get_json_constructor(JsonConstructorExpr *ctor, deparse_context *context, - bool showimplicit) -{ - StringInfo buf = context->buf; - const char *funcname; - int nargs; - ListCell *lc; - - switch (ctor->type) - { - case JSCTOR_JSON_PARSE: - funcname = "JSON"; - break; - case JSCTOR_JSON_SCALAR: - funcname = "JSON_SCALAR"; - break; - case JSCTOR_JSON_SERIALIZE: - funcname = "JSON_SERIALIZE"; - break; - case JSCTOR_JSON_OBJECT: - funcname = "JSON_OBJECT"; - break; - case JSCTOR_JSON_ARRAY: - funcname = "JSON_ARRAY"; - break; - case JSCTOR_JSON_OBJECTAGG: - get_json_agg_constructor(ctor, context, "JSON_OBJECTAGG", true); - return; - case JSCTOR_JSON_ARRAYAGG: - get_json_agg_constructor(ctor, context, "JSON_ARRAYAGG", false); - return; - default: - elog(ERROR, "invalid JsonConstructorExprType %d", ctor->type); - } - - appendStringInfo(buf, "%s(", funcname); - - nargs = 0; - foreach(lc, ctor->args) - { - if (nargs > 0) - { - const char *sep = ctor->type == JSCTOR_JSON_OBJECT && - (nargs % 2) != 0 ? " : " : ", "; - - appendStringInfoString(buf, sep); - } - - get_rule_expr((Node *) lfirst(lc), context, true); - - nargs++; - } - - get_json_constructor_options(ctor, buf); - - appendStringInfo(buf, ")"); -} - /* * get_proc_expr - Parse back a CallStmt node */ @@ -7023,17 +6681,16 @@ get_proc_expr(CallStmt *stmt, deparse_context *context, } /* - * get_agg_expr_helper - Parse back an Aggref node + * get_agg_expr - Parse back an Aggref node */ static void -get_agg_expr_helper(Aggref *aggref, deparse_context *context, - Aggref *original_aggref, const char *funcname, - const char *options, bool is_json_objectagg) +get_agg_expr(Aggref *aggref, deparse_context *context, + Aggref *original_aggref) { StringInfo buf = context->buf; Oid argtypes[FUNC_MAX_ARGS]; int nargs; - bool use_variadic = false; + bool use_variadic; /* * For a combining aggregate, we look up and deparse the corresponding @@ -7064,14 +6721,13 @@ get_agg_expr_helper(Aggref *aggref, deparse_context *context, /* Extract the argument types as seen by the parser */ nargs = get_aggregate_argtypes(aggref, argtypes); - if (!funcname) - funcname = generate_function_name(aggref->aggfnoid, nargs, NIL, - argtypes, aggref->aggvariadic, - &use_variadic, - context->special_exprkind); - /* Print the aggregate name, schema-qualified if needed */ - appendStringInfo(buf, "%s(%s", funcname, + appendStringInfo(buf, "%s(%s", + generate_function_name(aggref->aggfnoid, nargs, + NIL, argtypes, + aggref->aggvariadic, + &use_variadic, + context->special_exprkind), (aggref->aggdistinct != NIL) ? "DISTINCT " : ""); if (AGGKIND_IS_ORDERED_SET(aggref->aggkind)) @@ -7107,18 +6763,7 @@ get_agg_expr_helper(Aggref *aggref, deparse_context *context, if (tle->resjunk) continue; if (i++ > 0) - { - if (is_json_objectagg) - { - if (i > 2) - break; /* skip ABSENT ON NULL and WITH UNIQUE - * args */ - - appendStringInfoString(buf, " : "); - } - else - appendStringInfoString(buf, ", "); - } + appendStringInfoString(buf, ", "); if (use_variadic && i == nargs) appendStringInfoString(buf, "VARIADIC "); get_rule_expr(arg, context, true); @@ -7132,9 +6777,6 @@ get_agg_expr_helper(Aggref *aggref, deparse_context *context, } } - if (options) - appendStringInfoString(buf, options); - if (aggref->aggfilter != NULL) { appendStringInfoString(buf, ") FILTER (WHERE "); @@ -7144,16 +6786,6 @@ get_agg_expr_helper(Aggref *aggref, deparse_context *context, appendStringInfoChar(buf, ')'); } -/* - * get_agg_expr - Parse back an Aggref node - */ -static void -get_agg_expr(Aggref *aggref, deparse_context *context, Aggref *original_aggref) -{ - get_agg_expr_helper(aggref, context, original_aggref, NULL, NULL, - false); -} - /* * This is a helper function for get_agg_expr(). It's used when we deparse * a combining Aggref; resolve_special_varno locates the corresponding partial @@ -7173,12 +6805,10 @@ get_agg_combine_expr(Node *node, deparse_context *context, void *callback_arg) } /* - * get_windowfunc_expr_helper - Parse back a WindowFunc node + * get_windowfunc_expr - Parse back a WindowFunc node */ static void -get_windowfunc_expr_helper(WindowFunc *wfunc, deparse_context *context, - const char *funcname, const char *options, - bool is_json_objectagg) +get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context) { StringInfo buf = context->buf; Oid argtypes[FUNC_MAX_ARGS]; @@ -7202,30 +6832,17 @@ get_windowfunc_expr_helper(WindowFunc *wfunc, deparse_context *context, nargs++; } - if (!funcname) - funcname = generate_function_name(wfunc->winfnoid, nargs, argnames, - argtypes, false, NULL, - context->special_exprkind); - - appendStringInfo(buf, "%s(", funcname); + appendStringInfo(buf, "%s(", + generate_function_name(wfunc->winfnoid, nargs, + argnames, argtypes, + false, NULL, + context->special_exprkind)); /* winstar can be set only in zero-argument aggregates */ if (wfunc->winstar) appendStringInfoChar(buf, '*'); else - { - if (is_json_objectagg) - { - get_rule_expr((Node *) linitial(wfunc->args), context, false); - appendStringInfoString(buf, " : "); - get_rule_expr((Node *) lsecond(wfunc->args), context, false); - } - else - get_rule_expr((Node *) wfunc->args, context, true); - } - - if (options) - appendStringInfoString(buf, options); + get_rule_expr((Node *) wfunc->args, context, true); if (wfunc->aggfilter != NULL) { @@ -7262,15 +6879,6 @@ get_windowfunc_expr_helper(WindowFunc *wfunc, deparse_context *context, } } -/* - * get_windowfunc_expr - Parse back a WindowFunc node - */ -static void -get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context) -{ - get_windowfunc_expr_helper(wfunc, context, NULL, NULL, false); -} - /* * get_func_sql_syntax - Parse back a SQL-syntax function call * @@ -7511,31 +7119,6 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context) return false; } -/* - * get_json_agg_constructor - Parse back an aggregate JsonConstructorExpr node - */ -static void -get_json_agg_constructor(JsonConstructorExpr *ctor, deparse_context *context, - const char *funcname, bool is_json_objectagg) -{ - StringInfoData options; - - initStringInfo(&options); - get_json_constructor_options(ctor, &options); - - if (IsA(ctor->func, Aggref)) - get_agg_expr_helper((Aggref *) ctor->func, context, - (Aggref *) ctor->func, - funcname, options.data, is_json_objectagg); - else if (IsA(ctor->func, WindowFunc)) - get_windowfunc_expr_helper((WindowFunc *) ctor->func, context, - funcname, options.data, - is_json_objectagg); - else - elog(ERROR, "invalid JsonConstructorExpr underlying node type: %d", - nodeTag(ctor->func)); -} - /* ---------- * get_coercion_expr * @@ -7775,7 +7358,6 @@ simple_quote_literal(StringInfo buf, const char *val) appendStringInfoChar(buf, '\''); } - /* ---------- * get_sublink_expr - Parse back a sublink * ---------- @@ -7900,16 +7482,17 @@ get_sublink_expr(SubLink *sublink, deparse_context *context) appendStringInfoChar(buf, ')'); } - /* ---------- - * get_xmltable - Parse back a XMLTABLE function + * get_tablefunc - Parse back a table function * ---------- */ static void -get_xmltable(TableFunc *tf, deparse_context *context, bool showimplicit) +get_tablefunc(TableFunc *tf, deparse_context *context, bool showimplicit) { StringInfo buf = context->buf; + /* XMLTABLE is the only existing implementation. */ + appendStringInfoString(buf, "XMLTABLE("); if (tf->ns_uris != NIL) @@ -8000,271 +7583,6 @@ get_xmltable(TableFunc *tf, deparse_context *context, bool showimplicit) appendStringInfoChar(buf, ')'); } -/* - * get_json_nested_columns - Parse back nested JSON_TABLE columns - */ -static void -get_json_table_nested_columns(TableFunc *tf, Node *node, - deparse_context *context, bool showimplicit, - bool needcomma) -{ - if (IsA(node, JsonTableSibling)) - { - JsonTableSibling *n = (JsonTableSibling *) node; - - get_json_table_nested_columns(tf, n->larg, context, showimplicit, - needcomma); - get_json_table_nested_columns(tf, n->rarg, context, showimplicit, true); - } - else - { - JsonTableParent *n = castNode(JsonTableParent, node); - - if (needcomma) - appendStringInfoChar(context->buf, ','); - - appendStringInfoChar(context->buf, ' '); - appendContextKeyword(context, "NESTED PATH ", 0, 0, 0); - get_const_expr(n->path, context, -1); - appendStringInfo(context->buf, " AS %s", quote_identifier(n->name)); - get_json_table_columns(tf, n, context, showimplicit); - } -} - -/* - * get_json_table_plan - Parse back a JSON_TABLE plan - */ -static void -get_json_table_plan(TableFunc *tf, Node *node, deparse_context *context, - bool parenthesize) -{ - if (parenthesize) - appendStringInfoChar(context->buf, '('); - - if (IsA(node, JsonTableSibling)) - { - JsonTableSibling *n = (JsonTableSibling *) node; - - get_json_table_plan(tf, n->larg, context, - IsA(n->larg, JsonTableSibling) || - castNode(JsonTableParent, n->larg)->child); - - appendStringInfoString(context->buf, n->cross ? " CROSS " : " UNION "); - - get_json_table_plan(tf, n->rarg, context, - IsA(n->rarg, JsonTableSibling) || - castNode(JsonTableParent, n->rarg)->child); - } - else - { - JsonTableParent *n = castNode(JsonTableParent, node); - - appendStringInfoString(context->buf, quote_identifier(n->name)); - - if (n->child) - { - appendStringInfoString(context->buf, - n->outerJoin ? " OUTER " : " INNER "); - get_json_table_plan(tf, n->child, context, - IsA(n->child, JsonTableSibling)); - } - } - - if (parenthesize) - appendStringInfoChar(context->buf, ')'); -} - -/* - * get_json_table_columns - Parse back JSON_TABLE columns - */ -static void -get_json_table_columns(TableFunc *tf, JsonTableParent *node, - deparse_context *context, bool showimplicit) -{ - StringInfo buf = context->buf; - JsonExpr *jexpr = castNode(JsonExpr, tf->docexpr); - ListCell *lc_colname; - ListCell *lc_coltype; - ListCell *lc_coltypmod; - ListCell *lc_colvarexpr; - int colnum = 0; - - appendStringInfoChar(buf, ' '); - appendContextKeyword(context, "COLUMNS (", 0, 0, 0); - - if (PRETTY_INDENT(context)) - context->indentLevel += PRETTYINDENT_VAR; - - forfour(lc_colname, tf->colnames, - lc_coltype, tf->coltypes, - lc_coltypmod, tf->coltypmods, - lc_colvarexpr, tf->colvalexprs) - { - char *colname = strVal(lfirst(lc_colname)); - JsonExpr *colexpr; - Oid typid; - int32 typmod; - bool ordinality; - JsonBehaviorType default_behavior; - - typid = lfirst_oid(lc_coltype); - typmod = lfirst_int(lc_coltypmod); - colexpr = castNode(JsonExpr, lfirst(lc_colvarexpr)); - - if (colnum < node->colMin) - { - colnum++; - continue; - } - - if (colnum > node->colMax) - break; - - if (colnum > node->colMin) - appendStringInfoString(buf, ", "); - - colnum++; - - ordinality = !colexpr; - - appendContextKeyword(context, "", 0, 0, 0); - - appendStringInfo(buf, "%s %s", quote_identifier(colname), - ordinality ? "FOR ORDINALITY" : - format_type_with_typemod(typid, typmod)); - if (ordinality) - continue; - - if (colexpr->op == JSON_EXISTS_OP) - { - appendStringInfoString(buf, " EXISTS"); - default_behavior = JSON_BEHAVIOR_FALSE; - } - else - { - if (colexpr->op == JSON_QUERY_OP) - { - char typcategory; - bool typispreferred; - - get_type_category_preferred(typid, &typcategory, &typispreferred); - - if (typcategory == TYPCATEGORY_STRING) - appendStringInfoString(buf, - colexpr->format->format_type == JS_FORMAT_JSONB ? - " FORMAT JSONB" : " FORMAT JSON"); - } - - default_behavior = JSON_BEHAVIOR_NULL; - } - - if (jexpr->on_error->btype == JSON_BEHAVIOR_ERROR) - default_behavior = JSON_BEHAVIOR_ERROR; - - appendStringInfoString(buf, " PATH "); - - get_json_path_spec(colexpr->path_spec, context, showimplicit); - - get_json_expr_options(colexpr, context, default_behavior); - } - - if (node->child) - get_json_table_nested_columns(tf, node->child, context, showimplicit, - node->colMax >= node->colMin); - - if (PRETTY_INDENT(context)) - context->indentLevel -= PRETTYINDENT_VAR; - - appendContextKeyword(context, ")", 0, 0, 0); -} - -/* ---------- - * get_json_table - Parse back a JSON_TABLE function - * ---------- - */ -static void -get_json_table(TableFunc *tf, deparse_context *context, bool showimplicit) -{ - StringInfo buf = context->buf; - JsonExpr *jexpr = castNode(JsonExpr, tf->docexpr); - JsonTableParent *root = castNode(JsonTableParent, tf->plan); - - appendStringInfoString(buf, "JSON_TABLE("); - - if (PRETTY_INDENT(context)) - context->indentLevel += PRETTYINDENT_VAR; - - appendContextKeyword(context, "", 0, 0, 0); - - get_rule_expr(jexpr->formatted_expr, context, showimplicit); - - appendStringInfoString(buf, ", "); - - get_const_expr(root->path, context, -1); - - appendStringInfo(buf, " AS %s", quote_identifier(root->name)); - - if (jexpr->passing_values) - { - ListCell *lc1, - *lc2; - bool needcomma = false; - - appendStringInfoChar(buf, ' '); - appendContextKeyword(context, "PASSING ", 0, 0, 0); - - if (PRETTY_INDENT(context)) - context->indentLevel += PRETTYINDENT_VAR; - - forboth(lc1, jexpr->passing_names, - lc2, jexpr->passing_values) - { - if (needcomma) - appendStringInfoString(buf, ", "); - needcomma = true; - - appendContextKeyword(context, "", 0, 0, 0); - - get_rule_expr((Node *) lfirst(lc2), context, false); - appendStringInfo(buf, " AS %s", - quote_identifier((lfirst_node(String, lc1))->sval) - ); - } - - if (PRETTY_INDENT(context)) - context->indentLevel -= PRETTYINDENT_VAR; - } - - get_json_table_columns(tf, root, context, showimplicit); - - appendStringInfoChar(buf, ' '); - appendContextKeyword(context, "PLAN ", 0, 0, 0); - get_json_table_plan(tf, (Node *) root, context, true); - - if (jexpr->on_error->btype != JSON_BEHAVIOR_EMPTY) - get_json_behavior(jexpr->on_error, context, "ERROR"); - - if (PRETTY_INDENT(context)) - context->indentLevel -= PRETTYINDENT_VAR; - - appendContextKeyword(context, ")", 0, 0, 0); -} - -/* ---------- - * get_tablefunc - Parse back a table function - * ---------- - */ -static void -get_tablefunc(TableFunc *tf, deparse_context *context, bool showimplicit) -{ - /* XMLTABLE and JSON_TABLE are the only existing implementations. */ - - if (tf->functype == TFT_XMLTABLE) - get_xmltable(tf, context, showimplicit); - else if (tf->functype == TFT_JSON_TABLE) - get_json_table(tf, context, showimplicit); -} - /* ---------- * get_from_clause - Parse back a FROM clause * @@ -8866,7 +8184,6 @@ get_tablesample_def(TableSampleClause *tablesample, deparse_context *context) } } - /* * get_opclass_name - fetch name of an index operator class * @@ -9137,7 +8454,6 @@ generate_relation_name(Oid relid, List *namespaces) return result; } - /* * generate_rte_shard_name returns the qualified name of the shard given a * CITUS_RTE_SHARD range table entry. @@ -9156,7 +8472,6 @@ generate_rte_shard_name(RangeTblEntry *rangeTableEntry) return generate_fragment_name(shardSchemaName, shardTableName); } - /* * generate_fragment_name * Compute the name to display for a shard or merged table From 47bea76c6cf108e0fe7445d97f4398bc920ca48d Mon Sep 17 00:00:00 2001 From: naisila Date: Mon, 12 Sep 2022 11:29:07 +0300 Subject: [PATCH 06/17] Revert "Support JSON_TABLE on PG 15 (#6241)" This reverts commit 1f4fe35512dafbba89e6fe89606c5639f10e69b1. --- .../planner/multi_logical_planner.c | 3 +- .../planner/query_pushdown_planning.c | 63 +- .../distributed/query_pushdown_planning.h | 2 +- .../expected/json_table_select_only.out | 1583 ----------------- .../expected/json_table_select_only_0.out | 9 - src/test/regress/expected/pg15_json.out | 488 ----- src/test/regress/expected/pg15_json_0.out | 9 - src/test/regress/json_table_select_only.out | 1572 ---------------- src/test/regress/json_table_select_only_0.out | 9 - src/test/regress/multi_schedule | 2 +- .../regress/sql/json_table_select_only.sql | 330 ---- src/test/regress/sql/pg15_json.sql | 326 ---- 12 files changed, 7 insertions(+), 4389 deletions(-) delete mode 100644 src/test/regress/expected/json_table_select_only.out delete mode 100644 src/test/regress/expected/json_table_select_only_0.out delete mode 100644 src/test/regress/expected/pg15_json.out delete mode 100644 src/test/regress/expected/pg15_json_0.out delete mode 100644 src/test/regress/json_table_select_only.out delete mode 100644 src/test/regress/json_table_select_only_0.out delete mode 100644 src/test/regress/sql/json_table_select_only.sql delete mode 100644 src/test/regress/sql/pg15_json.sql diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 14dfa924f..7e665b567 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -1154,8 +1154,7 @@ HasComplexRangeTableType(Query *queryTree) if (rangeTableEntry->rtekind != RTE_RELATION && rangeTableEntry->rtekind != RTE_SUBQUERY && rangeTableEntry->rtekind != RTE_FUNCTION && - rangeTableEntry->rtekind != RTE_VALUES && - !IsJsonTableRTE(rangeTableEntry)) + rangeTableEntry->rtekind != RTE_VALUES) { hasComplexRangeTableType = true; } diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 964acfff4..5ad7887e9 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -60,8 +60,7 @@ typedef enum RecurringTuplesType RECURRING_TUPLES_FUNCTION, RECURRING_TUPLES_EMPTY_JOIN_TREE, RECURRING_TUPLES_RESULT_FUNCTION, - RECURRING_TUPLES_VALUES, - RECURRING_TUPLES_JSON_TABLE + RECURRING_TUPLES_VALUES } RecurringTuplesType; /* @@ -346,8 +345,7 @@ IsFunctionOrValuesRTE(Node *node) RangeTblEntry *rangeTblEntry = (RangeTblEntry *) node; if (rangeTblEntry->rtekind == RTE_FUNCTION || - rangeTblEntry->rtekind == RTE_VALUES || - IsJsonTableRTE(rangeTblEntry)) + rangeTblEntry->rtekind == RTE_VALUES) { return true; } @@ -720,13 +718,6 @@ DeferErrorIfFromClauseRecurs(Query *queryTree) "the FROM clause contains VALUES", NULL, NULL); } - else if (recurType == RECURRING_TUPLES_JSON_TABLE) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "correlated subqueries are not supported when " - "the FROM clause contains JSON_TABLE", NULL, - NULL); - } /* @@ -954,13 +945,6 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( "There exist a VALUES clause in the outer " "part of the outer join", NULL); } - else if (recurType == RECURRING_TUPLES_JSON_TABLE) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot pushdown the subquery", - "There exist a JSON_TABLE clause in the outer " - "part of the outer join", NULL); - } return NULL; } @@ -1251,8 +1235,7 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree) */ if (rangeTableEntry->rtekind == RTE_RELATION || rangeTableEntry->rtekind == RTE_SUBQUERY || - rangeTableEntry->rtekind == RTE_RESULT || - IsJsonTableRTE(rangeTableEntry)) /* TODO: can we have volatile???*/ + rangeTableEntry->rtekind == RTE_RESULT) { /* accepted */ } @@ -1420,13 +1403,6 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree) "VALUES is not supported within a " "UNION", NULL); } - else if (recurType == RECURRING_TUPLES_JSON_TABLE) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot push down this subquery", - "JSON_TABLE is not supported within a " - "UNION", NULL); - } return NULL; } @@ -1526,11 +1502,6 @@ RecurringTypeDescription(RecurringTuplesType recurType) return "a VALUES clause"; } - case RECURRING_TUPLES_JSON_TABLE: - { - return "a JSON_TABLE"; - } - case RECURRING_TUPLES_INVALID: { /* @@ -1727,8 +1698,7 @@ DeferredErrorIfUnsupportedLateralSubquery(PlannerInfo *plannerInfo, * strings anyway. */ if (recurType != RECURRING_TUPLES_VALUES && - recurType != RECURRING_TUPLES_RESULT_FUNCTION && - recurType != RECURRING_TUPLES_JSON_TABLE) + recurType != RECURRING_TUPLES_RESULT_FUNCTION) { recurTypeDescription = psprintf("%s (%s)", recurTypeDescription, recurringRangeTableEntry->eref-> @@ -1805,26 +1775,6 @@ ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType) } -/* - * IsJsonTableRTE checks whether the RTE refers to a JSON_TABLE - * table function, which was introduced in PostgreSQL 15. - */ -bool -IsJsonTableRTE(RangeTblEntry *rte) -{ -#if PG_VERSION_NUM >= PG_VERSION_15 - if (rte == NULL) - { - return false; - } - return (rte->rtekind == RTE_TABLEFUNC && - rte->tablefunc->functype == TFT_JSON_TABLE); -#endif - - return false; -} - - /* * HasRecurringTuples returns whether any part of the expression will generate * the same set of tuples in every query on shards when executing a distributed @@ -1886,11 +1836,6 @@ HasRecurringTuples(Node *node, RecurringTuplesType *recurType) *recurType = RECURRING_TUPLES_VALUES; return true; } - else if (IsJsonTableRTE(rangeTableEntry)) - { - *recurType = RECURRING_TUPLES_JSON_TABLE; - return true; - } return false; } diff --git a/src/include/distributed/query_pushdown_planning.h b/src/include/distributed/query_pushdown_planning.h index 3c30b7814..061a4a730 100644 --- a/src/include/distributed/query_pushdown_planning.h +++ b/src/include/distributed/query_pushdown_planning.h @@ -46,6 +46,6 @@ extern DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subquery bool outerMostQueryHasLimit); extern DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree); -extern bool IsJsonTableRTE(RangeTblEntry *rte); + #endif /* QUERY_PUSHDOWN_PLANNING_H */ diff --git a/src/test/regress/expected/json_table_select_only.out b/src/test/regress/expected/json_table_select_only.out deleted file mode 100644 index 0ce4edc68..000000000 --- a/src/test/regress/expected/json_table_select_only.out +++ /dev/null @@ -1,1583 +0,0 @@ --- --- PG15+ test --- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -\else -\q -\endif -SET search_path TO "json table"; -CREATE SCHEMA "json table"; -SET search_path TO "json table"; -CREATE TABLE jsonb_table_test (id bigserial, js jsonb); -SELECT create_distributed_table('jsonb_table_test', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- insert some data -INSERT INTO jsonb_table_test (js) -VALUES ( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "d": [], "c": []}, - {"a": 2, "d": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "d": [1, 2], "c": []}, - {"x": "4", "d": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [100, 200, 300], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": [null]}, - {"x": "4", "b": [1, 2], "c": 2} - ]' -), -( - '[ - {"y": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "t": [1, 2], "c": []}, - {"x": "4", "b": [1, 200], "c": 96} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "100", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"t": 1, "b": [], "c": []}, - {"t": 2, "b": [1, 2, 3], "x": [10, null, 20]}, - {"t": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"U": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1000, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "T": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"ffa": 1, "b": [], "c": []}, - {"ffb": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"fffc": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -); --- unspecified plan (outer, union) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 1 | - 2 | -1 | 1 | - 2 | -1 | 2 | - 2 | -1 | 2 | - 2 | -1 | 3 | - 2 | -1 | 3 | - 2 | -1 | | 10 - 2 | -1 | | 20 - 2 | -1 | | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | - 2 | 2 | 200 | - 2 | 2 | 300 | - 2 | 2 | 1000 | - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | 1 | - 3 | -1 | 1 | - 3 | -1 | 2 | - 3 | -1 | 2 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 200 | - 4 | -1 | | - 4 | -1 | | -(123 rows) - --- default plan (outer, union) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (outer, union) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 1 | - 2 | -1 | 1 | - 2 | -1 | 2 | - 2 | -1 | 2 | - 2 | -1 | 3 | - 2 | -1 | 3 | - 2 | -1 | | 10 - 2 | -1 | | 20 - 2 | -1 | | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | - 2 | 2 | 200 | - 2 | 2 | 300 | - 2 | 2 | 1000 | - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | 1 | - 3 | -1 | 1 | - 3 | -1 | 2 | - 3 | -1 | 2 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 200 | - 4 | -1 | | - 4 | -1 | | -(123 rows) - --- specific plan (p outer (pb union pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p outer (pb union pc)) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 1 | - 2 | -1 | 1 | - 2 | -1 | 2 | - 2 | -1 | 2 | - 2 | -1 | 3 | - 2 | -1 | 3 | - 2 | -1 | | 10 - 2 | -1 | | 20 - 2 | -1 | | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | - 2 | 2 | 200 | - 2 | 2 | 300 | - 2 | 2 | 1000 | - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | 1 | - 3 | -1 | 1 | - 3 | -1 | 2 | - 3 | -1 | 2 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 200 | - 4 | -1 | | - 4 | -1 | | -(123 rows) - --- specific plan (p outer (pc union pb)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p outer (pc union pb)) - ) jt ORDER BY 1,2,3,4; - n | a | c | b ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 10 | - 2 | -1 | 20 | - 2 | -1 | | 1 - 2 | -1 | | 1 - 2 | -1 | | 2 - 2 | -1 | | 2 - 2 | -1 | | 3 - 2 | -1 | | 3 - 2 | -1 | | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 100 - 2 | 2 | | 200 - 2 | 2 | | 300 - 2 | 2 | | 1000 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | | 1 - 3 | -1 | | 1 - 3 | -1 | | 2 - 3 | -1 | | 2 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 200 - 4 | -1 | | - 4 | -1 | | -(123 rows) - --- default plan (inner, union) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (inner) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 2 | -1 | 1 | - 2 | -1 | 1 | - 2 | -1 | 2 | - 2 | -1 | 2 | - 2 | -1 | 3 | - 2 | -1 | 3 | - 2 | -1 | | 10 - 2 | -1 | | 20 - 2 | -1 | | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | - 2 | 2 | 200 | - 2 | 2 | 300 | - 2 | 2 | 1000 | - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | 1 | - 3 | -1 | 1 | - 3 | -1 | 2 | - 3 | -1 | 2 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 200 | -(107 rows) - --- specific plan (p inner (pb union pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p inner (pb union pc)) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 2 | -1 | 1 | - 2 | -1 | 1 | - 2 | -1 | 2 | - 2 | -1 | 2 | - 2 | -1 | 3 | - 2 | -1 | 3 | - 2 | -1 | | 10 - 2 | -1 | | 20 - 2 | -1 | | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | - 2 | 2 | 200 | - 2 | 2 | 300 | - 2 | 2 | 1000 | - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | 1 | - 3 | -1 | 1 | - 3 | -1 | 2 | - 3 | -1 | 2 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 200 | -(107 rows) - --- default plan (inner, cross) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (cross, inner) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 2 | -1 | 1 | 10 - 2 | -1 | 1 | 20 - 2 | -1 | 1 | - 2 | -1 | 2 | 10 - 2 | -1 | 2 | 20 - 2 | -1 | 2 | - 2 | -1 | 3 | 10 - 2 | -1 | 3 | 20 - 2 | -1 | 3 | - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | 10 - 2 | 2 | 100 | 20 - 2 | 2 | 100 | - 2 | 2 | 200 | 10 - 2 | 2 | 200 | 20 - 2 | 2 | 200 | - 2 | 2 | 300 | 10 - 2 | 2 | 300 | 20 - 2 | 2 | 300 | - 2 | 2 | 1000 | 10 - 2 | 2 | 1000 | 20 - 2 | 2 | 1000 | - 3 | 3 | 1 | - 3 | 3 | 2 | -(92 rows) - --- specific plan (p inner (pb cross pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p inner (pb cross pc)) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 2 | -1 | 1 | 10 - 2 | -1 | 1 | 20 - 2 | -1 | 1 | - 2 | -1 | 2 | 10 - 2 | -1 | 2 | 20 - 2 | -1 | 2 | - 2 | -1 | 3 | 10 - 2 | -1 | 3 | 20 - 2 | -1 | 3 | - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | 10 - 2 | 2 | 100 | 20 - 2 | 2 | 100 | - 2 | 2 | 200 | 10 - 2 | 2 | 200 | 20 - 2 | 2 | 200 | - 2 | 2 | 300 | 10 - 2 | 2 | 300 | 20 - 2 | 2 | 300 | - 2 | 2 | 1000 | 10 - 2 | 2 | 1000 | 20 - 2 | 2 | 1000 | - 3 | 3 | 1 | - 3 | 3 | 2 | -(92 rows) - --- default plan (outer, cross) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (outer, cross) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 1 | 10 - 2 | -1 | 1 | 20 - 2 | -1 | 1 | - 2 | -1 | 2 | 10 - 2 | -1 | 2 | 20 - 2 | -1 | 2 | - 2 | -1 | 3 | 10 - 2 | -1 | 3 | 20 - 2 | -1 | 3 | - 2 | -1 | | - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | 10 - 2 | 2 | 100 | 20 - 2 | 2 | 100 | - 2 | 2 | 200 | 10 - 2 | 2 | 200 | 20 - 2 | 2 | 200 | - 2 | 2 | 300 | 10 - 2 | 2 | 300 | 20 - 2 | 2 | 300 | - 2 | 2 | 1000 | 10 - 2 | 2 | 1000 | 20 - 2 | 2 | 1000 | - 2 | 2 | | - 3 | -1 | | - 3 | -1 | | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | -(129 rows) - --- specific plan (p outer (pb cross pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p outer (pb cross pc)) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 1 | 10 - 2 | -1 | 1 | 20 - 2 | -1 | 1 | - 2 | -1 | 2 | 10 - 2 | -1 | 2 | 20 - 2 | -1 | 2 | - 2 | -1 | 3 | 10 - 2 | -1 | 3 | 20 - 2 | -1 | 3 | - 2 | -1 | | - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | 10 - 2 | 2 | 100 | 20 - 2 | 2 | 100 | - 2 | 2 | 200 | 10 - 2 | 2 | 200 | 20 - 2 | 2 | 200 | - 2 | 2 | 300 | 10 - 2 | 2 | 300 | 20 - 2 | 2 | 300 | - 2 | 2 | 1000 | 10 - 2 | 2 | 1000 | 20 - 2 | 2 | 1000 | - 2 | 2 | | - 3 | -1 | | - 3 | -1 | | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | -(129 rows) - -select - jt.*, b1 + 100 as b -from - json_table (jsonb - '[ - {"a": 1, "b": [[1, 10], [2], [3, 30, 300]], "c": [1, null, 2]}, - {"a": 2, "b": [10, 20], "c": [1, null, 2]}, - {"x": "3", "b": [11, 22, 33, 44]} - ]', - '$[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on error, - nested path 'strict $.b[*]' as pb columns ( - b text format json path '$', - nested path 'strict $[*]' as pb1 columns ( - b1 int path '$' - ) - ), - nested path 'strict $.c[*]' as pc columns ( - c text format json path '$', - nested path 'strict $[*]' as pc1 columns ( - c1 int path '$' - ) - ) - ) - --plan default(outer, cross) - plan(p outer ((pb inner pb1) cross (pc outer pc1))) - ) jt ORDER BY 1,2,3,4,5; - n | a | b | b1 | c | c1 | b ---------------------------------------------------------------------- - 1 | 1 | [1, 10] | 1 | 1 | | 101 - 1 | 1 | [1, 10] | 1 | 2 | | 101 - 1 | 1 | [1, 10] | 1 | null | | 101 - 1 | 1 | [1, 10] | 10 | 1 | | 110 - 1 | 1 | [1, 10] | 10 | 2 | | 110 - 1 | 1 | [1, 10] | 10 | null | | 110 - 1 | 1 | [2] | 2 | 1 | | 102 - 1 | 1 | [2] | 2 | 2 | | 102 - 1 | 1 | [2] | 2 | null | | 102 - 1 | 1 | [3, 30, 300] | 3 | 1 | | 103 - 1 | 1 | [3, 30, 300] | 3 | 2 | | 103 - 1 | 1 | [3, 30, 300] | 3 | null | | 103 - 1 | 1 | [3, 30, 300] | 30 | 1 | | 130 - 1 | 1 | [3, 30, 300] | 30 | 2 | | 130 - 1 | 1 | [3, 30, 300] | 30 | null | | 130 - 1 | 1 | [3, 30, 300] | 300 | 1 | | 400 - 1 | 1 | [3, 30, 300] | 300 | 2 | | 400 - 1 | 1 | [3, 30, 300] | 300 | null | | 400 - 2 | 2 | | | | | - 3 | | | | | | -(20 rows) - --- Should succeed (JSON arguments are passed to root and nested paths) -SELECT * -FROM - generate_series(1, 4) x, - generate_series(1, 3) y, - JSON_TABLE(jsonb - '[[1,2,3],[2,3,4,5],[3,4,5,6]]', - 'strict $[*] ? (@[*] < $x)' - PASSING x AS x, y AS y - COLUMNS ( - y text FORMAT JSON PATH '$', - NESTED PATH 'strict $[*] ? (@ >= $y)' - COLUMNS ( - z int PATH '$' - ) - ) - ) jt ORDER BY 4,1,2,3; - x | y | y | z ---------------------------------------------------------------------- - 2 | 1 | [1, 2, 3] | 1 - 3 | 1 | [1, 2, 3] | 1 - 4 | 1 | [1, 2, 3] | 1 - 2 | 1 | [1, 2, 3] | 2 - 2 | 2 | [1, 2, 3] | 2 - 3 | 1 | [1, 2, 3] | 2 - 3 | 1 | [2, 3, 4, 5] | 2 - 3 | 2 | [1, 2, 3] | 2 - 3 | 2 | [2, 3, 4, 5] | 2 - 4 | 1 | [1, 2, 3] | 2 - 4 | 1 | [2, 3, 4, 5] | 2 - 4 | 2 | [1, 2, 3] | 2 - 4 | 2 | [2, 3, 4, 5] | 2 - 2 | 1 | [1, 2, 3] | 3 - 2 | 2 | [1, 2, 3] | 3 - 2 | 3 | [1, 2, 3] | 3 - 3 | 1 | [1, 2, 3] | 3 - 3 | 1 | [2, 3, 4, 5] | 3 - 3 | 2 | [1, 2, 3] | 3 - 3 | 2 | [2, 3, 4, 5] | 3 - 3 | 3 | [1, 2, 3] | 3 - 3 | 3 | [2, 3, 4, 5] | 3 - 4 | 1 | [1, 2, 3] | 3 - 4 | 1 | [2, 3, 4, 5] | 3 - 4 | 1 | [3, 4, 5, 6] | 3 - 4 | 2 | [1, 2, 3] | 3 - 4 | 2 | [2, 3, 4, 5] | 3 - 4 | 2 | [3, 4, 5, 6] | 3 - 4 | 3 | [1, 2, 3] | 3 - 4 | 3 | [2, 3, 4, 5] | 3 - 4 | 3 | [3, 4, 5, 6] | 3 - 3 | 1 | [2, 3, 4, 5] | 4 - 3 | 2 | [2, 3, 4, 5] | 4 - 3 | 3 | [2, 3, 4, 5] | 4 - 4 | 1 | [2, 3, 4, 5] | 4 - 4 | 1 | [3, 4, 5, 6] | 4 - 4 | 2 | [2, 3, 4, 5] | 4 - 4 | 2 | [3, 4, 5, 6] | 4 - 4 | 3 | [2, 3, 4, 5] | 4 - 4 | 3 | [3, 4, 5, 6] | 4 - 3 | 1 | [2, 3, 4, 5] | 5 - 3 | 2 | [2, 3, 4, 5] | 5 - 3 | 3 | [2, 3, 4, 5] | 5 - 4 | 1 | [2, 3, 4, 5] | 5 - 4 | 1 | [3, 4, 5, 6] | 5 - 4 | 2 | [2, 3, 4, 5] | 5 - 4 | 2 | [3, 4, 5, 6] | 5 - 4 | 3 | [2, 3, 4, 5] | 5 - 4 | 3 | [3, 4, 5, 6] | 5 - 4 | 1 | [3, 4, 5, 6] | 6 - 4 | 2 | [3, 4, 5, 6] | 6 - 4 | 3 | [3, 4, 5, 6] | 6 -(52 rows) - -SET client_min_messages TO ERROR; -DROP SCHEMA "json table" CASCADE; diff --git a/src/test/regress/expected/json_table_select_only_0.out b/src/test/regress/expected/json_table_select_only_0.out deleted file mode 100644 index c04e76814..000000000 --- a/src/test/regress/expected/json_table_select_only_0.out +++ /dev/null @@ -1,9 +0,0 @@ --- --- PG15+ test --- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -\else -\q diff --git a/src/test/regress/expected/pg15_json.out b/src/test/regress/expected/pg15_json.out deleted file mode 100644 index be263337a..000000000 --- a/src/test/regress/expected/pg15_json.out +++ /dev/null @@ -1,488 +0,0 @@ --- --- PG15+ test --- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -\else -\q -\endif -CREATE SCHEMA pg15_json; -SET search_path TO pg15_json; -SET citus.next_shard_id TO 1687000; -CREATE TABLE test_table(id bigserial, value text); -SELECT create_distributed_table('test_table', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO test_table (value) SELECT i::text FROM generate_series(0,100)i; -CREATE TABLE my_films(id bigserial, js jsonb); -SELECT create_distributed_table('my_films', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO my_films(js) VALUES ( -'{ "favorites" : [ - { "kind" : "comedy", "films" : [ { "title" : "Bananas", "director" : "Woody Allen"}, - { "title" : "The Dinner Game", "director" : "Francis Veber" } ] }, - { "kind" : "horror", "films" : [{ "title" : "Psycho", "director" : "Alfred Hitchcock" } ] }, - { "kind" : "thriller", "films" : [{ "title" : "Vertigo", "director" : "Alfred Hitchcock" } ] }, - { "kind" : "drama", "films" : [{ "title" : "Yojimbo", "director" : "Akira Kurosawa" } ] } - ] }'); -INSERT INTO my_films(js) VALUES ( -'{ "favorites" : [ - { "kind" : "comedy", "films" : [ { "title" : "Bananas2", "director" : "Woody Allen"}, - { "title" : "The Dinner Game2", "director" : "Francis Veber" } ] }, - { "kind" : "horror", "films" : [{ "title" : "Psycho2", "director" : "Alfred Hitchcock" } ] }, - { "kind" : "thriller", "films" : [{ "title" : "Vertigo2", "director" : "Alfred Hitchcock" } ] }, - { "kind" : "drama", "films" : [{ "title" : "Yojimbo2", "director" : "Akira Kurosawa" } ] } - ] }'); --- a router query -SELECT jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt - WHERE my_films.id = 1 - ORDER BY 1,2,3,4; - id | kind | title | director ---------------------------------------------------------------------- - 1 | comedy | Bananas | Woody Allen - 1 | comedy | The Dinner Game | Francis Veber - 2 | horror | Psycho | Alfred Hitchcock - 3 | thriller | Vertigo | Alfred Hitchcock - 4 | drama | Yojimbo | Akira Kurosawa -(5 rows) - --- router query with an explicit LATEREL SUBQUERY -SELECT sub.* -FROM my_films, - lateral(SELECT * FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt) as sub -WHERE my_films.id = 1 ORDER BY 1,2,3,4; - id | kind | title | director ---------------------------------------------------------------------- - 1 | comedy | Bananas | Woody Allen - 1 | comedy | The Dinner Game | Francis Veber - 2 | horror | Psycho | Alfred Hitchcock - 3 | thriller | Vertigo | Alfred Hitchcock - 4 | drama | Yojimbo | Akira Kurosawa -(5 rows) - --- router query with an explicit LATEREL SUBQUERY and LIMIT -SELECT sub.* -FROM my_films, - lateral(SELECT * FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt ORDER BY id DESC LIMIT 1) as sub -WHERE my_films.id = 1 ORDER BY 1,2,3,4; - id | kind | title | director ---------------------------------------------------------------------- - 4 | drama | Yojimbo | Akira Kurosawa -(1 row) - --- set it DEBUG1 in case the plan changes --- we can see details -SET client_min_messages TO DEBUG1; --- a mult-shard query -SELECT jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt - ORDER BY 1,2,3,4; - id | kind | title | director ---------------------------------------------------------------------- - 1 | comedy | Bananas | Woody Allen - 1 | comedy | Bananas2 | Woody Allen - 1 | comedy | The Dinner Game | Francis Veber - 1 | comedy | The Dinner Game2 | Francis Veber - 2 | horror | Psycho | Alfred Hitchcock - 2 | horror | Psycho2 | Alfred Hitchcock - 3 | thriller | Vertigo | Alfred Hitchcock - 3 | thriller | Vertigo2 | Alfred Hitchcock - 4 | drama | Yojimbo | Akira Kurosawa - 4 | drama | Yojimbo2 | Akira Kurosawa -(10 rows) - --- recursively plan subqueries that has JSON_TABLE -SELECT count(*) FROM -( - SELECT jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt - LIMIT 1) as sub_with_json, test_table -WHERE test_table.id = sub_with_json.id; -DEBUG: push down of limit count: 1 -DEBUG: generating subplan XXX_1 for subquery SELECT jt.id, jt.kind, jt.title, jt.director FROM pg15_json.my_films, LATERAL JSON_TABLE(my_films.js, '$."favorites"[*]' AS json_table_path_1 COLUMNS (id FOR ORDINALITY, kind text PATH '$."kind"', NESTED PATH '$."films"[*]' AS json_table_path_2 COLUMNS (title text PATH '$."title"', director text PATH '$."director"')) PLAN (json_table_path_1 OUTER json_table_path_2)) jt LIMIT 1 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.id, intermediate_result.kind, intermediate_result.title, intermediate_result.director FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, kind text, title text, director text)) sub_with_json, pg15_json.test_table WHERE (test_table.id OPERATOR(pg_catalog.=) sub_with_json.id) - count ---------------------------------------------------------------------- - 1 -(1 row) - --- multi-shard query with an explicit LATEREL SUBQUERY -SELECT sub.* -FROM my_films JOIN - lateral - (SELECT * - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000) AS sub ON (true) - ORDER BY 1,2,3,4; - id | kind | title | director ---------------------------------------------------------------------- - 1 | comedy | Bananas | Woody Allen - 1 | comedy | Bananas2 | Woody Allen - 1 | comedy | The Dinner Game | Francis Veber - 1 | comedy | The Dinner Game2 | Francis Veber - 2 | horror | Psycho | Alfred Hitchcock - 2 | horror | Psycho2 | Alfred Hitchcock - 3 | thriller | Vertigo | Alfred Hitchcock - 3 | thriller | Vertigo2 | Alfred Hitchcock - 4 | drama | Yojimbo | Akira Kurosawa - 4 | drama | Yojimbo2 | Akira Kurosawa -(10 rows) - --- JSON_TABLE can be on the inner part of an outer joion -SELECT sub.* -FROM my_films LEFT JOIN - lateral - (SELECT * - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000) AS sub ON (true) - ORDER BY 1,2,3,4; - id | kind | title | director ---------------------------------------------------------------------- - 1 | comedy | Bananas | Woody Allen - 1 | comedy | Bananas2 | Woody Allen - 1 | comedy | The Dinner Game | Francis Veber - 1 | comedy | The Dinner Game2 | Francis Veber - 2 | horror | Psycho | Alfred Hitchcock - 2 | horror | Psycho2 | Alfred Hitchcock - 3 | thriller | Vertigo | Alfred Hitchcock - 3 | thriller | Vertigo2 | Alfred Hitchcock - 4 | drama | Yojimbo | Akira Kurosawa - 4 | drama | Yojimbo2 | Akira Kurosawa -(10 rows) - --- we can pushdown this correlated subquery in WHERE clause -SELECT count(*) -FROM my_films WHERE - (SELECT count(*) > 0 - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000); - count ---------------------------------------------------------------------- - 2 -(1 row) - --- we can pushdown this correlated subquery in SELECT clause - SELECT (SELECT count(*) > 0 - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt) -FROM my_films; - ?column? ---------------------------------------------------------------------- - t - t -(2 rows) - --- multi-shard query with an explicit LATEREL SUBQUERY --- along with other tables -SELECT sub.* -FROM my_films JOIN - lateral - (SELECT * - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000) AS sub ON (true) JOIN test_table ON(my_films.id = test_table.id) - ORDER BY 1,2,3,4; - id | kind | title | director ---------------------------------------------------------------------- - 1 | comedy | Bananas | Woody Allen - 1 | comedy | Bananas2 | Woody Allen - 1 | comedy | The Dinner Game | Francis Veber - 1 | comedy | The Dinner Game2 | Francis Veber - 2 | horror | Psycho | Alfred Hitchcock - 2 | horror | Psycho2 | Alfred Hitchcock - 3 | thriller | Vertigo | Alfred Hitchcock - 3 | thriller | Vertigo2 | Alfred Hitchcock - 4 | drama | Yojimbo | Akira Kurosawa - 4 | drama | Yojimbo2 | Akira Kurosawa -(10 rows) - --- non-colocated join fails -SELECT sub.* -FROM my_films JOIN - lateral - (SELECT * - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000) AS sub ON (true) JOIN test_table ON(my_films.id != test_table.id) - ORDER BY 1,2,3,4; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns --- JSON_TABLE can be in the outer part of the join --- as long as there is a distributed table -SELECT sub.* -FROM my_films JOIN - lateral - (SELECT * - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000) AS sub ON (true) LEFT JOIN test_table ON(my_films.id = test_table.id) - ORDER BY 1,2,3,4; - id | kind | title | director ---------------------------------------------------------------------- - 1 | comedy | Bananas | Woody Allen - 1 | comedy | Bananas2 | Woody Allen - 1 | comedy | The Dinner Game | Francis Veber - 1 | comedy | The Dinner Game2 | Francis Veber - 2 | horror | Psycho | Alfred Hitchcock - 2 | horror | Psycho2 | Alfred Hitchcock - 3 | thriller | Vertigo | Alfred Hitchcock - 3 | thriller | Vertigo2 | Alfred Hitchcock - 4 | drama | Yojimbo | Akira Kurosawa - 4 | drama | Yojimbo2 | Akira Kurosawa -(10 rows) - --- JSON_TABLE cannot be on the outer side of the join -SELECT * -FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]' - COLUMNS (id FOR ORDINALITY, column_a int4 PATH '$.a', column_b int4 PATH '$.b', a int4, b int4, c text)) -LEFT JOIN LATERAL - (SELECT * - FROM my_films) AS foo on(foo.id = a); -ERROR: cannot pushdown the subquery -DETAIL: There exist a JSON_TABLE clause in the outer part of the outer join --- JSON_TABLE cannot be on the FROM clause alone -SELECT * -FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]' - COLUMNS (id FOR ORDINALITY, column_a int4 PATH '$.a', column_b int4 PATH '$.b', a int4, b int4, c text)) as foo -WHERE b > - (SELECT count(*) - FROM my_films WHERE id = foo.a); -ERROR: correlated subqueries are not supported when the FROM clause contains JSON_TABLE --- we can recursively plan json_tables on set operations -(SELECT * -FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]' - COLUMNS (id FOR ORDINALITY)) ORDER BY id ASC LIMIT 1) -UNION -(SELECT * -FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]' - COLUMNS (id FOR ORDINALITY)) ORDER BY id ASC LIMIT 1) -UNION -(SELECT id FROM test_table ORDER BY id ASC LIMIT 1); -DEBUG: generating subplan XXX_1 for subquery SELECT id FROM JSON_TABLE('[{"a": 10, "b": 20}, {"a": 30, "b": 40}]'::jsonb, '$[*]' AS json_table_path_1 COLUMNS (id FOR ORDINALITY) PLAN (json_table_path_1)) ORDER BY id LIMIT 1 -DEBUG: generating subplan XXX_2 for subquery SELECT id FROM JSON_TABLE('[{"a": 10, "b": 20}, {"a": 30, "b": 40}]'::jsonb, '$[*]' AS json_table_path_1 COLUMNS (id FOR ORDINALITY) PLAN (json_table_path_1)) ORDER BY id LIMIT 1 -DEBUG: push down of limit count: 1 -DEBUG: generating subplan XXX_3 for subquery SELECT id FROM pg15_json.test_table ORDER BY id LIMIT 1 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer) UNION SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer) UNION SELECT intermediate_result.id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(id bigint) - id ---------------------------------------------------------------------- - 1 -(1 row) - --- LIMIT in subquery not supported when json_table exists -SELECT * -FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]' - COLUMNS (id FOR ORDINALITY, column_a int4 PATH '$.a', column_b int4 PATH '$.b', a int4, b int4, c text)) -JOIN LATERAL - (SELECT * - FROM my_films WHERE json_table.id = a LIMIT 1) as foo ON (true); -ERROR: cannot push down this subquery -DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a JSON_TABLE --- a little more complex query with multiple json_table -SELECT - director1 AS director, title1, kind1, title2, kind2 -FROM - my_films, - JSON_TABLE ( js, '$.favorites' AS favs COLUMNS ( - NESTED PATH '$[*]' AS films1 COLUMNS ( - kind1 text PATH '$.kind', - NESTED PATH '$.films[*]' AS film1 COLUMNS ( - title1 text PATH '$.title', - director1 text PATH '$.director') - ), - NESTED PATH '$[*]' AS films2 COLUMNS ( - kind2 text PATH '$.kind', - NESTED PATH '$.films[*]' AS film2 COLUMNS ( - title2 text PATH '$.title', - director2 text PATH '$.director' - ) - ) - ) - PLAN (favs INNER ((films1 INNER film1) CROSS (films2 INNER film2))) - ) AS jt - WHERE kind1 > kind2 AND director1 = director2 - ORDER BY 1,2,3,4; - director | title1 | kind1 | title2 | kind2 ---------------------------------------------------------------------- - Alfred Hitchcock | Vertigo | thriller | Psycho | horror - Alfred Hitchcock | Vertigo2 | thriller | Psycho2 | horror -(2 rows) - -RESET client_min_messages; --- test some utility functions on the target list & where clause -select jsonb_path_exists(js, '$.favorites') from my_films; - jsonb_path_exists ---------------------------------------------------------------------- - t - t -(2 rows) - -select bool_and(JSON_EXISTS(js, '$.favorites.films.title')) from my_films; - bool_and ---------------------------------------------------------------------- - t -(1 row) - -SELECT count(*) FROM my_films WHERE jsonb_path_exists(js, '$.favorites'); - count ---------------------------------------------------------------------- - 2 -(1 row) - -SELECT count(*) FROM my_films WHERE jsonb_path_exists(js, '$.favorites'); - count ---------------------------------------------------------------------- - 2 -(1 row) - -SELECT count(*) FROM my_films WHERE JSON_EXISTS(js, '$.favorites.films.title'); - count ---------------------------------------------------------------------- - 2 -(1 row) - --- check constraint with json_exists -create table user_profiles ( - id bigserial, - addresses jsonb, - anyjson jsonb, - check (json_exists( addresses, '$.main' )) -); -select create_distributed_table('user_profiles', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -insert into user_profiles (addresses) VALUES (JSON_SCALAR('1')); -ERROR: new row for relation "user_profiles_1687008" violates check constraint "user_profiles_addresses_check" -DETAIL: Failing row contains (1, "1", null). -CONTEXT: while executing command on localhost:xxxxx -insert into user_profiles (addresses) VALUES ('{"main":"value"}'); --- we cannot insert because WITH UNIQUE KEYS -insert into user_profiles (addresses) VALUES (JSON ('{"main":"value", "main":"value"}' WITH UNIQUE KEYS)); -ERROR: duplicate JSON object key value --- we can insert with -insert into user_profiles (addresses) VALUES (JSON ('{"main":"value", "main":"value"}' WITHOUT UNIQUE KEYS)) RETURNING *; - id | addresses | anyjson ---------------------------------------------------------------------- - 4 | {"main": "value"} | -(1 row) - -TRUNCATE user_profiles; -INSERT INTO user_profiles (anyjson) VALUES ('12'), ('"abc"'), ('[1,2,3]'), ('{"a":12}'); -select anyjson, anyjson is json array as json_array, anyjson is json object as json_object, anyjson is json scalar as json_scalar, -anyjson is json with UNIQUE keys -from user_profiles WHERE anyjson IS NOT NULL ORDER BY 1; - anyjson | json_array | json_object | json_scalar | ?column? ---------------------------------------------------------------------- - "abc" | f | f | t | t - 12 | f | f | t | t - [1, 2, 3] | t | f | f | t - {"a": 12} | f | t | f | t -(4 rows) - --- use json_query -SELECT i, - json_query('[{"x": "aaa"},{"x": "bbb"},{"x": "ccc"}]'::JSONB, '$[$i].x' passing id AS i RETURNING text omit quotes) -FROM generate_series(0, 3) i -JOIN my_films ON(id = i) ORDER BY 1; - i | json_query ---------------------------------------------------------------------- - 1 | bbb - 2 | ccc -(2 rows) - --- we can use JSON_TABLE in modification queries as well --- use log level such that we can see trace changes -SET client_min_messages TO DEBUG1; ---the JSON_TABLE subquery is recursively planned -UPDATE test_table SET VALUE = 'XXX' FROM( -SELECT jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt) as foo WHERE foo.id = test_table.id; -DEBUG: generating subplan XXX_1 for subquery SELECT jt.id, jt.kind, jt.title, jt.director FROM pg15_json.my_films, LATERAL JSON_TABLE(my_films.js, '$."favorites"[*]' AS json_table_path_1 COLUMNS (id FOR ORDINALITY, kind text PATH '$."kind"', NESTED PATH '$."films"[*]' AS json_table_path_2 COLUMNS (title text PATH '$."title"', director text PATH '$."director"')) PLAN (json_table_path_1 OUTER json_table_path_2)) jt -DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE pg15_json.test_table SET value = 'XXX'::text FROM (SELECT intermediate_result.id, intermediate_result.kind, intermediate_result.title, intermediate_result.director FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, kind text, title text, director text)) foo WHERE (foo.id OPERATOR(pg_catalog.=) test_table.id) --- Subquery with JSON table can be pushed down because two distributed tables --- in the query are joined on distribution column -UPDATE test_table SET VALUE = 'XXX' FROM ( -SELECT my_films.id, jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt) as foo WHERE foo.id = test_table.id; --- we can pushdown with CTEs as well -WITH json_cte AS -(SELECT my_films.id, jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt) -UPDATE test_table SET VALUE = 'XYZ' FROM json_cte - WHERE json_cte.id = test_table.id; - -- we can recursively with CTEs as well -WITH json_cte AS -(SELECT my_films.id as film_id, jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - id FOR ORDINALITY, - title text PATH '$.title', - director text PATH '$.director'))) AS jt ORDER BY jt.id LIMIT 1) -UPDATE test_table SET VALUE = 'XYZ' FROM json_cte - WHERE json_cte.film_id = test_table.id; -DEBUG: generating subplan XXX_1 for CTE json_cte: SELECT my_films.id AS film_id, jt.kind, jt.id, jt.title, jt.director FROM pg15_json.my_films, LATERAL JSON_TABLE(my_films.js, '$."favorites"[*]' AS json_table_path_1 COLUMNS (kind text PATH '$."kind"', NESTED PATH '$."films"[*]' AS json_table_path_2 COLUMNS (id FOR ORDINALITY, title text PATH '$."title"', director text PATH '$."director"')) PLAN (json_table_path_1 OUTER json_table_path_2)) jt ORDER BY jt.id LIMIT 1 -DEBUG: push down of limit count: 1 -DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE pg15_json.test_table SET value = 'XYZ'::text FROM (SELECT intermediate_result.film_id, intermediate_result.kind, intermediate_result.id, intermediate_result.title, intermediate_result.director FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(film_id bigint, kind text, id integer, title text, director text)) json_cte WHERE (json_cte.film_id OPERATOR(pg_catalog.=) test_table.id) -SET client_min_messages TO ERROR; -DROP SCHEMA pg15_json CASCADE; diff --git a/src/test/regress/expected/pg15_json_0.out b/src/test/regress/expected/pg15_json_0.out deleted file mode 100644 index c04e76814..000000000 --- a/src/test/regress/expected/pg15_json_0.out +++ /dev/null @@ -1,9 +0,0 @@ --- --- PG15+ test --- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -\else -\q diff --git a/src/test/regress/json_table_select_only.out b/src/test/regress/json_table_select_only.out deleted file mode 100644 index 61a120202..000000000 --- a/src/test/regress/json_table_select_only.out +++ /dev/null @@ -1,1572 +0,0 @@ --- --- PG15+ test --- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -\else -\q -\endif -SET search_path TO "json table"; --- insert some data -INSERT INTO jsonb_table_test (js) -VALUES ( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "d": [], "c": []}, - {"a": 2, "d": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "d": [1, 2], "c": []}, - {"x": "4", "d": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [100, 200, 300], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": [null]}, - {"x": "4", "b": [1, 2], "c": 2} - ]' -), -( - '[ - {"y": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "t": [1, 2], "c": []}, - {"x": "4", "b": [1, 200], "c": 96} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "100", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"t": 1, "b": [], "c": []}, - {"t": 2, "b": [1, 2, 3], "x": [10, null, 20]}, - {"t": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"U": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1000, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "T": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"ffa": 1, "b": [], "c": []}, - {"ffb": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"fffc": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -); --- unspecified plan (outer, union) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 1 | - 2 | -1 | 1 | - 2 | -1 | 2 | - 2 | -1 | 2 | - 2 | -1 | 3 | - 2 | -1 | 3 | - 2 | -1 | | 10 - 2 | -1 | | 20 - 2 | -1 | | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | - 2 | 2 | 200 | - 2 | 2 | 300 | - 2 | 2 | 1000 | - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | 1 | - 3 | -1 | 1 | - 3 | -1 | 2 | - 3 | -1 | 2 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 200 | - 4 | -1 | | - 4 | -1 | | -(123 rows) - --- default plan (outer, union) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (outer, union) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 1 | - 2 | -1 | 1 | - 2 | -1 | 2 | - 2 | -1 | 2 | - 2 | -1 | 3 | - 2 | -1 | 3 | - 2 | -1 | | 10 - 2 | -1 | | 20 - 2 | -1 | | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | - 2 | 2 | 200 | - 2 | 2 | 300 | - 2 | 2 | 1000 | - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | 1 | - 3 | -1 | 1 | - 3 | -1 | 2 | - 3 | -1 | 2 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 200 | - 4 | -1 | | - 4 | -1 | | -(123 rows) - --- specific plan (p outer (pb union pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p outer (pb union pc)) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 1 | - 2 | -1 | 1 | - 2 | -1 | 2 | - 2 | -1 | 2 | - 2 | -1 | 3 | - 2 | -1 | 3 | - 2 | -1 | | 10 - 2 | -1 | | 20 - 2 | -1 | | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | - 2 | 2 | 200 | - 2 | 2 | 300 | - 2 | 2 | 1000 | - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | 1 | - 3 | -1 | 1 | - 3 | -1 | 2 | - 3 | -1 | 2 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 200 | - 4 | -1 | | - 4 | -1 | | -(123 rows) - --- specific plan (p outer (pc union pb)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p outer (pc union pb)) - ) jt ORDER BY 1,2,3,4; - n | a | c | b ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 10 | - 2 | -1 | 20 | - 2 | -1 | | 1 - 2 | -1 | | 1 - 2 | -1 | | 2 - 2 | -1 | | 2 - 2 | -1 | | 3 - 2 | -1 | | 3 - 2 | -1 | | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 10 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | 20 | - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 1 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 2 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 3 - 2 | 2 | | 100 - 2 | 2 | | 200 - 2 | 2 | | 300 - 2 | 2 | | 1000 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | | 1 - 3 | -1 | | 1 - 3 | -1 | | 2 - 3 | -1 | | 2 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 1 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | 2 - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 1 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 2 - 4 | -1 | | 200 - 4 | -1 | | - 4 | -1 | | -(123 rows) - --- default plan (inner, union) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (inner) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 2 | -1 | 1 | - 2 | -1 | 1 | - 2 | -1 | 2 | - 2 | -1 | 2 | - 2 | -1 | 3 | - 2 | -1 | 3 | - 2 | -1 | | 10 - 2 | -1 | | 20 - 2 | -1 | | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | - 2 | 2 | 200 | - 2 | 2 | 300 | - 2 | 2 | 1000 | - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | 1 | - 3 | -1 | 1 | - 3 | -1 | 2 | - 3 | -1 | 2 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 200 | -(107 rows) - --- specific plan (p inner (pb union pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p inner (pb union pc)) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 2 | -1 | 1 | - 2 | -1 | 1 | - 2 | -1 | 2 | - 2 | -1 | 2 | - 2 | -1 | 3 | - 2 | -1 | 3 | - 2 | -1 | | 10 - 2 | -1 | | 20 - 2 | -1 | | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | - 2 | 2 | 200 | - 2 | 2 | 300 | - 2 | 2 | 1000 | - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 10 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | 20 - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 2 | 2 | | - 3 | -1 | 1 | - 3 | -1 | 1 | - 3 | -1 | 2 | - 3 | -1 | 2 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | 2 | - 3 | 3 | | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 1 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 2 | - 4 | -1 | 200 | -(107 rows) - --- default plan (inner, cross) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (cross, inner) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 2 | -1 | 1 | 10 - 2 | -1 | 1 | 20 - 2 | -1 | 1 | - 2 | -1 | 2 | 10 - 2 | -1 | 2 | 20 - 2 | -1 | 2 | - 2 | -1 | 3 | 10 - 2 | -1 | 3 | 20 - 2 | -1 | 3 | - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | 10 - 2 | 2 | 100 | 20 - 2 | 2 | 100 | - 2 | 2 | 200 | 10 - 2 | 2 | 200 | 20 - 2 | 2 | 200 | - 2 | 2 | 300 | 10 - 2 | 2 | 300 | 20 - 2 | 2 | 300 | - 2 | 2 | 1000 | 10 - 2 | 2 | 1000 | 20 - 2 | 2 | 1000 | - 3 | 3 | 1 | - 3 | 3 | 2 | -(92 rows) - --- specific plan (p inner (pb cross pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p inner (pb cross pc)) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 2 | -1 | 1 | 10 - 2 | -1 | 1 | 20 - 2 | -1 | 1 | - 2 | -1 | 2 | 10 - 2 | -1 | 2 | 20 - 2 | -1 | 2 | - 2 | -1 | 3 | 10 - 2 | -1 | 3 | 20 - 2 | -1 | 3 | - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | 10 - 2 | 2 | 100 | 20 - 2 | 2 | 100 | - 2 | 2 | 200 | 10 - 2 | 2 | 200 | 20 - 2 | 2 | 200 | - 2 | 2 | 300 | 10 - 2 | 2 | 300 | 20 - 2 | 2 | 300 | - 2 | 2 | 1000 | 10 - 2 | 2 | 1000 | 20 - 2 | 2 | 1000 | - 3 | 3 | 1 | - 3 | 3 | 2 | -(92 rows) - --- default plan (outer, cross) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (outer, cross) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 1 | 10 - 2 | -1 | 1 | 20 - 2 | -1 | 1 | - 2 | -1 | 2 | 10 - 2 | -1 | 2 | 20 - 2 | -1 | 2 | - 2 | -1 | 3 | 10 - 2 | -1 | 3 | 20 - 2 | -1 | 3 | - 2 | -1 | | - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | 10 - 2 | 2 | 100 | 20 - 2 | 2 | 100 | - 2 | 2 | 200 | 10 - 2 | 2 | 200 | 20 - 2 | 2 | 200 | - 2 | 2 | 300 | 10 - 2 | 2 | 300 | 20 - 2 | 2 | 300 | - 2 | 2 | 1000 | 10 - 2 | 2 | 1000 | 20 - 2 | 2 | 1000 | - 2 | 2 | | - 3 | -1 | | - 3 | -1 | | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | -(129 rows) - --- specific plan (p outer (pb cross pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p outer (pb cross pc)) - ) jt ORDER BY 1,2,3,4; - n | a | b | c ---------------------------------------------------------------------- - 1 | -1 | | - 1 | -1 | | - 1 | -1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 1 | 1 | | - 2 | -1 | 1 | 10 - 2 | -1 | 1 | 20 - 2 | -1 | 1 | - 2 | -1 | 2 | 10 - 2 | -1 | 2 | 20 - 2 | -1 | 2 | - 2 | -1 | 3 | 10 - 2 | -1 | 3 | 20 - 2 | -1 | 3 | - 2 | -1 | | - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 10 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | 20 - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 1 | - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 10 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | 20 - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 2 | - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 10 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | 20 - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 3 | - 2 | 2 | 100 | 10 - 2 | 2 | 100 | 20 - 2 | 2 | 100 | - 2 | 2 | 200 | 10 - 2 | 2 | 200 | 20 - 2 | 2 | 200 | - 2 | 2 | 300 | 10 - 2 | 2 | 300 | 20 - 2 | 2 | 300 | - 2 | 2 | 1000 | 10 - 2 | 2 | 1000 | 20 - 2 | 2 | 1000 | - 2 | 2 | | - 3 | -1 | | - 3 | -1 | | - 3 | 3 | 1 | - 3 | 3 | 2 | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 3 | 3 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | - 4 | -1 | | -(129 rows) - -select - jt.*, b1 + 100 as b -from - json_table (jsonb - '[ - {"a": 1, "b": [[1, 10], [2], [3, 30, 300]], "c": [1, null, 2]}, - {"a": 2, "b": [10, 20], "c": [1, null, 2]}, - {"x": "3", "b": [11, 22, 33, 44]} - ]', - '$[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on error, - nested path 'strict $.b[*]' as pb columns ( - b text format json path '$', - nested path 'strict $[*]' as pb1 columns ( - b1 int path '$' - ) - ), - nested path 'strict $.c[*]' as pc columns ( - c text format json path '$', - nested path 'strict $[*]' as pc1 columns ( - c1 int path '$' - ) - ) - ) - --plan default(outer, cross) - plan(p outer ((pb inner pb1) cross (pc outer pc1))) - ) jt ORDER BY 1,2,3,4,5; - n | a | b | b1 | c | c1 | b ---------------------------------------------------------------------- - 1 | 1 | [1, 10] | 1 | 1 | | 101 - 1 | 1 | [1, 10] | 1 | 2 | | 101 - 1 | 1 | [1, 10] | 1 | null | | 101 - 1 | 1 | [1, 10] | 10 | 1 | | 110 - 1 | 1 | [1, 10] | 10 | 2 | | 110 - 1 | 1 | [1, 10] | 10 | null | | 110 - 1 | 1 | [2] | 2 | 1 | | 102 - 1 | 1 | [2] | 2 | 2 | | 102 - 1 | 1 | [2] | 2 | null | | 102 - 1 | 1 | [3, 30, 300] | 3 | 1 | | 103 - 1 | 1 | [3, 30, 300] | 3 | 2 | | 103 - 1 | 1 | [3, 30, 300] | 3 | null | | 103 - 1 | 1 | [3, 30, 300] | 30 | 1 | | 130 - 1 | 1 | [3, 30, 300] | 30 | 2 | | 130 - 1 | 1 | [3, 30, 300] | 30 | null | | 130 - 1 | 1 | [3, 30, 300] | 300 | 1 | | 400 - 1 | 1 | [3, 30, 300] | 300 | 2 | | 400 - 1 | 1 | [3, 30, 300] | 300 | null | | 400 - 2 | 2 | | | | | - 3 | | | | | | -(20 rows) - --- Should succeed (JSON arguments are passed to root and nested paths) -SELECT * -FROM - generate_series(1, 4) x, - generate_series(1, 3) y, - JSON_TABLE(jsonb - '[[1,2,3],[2,3,4,5],[3,4,5,6]]', - 'strict $[*] ? (@[*] < $x)' - PASSING x AS x, y AS y - COLUMNS ( - y text FORMAT JSON PATH '$', - NESTED PATH 'strict $[*] ? (@ >= $y)' - COLUMNS ( - z int PATH '$' - ) - ) - ) jt ORDER BY 4,1,2,3; - x | y | y | z ---------------------------------------------------------------------- - 2 | 1 | [1, 2, 3] | 1 - 3 | 1 | [1, 2, 3] | 1 - 4 | 1 | [1, 2, 3] | 1 - 2 | 1 | [1, 2, 3] | 2 - 2 | 2 | [1, 2, 3] | 2 - 3 | 1 | [1, 2, 3] | 2 - 3 | 1 | [2, 3, 4, 5] | 2 - 3 | 2 | [1, 2, 3] | 2 - 3 | 2 | [2, 3, 4, 5] | 2 - 4 | 1 | [1, 2, 3] | 2 - 4 | 1 | [2, 3, 4, 5] | 2 - 4 | 2 | [1, 2, 3] | 2 - 4 | 2 | [2, 3, 4, 5] | 2 - 2 | 1 | [1, 2, 3] | 3 - 2 | 2 | [1, 2, 3] | 3 - 2 | 3 | [1, 2, 3] | 3 - 3 | 1 | [1, 2, 3] | 3 - 3 | 1 | [2, 3, 4, 5] | 3 - 3 | 2 | [1, 2, 3] | 3 - 3 | 2 | [2, 3, 4, 5] | 3 - 3 | 3 | [1, 2, 3] | 3 - 3 | 3 | [2, 3, 4, 5] | 3 - 4 | 1 | [1, 2, 3] | 3 - 4 | 1 | [2, 3, 4, 5] | 3 - 4 | 1 | [3, 4, 5, 6] | 3 - 4 | 2 | [1, 2, 3] | 3 - 4 | 2 | [2, 3, 4, 5] | 3 - 4 | 2 | [3, 4, 5, 6] | 3 - 4 | 3 | [1, 2, 3] | 3 - 4 | 3 | [2, 3, 4, 5] | 3 - 4 | 3 | [3, 4, 5, 6] | 3 - 3 | 1 | [2, 3, 4, 5] | 4 - 3 | 2 | [2, 3, 4, 5] | 4 - 3 | 3 | [2, 3, 4, 5] | 4 - 4 | 1 | [2, 3, 4, 5] | 4 - 4 | 1 | [3, 4, 5, 6] | 4 - 4 | 2 | [2, 3, 4, 5] | 4 - 4 | 2 | [3, 4, 5, 6] | 4 - 4 | 3 | [2, 3, 4, 5] | 4 - 4 | 3 | [3, 4, 5, 6] | 4 - 3 | 1 | [2, 3, 4, 5] | 5 - 3 | 2 | [2, 3, 4, 5] | 5 - 3 | 3 | [2, 3, 4, 5] | 5 - 4 | 1 | [2, 3, 4, 5] | 5 - 4 | 1 | [3, 4, 5, 6] | 5 - 4 | 2 | [2, 3, 4, 5] | 5 - 4 | 2 | [3, 4, 5, 6] | 5 - 4 | 3 | [2, 3, 4, 5] | 5 - 4 | 3 | [3, 4, 5, 6] | 5 - 4 | 1 | [3, 4, 5, 6] | 6 - 4 | 2 | [3, 4, 5, 6] | 6 - 4 | 3 | [3, 4, 5, 6] | 6 -(52 rows) - diff --git a/src/test/regress/json_table_select_only_0.out b/src/test/regress/json_table_select_only_0.out deleted file mode 100644 index c04e76814..000000000 --- a/src/test/regress/json_table_select_only_0.out +++ /dev/null @@ -1,9 +0,0 @@ --- --- PG15+ test --- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -\else -\q diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index c7b804f85..c8ad97d5a 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -55,7 +55,7 @@ test: subquery_in_targetlist subquery_in_where subquery_complex_target_list subq test: subquery_prepared_statements test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins test: cte_inline recursive_view_local_table values sequences_with_different_types -test: pg13 pg12 pg15_json json_table_select_only +test: pg13 pg12 # run pg14 sequentially as it syncs metadata test: pg14 test: pg15 diff --git a/src/test/regress/sql/json_table_select_only.sql b/src/test/regress/sql/json_table_select_only.sql deleted file mode 100644 index 250315a25..000000000 --- a/src/test/regress/sql/json_table_select_only.sql +++ /dev/null @@ -1,330 +0,0 @@ --- --- PG15+ test --- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -\else -\q -\endif - -SET search_path TO "json table"; - -CREATE SCHEMA "json table"; -SET search_path TO "json table"; -CREATE TABLE jsonb_table_test (id bigserial, js jsonb); -SELECT create_distributed_table('jsonb_table_test', 'id'); - --- insert some data -INSERT INTO jsonb_table_test (js) -VALUES ( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "d": [], "c": []}, - {"a": 2, "d": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "d": [1, 2], "c": []}, - {"x": "4", "d": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [100, 200, 300], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": [null]}, - {"x": "4", "b": [1, 2], "c": 2} - ]' -), -( - '[ - {"y": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "t": [1, 2], "c": []}, - {"x": "4", "b": [1, 200], "c": 96} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "100", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"t": 1, "b": [], "c": []}, - {"t": 2, "b": [1, 2, 3], "x": [10, null, 20]}, - {"t": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"U": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1000, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "T": [1, 2], "c": 123} - ]' -), -( - '[ - {"a": 1, "b": [], "c": []}, - {"a": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"a": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -), -( - '[ - {"ffa": 1, "b": [], "c": []}, - {"ffb": 2, "b": [1, 2, 3], "c": [10, null, 20]}, - {"fffc": 3, "b": [1, 2], "c": []}, - {"x": "4", "b": [1, 2], "c": 123} - ]' -); - --- unspecified plan (outer, union) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - ) jt ORDER BY 1,2,3,4; - - - --- default plan (outer, union) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (outer, union) - ) jt ORDER BY 1,2,3,4; - --- specific plan (p outer (pb union pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p outer (pb union pc)) - ) jt ORDER BY 1,2,3,4; - --- specific plan (p outer (pc union pb)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p outer (pc union pb)) - ) jt ORDER BY 1,2,3,4; - --- default plan (inner, union) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (inner) - ) jt ORDER BY 1,2,3,4; - --- specific plan (p inner (pb union pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p inner (pb union pc)) - ) jt ORDER BY 1,2,3,4; - --- default plan (inner, cross) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (cross, inner) - ) jt ORDER BY 1,2,3,4; - --- specific plan (p inner (pb cross pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p inner (pb cross pc)) - ) jt ORDER BY 1,2,3,4; - --- default plan (outer, cross) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan default (outer, cross) - ) jt ORDER BY 1,2,3,4; - --- specific plan (p outer (pb cross pc)) -select - jt.* -from - jsonb_table_test jtt, - json_table ( - jtt.js,'strict $[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on empty, - nested path 'strict $.b[*]' as pb columns ( b int path '$' ), - nested path 'strict $.c[*]' as pc columns ( c int path '$' ) - ) - plan (p outer (pb cross pc)) - ) jt ORDER BY 1,2,3,4; - - -select - jt.*, b1 + 100 as b -from - json_table (jsonb - '[ - {"a": 1, "b": [[1, 10], [2], [3, 30, 300]], "c": [1, null, 2]}, - {"a": 2, "b": [10, 20], "c": [1, null, 2]}, - {"x": "3", "b": [11, 22, 33, 44]} - ]', - '$[*]' as p - columns ( - n for ordinality, - a int path 'lax $.a' default -1 on error, - nested path 'strict $.b[*]' as pb columns ( - b text format json path '$', - nested path 'strict $[*]' as pb1 columns ( - b1 int path '$' - ) - ), - nested path 'strict $.c[*]' as pc columns ( - c text format json path '$', - nested path 'strict $[*]' as pc1 columns ( - c1 int path '$' - ) - ) - ) - --plan default(outer, cross) - plan(p outer ((pb inner pb1) cross (pc outer pc1))) - ) jt ORDER BY 1,2,3,4,5; - --- Should succeed (JSON arguments are passed to root and nested paths) -SELECT * -FROM - generate_series(1, 4) x, - generate_series(1, 3) y, - JSON_TABLE(jsonb - '[[1,2,3],[2,3,4,5],[3,4,5,6]]', - 'strict $[*] ? (@[*] < $x)' - PASSING x AS x, y AS y - COLUMNS ( - y text FORMAT JSON PATH '$', - NESTED PATH 'strict $[*] ? (@ >= $y)' - COLUMNS ( - z int PATH '$' - ) - ) - ) jt ORDER BY 4,1,2,3; - -SET client_min_messages TO ERROR; -DROP SCHEMA "json table" CASCADE; - diff --git a/src/test/regress/sql/pg15_json.sql b/src/test/regress/sql/pg15_json.sql deleted file mode 100644 index 640953ac7..000000000 --- a/src/test/regress/sql/pg15_json.sql +++ /dev/null @@ -1,326 +0,0 @@ --- --- PG15+ test --- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -\else -\q -\endif - -CREATE SCHEMA pg15_json; -SET search_path TO pg15_json; - -SET citus.next_shard_id TO 1687000; - -CREATE TABLE test_table(id bigserial, value text); -SELECT create_distributed_table('test_table', 'id'); -INSERT INTO test_table (value) SELECT i::text FROM generate_series(0,100)i; - - -CREATE TABLE my_films(id bigserial, js jsonb); -SELECT create_distributed_table('my_films', 'id'); - -INSERT INTO my_films(js) VALUES ( -'{ "favorites" : [ - { "kind" : "comedy", "films" : [ { "title" : "Bananas", "director" : "Woody Allen"}, - { "title" : "The Dinner Game", "director" : "Francis Veber" } ] }, - { "kind" : "horror", "films" : [{ "title" : "Psycho", "director" : "Alfred Hitchcock" } ] }, - { "kind" : "thriller", "films" : [{ "title" : "Vertigo", "director" : "Alfred Hitchcock" } ] }, - { "kind" : "drama", "films" : [{ "title" : "Yojimbo", "director" : "Akira Kurosawa" } ] } - ] }'); - -INSERT INTO my_films(js) VALUES ( -'{ "favorites" : [ - { "kind" : "comedy", "films" : [ { "title" : "Bananas2", "director" : "Woody Allen"}, - { "title" : "The Dinner Game2", "director" : "Francis Veber" } ] }, - { "kind" : "horror", "films" : [{ "title" : "Psycho2", "director" : "Alfred Hitchcock" } ] }, - { "kind" : "thriller", "films" : [{ "title" : "Vertigo2", "director" : "Alfred Hitchcock" } ] }, - { "kind" : "drama", "films" : [{ "title" : "Yojimbo2", "director" : "Akira Kurosawa" } ] } - ] }'); - --- a router query -SELECT jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt - WHERE my_films.id = 1 - ORDER BY 1,2,3,4; - --- router query with an explicit LATEREL SUBQUERY -SELECT sub.* -FROM my_films, - lateral(SELECT * FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt) as sub -WHERE my_films.id = 1 ORDER BY 1,2,3,4; - --- router query with an explicit LATEREL SUBQUERY and LIMIT -SELECT sub.* -FROM my_films, - lateral(SELECT * FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt ORDER BY id DESC LIMIT 1) as sub -WHERE my_films.id = 1 ORDER BY 1,2,3,4; - --- set it DEBUG1 in case the plan changes --- we can see details -SET client_min_messages TO DEBUG1; - --- a mult-shard query -SELECT jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt - ORDER BY 1,2,3,4; - --- recursively plan subqueries that has JSON_TABLE -SELECT count(*) FROM -( - SELECT jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt - LIMIT 1) as sub_with_json, test_table -WHERE test_table.id = sub_with_json.id; - - --- multi-shard query with an explicit LATEREL SUBQUERY -SELECT sub.* -FROM my_films JOIN - lateral - (SELECT * - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000) AS sub ON (true) - ORDER BY 1,2,3,4; - --- JSON_TABLE can be on the inner part of an outer joion -SELECT sub.* -FROM my_films LEFT JOIN - lateral - (SELECT * - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000) AS sub ON (true) - ORDER BY 1,2,3,4; - --- we can pushdown this correlated subquery in WHERE clause -SELECT count(*) -FROM my_films WHERE - (SELECT count(*) > 0 - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000); - --- we can pushdown this correlated subquery in SELECT clause - SELECT (SELECT count(*) > 0 - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt) -FROM my_films; - --- multi-shard query with an explicit LATEREL SUBQUERY --- along with other tables -SELECT sub.* -FROM my_films JOIN - lateral - (SELECT * - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000) AS sub ON (true) JOIN test_table ON(my_films.id = test_table.id) - ORDER BY 1,2,3,4; - --- non-colocated join fails -SELECT sub.* -FROM my_films JOIN - lateral - (SELECT * - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000) AS sub ON (true) JOIN test_table ON(my_films.id != test_table.id) - ORDER BY 1,2,3,4; - --- JSON_TABLE can be in the outer part of the join --- as long as there is a distributed table -SELECT sub.* -FROM my_films JOIN - lateral - (SELECT * - FROM JSON_TABLE (js, '$.favorites[*]' COLUMNS (id FOR ORDINALITY, - kind text PATH '$.kind', NESTED PATH '$.films[*]' - COLUMNS (title text PATH '$.title', director text PATH '$.director'))) AS jt - LIMIT 1000) AS sub ON (true) LEFT JOIN test_table ON(my_films.id = test_table.id) - ORDER BY 1,2,3,4; - --- JSON_TABLE cannot be on the outer side of the join -SELECT * -FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]' - COLUMNS (id FOR ORDINALITY, column_a int4 PATH '$.a', column_b int4 PATH '$.b', a int4, b int4, c text)) -LEFT JOIN LATERAL - (SELECT * - FROM my_films) AS foo on(foo.id = a); - - --- JSON_TABLE cannot be on the FROM clause alone -SELECT * -FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]' - COLUMNS (id FOR ORDINALITY, column_a int4 PATH '$.a', column_b int4 PATH '$.b', a int4, b int4, c text)) as foo -WHERE b > - (SELECT count(*) - FROM my_films WHERE id = foo.a); - --- we can recursively plan json_tables on set operations -(SELECT * -FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]' - COLUMNS (id FOR ORDINALITY)) ORDER BY id ASC LIMIT 1) -UNION -(SELECT * -FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]' - COLUMNS (id FOR ORDINALITY)) ORDER BY id ASC LIMIT 1) -UNION -(SELECT id FROM test_table ORDER BY id ASC LIMIT 1); - --- LIMIT in subquery not supported when json_table exists -SELECT * -FROM json_table('[{"a":10,"b":20},{"a":30,"b":40}]'::JSONB, '$[*]' - COLUMNS (id FOR ORDINALITY, column_a int4 PATH '$.a', column_b int4 PATH '$.b', a int4, b int4, c text)) -JOIN LATERAL - (SELECT * - FROM my_films WHERE json_table.id = a LIMIT 1) as foo ON (true); - --- a little more complex query with multiple json_table -SELECT - director1 AS director, title1, kind1, title2, kind2 -FROM - my_films, - JSON_TABLE ( js, '$.favorites' AS favs COLUMNS ( - NESTED PATH '$[*]' AS films1 COLUMNS ( - kind1 text PATH '$.kind', - NESTED PATH '$.films[*]' AS film1 COLUMNS ( - title1 text PATH '$.title', - director1 text PATH '$.director') - ), - NESTED PATH '$[*]' AS films2 COLUMNS ( - kind2 text PATH '$.kind', - NESTED PATH '$.films[*]' AS film2 COLUMNS ( - title2 text PATH '$.title', - director2 text PATH '$.director' - ) - ) - ) - PLAN (favs INNER ((films1 INNER film1) CROSS (films2 INNER film2))) - ) AS jt - WHERE kind1 > kind2 AND director1 = director2 - ORDER BY 1,2,3,4; - -RESET client_min_messages; - --- test some utility functions on the target list & where clause -select jsonb_path_exists(js, '$.favorites') from my_films; -select bool_and(JSON_EXISTS(js, '$.favorites.films.title')) from my_films; -SELECT count(*) FROM my_films WHERE jsonb_path_exists(js, '$.favorites'); -SELECT count(*) FROM my_films WHERE jsonb_path_exists(js, '$.favorites'); -SELECT count(*) FROM my_films WHERE JSON_EXISTS(js, '$.favorites.films.title'); - --- check constraint with json_exists -create table user_profiles ( - id bigserial, - addresses jsonb, - anyjson jsonb, - check (json_exists( addresses, '$.main' )) -); -select create_distributed_table('user_profiles', 'id'); -insert into user_profiles (addresses) VALUES (JSON_SCALAR('1')); -insert into user_profiles (addresses) VALUES ('{"main":"value"}'); - --- we cannot insert because WITH UNIQUE KEYS -insert into user_profiles (addresses) VALUES (JSON ('{"main":"value", "main":"value"}' WITH UNIQUE KEYS)); - --- we can insert with -insert into user_profiles (addresses) VALUES (JSON ('{"main":"value", "main":"value"}' WITHOUT UNIQUE KEYS)) RETURNING *; - -TRUNCATE user_profiles; -INSERT INTO user_profiles (anyjson) VALUES ('12'), ('"abc"'), ('[1,2,3]'), ('{"a":12}'); -select anyjson, anyjson is json array as json_array, anyjson is json object as json_object, anyjson is json scalar as json_scalar, -anyjson is json with UNIQUE keys -from user_profiles WHERE anyjson IS NOT NULL ORDER BY 1; - --- use json_query -SELECT i, - json_query('[{"x": "aaa"},{"x": "bbb"},{"x": "ccc"}]'::JSONB, '$[$i].x' passing id AS i RETURNING text omit quotes) -FROM generate_series(0, 3) i -JOIN my_films ON(id = i) ORDER BY 1; - --- we can use JSON_TABLE in modification queries as well - --- use log level such that we can see trace changes -SET client_min_messages TO DEBUG1; - ---the JSON_TABLE subquery is recursively planned -UPDATE test_table SET VALUE = 'XXX' FROM( -SELECT jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt) as foo WHERE foo.id = test_table.id; - --- Subquery with JSON table can be pushed down because two distributed tables --- in the query are joined on distribution column -UPDATE test_table SET VALUE = 'XXX' FROM ( -SELECT my_films.id, jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt) as foo WHERE foo.id = test_table.id; - --- we can pushdown with CTEs as well -WITH json_cte AS -(SELECT my_films.id, jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text PATH '$.title', - director text PATH '$.director'))) AS jt) -UPDATE test_table SET VALUE = 'XYZ' FROM json_cte - WHERE json_cte.id = test_table.id; - - -- we can recursively with CTEs as well -WITH json_cte AS -(SELECT my_films.id as film_id, jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - id FOR ORDINALITY, - title text PATH '$.title', - director text PATH '$.director'))) AS jt ORDER BY jt.id LIMIT 1) -UPDATE test_table SET VALUE = 'XYZ' FROM json_cte - WHERE json_cte.film_id = test_table.id; - -SET client_min_messages TO ERROR; -DROP SCHEMA pg15_json CASCADE; From b84251ac0851a8930f982b73cb54c708420b86f9 Mon Sep 17 00:00:00 2001 From: naisila Date: Mon, 12 Sep 2022 13:09:02 +0300 Subject: [PATCH 07/17] Bump test images to 15beta4 --- .circleci/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index bab2abdff..cd3dfabbe 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ orbs: parameters: image_suffix: type: string - default: '-v643b0b7' + default: '-v0eef34d' pg13_version: type: string default: '13.8' @@ -15,10 +15,10 @@ parameters: default: '14.5' pg15_version: type: string - default: '15beta3' + default: '15beta4' upgrade_pg_versions: type: string - default: '13.8-14.5-15beta3' + default: '13.8-14.5-15beta4' style_checker_tools_version: type: string default: '0.8.18' From b036e44aa4fe91c2a030d3949d9ff9b0b09191eb Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Mon, 12 Sep 2022 16:29:57 +0200 Subject: [PATCH 08/17] Fix bug preventing isolate_tenant_to_new_shard with text column (#6320) Co-authored-by: Marco Slot --- .../distributed/operations/isolate_shards.c | 4 +- .../multi_tenant_isolation_nonblocking.out | 132 ++++++++++++------ .../multi_tenant_isolation_nonblocking.sql | 9 ++ 3 files changed, 98 insertions(+), 47 deletions(-) diff --git a/src/backend/distributed/operations/isolate_shards.c b/src/backend/distributed/operations/isolate_shards.c index c0f7739b8..ec89ae402 100644 --- a/src/backend/distributed/operations/isolate_shards.c +++ b/src/backend/distributed/operations/isolate_shards.c @@ -133,7 +133,9 @@ isolate_tenant_to_new_shard(PG_FUNCTION_ARGS) FmgrInfo *hashFunction = cacheEntry->hashFunction; /* get hashed value of the distribution value */ - Datum hashedValueDatum = FunctionCall1(hashFunction, tenantIdDatum); + Datum hashedValueDatum = FunctionCall1Coll(hashFunction, + cacheEntry->partitionColumn->varcollid, + tenantIdDatum); int hashedValue = DatumGetInt32(hashedValueDatum); List *shardSplitPointsList = NIL; diff --git a/src/test/regress/expected/multi_tenant_isolation_nonblocking.out b/src/test/regress/expected/multi_tenant_isolation_nonblocking.out index 5f3b36086..e8feb8f23 100644 --- a/src/test/regress/expected/multi_tenant_isolation_nonblocking.out +++ b/src/test/regress/expected/multi_tenant_isolation_nonblocking.out @@ -661,6 +661,34 @@ SELECT count(*) FROM lineitem_date WHERE l_shipdate = '1997-08-08'; 1 (1 row) +-- test with text distribution column (because of collations) +SET citus.shard_replication_factor TO 1; +CREATE TABLE text_column (tenant_id text, value jsonb); +INSERT INTO text_column VALUES ('hello','{}'); +SELECT create_distributed_table('text_column','tenant_id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$"Tenant Isolation".text_column$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT isolate_tenant_to_new_shard('text_column', 'hello', shard_transfer_mode => 'force_logical'); + isolate_tenant_to_new_shard +--------------------------------------------------------------------- + 1230053 +(1 row) + +SELECT * FROM text_column; + tenant_id | value +--------------------------------------------------------------------- + hello | {} +(1 row) + +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 1 orphaned resources -- test with invalid shard placements \c - postgres - :master_port SET search_path to "Tenant Isolation"; @@ -751,7 +779,11 @@ SET citus.override_table_visibility TO false; Tenant Isolation | orders_streaming_1230039 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230042 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230043 | table | mx_isolation_role_ent -(14 rows) + Tenant Isolation | text_column | table | mx_isolation_role_ent + Tenant Isolation | text_column_1230052 | table | mx_isolation_role_ent + Tenant Isolation | text_column_1230053 | table | mx_isolation_role_ent + Tenant Isolation | text_column_1230054 | table | mx_isolation_role_ent +(18 rows) \c - mx_isolation_role_ent - :master_port SET search_path to "Tenant Isolation"; @@ -782,7 +814,11 @@ SET citus.override_table_visibility TO false; Tenant Isolation | orders_streaming_1230039 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230042 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230043 | table | mx_isolation_role_ent -(14 rows) + Tenant Isolation | text_column | table | mx_isolation_role_ent + Tenant Isolation | text_column_1230052 | table | mx_isolation_role_ent + Tenant Isolation | text_column_1230053 | table | mx_isolation_role_ent + Tenant Isolation | text_column_1230054 | table | mx_isolation_role_ent +(18 rows) DROP EVENT TRIGGER abort_ddl; -- create a trigger for drops @@ -851,9 +887,9 @@ SET citus.override_table_visibility TO false; Tenant Isolation | lineitem_streaming_1230036 | table | mx_isolation_role_ent Tenant Isolation | lineitem_streaming_1230040 | table | mx_isolation_role_ent Tenant Isolation | lineitem_streaming_1230041 | table | mx_isolation_role_ent - Tenant Isolation | lineitem_streaming_1230056 | table | mx_isolation_role_ent - Tenant Isolation | lineitem_streaming_1230057 | table | mx_isolation_role_ent - Tenant Isolation | lineitem_streaming_1230058 | table | mx_isolation_role_ent + Tenant Isolation | lineitem_streaming_1230061 | table | mx_isolation_role_ent + Tenant Isolation | lineitem_streaming_1230062 | table | mx_isolation_role_ent + Tenant Isolation | lineitem_streaming_1230063 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230014 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230015 | table | mx_isolation_role_ent @@ -861,10 +897,14 @@ SET citus.override_table_visibility TO false; Tenant Isolation | orders_streaming_1230039 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230042 | table | mx_isolation_role_ent Tenant Isolation | orders_streaming_1230043 | table | mx_isolation_role_ent - Tenant Isolation | orders_streaming_1230059 | table | mx_isolation_role_ent - Tenant Isolation | orders_streaming_1230060 | table | mx_isolation_role_ent - Tenant Isolation | orders_streaming_1230061 | table | mx_isolation_role_ent -(20 rows) + Tenant Isolation | orders_streaming_1230064 | table | mx_isolation_role_ent + Tenant Isolation | orders_streaming_1230065 | table | mx_isolation_role_ent + Tenant Isolation | orders_streaming_1230066 | table | mx_isolation_role_ent + Tenant Isolation | text_column | table | mx_isolation_role_ent + Tenant Isolation | text_column_1230052 | table | mx_isolation_role_ent + Tenant Isolation | text_column_1230053 | table | mx_isolation_role_ent + Tenant Isolation | text_column_1230054 | table | mx_isolation_role_ent +(24 rows) \c - postgres - :worker_1_port DROP EVENT TRIGGER abort_drop; @@ -1090,7 +1130,7 @@ DROP TABLE test_colocated_table_no_rep_identity; SELECT isolate_tenant_to_new_shard('test_colocated_table_2', 1, 'CASCADE', shard_transfer_mode => 'auto'); isolate_tenant_to_new_shard --------------------------------------------------------------------- - 1230103 + 1230108 (1 row) SELECT count(*) FROM test_colocated_table_2; @@ -1112,47 +1152,47 @@ ORDER BY 1, 2; relname | Constraint | Definition --------------------------------------------------------------------- test_colocated_table_1 | test_colocated_table_1_id_fkey | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1(id) - test_colocated_table_1_1230069 | test_colocated_table_1_id_fkey_1230069 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230069(id) - test_colocated_table_1_1230071 | test_colocated_table_1_id_fkey_1230071 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230071(id) - test_colocated_table_1_1230073 | test_colocated_table_1_id_fkey_1230073 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230073(id) - test_colocated_table_1_1230099 | test_colocated_table_1_id_fkey_1230099 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230099(id) - test_colocated_table_1_1230100 | test_colocated_table_1_id_fkey_1230100 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230100(id) - test_colocated_table_1_1230101 | test_colocated_table_1_id_fkey_1230101 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230101(id) + test_colocated_table_1_1230074 | test_colocated_table_1_id_fkey_1230074 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230074(id) + test_colocated_table_1_1230076 | test_colocated_table_1_id_fkey_1230076 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230076(id) + test_colocated_table_1_1230078 | test_colocated_table_1_id_fkey_1230078 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230078(id) + test_colocated_table_1_1230104 | test_colocated_table_1_id_fkey_1230104 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230104(id) + test_colocated_table_1_1230105 | test_colocated_table_1_id_fkey_1230105 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230105(id) + test_colocated_table_1_1230106 | test_colocated_table_1_id_fkey_1230106 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230106(id) test_colocated_table_2 | test_colocated_table_2_id_fkey | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1(id) test_colocated_table_2 | test_colocated_table_2_value_1_fkey | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey(id) - test_colocated_table_2_1230077 | test_colocated_table_2_id_fkey_1230077 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230069(id) - test_colocated_table_2_1230077 | test_colocated_table_2_value_1_fkey_1230077 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) - test_colocated_table_2_1230079 | test_colocated_table_2_id_fkey_1230079 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230071(id) - test_colocated_table_2_1230079 | test_colocated_table_2_value_1_fkey_1230079 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) - test_colocated_table_2_1230081 | test_colocated_table_2_id_fkey_1230081 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230073(id) - test_colocated_table_2_1230081 | test_colocated_table_2_value_1_fkey_1230081 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) - test_colocated_table_2_1230102 | test_colocated_table_2_id_fkey_1230102 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230099(id) - test_colocated_table_2_1230102 | test_colocated_table_2_value_1_fkey_1230102 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) - test_colocated_table_2_1230103 | test_colocated_table_2_id_fkey_1230103 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230100(id) - test_colocated_table_2_1230103 | test_colocated_table_2_value_1_fkey_1230103 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) - test_colocated_table_2_1230104 | test_colocated_table_2_id_fkey_1230104 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230101(id) - test_colocated_table_2_1230104 | test_colocated_table_2_value_1_fkey_1230104 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) + test_colocated_table_2_1230082 | test_colocated_table_2_id_fkey_1230082 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230074(id) + test_colocated_table_2_1230082 | test_colocated_table_2_value_1_fkey_1230082 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) + test_colocated_table_2_1230084 | test_colocated_table_2_id_fkey_1230084 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230076(id) + test_colocated_table_2_1230084 | test_colocated_table_2_value_1_fkey_1230084 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) + test_colocated_table_2_1230086 | test_colocated_table_2_id_fkey_1230086 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230078(id) + test_colocated_table_2_1230086 | test_colocated_table_2_value_1_fkey_1230086 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) + test_colocated_table_2_1230107 | test_colocated_table_2_id_fkey_1230107 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230104(id) + test_colocated_table_2_1230107 | test_colocated_table_2_value_1_fkey_1230107 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) + test_colocated_table_2_1230108 | test_colocated_table_2_id_fkey_1230108 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230105(id) + test_colocated_table_2_1230108 | test_colocated_table_2_value_1_fkey_1230108 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) + test_colocated_table_2_1230109 | test_colocated_table_2_id_fkey_1230109 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230106(id) + test_colocated_table_2_1230109 | test_colocated_table_2_value_1_fkey_1230109 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) test_colocated_table_3 | test_colocated_table_3_id_fkey | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1(id) test_colocated_table_3 | test_colocated_table_3_id_fkey1 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2(id) test_colocated_table_3 | test_colocated_table_3_value_1_fkey | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey(id) - test_colocated_table_3_1230085 | test_colocated_table_3_id_fkey1_1230085 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230077(id) - test_colocated_table_3_1230085 | test_colocated_table_3_id_fkey_1230085 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230069(id) - test_colocated_table_3_1230085 | test_colocated_table_3_value_1_fkey_1230085 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) - test_colocated_table_3_1230087 | test_colocated_table_3_id_fkey1_1230087 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230079(id) - test_colocated_table_3_1230087 | test_colocated_table_3_id_fkey_1230087 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230071(id) - test_colocated_table_3_1230087 | test_colocated_table_3_value_1_fkey_1230087 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) - test_colocated_table_3_1230089 | test_colocated_table_3_id_fkey1_1230089 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230081(id) - test_colocated_table_3_1230089 | test_colocated_table_3_id_fkey_1230089 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230073(id) - test_colocated_table_3_1230089 | test_colocated_table_3_value_1_fkey_1230089 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) - test_colocated_table_3_1230105 | test_colocated_table_3_id_fkey1_1230105 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230102(id) - test_colocated_table_3_1230105 | test_colocated_table_3_id_fkey_1230105 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230099(id) - test_colocated_table_3_1230105 | test_colocated_table_3_value_1_fkey_1230105 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) - test_colocated_table_3_1230106 | test_colocated_table_3_id_fkey1_1230106 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230103(id) - test_colocated_table_3_1230106 | test_colocated_table_3_id_fkey_1230106 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230100(id) - test_colocated_table_3_1230106 | test_colocated_table_3_value_1_fkey_1230106 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) - test_colocated_table_3_1230107 | test_colocated_table_3_id_fkey1_1230107 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230104(id) - test_colocated_table_3_1230107 | test_colocated_table_3_id_fkey_1230107 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230101(id) - test_colocated_table_3_1230107 | test_colocated_table_3_value_1_fkey_1230107 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230066(id) + test_colocated_table_3_1230090 | test_colocated_table_3_id_fkey1_1230090 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230082(id) + test_colocated_table_3_1230090 | test_colocated_table_3_id_fkey_1230090 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230074(id) + test_colocated_table_3_1230090 | test_colocated_table_3_value_1_fkey_1230090 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) + test_colocated_table_3_1230092 | test_colocated_table_3_id_fkey1_1230092 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230084(id) + test_colocated_table_3_1230092 | test_colocated_table_3_id_fkey_1230092 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230076(id) + test_colocated_table_3_1230092 | test_colocated_table_3_value_1_fkey_1230092 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) + test_colocated_table_3_1230094 | test_colocated_table_3_id_fkey1_1230094 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230086(id) + test_colocated_table_3_1230094 | test_colocated_table_3_id_fkey_1230094 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230078(id) + test_colocated_table_3_1230094 | test_colocated_table_3_value_1_fkey_1230094 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) + test_colocated_table_3_1230110 | test_colocated_table_3_id_fkey1_1230110 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230107(id) + test_colocated_table_3_1230110 | test_colocated_table_3_id_fkey_1230110 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230104(id) + test_colocated_table_3_1230110 | test_colocated_table_3_value_1_fkey_1230110 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) + test_colocated_table_3_1230111 | test_colocated_table_3_id_fkey1_1230111 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230108(id) + test_colocated_table_3_1230111 | test_colocated_table_3_id_fkey_1230111 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230105(id) + test_colocated_table_3_1230111 | test_colocated_table_3_value_1_fkey_1230111 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) + test_colocated_table_3_1230112 | test_colocated_table_3_id_fkey1_1230112 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_2_1230109(id) + test_colocated_table_3_1230112 | test_colocated_table_3_id_fkey_1230112 | FOREIGN KEY (id) REFERENCES "Tenant Isolation".test_colocated_table_1_1230106(id) + test_colocated_table_3_1230112 | test_colocated_table_3_value_1_fkey_1230112 | FOREIGN KEY (value_1) REFERENCES "Tenant Isolation".test_reference_table_fkey_1230071(id) (42 rows) \c - mx_isolation_role_ent - :master_port diff --git a/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql b/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql index eca828d04..812f0f853 100644 --- a/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql +++ b/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql @@ -318,6 +318,15 @@ SELECT count(*) FROM lineitem_date WHERE l_shipdate = '1997-07-30'; SELECT count(*) FROM lineitem_date WHERE l_shipdate = '1998-01-15'; SELECT count(*) FROM lineitem_date WHERE l_shipdate = '1997-08-08'; +-- test with text distribution column (because of collations) +SET citus.shard_replication_factor TO 1; +CREATE TABLE text_column (tenant_id text, value jsonb); +INSERT INTO text_column VALUES ('hello','{}'); +SELECT create_distributed_table('text_column','tenant_id'); +SELECT isolate_tenant_to_new_shard('text_column', 'hello', shard_transfer_mode => 'force_logical'); +SELECT * FROM text_column; +CALL pg_catalog.citus_cleanup_orphaned_resources(); + -- test with invalid shard placements \c - postgres - :master_port SET search_path to "Tenant Isolation"; From 48f7d6c27925af3813d787bbe9e9d1458f54413e Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Mon, 12 Sep 2022 16:49:17 +0200 Subject: [PATCH 09/17] Show local managed tables in citus_tables and hide tables owned by extensions (#6321) Co-authored-by: Marco Slot --- .../distributed/sql/citus--11.0-4--11.1-1.sql | 2 + .../sql/downgrades/citus--11.1-1--11.0-4.sql | 3 ++ .../sql/udfs/citus_shards/11.1-1.sql | 45 +++++++++++++++++ .../sql/udfs/citus_shards/latest.sql | 10 ++++ .../sql/udfs/citus_tables/11.1-1.sql | 48 +++++++++++++++++++ .../sql/udfs/citus_tables/latest.sql | 14 +++++- .../alter_table_set_access_method.out | 18 +++---- src/test/regress/expected/multi_mx_ddl.out | 26 ++++++++++ src/test/regress/sql/multi_mx_ddl.sql | 8 ++++ 9 files changed, 164 insertions(+), 10 deletions(-) create mode 100644 src/backend/distributed/sql/udfs/citus_shards/11.1-1.sql create mode 100644 src/backend/distributed/sql/udfs/citus_tables/11.1-1.sql diff --git a/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql b/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql index 89d96eda4..d5e157fdb 100644 --- a/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql +++ b/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql @@ -1,4 +1,6 @@ #include "udfs/citus_locks/11.1-1.sql" +#include "udfs/citus_tables/11.1-1.sql" +#include "udfs/citus_shards/11.1-1.sql" #include "udfs/create_distributed_table_concurrently/11.1-1.sql" #include "udfs/citus_internal_delete_partition_metadata/11.1-1.sql" #include "udfs/citus_copy_shard_placement/11.1-1.sql" diff --git a/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql b/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql index 83c91b06d..fbfc4eddf 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql @@ -93,6 +93,9 @@ DROP FUNCTION pg_catalog.get_all_active_transactions(OUT datid oid, OUT process_ DROP VIEW pg_catalog.citus_locks; DROP FUNCTION pg_catalog.citus_locks(); +#include "../udfs/citus_tables/10.0-4.sql" +#include "../udfs/citus_shards/10.1-1.sql" + DROP FUNCTION pg_catalog.replicate_reference_tables(citus.shard_transfer_mode); #include "../udfs/replicate_reference_tables/9.3-2.sql" diff --git a/src/backend/distributed/sql/udfs/citus_shards/11.1-1.sql b/src/backend/distributed/sql/udfs/citus_shards/11.1-1.sql new file mode 100644 index 000000000..08e039899 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_shards/11.1-1.sql @@ -0,0 +1,45 @@ +CREATE OR REPLACE VIEW pg_catalog.citus_shards AS +SELECT + pg_dist_shard.logicalrelid AS table_name, + pg_dist_shard.shardid, + shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) as shard_name, + CASE WHEN partkey IS NOT NULL THEN 'distributed' WHEN repmodel = 't' THEN 'reference' ELSE 'local' END AS citus_table_type, + colocationid AS colocation_id, + pg_dist_node.nodename, + pg_dist_node.nodeport, + size as shard_size +FROM + pg_dist_shard +JOIN + pg_dist_placement +ON + pg_dist_shard.shardid = pg_dist_placement.shardid +JOIN + pg_dist_node +ON + pg_dist_placement.groupid = pg_dist_node.groupid +JOIN + pg_dist_partition +ON + pg_dist_partition.logicalrelid = pg_dist_shard.logicalrelid +LEFT JOIN + (SELECT (regexp_matches(table_name,'_(\d+)$'))[1]::int as shard_id, max(size) as size from citus_shard_sizes() GROUP BY shard_id) as shard_sizes +ON + pg_dist_shard.shardid = shard_sizes.shard_id +WHERE + pg_dist_placement.shardstate = 1 +AND + -- filter out tables owned by extensions + pg_dist_partition.logicalrelid NOT IN ( + SELECT + objid + FROM + pg_depend + WHERE + classid = 'pg_class'::regclass AND refclassid = 'pg_extension'::regclass AND deptype = 'e' + ) +ORDER BY + pg_dist_shard.logicalrelid::text, shardid +; + +GRANT SELECT ON pg_catalog.citus_shards TO public; diff --git a/src/backend/distributed/sql/udfs/citus_shards/latest.sql b/src/backend/distributed/sql/udfs/citus_shards/latest.sql index ff98638e7..08e039899 100644 --- a/src/backend/distributed/sql/udfs/citus_shards/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_shards/latest.sql @@ -28,6 +28,16 @@ ON pg_dist_shard.shardid = shard_sizes.shard_id WHERE pg_dist_placement.shardstate = 1 +AND + -- filter out tables owned by extensions + pg_dist_partition.logicalrelid NOT IN ( + SELECT + objid + FROM + pg_depend + WHERE + classid = 'pg_class'::regclass AND refclassid = 'pg_extension'::regclass AND deptype = 'e' + ) ORDER BY pg_dist_shard.logicalrelid::text, shardid ; diff --git a/src/backend/distributed/sql/udfs/citus_tables/11.1-1.sql b/src/backend/distributed/sql/udfs/citus_tables/11.1-1.sql new file mode 100644 index 000000000..ee66852a4 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_tables/11.1-1.sql @@ -0,0 +1,48 @@ +DO $$ +declare +citus_tables_create_query text; +BEGIN +citus_tables_create_query=$CTCQ$ + CREATE OR REPLACE VIEW %I.citus_tables AS + SELECT + logicalrelid AS table_name, + CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE + CASE when repmodel = 't' THEN 'reference' ELSE 'local' END + END AS citus_table_type, + coalesce(column_to_column_name(logicalrelid, partkey), '') AS distribution_column, + colocationid AS colocation_id, + pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size, + (select count(*) from pg_dist_shard where logicalrelid = p.logicalrelid) AS shard_count, + pg_get_userbyid(relowner) AS table_owner, + amname AS access_method + FROM + pg_dist_partition p + JOIN + pg_class c ON (p.logicalrelid = c.oid) + LEFT JOIN + pg_am a ON (a.oid = c.relam) + WHERE + -- filter out tables owned by extensions + logicalrelid NOT IN ( + SELECT + objid + FROM + pg_depend + WHERE + classid = 'pg_class'::regclass AND refclassid = 'pg_extension'::regclass AND deptype = 'e' + ) + ORDER BY + logicalrelid::text; +$CTCQ$; + +IF EXISTS (SELECT 1 FROM pg_namespace WHERE nspname = 'public') THEN + EXECUTE format(citus_tables_create_query, 'public'); + GRANT SELECT ON public.citus_tables TO public; +ELSE + EXECUTE format(citus_tables_create_query, 'citus'); + ALTER VIEW citus.citus_tables SET SCHEMA pg_catalog; + GRANT SELECT ON pg_catalog.citus_tables TO public; +END IF; + +END; +$$; diff --git a/src/backend/distributed/sql/udfs/citus_tables/latest.sql b/src/backend/distributed/sql/udfs/citus_tables/latest.sql index 025a0f3e7..ee66852a4 100644 --- a/src/backend/distributed/sql/udfs/citus_tables/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_tables/latest.sql @@ -6,7 +6,9 @@ citus_tables_create_query=$CTCQ$ CREATE OR REPLACE VIEW %I.citus_tables AS SELECT logicalrelid AS table_name, - CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE 'reference' END AS citus_table_type, + CASE WHEN partkey IS NOT NULL THEN 'distributed' ELSE + CASE when repmodel = 't' THEN 'reference' ELSE 'local' END + END AS citus_table_type, coalesce(column_to_column_name(logicalrelid, partkey), '') AS distribution_column, colocationid AS colocation_id, pg_size_pretty(citus_total_relation_size(logicalrelid, fail_on_error := false)) AS table_size, @@ -20,7 +22,15 @@ citus_tables_create_query=$CTCQ$ LEFT JOIN pg_am a ON (a.oid = c.relam) WHERE - partkey IS NOT NULL OR repmodel = 't' + -- filter out tables owned by extensions + logicalrelid NOT IN ( + SELECT + objid + FROM + pg_depend + WHERE + classid = 'pg_class'::regclass AND refclassid = 'pg_extension'::regclass AND deptype = 'e' + ) ORDER BY logicalrelid::text; $CTCQ$; diff --git a/src/test/regress/expected/alter_table_set_access_method.out b/src/test/regress/expected/alter_table_set_access_method.out index 81185fe9b..e95c8f686 100644 --- a/src/test/regress/expected/alter_table_set_access_method.out +++ b/src/test/regress/expected/alter_table_set_access_method.out @@ -452,11 +452,12 @@ SELECT citus_add_local_table_to_metadata('table_type_citus_local'); CREATE TABLE table_type_pg_local (a INT); SELECT table_name, citus_table_type, distribution_column, shard_count, access_method FROM public.citus_tables WHERE table_name::text LIKE 'table\_type%' ORDER BY 1; - table_name | citus_table_type | distribution_column | shard_count | access_method + table_name | citus_table_type | distribution_column | shard_count | access_method --------------------------------------------------------------------- - table_type_dist | distributed | a | 4 | heap - table_type_ref | reference | | 1 | heap -(2 rows) + table_type_dist | distributed | a | 4 | heap + table_type_ref | reference | | 1 | heap + table_type_citus_local | local | | 1 | heap +(3 rows) SELECT c.relname, a.amname FROM pg_class c, pg_am a where c.relname SIMILAR TO 'table_type\D*' AND c.relnamespace = 'alter_table_set_access_method'::regnamespace AND c.relam = a.oid; relname | amname @@ -508,11 +509,12 @@ NOTICE: renaming the new table to alter_table_set_access_method.table_type_citu (1 row) SELECT table_name, citus_table_type, distribution_column, shard_count, access_method FROM public.citus_tables WHERE table_name::text LIKE 'table\_type%' ORDER BY 1; - table_name | citus_table_type | distribution_column | shard_count | access_method + table_name | citus_table_type | distribution_column | shard_count | access_method --------------------------------------------------------------------- - table_type_dist | distributed | a | 4 | columnar - table_type_ref | reference | | 1 | columnar -(2 rows) + table_type_dist | distributed | a | 4 | columnar + table_type_ref | reference | | 1 | columnar + table_type_citus_local | local | | 1 | columnar +(3 rows) SELECT c.relname, a.amname FROM pg_class c, pg_am a where c.relname SIMILAR TO 'table_type\D*' AND c.relnamespace = 'alter_table_set_access_method'::regnamespace AND c.relam = a.oid; relname | amname diff --git a/src/test/regress/expected/multi_mx_ddl.out b/src/test/regress/expected/multi_mx_ddl.out index e6f3f6e0f..ba5e807a9 100644 --- a/src/test/regress/expected/multi_mx_ddl.out +++ b/src/test/regress/expected/multi_mx_ddl.out @@ -278,6 +278,19 @@ HINT: To remove the local data, run: SELECT truncate_local_data_after_distribut (1 row) +-- table should not show up in citus_tables or citus_shards +SELECT count(*) FROM citus_tables WHERE table_name = 'seg_test'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM citus_shards WHERE table_name = 'seg_test'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + \c - - - :worker_1_port -- should be able to see contents from worker SELECT * FROM seg_test; @@ -286,6 +299,19 @@ SELECT * FROM seg_test; 42 (1 row) +-- table should not show up in citus_tables or citus_shards +SELECT count(*) FROM citus_tables WHERE table_name = 'seg_test'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM citus_shards WHERE table_name = 'seg_test'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + \c - - - :master_port -- test metadata sync in the presence of an extension-owned table SELECT start_metadata_sync_to_node('localhost', :worker_1_port); diff --git a/src/test/regress/sql/multi_mx_ddl.sql b/src/test/regress/sql/multi_mx_ddl.sql index d0452c3d2..ebd9fbe97 100644 --- a/src/test/regress/sql/multi_mx_ddl.sql +++ b/src/test/regress/sql/multi_mx_ddl.sql @@ -155,11 +155,19 @@ SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 4; SELECT create_distributed_table('seg_test', 'x'); +-- table should not show up in citus_tables or citus_shards +SELECT count(*) FROM citus_tables WHERE table_name = 'seg_test'::regclass; +SELECT count(*) FROM citus_shards WHERE table_name = 'seg_test'::regclass; + \c - - - :worker_1_port -- should be able to see contents from worker SELECT * FROM seg_test; +-- table should not show up in citus_tables or citus_shards +SELECT count(*) FROM citus_tables WHERE table_name = 'seg_test'::regclass; +SELECT count(*) FROM citus_shards WHERE table_name = 'seg_test'::regclass; + \c - - - :master_port -- test metadata sync in the presence of an extension-owned table From cda3686d862cfbdfc9ee09edcefc2e39fabe7da9 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Mon, 12 Sep 2022 19:46:53 +0200 Subject: [PATCH 10/17] Feature: run rebalancer in the background (#6215) DESCRIPTION: Add a rebalancer that uses background tasks for its execution Based on the baclground jobs and tasks introduced in #6296 we implement a new rebalancer on top of the primitives of background execution. This allows the user to initiate a rebalance and let Citus execute the long running steps in the background until completion. Users can invoke the new background rebalancer with `SELECT citus_rebalance_start();`. It will output information on its job id and how to track progress. Also it returns its job id for automation purposes. If you simply want to wait till the rebalance is done you can use `SELECT citus_rebalance_wait();` A running rebalance can be canelled/stopped with `SELECT citus_rebalance_stop();`. --- .../distributed/metadata/metadata_utility.c | 73 +++++ .../distributed/operations/shard_rebalancer.c | 265 +++++++++++++++++- .../distributed/sql/citus--11.0-4--11.1-1.sql | 3 + .../sql/downgrades/citus--11.1-1--11.0-4.sql | 4 + .../sql/udfs/citus_rebalance_start/11.1-1.sql | 11 + .../sql/udfs/citus_rebalance_start/latest.sql | 11 + .../sql/udfs/citus_rebalance_stop/11.1-1.sql | 7 + .../sql/udfs/citus_rebalance_stop/latest.sql | 7 + .../sql/udfs/citus_rebalance_wait/11.1-1.sql | 7 + .../sql/udfs/citus_rebalance_wait/latest.sql | 7 + .../distributed/utils/background_jobs.c | 25 +- .../distributed/utils/reference_table_utils.c | 57 ++++ src/include/distributed/background_jobs.h | 6 + src/include/distributed/metadata_utility.h | 1 + .../distributed/reference_table_utils.h | 1 + src/test/regress/bin/normalize.sed | 5 + .../regress/expected/background_rebalance.out | 180 ++++++++++++ .../expected/isolation_shard_rebalancer.out | 28 ++ src/test/regress/expected/multi_extension.out | 5 +- .../expected/upgrade_list_citus_objects.out | 5 +- src/test/regress/operations_schedule | 1 + .../spec/isolation_shard_rebalancer.spec | 14 + src/test/regress/sql/background_rebalance.sql | 64 +++++ 23 files changed, 774 insertions(+), 13 deletions(-) create mode 100644 src/backend/distributed/sql/udfs/citus_rebalance_start/11.1-1.sql create mode 100644 src/backend/distributed/sql/udfs/citus_rebalance_start/latest.sql create mode 100644 src/backend/distributed/sql/udfs/citus_rebalance_stop/11.1-1.sql create mode 100644 src/backend/distributed/sql/udfs/citus_rebalance_stop/latest.sql create mode 100644 src/backend/distributed/sql/udfs/citus_rebalance_wait/11.1-1.sql create mode 100644 src/backend/distributed/sql/udfs/citus_rebalance_wait/latest.sql create mode 100644 src/test/regress/expected/background_rebalance.out create mode 100644 src/test/regress/sql/background_rebalance.sql diff --git a/src/backend/distributed/metadata/metadata_utility.c b/src/backend/distributed/metadata/metadata_utility.c index 8c57357c9..b7d20a9b5 100644 --- a/src/backend/distributed/metadata/metadata_utility.c +++ b/src/backend/distributed/metadata/metadata_utility.c @@ -2724,6 +2724,79 @@ GetNextBackgroundTaskTaskId(void) } +/* + * HasNonTerminalJobOfType returns true if there is a job of a given type that is not in + * its terminal state. + * + * Some jobs would want a single instance to be able to run at once. Before submitting a + * new job if could see if there is a job of their type already executing. + * + * If a job is found the options jobIdOut is populated with the jobId. + */ +bool +HasNonTerminalJobOfType(const char *jobType, int64 *jobIdOut) +{ + Relation pgDistBackgroundJob = + table_open(DistBackgroundJobRelationId(), AccessShareLock); + + /* find any job in states listed here */ + BackgroundJobStatus jobStatus[] = { + BACKGROUND_JOB_STATUS_RUNNING, + BACKGROUND_JOB_STATUS_CANCELLING, + BACKGROUND_JOB_STATUS_FAILING, + BACKGROUND_JOB_STATUS_SCHEDULED + }; + + NameData jobTypeName = { 0 }; + namestrcpy(&jobTypeName, jobType); + + bool foundJob = false; + for (int i = 0; !foundJob && i < lengthof(jobStatus); i++) + { + ScanKeyData scanKey[2] = { 0 }; + const bool indexOK = true; + + /* pg_dist_background_job.status == jobStatus[i] */ + ScanKeyInit(&scanKey[0], Anum_pg_dist_background_job_state, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(BackgroundJobStatusOid(jobStatus[i]))); + + /* pg_dist_background_job.job_type == jobType */ + ScanKeyInit(&scanKey[1], Anum_pg_dist_background_job_job_type, + BTEqualStrategyNumber, F_NAMEEQ, + NameGetDatum(&jobTypeName)); + + SysScanDesc scanDescriptor = + systable_beginscan(pgDistBackgroundJob, + InvalidOid, /* TODO use an actual index here */ + indexOK, NULL, lengthof(scanKey), scanKey); + + HeapTuple taskTuple = NULL; + if (HeapTupleIsValid(taskTuple = systable_getnext(scanDescriptor))) + { + foundJob = true; + + if (jobIdOut) + { + Datum values[Natts_pg_dist_background_job] = { 0 }; + bool isnull[Natts_pg_dist_background_job] = { 0 }; + + TupleDesc tupleDesc = RelationGetDescr(pgDistBackgroundJob); + heap_deform_tuple(taskTuple, tupleDesc, values, isnull); + + *jobIdOut = DatumGetInt64(values[Anum_pg_dist_background_job_job_id - 1]); + } + } + + systable_endscan(scanDescriptor); + } + + table_close(pgDistBackgroundJob, NoLock); + + return foundJob; +} + + /* * CreateBackgroundJob is a helper function to insert a new Background Job into Citus' * catalog. After inserting the new job's metadataa into the catalog it returns the job_id diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c index 0e9b8db1d..60962deac 100644 --- a/src/backend/distributed/operations/shard_rebalancer.c +++ b/src/backend/distributed/operations/shard_rebalancer.c @@ -26,6 +26,7 @@ #include "commands/dbcommands.h" #include "commands/sequence.h" #include "distributed/argutils.h" +#include "distributed/background_jobs.h" #include "distributed/citus_safe_lib.h" #include "distributed/citus_ruleutils.h" #include "distributed/colocation_utils.h" @@ -73,6 +74,7 @@ typedef struct RebalanceOptions bool drainOnly; float4 improvementThreshold; Form_pg_dist_rebalance_strategy rebalanceStrategy; + const char *operationName; } RebalanceOptions; @@ -227,6 +229,8 @@ static float4 NodeCapacity(WorkerNode *workerNode, void *context); static ShardCost GetShardCost(uint64 shardId, void *context); static List * NonColocatedDistRelationIdList(void); static void RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid); +static int64 RebalanceTableShardsBackground(RebalanceOptions *options, Oid + shardReplicationModeOid); static void AcquireRebalanceColocationLock(Oid relationId, const char *operationName); static void ExecutePlacementUpdates(List *placementUpdateList, Oid shardReplicationModeOid, char *noticeOperation); @@ -245,6 +249,8 @@ static uint64 WorkerShardSize(HTAB *workerShardStatistics, static void AddToWorkerShardIdSet(HTAB *shardsByWorker, char *workerName, int workerPort, uint64 shardId); static HTAB * BuildShardSizesHash(ProgressMonitorData *monitor, HTAB *shardStatistics); +static void ErrorOnConcurrentRebalance(RebalanceOptions *); + /* declarations for dynamic loading */ PG_FUNCTION_INFO_V1(rebalance_table_shards); @@ -256,6 +262,9 @@ PG_FUNCTION_INFO_V1(master_drain_node); PG_FUNCTION_INFO_V1(citus_shard_cost_by_disk_size); PG_FUNCTION_INFO_V1(citus_validate_rebalance_strategy_functions); PG_FUNCTION_INFO_V1(pg_dist_rebalance_strategy_enterprise_check); +PG_FUNCTION_INFO_V1(citus_rebalance_start); +PG_FUNCTION_INFO_V1(citus_rebalance_stop); +PG_FUNCTION_INFO_V1(citus_rebalance_wait); bool RunningUnderIsolationTest = false; int MaxRebalancerLoggedIgnoredMoves = 5; @@ -858,6 +867,93 @@ rebalance_table_shards(PG_FUNCTION_ARGS) } +/* + * citus_rebalance_start rebalances the shards across the workers. + * + * SQL signature: + * + * citus_rebalance_start( + * rebalance_strategy name DEFAULT NULL, + * drain_only boolean DEFAULT false, + * shard_transfer_mode citus.shard_transfer_mode default 'auto' + * ) RETURNS VOID + */ +Datum +citus_rebalance_start(PG_FUNCTION_ARGS) +{ + CheckCitusVersion(ERROR); + List *relationIdList = NonColocatedDistRelationIdList(); + Form_pg_dist_rebalance_strategy strategy = + GetRebalanceStrategy(PG_GETARG_NAME_OR_NULL(0)); + + PG_ENSURE_ARGNOTNULL(1, "drain_only"); + bool drainOnly = PG_GETARG_BOOL(1); + + PG_ENSURE_ARGNOTNULL(2, "shard_transfer_mode"); + Oid shardTransferModeOid = PG_GETARG_OID(2); + + RebalanceOptions options = { + .relationIdList = relationIdList, + .threshold = strategy->defaultThreshold, + .maxShardMoves = 10000000, + .excludedShardArray = construct_empty_array(INT4OID), + .drainOnly = drainOnly, + .rebalanceStrategy = strategy, + .improvementThreshold = strategy->improvementThreshold, + }; + int jobId = RebalanceTableShardsBackground(&options, shardTransferModeOid); + + if (jobId == 0) + { + PG_RETURN_NULL(); + } + PG_RETURN_INT64(jobId); +} + + +/* + * citus_rebalance_stop stops any ongoing background rebalance that is executing. + * Raises an error when there is no backgound rebalance ongoing at the moment. + */ +Datum +citus_rebalance_stop(PG_FUNCTION_ARGS) +{ + CheckCitusVersion(ERROR); + + int64 jobId = 0; + if (!HasNonTerminalJobOfType("rebalance", &jobId)) + { + ereport(ERROR, (errmsg("no ongoing rebalance that can be stopped"))); + } + + DirectFunctionCall1(citus_job_cancel, Int64GetDatum(jobId)); + + PG_RETURN_VOID(); +} + + +/* + * citus_rebalance_wait waits till an ongoing background rebalance has finished execution. + * A warning will be displayed if no rebalance is ongoing. + */ +Datum +citus_rebalance_wait(PG_FUNCTION_ARGS) +{ + CheckCitusVersion(ERROR); + + int64 jobId = 0; + if (!HasNonTerminalJobOfType("rebalance", &jobId)) + { + ereport(WARNING, (errmsg("no ongoing rebalance that can be waited on"))); + PG_RETURN_VOID(); + } + + citus_job_wait_internal(jobId, NULL); + + PG_RETURN_VOID(); +} + + /* * GetRebalanceStrategy returns the rebalance strategy from * pg_dist_rebalance_strategy matching the given name. If name is NULL it @@ -1579,17 +1675,14 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid) return; } - Oid relationId = InvalidOid; char *operationName = "rebalance"; if (options->drainOnly) { operationName = "move"; } - foreach_oid(relationId, options->relationIdList) - { - AcquireRebalanceColocationLock(relationId, operationName); - } + options->operationName = operationName; + ErrorOnConcurrentRebalance(options); List *placementUpdateList = GetRebalanceSteps(options); @@ -1609,6 +1702,168 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid) } +/* + * ErrorOnConcurrentRebalance raises an error with extra information when there is already + * a rebalance running. + */ +static void +ErrorOnConcurrentRebalance(RebalanceOptions *options) +{ + Oid relationId = InvalidOid; + foreach_oid(relationId, options->relationIdList) + { + /* this provides the legacy error when the lock can't be acquired */ + AcquireRebalanceColocationLock(relationId, options->operationName); + } + + int64 jobId = 0; + if (HasNonTerminalJobOfType("rebalance", &jobId)) + { + ereport(ERROR, ( + errmsg("A rebalance is already running as job %ld", jobId), + errdetail("A rebalance was already scheduled as background job"), + errhint("To monitor progress, run: SELECT * FROM " + "pg_dist_background_task WHERE job_id = %ld ORDER BY task_id " + "ASC; or SELECT * FROM get_rebalance_progress();", + jobId))); + } +} + + +/* + * RebalanceTableShardsBackground rebalances the shards for the relations + * inside the relationIdList across the different workers. It does so using our + * background job+task infrastructure. + */ +static int64 +RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationModeOid) +{ + if (list_length(options->relationIdList) == 0) + { + ereport(NOTICE, (errmsg("No tables to rebalance"))); + return 0; + } + + char *operationName = "rebalance"; + if (options->drainOnly) + { + operationName = "move"; + } + + options->operationName = operationName; + ErrorOnConcurrentRebalance(options); + + const char shardTransferMode = LookupShardTransferMode(shardReplicationModeOid); + List *colocatedTableList = NIL; + Oid relationId = InvalidOid; + foreach_oid(relationId, options->relationIdList) + { + colocatedTableList = list_concat(colocatedTableList, + ColocatedTableList(relationId)); + } + Oid colocatedTableId = InvalidOid; + foreach_oid(colocatedTableId, colocatedTableList) + { + EnsureTableOwner(colocatedTableId); + } + + if (shardTransferMode == TRANSFER_MODE_AUTOMATIC) + { + /* make sure that all tables included in the rebalance have a replica identity*/ + VerifyTablesHaveReplicaIdentity(colocatedTableList); + } + + List *placementUpdateList = GetRebalanceSteps(options); + + if (list_length(placementUpdateList) == 0) + { + ereport(NOTICE, (errmsg("No moves available for rebalancing"))); + return 0; + } + + DropOrphanedShardsInSeparateTransaction(); + + /* find the name of the shard transfer mode to interpolate in the scheduled command */ + Datum shardTranferModeLabelDatum = + DirectFunctionCall1(enum_out, shardReplicationModeOid); + char *shardTranferModeLabel = DatumGetCString(shardTranferModeLabelDatum); + + /* schedule planned moves */ + int64 jobId = CreateBackgroundJob("rebalance", "Rebalance all colocation groups"); + + /* buffer used to construct the sql command for the tasks */ + StringInfoData buf = { 0 }; + initStringInfo(&buf); + + /* + * Currently we only have two tasks that any move can depend on: + * - replicating reference tables + * - the previous move + * + * prevJobIdx tells what slot to write the id of the task into. We only use both slots + * if we are actually replicating reference tables. + */ + int64 prevJobId[2] = { 0 }; + int prevJobIdx = 0; + + List *referenceTableIdList = NIL; + + if (HasNodesWithMissingReferenceTables(&referenceTableIdList)) + { + VerifyTablesHaveReplicaIdentity(referenceTableIdList); + + /* + * Reference tables need to be copied to (newly-added) nodes, this needs to be the + * first task before we can move any other table. + */ + appendStringInfo(&buf, + "SELECT pg_catalog.replicate_reference_tables(%s)", + quote_literal_cstr(shardTranferModeLabel)); + BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data, + prevJobIdx, prevJobId); + prevJobId[prevJobIdx] = task->taskid; + prevJobIdx++; + } + + PlacementUpdateEvent *move = NULL; + bool first = true; + int prevMoveIndex = prevJobIdx; + foreach_ptr(move, placementUpdateList) + { + resetStringInfo(&buf); + + appendStringInfo(&buf, + "SELECT pg_catalog.citus_move_shard_placement(%ld,%s,%u,%s,%u,%s)", + move->shardId, + quote_literal_cstr(move->sourceNode->workerName), + move->sourceNode->workerPort, + quote_literal_cstr(move->targetNode->workerName), + move->targetNode->workerPort, + quote_literal_cstr(shardTranferModeLabel)); + + BackgroundTask *task = ScheduleBackgroundTask(jobId, GetUserId(), buf.data, + prevJobIdx, prevJobId); + prevJobId[prevMoveIndex] = task->taskid; + if (first) + { + first = false; + prevJobIdx++; + } + } + + ereport(NOTICE, + (errmsg("Scheduled %d moves as job %ld", + list_length(placementUpdateList), jobId), + errdetail("Rebalance scheduled as background job"), + errhint("To monitor progress, run: " + "SELECT * FROM pg_dist_background_task WHERE job_id = %ld ORDER BY " + "task_id ASC; or SELECT * FROM get_rebalance_progress();", + jobId))); + + return jobId; +} + + /* * UpdateShardPlacement copies or moves a shard placement by calling * the corresponding functions in Citus in a subtransaction. diff --git a/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql b/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql index d5e157fdb..00c28f22c 100644 --- a/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql +++ b/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql @@ -170,3 +170,6 @@ GRANT SELECT ON pg_catalog.pg_dist_background_task_depend TO PUBLIC; #include "udfs/citus_job_wait/11.1-1.sql" #include "udfs/citus_job_cancel/11.1-1.sql" +#include "udfs/citus_rebalance_start/11.1-1.sql" +#include "udfs/citus_rebalance_stop/11.1-1.sql" +#include "udfs/citus_rebalance_wait/11.1-1.sql" diff --git a/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql b/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql index fbfc4eddf..2a7462e0d 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql @@ -108,6 +108,10 @@ DROP TABLE pg_catalog.pg_dist_cleanup; DROP SEQUENCE pg_catalog.pg_dist_operationid_seq; DROP SEQUENCE pg_catalog.pg_dist_cleanup_recordid_seq; DROP PROCEDURE pg_catalog.citus_cleanup_orphaned_resources(); + +DROP FUNCTION pg_catalog.citus_rebalance_start(name, bool, citus.shard_transfer_mode); +DROP FUNCTION pg_catalog.citus_rebalance_stop(); +DROP FUNCTION pg_catalog.citus_rebalance_wait(); DROP FUNCTION pg_catalog.citus_job_cancel(bigint); DROP FUNCTION pg_catalog.citus_job_wait(bigint, pg_catalog.citus_job_status); DROP TABLE pg_catalog.pg_dist_background_task_depend; diff --git a/src/backend/distributed/sql/udfs/citus_rebalance_start/11.1-1.sql b/src/backend/distributed/sql/udfs/citus_rebalance_start/11.1-1.sql new file mode 100644 index 000000000..cc84d3142 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_rebalance_start/11.1-1.sql @@ -0,0 +1,11 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_start( + rebalance_strategy name DEFAULT NULL, + drain_only boolean DEFAULT false, + shard_transfer_mode citus.shard_transfer_mode default 'auto' + ) + RETURNS bigint + AS 'MODULE_PATHNAME' + LANGUAGE C VOLATILE; +COMMENT ON FUNCTION pg_catalog.citus_rebalance_start(name, boolean, citus.shard_transfer_mode) + IS 'rebalance the shards in the cluster in the background'; +GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_start(name, boolean, citus.shard_transfer_mode) TO PUBLIC; diff --git a/src/backend/distributed/sql/udfs/citus_rebalance_start/latest.sql b/src/backend/distributed/sql/udfs/citus_rebalance_start/latest.sql new file mode 100644 index 000000000..cc84d3142 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_rebalance_start/latest.sql @@ -0,0 +1,11 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_start( + rebalance_strategy name DEFAULT NULL, + drain_only boolean DEFAULT false, + shard_transfer_mode citus.shard_transfer_mode default 'auto' + ) + RETURNS bigint + AS 'MODULE_PATHNAME' + LANGUAGE C VOLATILE; +COMMENT ON FUNCTION pg_catalog.citus_rebalance_start(name, boolean, citus.shard_transfer_mode) + IS 'rebalance the shards in the cluster in the background'; +GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_start(name, boolean, citus.shard_transfer_mode) TO PUBLIC; diff --git a/src/backend/distributed/sql/udfs/citus_rebalance_stop/11.1-1.sql b/src/backend/distributed/sql/udfs/citus_rebalance_stop/11.1-1.sql new file mode 100644 index 000000000..46ef49996 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_rebalance_stop/11.1-1.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_stop() + RETURNS VOID + AS 'MODULE_PATHNAME' + LANGUAGE C VOLATILE; +COMMENT ON FUNCTION pg_catalog.citus_rebalance_stop() + IS 'stop a rebalance that is running in the background'; +GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_stop() TO PUBLIC; diff --git a/src/backend/distributed/sql/udfs/citus_rebalance_stop/latest.sql b/src/backend/distributed/sql/udfs/citus_rebalance_stop/latest.sql new file mode 100644 index 000000000..46ef49996 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_rebalance_stop/latest.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_stop() + RETURNS VOID + AS 'MODULE_PATHNAME' + LANGUAGE C VOLATILE; +COMMENT ON FUNCTION pg_catalog.citus_rebalance_stop() + IS 'stop a rebalance that is running in the background'; +GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_stop() TO PUBLIC; diff --git a/src/backend/distributed/sql/udfs/citus_rebalance_wait/11.1-1.sql b/src/backend/distributed/sql/udfs/citus_rebalance_wait/11.1-1.sql new file mode 100644 index 000000000..4e78ec621 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_rebalance_wait/11.1-1.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_wait() + RETURNS VOID + AS 'MODULE_PATHNAME' + LANGUAGE C VOLATILE; +COMMENT ON FUNCTION pg_catalog.citus_rebalance_wait() + IS 'wait on a running rebalance in the background'; +GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_wait() TO PUBLIC; diff --git a/src/backend/distributed/sql/udfs/citus_rebalance_wait/latest.sql b/src/backend/distributed/sql/udfs/citus_rebalance_wait/latest.sql new file mode 100644 index 000000000..4e78ec621 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_rebalance_wait/latest.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_rebalance_wait() + RETURNS VOID + AS 'MODULE_PATHNAME' + LANGUAGE C VOLATILE; +COMMENT ON FUNCTION pg_catalog.citus_rebalance_wait() + IS 'wait on a running rebalance in the background'; +GRANT EXECUTE ON FUNCTION pg_catalog.citus_rebalance_wait() TO PUBLIC; diff --git a/src/backend/distributed/utils/background_jobs.c b/src/backend/distributed/utils/background_jobs.c index f28bef486..6b7cb024b 100644 --- a/src/backend/distributed/utils/background_jobs.c +++ b/src/backend/distributed/utils/background_jobs.c @@ -157,6 +157,22 @@ citus_job_wait(PG_FUNCTION_ARGS) desiredStatus = BackgroundJobStatusByOid(PG_GETARG_OID(1)); } + citus_job_wait_internal(jobid, hasDesiredStatus ? &desiredStatus : NULL); + + PG_RETURN_VOID(); +} + + +/* + * citus_job_wait_internal imaplements the waiting on a job for reuse in other areas where + * we want to wait on jobs. eg the background rebalancer. + * + * When a desiredStatus is provided it will provide an error when a different state is + * reached and the state cannot ever reach the desired state anymore. + */ +void +citus_job_wait_internal(int64 jobid, BackgroundJobStatus *desiredStatus) +{ /* * Since we are wait polling we will actually allocate memory on every poll. To make * sure we don't put unneeded pressure on the memory we create a context that we clear @@ -177,10 +193,9 @@ citus_job_wait(PG_FUNCTION_ARGS) if (!job) { ereport(ERROR, (errmsg("no job found for job with jobid: %ld", jobid))); - PG_RETURN_VOID(); } - if (hasDesiredStatus && job->state == desiredStatus) + if (desiredStatus && job->state == *desiredStatus) { /* job has reached its desired status, done waiting */ break; @@ -188,7 +203,7 @@ citus_job_wait(PG_FUNCTION_ARGS) if (IsBackgroundJobStatusTerminal(job->state)) { - if (hasDesiredStatus) + if (desiredStatus) { /* * We have reached a terminal state, which is not the desired state we @@ -201,7 +216,7 @@ citus_job_wait(PG_FUNCTION_ARGS) reachedStatusOid); char *reachedStatusName = DatumGetCString(reachedStatusNameDatum); - Oid desiredStatusOid = BackgroundJobStatusOid(desiredStatus); + Oid desiredStatusOid = BackgroundJobStatusOid(*desiredStatus); Datum desiredStatusNameDatum = DirectFunctionCall1(enum_out, desiredStatusOid); char *desiredStatusName = DatumGetCString(desiredStatusNameDatum); @@ -228,8 +243,6 @@ citus_job_wait(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldContext); MemoryContextDelete(waitContext); - - PG_RETURN_VOID(); } diff --git a/src/backend/distributed/utils/reference_table_utils.c b/src/backend/distributed/utils/reference_table_utils.c index 8b37674d0..a00e7de3b 100644 --- a/src/backend/distributed/utils/reference_table_utils.c +++ b/src/backend/distributed/utils/reference_table_utils.c @@ -293,6 +293,63 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode) } +/* + * HasNodesWithMissingReferenceTables checks if all reference tables are already copied to + * all nodes. When a node doesn't have a copy of the reference tables we call them missing + * and this function will return true. + * + * The caller might be interested in the list of all reference tables after this check and + * this the list of tables is written to *referenceTableList if a non-null pointer is + * passed. + */ +bool +HasNodesWithMissingReferenceTables(List **referenceTableList) +{ + int colocationId = GetReferenceTableColocationId(); + + if (colocationId == INVALID_COLOCATION_ID) + { + /* we have no reference table yet. */ + return false; + } + LockColocationId(colocationId, AccessShareLock); + + List *referenceTableIdList = CitusTableTypeIdList(REFERENCE_TABLE); + if (referenceTableList) + { + *referenceTableList = referenceTableIdList; + } + + if (list_length(referenceTableIdList) <= 0) + { + return false; + } + + Oid referenceTableId = linitial_oid(referenceTableIdList); + List *shardIntervalList = LoadShardIntervalList(referenceTableId); + if (list_length(shardIntervalList) == 0) + { + const char *referenceTableName = get_rel_name(referenceTableId); + + /* check for corrupt metadata */ + ereport(ERROR, (errmsg("reference table \"%s\" does not have a shard", + referenceTableName))); + } + + ShardInterval *shardInterval = (ShardInterval *) linitial(shardIntervalList); + uint64 shardId = shardInterval->shardId; + List *newWorkersList = WorkersWithoutReferenceTablePlacement(shardId, + AccessShareLock); + + if (list_length(newWorkersList) <= 0) + { + return false; + } + + return true; +} + + /* * AnyRelationsModifiedInTransaction returns true if any of the given relations * were modified in the current transaction. diff --git a/src/include/distributed/background_jobs.h b/src/include/distributed/background_jobs.h index e38e57569..d814a2165 100644 --- a/src/include/distributed/background_jobs.h +++ b/src/include/distributed/background_jobs.h @@ -15,9 +15,15 @@ #include "postmaster/bgworker.h" +#include "distributed/metadata_utility.h" + extern BackgroundWorkerHandle * StartCitusBackgroundTaskQueueMonitor(Oid database, Oid extensionOwner); extern void CitusBackgroundTaskQueueMonitorMain(Datum arg); extern void CitusBackgroundTaskExecuter(Datum main_arg); +extern Datum citus_job_cancel(PG_FUNCTION_ARGS); +extern Datum citus_job_wait(PG_FUNCTION_ARGS); +extern void citus_job_wait_internal(int64 jobid, BackgroundJobStatus *desiredStatus); + #endif /*CITUS_BACKGROUND_JOBS_H */ diff --git a/src/include/distributed/metadata_utility.h b/src/include/distributed/metadata_utility.h index 748c9ff81..5376dd858 100644 --- a/src/include/distributed/metadata_utility.h +++ b/src/include/distributed/metadata_utility.h @@ -384,6 +384,7 @@ extern void EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId, Oid extern void AlterSequenceType(Oid seqOid, Oid typeOid); extern void EnsureRelationHasCompatibleSequenceTypes(Oid relationId); extern bool HasRunnableBackgroundTask(void); +extern bool HasNonTerminalJobOfType(const char *jobType, int64 *jobIdOut); extern int64 CreateBackgroundJob(const char *jobType, const char *description); extern BackgroundTask * ScheduleBackgroundTask(int64 jobId, Oid owner, char *command, int dependingTaskCount, diff --git a/src/include/distributed/reference_table_utils.h b/src/include/distributed/reference_table_utils.h index 80b282126..ce2de9d9d 100644 --- a/src/include/distributed/reference_table_utils.h +++ b/src/include/distributed/reference_table_utils.h @@ -20,6 +20,7 @@ extern void EnsureReferenceTablesExistOnAllNodes(void); extern void EnsureReferenceTablesExistOnAllNodesExtended(char transferMode); +extern bool HasNodesWithMissingReferenceTables(List **referenceTableList); extern uint32 CreateReferenceTableColocationId(void); extern uint32 GetReferenceTableColocationId(void); extern void DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index 531029bcd..353707c21 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -283,3 +283,8 @@ s/^(DETAIL: "[a-z\ ]+ )pg_temp_[0-9]+(\..*" will be created only locally)$/\1pg # will be replaced with # WARNING: "function func(bigint)" has dependency on unsupported object "schema pg_temp_xxx" s/^(WARNING|ERROR)(: "[a-z\ ]+ .*" has dependency on unsupported object) "schema pg_temp_[0-9]+"$/\1\2 "schema pg_temp_xxx"/g + +# remove jobId's from the messages of the background rebalancer +s/^ERROR: A rebalance is already running as job [0-9]+$/ERROR: A rebalance is already running as job xxx/g +s/^NOTICE: Scheduled ([0-9]+) moves as job [0-9]+$/NOTICE: Scheduled \1 moves as job xxx/g +s/^HINT: (.*) job_id = [0-9]+ (.*)$/HINT: \1 job_id = xxx \2/g diff --git a/src/test/regress/expected/background_rebalance.out b/src/test/regress/expected/background_rebalance.out new file mode 100644 index 000000000..32a5e86b0 --- /dev/null +++ b/src/test/regress/expected/background_rebalance.out @@ -0,0 +1,180 @@ +CREATE SCHEMA background_rebalance; +SET search_path TO background_rebalance; +SET citus.next_shard_id TO 85674000; +SET citus.shard_replication_factor TO 1; +ALTER SYSTEM SET citus.background_task_queue_interval TO '1s'; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +CREATE TABLE t1 (a int PRIMARY KEY); +SELECT create_distributed_table('t1', 'a', shard_count => 4, colocate_with => 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- verify the rebalance works - no-op - when the shards are balanced. Noop is shown by wait complaining there is nothing +-- to wait on. +SELECT 1 FROM citus_rebalance_start(); +NOTICE: No moves available for rebalancing + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT citus_rebalance_wait(); +WARNING: no ongoing rebalance that can be waited on + citus_rebalance_wait +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +-- rebalance a table in the background +SELECT 1 FROM citus_rebalance_start(); +NOTICE: Scheduled 1 moves as job xxx +DETAIL: Rebalance scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT citus_rebalance_wait(); + citus_rebalance_wait +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE t2 (a int); +SELECT create_distributed_table('t2', 'a' , colocate_with => 't1'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- show that we get an error when a table in the colocation group can't be moved non-blocking +SELECT 1 FROM citus_rebalance_start(); +ERROR: cannot use logical replication to transfer shards of the relation t2 since it doesn't have a REPLICA IDENTITY or PRIMARY KEY +DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY. +HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'. +SELECT 1 FROM citus_rebalance_start(shard_transfer_mode => 'block_writes'); +NOTICE: Scheduled 1 moves as job xxx +DETAIL: Rebalance scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT citus_rebalance_wait(); + citus_rebalance_wait +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE t2; +SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +-- show we can stop a rebalance, the stop causes the move to not have happened, eg, our move back below fails. +SELECT 1 FROM citus_rebalance_start(); +NOTICE: Scheduled 1 moves as job xxx +DETAIL: Rebalance scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT citus_rebalance_stop(); + citus_rebalance_stop +--------------------------------------------------------------------- + +(1 row) + +-- waiting on this rebalance is racy, as it sometimes sees no rebalance is ongoing while other times it actually sees it ongoing +-- we simply sleep a bit here +SELECT pg_sleep(1); + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +-- failing move due to a stopped rebalance, first clean orphans to make the error stable +SET client_min_messages TO WARNING; +CALL citus_cleanup_orphaned_shards(); +RESET client_min_messages; +SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); +WARNING: shard is already present on node localhost:xxxxx +DETAIL: Move may have already completed. + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +-- show we can't start the rebalancer twice +SELECT 1 FROM citus_rebalance_start(); +NOTICE: Scheduled 1 moves as job xxx +DETAIL: Rebalance scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT 1 FROM citus_rebalance_start(); +ERROR: A rebalance is already running as job xxx +DETAIL: A rebalance was already scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); +SELECT citus_rebalance_wait(); + citus_rebalance_wait +--------------------------------------------------------------------- + +(1 row) + +-- show that the old rebalancer cannot be started with a background rebalance in progress +SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +SELECT 1 FROM citus_rebalance_start(); +NOTICE: Scheduled 1 moves as job xxx +DETAIL: Rebalance scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT rebalance_table_shards(); +ERROR: A rebalance is already running as job xxx +DETAIL: A rebalance was already scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); +SELECT citus_rebalance_wait(); + citus_rebalance_wait +--------------------------------------------------------------------- + +(1 row) + +SET client_min_messages TO WARNING; +DROP SCHEMA background_rebalance CASCADE; diff --git a/src/test/regress/expected/isolation_shard_rebalancer.out b/src/test/regress/expected/isolation_shard_rebalancer.out index 1d6779b7c..2385f239a 100644 --- a/src/test/regress/expected/isolation_shard_rebalancer.out +++ b/src/test/regress/expected/isolation_shard_rebalancer.out @@ -575,3 +575,31 @@ master_set_node_property (1 row) + +starting permutation: s1-rebalance-all s2-citus-rebalance-start s1-commit +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-rebalance-all: + BEGIN; + select rebalance_table_shards(); + +rebalance_table_shards +--------------------------------------------------------------------- + +(1 row) + +step s2-citus-rebalance-start: + SELECT 1 FROM citus_rebalance_start(); + +ERROR: could not acquire the lock required to rebalance public.colocated1 +step s1-commit: + COMMIT; + +master_set_node_property +--------------------------------------------------------------------- + +(1 row) + diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index f76712647..5f37f7a32 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -1143,6 +1143,9 @@ SELECT * FROM multi_extension.print_extension_changes(); | function citus_job_cancel(bigint) void | function citus_job_wait(bigint,citus_job_status) void | function citus_locks() SETOF record + | function citus_rebalance_start(name,boolean,citus.shard_transfer_mode) bigint + | function citus_rebalance_stop() void + | function citus_rebalance_wait() void | function citus_split_shard_by_split_points(bigint,text[],integer[],citus.shard_transfer_mode) void | function create_distributed_table_concurrently(regclass,text,citus.distribution_type,text,integer) void | function isolate_tenant_to_new_shard(regclass,"any",text,citus.shard_transfer_mode) bigint @@ -1165,7 +1168,7 @@ SELECT * FROM multi_extension.print_extension_changes(); | type split_copy_info | type split_shard_info | view citus_locks -(52 rows) +(55 rows) DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff; -- show running version diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out index bb04fcfb9..085a47769 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects.out +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -90,6 +90,9 @@ ORDER BY 1; function citus_pid_for_gpid(bigint) function citus_prepare_pg_upgrade() function citus_query_stats() + function citus_rebalance_start(name,boolean,citus.shard_transfer_mode) + function citus_rebalance_stop() + function citus_rebalance_wait() function citus_relation_size(regclass) function citus_remote_connection_stats() function citus_remove_node(text,integer) @@ -283,5 +286,5 @@ ORDER BY 1; view citus_stat_statements view pg_dist_shard_placement view time_partitions -(275 rows) +(278 rows) diff --git a/src/test/regress/operations_schedule b/src/test/regress/operations_schedule index 3eeb3e8db..96eafc3ea 100644 --- a/src/test/regress/operations_schedule +++ b/src/test/regress/operations_schedule @@ -3,6 +3,7 @@ test: multi_cluster_management test: multi_test_catalog_views test: shard_rebalancer_unit test: shard_rebalancer +test: background_rebalance test: worker_copy_table_to_node test: foreign_key_to_reference_shard_rebalance test: multi_move_mx diff --git a/src/test/regress/spec/isolation_shard_rebalancer.spec b/src/test/regress/spec/isolation_shard_rebalancer.spec index 1aca39ca6..d9d8d99ed 100644 --- a/src/test/regress/spec/isolation_shard_rebalancer.spec +++ b/src/test/regress/spec/isolation_shard_rebalancer.spec @@ -44,6 +44,12 @@ step "s1-replicate-nc" select replicate_table_shards('non_colocated'); } +step "s1-rebalance-all" +{ + BEGIN; + select rebalance_table_shards(); +} + step "s1-commit" { COMMIT; @@ -82,6 +88,11 @@ step "s2-drain" select master_drain_node('localhost', 57638); } +step "s2-citus-rebalance-start" +{ + SELECT 1 FROM citus_rebalance_start(); +} + // disallowed because it's the same table permutation "s1-rebalance-nc" "s2-rebalance-nc" "s1-commit" @@ -112,3 +123,6 @@ permutation "s1-rebalance-c1" "s2-drain" "s1-commit" permutation "s1-replicate-c1" "s2-drain" "s1-commit" permutation "s1-rebalance-nc" "s2-drain" "s1-commit" permutation "s1-replicate-nc" "s2-drain" "s1-commit" + +// disallow the background rebalancer to run when rebalance_table_shard rung +permutation "s1-rebalance-all" "s2-citus-rebalance-start" "s1-commit" diff --git a/src/test/regress/sql/background_rebalance.sql b/src/test/regress/sql/background_rebalance.sql new file mode 100644 index 000000000..9158fc532 --- /dev/null +++ b/src/test/regress/sql/background_rebalance.sql @@ -0,0 +1,64 @@ +CREATE SCHEMA background_rebalance; +SET search_path TO background_rebalance; +SET citus.next_shard_id TO 85674000; +SET citus.shard_replication_factor TO 1; + +ALTER SYSTEM SET citus.background_task_queue_interval TO '1s'; +SELECT pg_reload_conf(); + +CREATE TABLE t1 (a int PRIMARY KEY); +SELECT create_distributed_table('t1', 'a', shard_count => 4, colocate_with => 'none'); + +-- verify the rebalance works - no-op - when the shards are balanced. Noop is shown by wait complaining there is nothing +-- to wait on. +SELECT 1 FROM citus_rebalance_start(); +SELECT citus_rebalance_wait(); + +SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); + +-- rebalance a table in the background +SELECT 1 FROM citus_rebalance_start(); +SELECT citus_rebalance_wait(); + +SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); + +CREATE TABLE t2 (a int); +SELECT create_distributed_table('t2', 'a' , colocate_with => 't1'); + +-- show that we get an error when a table in the colocation group can't be moved non-blocking +SELECT 1 FROM citus_rebalance_start(); +SELECT 1 FROM citus_rebalance_start(shard_transfer_mode => 'block_writes'); +SELECT citus_rebalance_wait(); + +DROP TABLE t2; + +SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); + +-- show we can stop a rebalance, the stop causes the move to not have happened, eg, our move back below fails. +SELECT 1 FROM citus_rebalance_start(); +SELECT citus_rebalance_stop(); +-- waiting on this rebalance is racy, as it sometimes sees no rebalance is ongoing while other times it actually sees it ongoing +-- we simply sleep a bit here +SELECT pg_sleep(1); + +-- failing move due to a stopped rebalance, first clean orphans to make the error stable +SET client_min_messages TO WARNING; +CALL citus_cleanup_orphaned_shards(); +RESET client_min_messages; +SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); + + +-- show we can't start the rebalancer twice +SELECT 1 FROM citus_rebalance_start(); +SELECT 1 FROM citus_rebalance_start(); +SELECT citus_rebalance_wait(); + +-- show that the old rebalancer cannot be started with a background rebalance in progress +SELECT citus_move_shard_placement(85674000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); +SELECT 1 FROM citus_rebalance_start(); +SELECT rebalance_table_shards(); +SELECT citus_rebalance_wait(); + + +SET client_min_messages TO WARNING; +DROP SCHEMA background_rebalance CASCADE; From b79111527e70fff309f9df3c9067620168bc7605 Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Mon, 12 Sep 2022 21:09:37 +0200 Subject: [PATCH 11/17] Avoid blocking writes in create_distributed_table_concurrently (#6324) Co-authored-by: Marco Slot --- .../commands/create_distributed_table.c | 4 +- .../multi_replicate_reference_table.out | 69 +++++++++++++++++-- .../sql/multi_replicate_reference_table.sql | 24 +++++++ 3 files changed, 90 insertions(+), 7 deletions(-) diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index 95a5062ce..4a1450ece 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -564,8 +564,10 @@ CreateDistributedTableConcurrently(Oid relationId, char *distributionColumnName, * such that we can create foreign keys and joins work immediately after creation. * We do this after applying all essential checks to error out early in case of * user error. + * + * Use force_logical since this function is meant to not block writes. */ - EnsureReferenceTablesExistOnAllNodes(); + EnsureReferenceTablesExistOnAllNodesExtended(TRANSFER_MODE_FORCE_LOGICAL); /* * At this point, the table is a Citus local table, which means it does diff --git a/src/test/regress/expected/multi_replicate_reference_table.out b/src/test/regress/expected/multi_replicate_reference_table.out index 2a28208af..313d0215a 100644 --- a/src/test/regress/expected/multi_replicate_reference_table.out +++ b/src/test/regress/expected/multi_replicate_reference_table.out @@ -356,6 +356,63 @@ WHERE colocationid IN (1 row) DROP TABLE replicate_reference_table_commit; +-- exercise reference table replication in create_distributed_table_concurrently +SELECT citus_remove_node('localhost', :worker_2_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE replicate_reference_table_cdtc(column1 int); +SELECT create_reference_table('replicate_reference_table_cdtc'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_add_node('localhost', :worker_2_port); + citus_add_node +--------------------------------------------------------------------- + 1370004 +(1 row) + +-- required for create_distributed_table_concurrently +SELECT 1 FROM citus_set_coordinator_host('localhost', :master_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SET citus.shard_replication_factor TO 1; +CREATE TABLE distributed_table_cdtc(column1 int primary key); +SELECT create_distributed_table_concurrently('distributed_table_cdtc', 'column1'); + create_distributed_table_concurrently +--------------------------------------------------------------------- + +(1 row) + +RESET citus.shard_replication_factor; +SELECT citus_remove_node('localhost', :master_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + +SELECT + shardid, shardstate, shardlength, nodename, nodeport +FROM + pg_dist_shard_placement_view +WHERE + nodeport = :worker_2_port +ORDER BY shardid, nodeport; + shardid | shardstate | shardlength | nodename | nodeport +--------------------------------------------------------------------- + 1370004 | 1 | 0 | localhost | 57638 + 1370005 | 1 | 0 | localhost | 57638 + 1370007 | 1 | 0 | localhost | 57638 +(3 rows) + +DROP TABLE replicate_reference_table_cdtc, distributed_table_cdtc; -- test adding new node + upgrading another hash distributed table to reference table + creating new reference table in TRANSACTION SELECT master_remove_node('localhost', :worker_2_port); master_remove_node @@ -431,8 +488,8 @@ WHERE ORDER BY shardid, nodeport; shardid | shardstate | shardlength | nodename | nodeport --------------------------------------------------------------------- - 1370004 | 1 | 0 | localhost | 57638 - 1370005 | 1 | 0 | localhost | 57638 + 1370009 | 1 | 0 | localhost | 57638 + 1370010 | 1 | 0 | localhost | 57638 (2 rows) SELECT shardcount, replicationfactor, distributioncolumntype, distributioncolumncollation FROM pg_dist_colocation @@ -739,7 +796,7 @@ WHERE ORDER BY 1,4,5; shardid | shardstate | shardlength | nodename | nodeport --------------------------------------------------------------------- - 1370014 | 1 | 0 | localhost | 57637 + 1370019 | 1 | 0 | localhost | 57637 (1 row) -- we should see the two shard placements after activation @@ -764,7 +821,7 @@ WHERE ORDER BY 1,4,5; shardid | shardstate | shardlength | nodename | nodeport --------------------------------------------------------------------- - 1370014 | 1 | 0 | localhost | 57637 + 1370019 | 1 | 0 | localhost | 57637 (1 row) SELECT 1 FROM master_remove_node('localhost', :worker_2_port); @@ -794,7 +851,7 @@ HINT: Add the target node via SELECT citus_add_node('localhost', 57638); SELECT citus_add_secondary_node('localhost', :worker_2_port, 'localhost', :worker_1_port); citus_add_secondary_node --------------------------------------------------------------------- - 1370012 + 1370014 (1 row) SELECT citus_copy_shard_placement( @@ -1162,7 +1219,7 @@ SELECT create_distributed_table('test','x'); SELECT citus_add_node('localhost', :worker_2_port); citus_add_node --------------------------------------------------------------------- - 1370022 + 1370024 (1 row) SELECT diff --git a/src/test/regress/sql/multi_replicate_reference_table.sql b/src/test/regress/sql/multi_replicate_reference_table.sql index 9707ef7fe..e507dfe56 100644 --- a/src/test/regress/sql/multi_replicate_reference_table.sql +++ b/src/test/regress/sql/multi_replicate_reference_table.sql @@ -233,6 +233,30 @@ WHERE colocationid IN DROP TABLE replicate_reference_table_commit; +-- exercise reference table replication in create_distributed_table_concurrently +SELECT citus_remove_node('localhost', :worker_2_port); +CREATE TABLE replicate_reference_table_cdtc(column1 int); +SELECT create_reference_table('replicate_reference_table_cdtc'); +SELECT citus_add_node('localhost', :worker_2_port); + +-- required for create_distributed_table_concurrently +SELECT 1 FROM citus_set_coordinator_host('localhost', :master_port); +SET citus.shard_replication_factor TO 1; + +CREATE TABLE distributed_table_cdtc(column1 int primary key); +SELECT create_distributed_table_concurrently('distributed_table_cdtc', 'column1'); + +RESET citus.shard_replication_factor; +SELECT citus_remove_node('localhost', :master_port); + +SELECT + shardid, shardstate, shardlength, nodename, nodeport +FROM + pg_dist_shard_placement_view +WHERE + nodeport = :worker_2_port +ORDER BY shardid, nodeport; +DROP TABLE replicate_reference_table_cdtc, distributed_table_cdtc; -- test adding new node + upgrading another hash distributed table to reference table + creating new reference table in TRANSACTION SELECT master_remove_node('localhost', :worker_2_port); From 164f2fa0a6403ef1498330d8b17a364b0e8e004b Mon Sep 17 00:00:00 2001 From: Hanefi Onaldi Date: Mon, 12 Sep 2022 23:47:37 +0300 Subject: [PATCH 12/17] PG15: Add support for NULLS NOT DISTINCT (#6308) Relevant PG commit: 94aa7cc5f707712f592885995a28e018c7c80488 --- .../distributed/deparser/citus_ruleutils.c | 7 ++ src/test/regress/expected/pg15.out | 68 ++++++++++++++++++- src/test/regress/sql/pg15.sql | 44 +++++++++++- 3 files changed, 115 insertions(+), 4 deletions(-) diff --git a/src/backend/distributed/deparser/citus_ruleutils.c b/src/backend/distributed/deparser/citus_ruleutils.c index 0a281be4d..dbb8edbe2 100644 --- a/src/backend/distributed/deparser/citus_ruleutils.c +++ b/src/backend/distributed/deparser/citus_ruleutils.c @@ -800,6 +800,13 @@ deparse_shard_index_statement(IndexStmt *origStmt, Oid distrelid, int64 shardid, appendStringInfoString(buffer, ") "); } +#if PG_VERSION_NUM >= PG_VERSION_15 + if (indexStmt->nulls_not_distinct) + { + appendStringInfoString(buffer, "NULLS NOT DISTINCT "); + } +#endif /* PG_VERSION_15 */ + if (indexStmt->options != NIL) { appendStringInfoString(buffer, "WITH ("); diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out index 03dfd3675..0e9357957 100644 --- a/src/test/regress/expected/pg15.out +++ b/src/test/regress/expected/pg15.out @@ -610,8 +610,72 @@ SELECT * FROM FKTABLE ORDER BY id; 1 | 2 | | 0 (2 rows) --- Clean up \c - - - :master_port +-- test NULL NOT DISTINCT clauses +-- set the next shard id so that the error messages are easier to maintain +SET citus.next_shard_id TO 960050; +CREATE TABLE null_distinct_test(id INT, c1 INT, c2 INT, c3 VARCHAR(10)) ; +SELECT create_distributed_table('null_distinct_test', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE UNIQUE INDEX idx1_null_distinct_test ON null_distinct_test(id, c1) NULLS DISTINCT ; +CREATE UNIQUE INDEX idx2_null_distinct_test ON null_distinct_test(id, c2) NULLS NOT DISTINCT ; +-- populate with some initial data +INSERT INTO null_distinct_test VALUES (1, 1, 1, 'data1') ; +INSERT INTO null_distinct_test VALUES (1, 2, NULL, 'data2') ; +INSERT INTO null_distinct_test VALUES (1, NULL, 3, 'data3') ; +-- should fail as we already have a null value in c2 column +INSERT INTO null_distinct_test VALUES (1, NULL, NULL, 'data4') ; +ERROR: duplicate key value violates unique constraint "idx2_null_distinct_test_960050" +DETAIL: Key (id, c2)=(1, null) already exists. +CONTEXT: while executing command on localhost:xxxxx +INSERT INTO null_distinct_test VALUES (1, NULL, NULL, 'data4') ON CONFLICT DO NOTHING; +INSERT INTO null_distinct_test VALUES (1, NULL, NULL, 'data4') ON CONFLICT (id, c2) DO UPDATE SET c2=100 RETURNING *; + id | c1 | c2 | c3 +--------------------------------------------------------------------- + 1 | 2 | 100 | data2 +(1 row) + +-- should not fail as null values are distinct for c1 column +INSERT INTO null_distinct_test VALUES (1, NULL, 5, 'data5') ; +-- test that unique constraints also work properly +-- since we have multiple (1,NULL) pairs for columns (id,c1) the first will work, second will fail +ALTER TABLE null_distinct_test ADD CONSTRAINT uniq_distinct_c1 UNIQUE NULLS DISTINCT (id,c1); +ALTER TABLE null_distinct_test ADD CONSTRAINT uniq_c1 UNIQUE NULLS NOT DISTINCT (id,c1); +ERROR: could not create unique index "uniq_c1_960050" +DETAIL: Key (id, c1)=(1, null) is duplicated. +CONTEXT: while executing command on localhost:xxxxx +-- show all records in the table for fact checking +SELECT * FROM null_distinct_test ORDER BY c3; + id | c1 | c2 | c3 +--------------------------------------------------------------------- + 1 | 1 | 1 | data1 + 1 | 2 | 100 | data2 + 1 | | 3 | data3 + 1 | | 5 | data5 +(4 rows) + +-- test unique nulls not distinct constraints on a reference table +CREATE TABLE reference_uniq_test ( + x int, y int, + UNIQUE NULLS NOT DISTINCT (x, y) +); +SELECT create_reference_table('reference_uniq_test'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO reference_uniq_test VALUES (1, 1), (1, NULL), (NULL, 1); +-- the following will fail +INSERT INTO reference_uniq_test VALUES (1, NULL); +ERROR: duplicate key value violates unique constraint "reference_uniq_test_x_y_key_960054" +DETAIL: Key (x, y)=(1, null) already exists. +CONTEXT: while executing command on localhost:xxxxx +-- Clean up \set VERBOSITY terse +SET client_min_messages TO ERROR; DROP SCHEMA pg15 CASCADE; -NOTICE: drop cascades to 15 other objects diff --git a/src/test/regress/sql/pg15.sql b/src/test/regress/sql/pg15.sql index f59a46aa3..296ef016e 100644 --- a/src/test/regress/sql/pg15.sql +++ b/src/test/regress/sql/pg15.sql @@ -336,9 +336,49 @@ INSERT INTO FKTABLE VALUES DELETE FROM PKTABLE WHERE id = 1 OR id = 2; SELECT * FROM FKTABLE ORDER BY id; - --- Clean up \c - - - :master_port +-- test NULL NOT DISTINCT clauses +-- set the next shard id so that the error messages are easier to maintain +SET citus.next_shard_id TO 960050; +CREATE TABLE null_distinct_test(id INT, c1 INT, c2 INT, c3 VARCHAR(10)) ; +SELECT create_distributed_table('null_distinct_test', 'id'); + +CREATE UNIQUE INDEX idx1_null_distinct_test ON null_distinct_test(id, c1) NULLS DISTINCT ; +CREATE UNIQUE INDEX idx2_null_distinct_test ON null_distinct_test(id, c2) NULLS NOT DISTINCT ; + +-- populate with some initial data +INSERT INTO null_distinct_test VALUES (1, 1, 1, 'data1') ; +INSERT INTO null_distinct_test VALUES (1, 2, NULL, 'data2') ; +INSERT INTO null_distinct_test VALUES (1, NULL, 3, 'data3') ; + +-- should fail as we already have a null value in c2 column +INSERT INTO null_distinct_test VALUES (1, NULL, NULL, 'data4') ; +INSERT INTO null_distinct_test VALUES (1, NULL, NULL, 'data4') ON CONFLICT DO NOTHING; +INSERT INTO null_distinct_test VALUES (1, NULL, NULL, 'data4') ON CONFLICT (id, c2) DO UPDATE SET c2=100 RETURNING *; + +-- should not fail as null values are distinct for c1 column +INSERT INTO null_distinct_test VALUES (1, NULL, 5, 'data5') ; + +-- test that unique constraints also work properly +-- since we have multiple (1,NULL) pairs for columns (id,c1) the first will work, second will fail +ALTER TABLE null_distinct_test ADD CONSTRAINT uniq_distinct_c1 UNIQUE NULLS DISTINCT (id,c1); +ALTER TABLE null_distinct_test ADD CONSTRAINT uniq_c1 UNIQUE NULLS NOT DISTINCT (id,c1); + +-- show all records in the table for fact checking +SELECT * FROM null_distinct_test ORDER BY c3; + +-- test unique nulls not distinct constraints on a reference table +CREATE TABLE reference_uniq_test ( + x int, y int, + UNIQUE NULLS NOT DISTINCT (x, y) +); +SELECT create_reference_table('reference_uniq_test'); +INSERT INTO reference_uniq_test VALUES (1, 1), (1, NULL), (NULL, 1); +-- the following will fail +INSERT INTO reference_uniq_test VALUES (1, NULL); + +-- Clean up \set VERBOSITY terse +SET client_min_messages TO ERROR; DROP SCHEMA pg15 CASCADE; From 5cfcc633082e3c4dc3381ee0d9777aec4a787875 Mon Sep 17 00:00:00 2001 From: Hanefi Onaldi Date: Tue, 13 Sep 2022 00:05:58 +0300 Subject: [PATCH 13/17] Add warning messages for cluster commands on partitioned tables (#6306) PG15 introduces `CLUSTER` commands for partitioned tables. Similar to a `CLUSTER` command with no supplied table names, these commands also can not be run inside transaction blocks and therefore can not be propagated in a distributed transaction block with ease. Therefore we raise warnings. Relevant PG commit: cfdd03f45e6afc632fbe70519250ec19167d6765 --- src/backend/distributed/commands/cluster.c | 22 ++++++++++++++ src/test/regress/expected/pg15.out | 34 ++++++++++++++++++++++ src/test/regress/sql/pg15.sql | 32 ++++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/src/backend/distributed/commands/cluster.c b/src/backend/distributed/commands/cluster.c index c539aa066..4cffbaf51 100644 --- a/src/backend/distributed/commands/cluster.c +++ b/src/backend/distributed/commands/cluster.c @@ -19,6 +19,7 @@ #include "distributed/commands/utility_hook.h" #include "distributed/listutils.h" #include "distributed/metadata_cache.h" +#include "distributed/multi_partitioning_utils.h" static bool IsClusterStmtVerbose_compat(ClusterStmt *clusterStmt); @@ -69,6 +70,27 @@ PreprocessClusterStmt(Node *node, const char *clusterCommand, return NIL; } + /* + * We do not support CLUSTER command on partitioned tables as it can not be run inside + * transaction blocks. PostgreSQL currently does not support CLUSTER command on + * partitioned tables in a transaction block. Although Citus can execute commands + * outside of transaction block -- such as VACUUM -- we cannot do that here because + * CLUSTER command is also not allowed from a function call as well. By default, Citus + * uses `worker_apply_shard_ddl_command()`, where we should avoid it for this case. + */ + if (PartitionedTable(relationId)) + { + if (EnableUnsupportedFeatureMessages) + { + ereport(WARNING, (errmsg("not propagating CLUSTER command for partitioned " + "table to worker nodes"), + errhint("Provide a child partition table names in order to " + "CLUSTER distributed partitioned tables."))); + } + + return NIL; + } + if (IsClusterStmtVerbose_compat(clusterStmt)) { ereport(ERROR, (errmsg("cannot run CLUSTER command"), diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out index 0e9357957..3c596bbb1 100644 --- a/src/test/regress/expected/pg15.out +++ b/src/test/regress/expected/pg15.out @@ -611,6 +611,7 @@ SELECT * FROM FKTABLE ORDER BY id; (2 rows) \c - - - :master_port +SET search_path TO pg15; -- test NULL NOT DISTINCT clauses -- set the next shard id so that the error messages are easier to maintain SET citus.next_shard_id TO 960050; @@ -675,7 +676,40 @@ INSERT INTO reference_uniq_test VALUES (1, NULL); ERROR: duplicate key value violates unique constraint "reference_uniq_test_x_y_key_960054" DETAIL: Key (x, y)=(1, null) already exists. CONTEXT: while executing command on localhost:xxxxx +-- +-- PG15 introduces CLUSTER command support for partitioned tables. However, similar to +-- CLUSTER commands with no table name, these queries can not be run inside a transaction +-- block. Therefore, we do not propagate such queries. +-- +-- Should print a warning that it will not be propagated to worker nodes. +CLUSTER sale USING sale_pk; +WARNING: not propagating CLUSTER command for partitioned table to worker nodes +HINT: Provide a child partition table names in order to CLUSTER distributed partitioned tables. +-- verify that we can cluster the partition tables only when replication factor is 1 +CLUSTER sale_newyork USING sale_newyork_pkey; +ERROR: modifications on partitions when replication factor is greater than 1 is not supported +HINT: Run the query on the parent table "sale" instead. +-- create a new partitioned table with shard replicaiton factor 1 +SET citus.shard_replication_factor = 1; +CREATE TABLE sale_repl_factor_1 ( LIKE sale ) + PARTITION BY list (state_code); +ALTER TABLE sale_repl_factor_1 ADD CONSTRAINT sale_repl_factor_1_pk PRIMARY KEY (state_code, sale_date); +CREATE TABLE sale_newyork_repl_factor_1 PARTITION OF sale_repl_factor_1 FOR VALUES IN ('NY'); +CREATE TABLE sale_california_repl_factor_1 PARTITION OF sale_repl_factor_1 FOR VALUES IN ('CA'); +SELECT create_distributed_table('sale_repl_factor_1', 'state_code'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Should print a warning that it will not be propagated to worker nodes. +CLUSTER sale_repl_factor_1 USING sale_repl_factor_1_pk; +WARNING: not propagating CLUSTER command for partitioned table to worker nodes +HINT: Provide a child partition table names in order to CLUSTER distributed partitioned tables. +-- verify that we can still cluster the partition tables now since replication factor is 1 +CLUSTER sale_newyork_repl_factor_1 USING sale_newyork_repl_factor_1_pkey; -- Clean up +RESET citus.shard_replication_factor; \set VERBOSITY terse SET client_min_messages TO ERROR; DROP SCHEMA pg15 CASCADE; diff --git a/src/test/regress/sql/pg15.sql b/src/test/regress/sql/pg15.sql index 296ef016e..9cfa5960d 100644 --- a/src/test/regress/sql/pg15.sql +++ b/src/test/regress/sql/pg15.sql @@ -337,6 +337,7 @@ DELETE FROM PKTABLE WHERE id = 1 OR id = 2; SELECT * FROM FKTABLE ORDER BY id; \c - - - :master_port +SET search_path TO pg15; -- test NULL NOT DISTINCT clauses -- set the next shard id so that the error messages are easier to maintain @@ -378,7 +379,38 @@ INSERT INTO reference_uniq_test VALUES (1, 1), (1, NULL), (NULL, 1); -- the following will fail INSERT INTO reference_uniq_test VALUES (1, NULL); +-- +-- PG15 introduces CLUSTER command support for partitioned tables. However, similar to +-- CLUSTER commands with no table name, these queries can not be run inside a transaction +-- block. Therefore, we do not propagate such queries. +-- + +-- Should print a warning that it will not be propagated to worker nodes. +CLUSTER sale USING sale_pk; + +-- verify that we can cluster the partition tables only when replication factor is 1 +CLUSTER sale_newyork USING sale_newyork_pkey; + +-- create a new partitioned table with shard replicaiton factor 1 +SET citus.shard_replication_factor = 1; +CREATE TABLE sale_repl_factor_1 ( LIKE sale ) + PARTITION BY list (state_code); + +ALTER TABLE sale_repl_factor_1 ADD CONSTRAINT sale_repl_factor_1_pk PRIMARY KEY (state_code, sale_date); + +CREATE TABLE sale_newyork_repl_factor_1 PARTITION OF sale_repl_factor_1 FOR VALUES IN ('NY'); +CREATE TABLE sale_california_repl_factor_1 PARTITION OF sale_repl_factor_1 FOR VALUES IN ('CA'); + +SELECT create_distributed_table('sale_repl_factor_1', 'state_code'); + +-- Should print a warning that it will not be propagated to worker nodes. +CLUSTER sale_repl_factor_1 USING sale_repl_factor_1_pk; + +-- verify that we can still cluster the partition tables now since replication factor is 1 +CLUSTER sale_newyork_repl_factor_1 USING sale_newyork_repl_factor_1_pkey; + -- Clean up +RESET citus.shard_replication_factor; \set VERBOSITY terse SET client_min_messages TO ERROR; DROP SCHEMA pg15 CASCADE; From 76ff4ab188f830f75a018988dfb39e16853a2262 Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Tue, 13 Sep 2022 10:53:39 +0300 Subject: [PATCH 14/17] Adds support for unlogged distributed sequences (#6292) We can now do the following: - Distribute sequence with logged/unlogged option - ALTER TABLE my_sequence SET LOGGED/UNLOGGED - ALTER SEQUENCE my_sequence SET LOGGED/UNLOGGED Relevant PG commit https://github.com/postgres/postgres/commit/344d62fb9a978a72cf8347f0369b9ee643fd0b31 --- .../commands/distribute_object_ops.c | 46 +++++ src/backend/distributed/commands/sequence.c | 115 ++++++++++++ src/backend/distributed/commands/table.c | 26 ++- .../distributed/deparser/citus_ruleutils.c | 7 +- .../deparser/deparse_sequence_stmts.c | 93 ++++++++++ .../deparser/qualify_sequence_stmt.c | 31 ++++ src/include/distributed/citus_ruleutils.h | 6 - src/include/distributed/commands.h | 11 ++ src/include/distributed/deparser.h | 6 + src/include/pg_version_compat.h | 9 + src/test/regress/expected/pg15.out | 170 ++++++++++++++++++ src/test/regress/sql/pg15.sql | 131 ++++++++++++++ 12 files changed, 641 insertions(+), 10 deletions(-) diff --git a/src/backend/distributed/commands/distribute_object_ops.c b/src/backend/distributed/commands/distribute_object_ops.c index 74a7ce69b..056ba20e2 100644 --- a/src/backend/distributed/commands/distribute_object_ops.c +++ b/src/backend/distributed/commands/distribute_object_ops.c @@ -734,6 +734,17 @@ static DistributeObjectOps Sequence_AlterOwner = { .address = AlterSequenceOwnerStmtObjectAddress, .markDistributed = false, }; +#if (PG_VERSION_NUM >= PG_VERSION_15) +static DistributeObjectOps Sequence_AlterPersistence = { + .deparse = DeparseAlterSequencePersistenceStmt, + .qualify = QualifyAlterSequencePersistenceStmt, + .preprocess = PreprocessAlterSequencePersistenceStmt, + .postprocess = NULL, + .operationType = DIST_OPS_ALTER, + .address = AlterSequencePersistenceStmtObjectAddress, + .markDistributed = false, +}; +#endif static DistributeObjectOps Sequence_Drop = { .deparse = DeparseDropSequenceStmt, .qualify = QualifyDropSequenceStmt, @@ -1463,6 +1474,41 @@ GetDistributeObjectOps(Node *node) case OBJECT_SEQUENCE: { +#if (PG_VERSION_NUM >= PG_VERSION_15) + ListCell *cmdCell = NULL; + foreach(cmdCell, stmt->cmds) + { + AlterTableCmd *cmd = castNode(AlterTableCmd, lfirst(cmdCell)); + switch (cmd->subtype) + { + case AT_ChangeOwner: + { + return &Sequence_AlterOwner; + } + + case AT_SetLogged: + { + return &Sequence_AlterPersistence; + } + + case AT_SetUnLogged: + { + return &Sequence_AlterPersistence; + } + + default: + { + return &NoDistributeOps; + } + } + } +#endif + + /* + * Prior to PG15, the only Alter Table statement + * with Sequence as its object was an + * Alter Owner statement + */ return &Sequence_AlterOwner; } diff --git a/src/backend/distributed/commands/sequence.c b/src/backend/distributed/commands/sequence.c index 9352ae297..20b7666ad 100644 --- a/src/backend/distributed/commands/sequence.c +++ b/src/backend/distributed/commands/sequence.c @@ -712,6 +712,121 @@ PostprocessAlterSequenceOwnerStmt(Node *node, const char *queryString) } +#if (PG_VERSION_NUM >= PG_VERSION_15) + +/* + * PreprocessAlterSequencePersistenceStmt is called for change of persistence + * of sequences before the persistence is changed on the local instance. + * + * If the sequence for which the persistence is changed is distributed, we execute + * the change on all the workers to keep the type in sync across the cluster. + */ +List * +PreprocessAlterSequencePersistenceStmt(Node *node, const char *queryString, + ProcessUtilityContext processUtilityContext) +{ + AlterTableStmt *stmt = castNode(AlterTableStmt, node); + Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE); + + List *sequenceAddresses = GetObjectAddressListFromParseTree((Node *) stmt, false, + false); + + /* the code-path only supports a single object */ + Assert(list_length(sequenceAddresses) == 1); + + if (!ShouldPropagateAnyObject(sequenceAddresses)) + { + return NIL; + } + + EnsureCoordinator(); + QualifyTreeNode((Node *) stmt); + + const char *sql = DeparseTreeNode((Node *) stmt); + + List *commands = list_make3(DISABLE_DDL_PROPAGATION, (void *) sql, + ENABLE_DDL_PROPAGATION); + + return NodeDDLTaskList(NON_COORDINATOR_METADATA_NODES, commands); +} + + +/* + * AlterSequencePersistenceStmtObjectAddress returns the ObjectAddress of the + * sequence that is the subject of the AlterPersistenceStmt. + */ +List * +AlterSequencePersistenceStmtObjectAddress(Node *node, bool missing_ok, bool isPostprocess) +{ + AlterTableStmt *stmt = castNode(AlterTableStmt, node); + Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE); + + RangeVar *sequence = stmt->relation; + Oid seqOid = RangeVarGetRelid(sequence, NoLock, missing_ok); + ObjectAddress *sequenceAddress = palloc0(sizeof(ObjectAddress)); + ObjectAddressSet(*sequenceAddress, RelationRelationId, seqOid); + + return list_make1(sequenceAddress); +} + + +/* + * PreprocessSequenceAlterTableStmt is called for change of persistence or owner + * of sequences before the persistence/owner is changed on the local instance. + * + * Altering persistence or owner are the only ALTER commands of a sequence + * that may pass through an AlterTableStmt as well + */ +List * +PreprocessSequenceAlterTableStmt(Node *node, const char *queryString, + ProcessUtilityContext processUtilityContext) +{ + AlterTableStmt *stmt = castNode(AlterTableStmt, node); + Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE); + + ListCell *cmdCell = NULL; + foreach(cmdCell, stmt->cmds) + { + AlterTableCmd *cmd = castNode(AlterTableCmd, lfirst(cmdCell)); + switch (cmd->subtype) + { + case AT_ChangeOwner: + { + return PreprocessAlterSequenceOwnerStmt(node, + queryString, + processUtilityContext); + } + + case AT_SetLogged: + { + return PreprocessAlterSequencePersistenceStmt(node, + queryString, + processUtilityContext); + } + + case AT_SetUnLogged: + { + return PreprocessAlterSequencePersistenceStmt(node, + queryString, + processUtilityContext); + } + + default: + { + /* normally we shouldn't ever reach this */ + ereport(ERROR, (errmsg("unsupported subtype for alter sequence command"), + errdetail("sub command type: %d", + cmd->subtype))); + } + } + } + return NIL; +} + + +#endif + + /* * PreprocessGrantOnSequenceStmt is executed before the statement is applied to the local * postgres instance. diff --git a/src/backend/distributed/commands/table.c b/src/backend/distributed/commands/table.c index 5b712dae2..e2a28fab6 100644 --- a/src/backend/distributed/commands/table.c +++ b/src/backend/distributed/commands/table.c @@ -733,20 +733,40 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand, /* * check whether we are dealing with a sequence or view here - * if yes, it must be ALTER TABLE .. OWNER TO .. command - * since this is the only ALTER command of a sequence or view that - * passes through an AlterTableStmt */ char relKind = get_rel_relkind(leftRelationId); if (relKind == RELKIND_SEQUENCE) { AlterTableStmt *stmtCopy = copyObject(alterTableStatement); AlterTableStmtObjType_compat(stmtCopy) = OBJECT_SEQUENCE; +#if (PG_VERSION_NUM >= PG_VERSION_15) + + /* + * it must be ALTER TABLE .. OWNER TO .. + * or ALTER TABLE .. SET LOGGED/UNLOGGED command + * since these are the only ALTER commands of a sequence that + * pass through an AlterTableStmt + */ + return PreprocessSequenceAlterTableStmt((Node *) stmtCopy, alterTableCommand, + processUtilityContext); +#else + + /* + * it must be ALTER TABLE .. OWNER TO .. command + * since this is the only ALTER command of a sequence that + * passes through an AlterTableStmt + */ return PreprocessAlterSequenceOwnerStmt((Node *) stmtCopy, alterTableCommand, processUtilityContext); +#endif } else if (relKind == RELKIND_VIEW) { + /* + * it must be ALTER TABLE .. OWNER TO .. command + * since this is the only ALTER command of a view that + * passes through an AlterTableStmt + */ AlterTableStmt *stmtCopy = copyObject(alterTableStatement); AlterTableStmtObjType_compat(stmtCopy) = OBJECT_VIEW; return PreprocessAlterViewStmt((Node *) stmtCopy, alterTableCommand, diff --git a/src/backend/distributed/deparser/citus_ruleutils.c b/src/backend/distributed/deparser/citus_ruleutils.c index dbb8edbe2..9534d7ac6 100644 --- a/src/backend/distributed/deparser/citus_ruleutils.c +++ b/src/backend/distributed/deparser/citus_ruleutils.c @@ -256,7 +256,12 @@ pg_get_sequencedef_string(Oid sequenceRelationId) char *qualifiedSequenceName = generate_qualified_relation_name(sequenceRelationId); char *typeName = format_type_be(pgSequenceForm->seqtypid); - char *sequenceDef = psprintf(CREATE_SEQUENCE_COMMAND, qualifiedSequenceName, + char *sequenceDef = psprintf(CREATE_SEQUENCE_COMMAND, +#if (PG_VERSION_NUM >= PG_VERSION_15) + get_rel_persistence(sequenceRelationId) == + RELPERSISTENCE_UNLOGGED ? "UNLOGGED " : "", +#endif + qualifiedSequenceName, typeName, pgSequenceForm->seqincrement, pgSequenceForm->seqmin, pgSequenceForm->seqmax, pgSequenceForm->seqstart, diff --git a/src/backend/distributed/deparser/deparse_sequence_stmts.c b/src/backend/distributed/deparser/deparse_sequence_stmts.c index 0a615d741..80c4e2dd4 100644 --- a/src/backend/distributed/deparser/deparse_sequence_stmts.c +++ b/src/backend/distributed/deparser/deparse_sequence_stmts.c @@ -27,6 +27,9 @@ static void AppendSequenceNameList(StringInfo buf, List *objects, ObjectType obj static void AppendRenameSequenceStmt(StringInfo buf, RenameStmt *stmt); static void AppendAlterSequenceSchemaStmt(StringInfo buf, AlterObjectSchemaStmt *stmt); static void AppendAlterSequenceOwnerStmt(StringInfo buf, AlterTableStmt *stmt); +#if (PG_VERSION_NUM >= PG_VERSION_15) +static void AppendAlterSequencePersistenceStmt(StringInfo buf, AlterTableStmt *stmt); +#endif static void AppendGrantOnSequenceStmt(StringInfo buf, GrantStmt *stmt); static void AppendGrantOnSequenceSequences(StringInfo buf, GrantStmt *stmt); @@ -258,6 +261,96 @@ AppendAlterSequenceOwnerStmt(StringInfo buf, AlterTableStmt *stmt) } +#if (PG_VERSION_NUM >= PG_VERSION_15) + +/* + * DeparseAlterSequencePersistenceStmt builds and returns a string representing + * the AlterTableStmt consisting of changing the persistence of a sequence + */ +char * +DeparseAlterSequencePersistenceStmt(Node *node) +{ + AlterTableStmt *stmt = castNode(AlterTableStmt, node); + StringInfoData str = { 0 }; + initStringInfo(&str); + + Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE); + + AppendAlterSequencePersistenceStmt(&str, stmt); + + return str.data; +} + + +/* + * AppendAlterSequencePersistenceStmt appends a string representing the + * AlterTableStmt to a buffer consisting of changing the persistence of a sequence + */ +static void +AppendAlterSequencePersistenceStmt(StringInfo buf, AlterTableStmt *stmt) +{ + Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE); + + RangeVar *seq = stmt->relation; + char *qualifiedSequenceName = quote_qualified_identifier(seq->schemaname, + seq->relname); + appendStringInfoString(buf, "ALTER SEQUENCE "); + + if (stmt->missing_ok) + { + appendStringInfoString(buf, "IF EXISTS "); + } + + appendStringInfoString(buf, qualifiedSequenceName); + + ListCell *cmdCell = NULL; + foreach(cmdCell, stmt->cmds) + { + if (cmdCell != list_head(stmt->cmds)) + { + /* + * As of PG15, we cannot reach this code because ALTER SEQUENCE + * is only supported for a single sequence. Still, let's be + * defensive for future PG changes + */ + ereport(ERROR, (errmsg("More than one subcommand is not supported " + "for ALTER SEQUENCE"))); + } + + AlterTableCmd *alterTableCmd = castNode(AlterTableCmd, lfirst(cmdCell)); + switch (alterTableCmd->subtype) + { + case AT_SetLogged: + { + appendStringInfoString(buf, " SET LOGGED;"); + break; + } + + case AT_SetUnLogged: + { + appendStringInfoString(buf, " SET UNLOGGED;"); + break; + } + + default: + { + /* + * normally we shouldn't ever reach this + * because we enter this function after making sure this stmt is of the form + * ALTER SEQUENCE .. SET LOGGED/UNLOGGED + */ + ereport(ERROR, (errmsg("unsupported subtype for alter sequence command"), + errdetail("sub command type: %d", + alterTableCmd->subtype))); + } + } + } +} + + +#endif + + /* * DeparseGrantOnSequenceStmt builds and returns a string representing the GrantOnSequenceStmt */ diff --git a/src/backend/distributed/deparser/qualify_sequence_stmt.c b/src/backend/distributed/deparser/qualify_sequence_stmt.c index 9f4ef6fe8..cece902a6 100644 --- a/src/backend/distributed/deparser/qualify_sequence_stmt.c +++ b/src/backend/distributed/deparser/qualify_sequence_stmt.c @@ -51,6 +51,37 @@ QualifyAlterSequenceOwnerStmt(Node *node) } +#if (PG_VERSION_NUM >= PG_VERSION_15) + +/* + * QualifyAlterSequencePersistenceStmt transforms a + * ALTER SEQUENCE .. SET LOGGED/UNLOGGED + * statement in place and makes the sequence name fully qualified. + */ +void +QualifyAlterSequencePersistenceStmt(Node *node) +{ + AlterTableStmt *stmt = castNode(AlterTableStmt, node); + Assert(AlterTableStmtObjType_compat(stmt) == OBJECT_SEQUENCE); + + RangeVar *seq = stmt->relation; + + if (seq->schemaname == NULL) + { + Oid seqOid = RangeVarGetRelid(seq, NoLock, stmt->missing_ok); + + if (OidIsValid(seqOid)) + { + Oid schemaOid = get_rel_namespace(seqOid); + seq->schemaname = get_namespace_name(schemaOid); + } + } +} + + +#endif + + /* * QualifyAlterSequenceSchemaStmt transforms a * ALTER SEQUENCE .. SET SCHEMA .. diff --git a/src/include/distributed/citus_ruleutils.h b/src/include/distributed/citus_ruleutils.h index 938a70578..ca06e6d5a 100644 --- a/src/include/distributed/citus_ruleutils.h +++ b/src/include/distributed/citus_ruleutils.h @@ -20,12 +20,6 @@ #include "nodes/parsenodes.h" #include "nodes/pg_list.h" - -#define CREATE_SEQUENCE_COMMAND \ - "CREATE SEQUENCE IF NOT EXISTS %s AS %s INCREMENT BY " INT64_FORMAT \ - " MINVALUE " INT64_FORMAT " MAXVALUE " INT64_FORMAT \ - " START WITH " INT64_FORMAT " CACHE " INT64_FORMAT " %sCYCLE" - /* Function declarations for version independent Citus ruleutils wrapper functions */ extern char * pg_get_extensiondef_string(Oid tableRelationId); extern Oid get_extension_schema(Oid ext_oid); diff --git a/src/include/distributed/commands.h b/src/include/distributed/commands.h index e67669ab2..656feec67 100644 --- a/src/include/distributed/commands.h +++ b/src/include/distributed/commands.h @@ -459,6 +459,13 @@ extern List * PostprocessAlterSequenceSchemaStmt(Node *node, const char *querySt extern List * PreprocessAlterSequenceOwnerStmt(Node *node, const char *queryString, ProcessUtilityContext processUtilityContext); extern List * PostprocessAlterSequenceOwnerStmt(Node *node, const char *queryString); +#if (PG_VERSION_NUM >= PG_VERSION_15) +extern List * PreprocessAlterSequencePersistenceStmt(Node *node, const char *queryString, + ProcessUtilityContext + processUtilityContext); +extern List * PreprocessSequenceAlterTableStmt(Node *node, const char *queryString, + ProcessUtilityContext processUtilityContext); +#endif extern List * PreprocessDropSequenceStmt(Node *node, const char *queryString, ProcessUtilityContext processUtilityContext); extern List * SequenceDropStmtObjectAddress(Node *stmt, bool missing_ok, bool @@ -474,6 +481,10 @@ extern List * AlterSequenceSchemaStmtObjectAddress(Node *node, bool missing_ok, isPostprocess); extern List * AlterSequenceOwnerStmtObjectAddress(Node *node, bool missing_ok, bool isPostprocess); +#if (PG_VERSION_NUM >= PG_VERSION_15) +extern List * AlterSequencePersistenceStmtObjectAddress(Node *node, bool missing_ok, bool + isPostprocess); +#endif extern List * RenameSequenceStmtObjectAddress(Node *node, bool missing_ok, bool isPostprocess); extern void ErrorIfUnsupportedSeqStmt(CreateSeqStmt *createSeqStmt); diff --git a/src/include/distributed/deparser.h b/src/include/distributed/deparser.h index 9ac15b6ac..0d0a99e22 100644 --- a/src/include/distributed/deparser.h +++ b/src/include/distributed/deparser.h @@ -226,6 +226,9 @@ extern char * DeparseDropSequenceStmt(Node *node); extern char * DeparseRenameSequenceStmt(Node *node); extern char * DeparseAlterSequenceSchemaStmt(Node *node); extern char * DeparseAlterSequenceOwnerStmt(Node *node); +#if (PG_VERSION_NUM >= PG_VERSION_15) +extern char * DeparseAlterSequencePersistenceStmt(Node *node); +#endif extern char * DeparseGrantOnSequenceStmt(Node *node); /* forward declarations for qualify_sequence_stmt.c */ @@ -233,6 +236,9 @@ extern void QualifyRenameSequenceStmt(Node *node); extern void QualifyDropSequenceStmt(Node *node); extern void QualifyAlterSequenceSchemaStmt(Node *node); extern void QualifyAlterSequenceOwnerStmt(Node *node); +#if (PG_VERSION_NUM >= PG_VERSION_15) +extern void QualifyAlterSequencePersistenceStmt(Node *node); +#endif extern void QualifyGrantOnSequenceStmt(Node *node); #endif /* CITUS_DEPARSER_H */ diff --git a/src/include/pg_version_compat.h b/src/include/pg_version_compat.h index f551085a7..fcb857c41 100644 --- a/src/include/pg_version_compat.h +++ b/src/include/pg_version_compat.h @@ -18,6 +18,10 @@ #define RelationCreateStorage_compat(a, b, c) RelationCreateStorage(a, b, c) #define parse_analyze_varparams_compat(a, b, c, d, e) parse_analyze_varparams(a, b, c, d, \ e) +#define CREATE_SEQUENCE_COMMAND \ + "CREATE %sSEQUENCE IF NOT EXISTS %s AS %s INCREMENT BY " INT64_FORMAT \ + " MINVALUE " INT64_FORMAT " MAXVALUE " INT64_FORMAT \ + " START WITH " INT64_FORMAT " CACHE " INT64_FORMAT " %sCYCLE" #else #include "nodes/value.h" @@ -62,6 +66,11 @@ RelationGetSmgr(Relation rel) } +#define CREATE_SEQUENCE_COMMAND \ + "CREATE SEQUENCE IF NOT EXISTS %s AS %s INCREMENT BY " INT64_FORMAT \ + " MINVALUE " INT64_FORMAT " MAXVALUE " INT64_FORMAT \ + " START WITH " INT64_FORMAT " CACHE " INT64_FORMAT " %sCYCLE" + #endif #if PG_VERSION_NUM >= PG_VERSION_14 diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out index 3c596bbb1..e535d198d 100644 --- a/src/test/regress/expected/pg15.out +++ b/src/test/regress/expected/pg15.out @@ -552,6 +552,176 @@ SELECT count(*)=100 FROM copy_test2; t (1 row) +-- +-- In PG15, unlogged sequences are supported +-- we support this for distributed sequences as well +-- +CREATE SEQUENCE seq1; +CREATE UNLOGGED SEQUENCE "pg15"."seq 2"; +-- first, test that sequence persistence is distributed correctly +-- when the sequence is distributed +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + relname | logged_info +--------------------------------------------------------------------- + seq 2 | unlogged + seq1 | logged +(2 rows) + +CREATE TABLE "seq test"(a int, b int default nextval ('seq1'), c int default nextval ('"pg15"."seq 2"')); +SELECT create_distributed_table('"pg15"."seq test"','a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +\c - - - :worker_1_port +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + relname | logged_info +--------------------------------------------------------------------- + seq 2 | unlogged + seq1 | logged +(2 rows) + +\c - - - :master_port +SET search_path TO pg15; +-- now, check that we can change sequence persistence using ALTER SEQUENCE +ALTER SEQUENCE seq1 SET UNLOGGED; +-- use IF EXISTS +ALTER SEQUENCE IF EXISTS "seq 2" SET LOGGED; +-- check non-existent sequence as well +ALTER SEQUENCE seq_non_exists SET LOGGED; +ERROR: relation "seq_non_exists" does not exist +ALTER SEQUENCE IF EXISTS seq_non_exists SET LOGGED; +NOTICE: relation "seq_non_exists" does not exist, skipping +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + relname | logged_info +--------------------------------------------------------------------- + seq 2 | logged + seq1 | unlogged +(2 rows) + +\c - - - :worker_1_port +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + relname | logged_info +--------------------------------------------------------------------- + seq 2 | logged + seq1 | unlogged +(2 rows) + +\c - - - :master_port +SET search_path TO pg15; +-- now, check that we can change sequence persistence using ALTER TABLE +ALTER TABLE seq1 SET LOGGED; +ALTER TABLE "seq 2" SET UNLOGGED; +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + relname | logged_info +--------------------------------------------------------------------- + seq 2 | unlogged + seq1 | logged +(2 rows) + +\c - - - :worker_1_port +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + relname | logged_info +--------------------------------------------------------------------- + seq 2 | unlogged + seq1 | logged +(2 rows) + +\c - - - :master_port +SET search_path TO pg15; +-- An identity/serial sequence now automatically gets and follows the +-- persistence level (logged/unlogged) of its owning table. +-- Test this behavior as well +CREATE UNLOGGED TABLE test(a bigserial, b bigserial); +SELECT create_distributed_table('test', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- show that associated sequence is unlooged +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('test_a_seq', 'test_b_seq') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + relname | logged_info +--------------------------------------------------------------------- + test_a_seq | unlogged + test_b_seq | unlogged +(2 rows) + +\c - - - :worker_1_port +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('test_a_seq', 'test_b_seq') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + relname | logged_info +--------------------------------------------------------------------- + test_a_seq | unlogged + test_b_seq | unlogged +(2 rows) + +\c - - - :master_port +SET search_path TO pg15; -- allow foreign key columns to have SET NULL/DEFAULT on column basis -- currently only reference tables can support that CREATE TABLE PKTABLE (tid int, id int, PRIMARY KEY (tid, id)); diff --git a/src/test/regress/sql/pg15.sql b/src/test/regress/sql/pg15.sql index 9cfa5960d..80b23986d 100644 --- a/src/test/regress/sql/pg15.sql +++ b/src/test/regress/sql/pg15.sql @@ -298,6 +298,137 @@ ALTER TABLE copy_test2 RENAME COLUMN data_ TO data; COPY copy_test2 FROM :'temp_dir''copy_test.txt' WITH ( HEADER match, FORMAT text); SELECT count(*)=100 FROM copy_test2; +-- +-- In PG15, unlogged sequences are supported +-- we support this for distributed sequences as well +-- + +CREATE SEQUENCE seq1; +CREATE UNLOGGED SEQUENCE "pg15"."seq 2"; + +-- first, test that sequence persistence is distributed correctly +-- when the sequence is distributed + +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + +CREATE TABLE "seq test"(a int, b int default nextval ('seq1'), c int default nextval ('"pg15"."seq 2"')); + +SELECT create_distributed_table('"pg15"."seq test"','a'); + +\c - - - :worker_1_port +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + +\c - - - :master_port +SET search_path TO pg15; + +-- now, check that we can change sequence persistence using ALTER SEQUENCE + +ALTER SEQUENCE seq1 SET UNLOGGED; +-- use IF EXISTS +ALTER SEQUENCE IF EXISTS "seq 2" SET LOGGED; +-- check non-existent sequence as well +ALTER SEQUENCE seq_non_exists SET LOGGED; +ALTER SEQUENCE IF EXISTS seq_non_exists SET LOGGED; + +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + +\c - - - :worker_1_port +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + +\c - - - :master_port +SET search_path TO pg15; + +-- now, check that we can change sequence persistence using ALTER TABLE +ALTER TABLE seq1 SET LOGGED; +ALTER TABLE "seq 2" SET UNLOGGED; + +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + +\c - - - :worker_1_port +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('seq1', 'seq 2') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + +\c - - - :master_port +SET search_path TO pg15; + +-- An identity/serial sequence now automatically gets and follows the +-- persistence level (logged/unlogged) of its owning table. +-- Test this behavior as well + +CREATE UNLOGGED TABLE test(a bigserial, b bigserial); +SELECT create_distributed_table('test', 'a'); + +-- show that associated sequence is unlooged +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('test_a_seq', 'test_b_seq') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + +\c - - - :worker_1_port +SELECT relname, + CASE relpersistence + WHEN 'u' THEN 'unlogged' + WHEN 'p' then 'logged' + ELSE 'unknown' + END AS logged_info +FROM pg_class +WHERE relname IN ('test_a_seq', 'test_b_seq') AND relnamespace='pg15'::regnamespace +ORDER BY relname; + +\c - - - :master_port +SET search_path TO pg15; -- allow foreign key columns to have SET NULL/DEFAULT on column basis -- currently only reference tables can support that From f13b14062105b08d8e2891ae781f37461c039696 Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Tue, 13 Sep 2022 10:59:52 +0200 Subject: [PATCH 15/17] Show citus_copy_shard_placement progress in get_rebalance_progress (#6322) DESCRIPTION: Show citus_copy_shard_placement progress in get_rebalance_progress When rebalancing to a new node that does not have reference tables yet the rebalancer will first copy the reference tables to the nodes. Depending on the size of the reference tables, this might take a long time. However, there's no indication of what's happening at this stage of the rebalance. This PR improves this situation by also showing the progress of any citus_copy_shard_placement calls when calling get_rebalance_progress. --- .../distributed/operations/shard_rebalancer.c | 12 +- .../distributed/operations/shard_transfer.c | 15 + .../distributed/sql/citus--11.0-4--11.1-1.sql | 1 + .../sql/downgrades/citus--11.1-1--11.0-4.sql | 1 + .../udfs/get_rebalance_progress/11.1-1.sql | 20 ++ .../udfs/get_rebalance_progress/latest.sql | 4 +- src/include/distributed/shard_rebalancer.h | 1 + .../isolation_shard_rebalancer_progress.out | 284 +++++++++++++----- src/test/regress/expected/multi_extension.out | 116 +++---- .../regress/expected/shard_rebalancer.out | 4 +- .../isolation_shard_rebalancer_progress.spec | 22 +- 11 files changed, 341 insertions(+), 139 deletions(-) create mode 100644 src/backend/distributed/sql/udfs/get_rebalance_progress/11.1-1.sql diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c index 60962deac..beab2be47 100644 --- a/src/backend/distributed/operations/shard_rebalancer.c +++ b/src/backend/distributed/operations/shard_rebalancer.c @@ -269,6 +269,11 @@ PG_FUNCTION_INFO_V1(citus_rebalance_wait); bool RunningUnderIsolationTest = false; int MaxRebalancerLoggedIgnoredMoves = 5; +static const char *PlacementUpdateTypeNames[] = { + [PLACEMENT_UPDATE_INVALID_FIRST] = "unknown", + [PLACEMENT_UPDATE_MOVE] = "move", + [PLACEMENT_UPDATE_COPY] = "copy", +}; #ifdef USE_ASSERT_CHECKING @@ -801,6 +806,7 @@ SetupRebalanceMonitor(List *placementUpdateList, event->shardId = colocatedUpdate->shardId; event->sourcePort = colocatedUpdate->sourceNode->workerPort; event->targetPort = colocatedUpdate->targetNode->workerPort; + event->updateType = colocatedUpdate->updateType; pg_atomic_init_u64(&event->progress, initialProgressState); eventIndex++; @@ -1234,8 +1240,8 @@ get_rebalance_progress(PG_FUNCTION_ARGS) shardSize = shardSizesStat->totalSize; } - Datum values[11]; - bool nulls[11]; + Datum values[12]; + bool nulls[12]; memset(values, 0, sizeof(values)); memset(nulls, 0, sizeof(nulls)); @@ -1251,6 +1257,8 @@ get_rebalance_progress(PG_FUNCTION_ARGS) values[8] = UInt64GetDatum(pg_atomic_read_u64(&step->progress)); values[9] = UInt64GetDatum(sourceSize); values[10] = UInt64GetDatum(targetSize); + values[11] = PointerGetDatum( + cstring_to_text(PlacementUpdateTypeNames[step->updateType])); tuplestore_putvalues(tupstore, tupdesc, values, nulls); } diff --git a/src/backend/distributed/operations/shard_transfer.c b/src/backend/distributed/operations/shard_transfer.c index 356e3dd6a..cdb61731f 100644 --- a/src/backend/distributed/operations/shard_transfer.c +++ b/src/backend/distributed/operations/shard_transfer.c @@ -996,6 +996,20 @@ ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, return; } + WorkerNode *sourceNode = FindWorkerNode(sourceNodeName, sourceNodePort); + WorkerNode *targetNode = FindWorkerNode(targetNodeName, targetNodePort); + + Oid relationId = RelationIdForShard(shardId); + PlacementUpdateEvent *placementUpdateEvent = palloc0( + sizeof(PlacementUpdateEvent)); + placementUpdateEvent->updateType = PLACEMENT_UPDATE_COPY; + placementUpdateEvent->shardId = shardId; + placementUpdateEvent->sourceNode = sourceNode; + placementUpdateEvent->targetNode = targetNode; + SetupRebalanceMonitor(list_make1(placementUpdateEvent), relationId, + REBALANCE_PROGRESS_MOVING); + + /* * At this point of the shard replication, we don't need to block the writes to * shards when logical replication is used. @@ -1064,6 +1078,7 @@ ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, SendCommandToWorkersWithMetadata(placementCommand); } } + FinalizeCurrentProgressMonitor(); } diff --git a/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql b/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql index 00c28f22c..363ddad3c 100644 --- a/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql +++ b/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql @@ -173,3 +173,4 @@ GRANT SELECT ON pg_catalog.pg_dist_background_task_depend TO PUBLIC; #include "udfs/citus_rebalance_start/11.1-1.sql" #include "udfs/citus_rebalance_stop/11.1-1.sql" #include "udfs/citus_rebalance_wait/11.1-1.sql" +#include "udfs/get_rebalance_progress/11.1-1.sql" diff --git a/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql b/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql index 2a7462e0d..54ebcf4f3 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql @@ -121,3 +121,4 @@ DROP TABLE pg_catalog.pg_dist_background_job; DROP TYPE pg_catalog.citus_job_status; DROP FUNCTION pg_catalog.citus_copy_shard_placement; #include "../udfs/citus_copy_shard_placement/10.0-1.sql" +#include "../udfs/get_rebalance_progress/10.1-1.sql" diff --git a/src/backend/distributed/sql/udfs/get_rebalance_progress/11.1-1.sql b/src/backend/distributed/sql/udfs/get_rebalance_progress/11.1-1.sql new file mode 100644 index 000000000..639f9078b --- /dev/null +++ b/src/backend/distributed/sql/udfs/get_rebalance_progress/11.1-1.sql @@ -0,0 +1,20 @@ +DROP FUNCTION pg_catalog.get_rebalance_progress(); + +CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_progress() + RETURNS TABLE(sessionid integer, + table_name regclass, + shardid bigint, + shard_size bigint, + sourcename text, + sourceport int, + targetname text, + targetport int, + progress bigint, + source_shard_size bigint, + target_shard_size bigint, + operation_type text + ) + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT; +COMMENT ON FUNCTION pg_catalog.get_rebalance_progress() + IS 'provides progress information about the ongoing rebalance operations'; diff --git a/src/backend/distributed/sql/udfs/get_rebalance_progress/latest.sql b/src/backend/distributed/sql/udfs/get_rebalance_progress/latest.sql index 7df399ac1..639f9078b 100644 --- a/src/backend/distributed/sql/udfs/get_rebalance_progress/latest.sql +++ b/src/backend/distributed/sql/udfs/get_rebalance_progress/latest.sql @@ -11,7 +11,9 @@ CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_progress() targetport int, progress bigint, source_shard_size bigint, - target_shard_size bigint) + target_shard_size bigint, + operation_type text + ) AS 'MODULE_PATHNAME' LANGUAGE C STRICT; COMMENT ON FUNCTION pg_catalog.get_rebalance_progress() diff --git a/src/include/distributed/shard_rebalancer.h b/src/include/distributed/shard_rebalancer.h index 36c38ffff..3abeb80fb 100644 --- a/src/include/distributed/shard_rebalancer.h +++ b/src/include/distributed/shard_rebalancer.h @@ -106,6 +106,7 @@ typedef struct PlacementUpdateEventProgress int sourcePort; char targetName[255]; int targetPort; + PlacementUpdateType updateType; pg_atomic_uint64 progress; } PlacementUpdateEventProgress; diff --git a/src/test/regress/expected/isolation_shard_rebalancer_progress.out b/src/test/regress/expected/isolation_shard_rebalancer_progress.out index 731f72c14..3ad592a50 100644 --- a/src/test/regress/expected/isolation_shard_rebalancer_progress.out +++ b/src/test/regress/expected/isolation_shard_rebalancer_progress.out @@ -28,15 +28,16 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1 -colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1 -colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0 -colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0 +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1|move +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1|move +colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0|move +colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0|move (4 rows) step s2-unlock-1-start: @@ -71,10 +72,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) @@ -109,15 +111,16 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 73728|localhost | 57637| 0|localhost | 57638| 73728| 2 -colocated2|1500005| 401408|localhost | 57637| 0|localhost | 57638| 401408| 2 -colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 1 -colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 1 +colocated1|1500001| 73728|localhost | 57637| 0|localhost | 57638| 73728| 2|move +colocated2|1500005| 401408|localhost | 57637| 0|localhost | 57638| 401408| 2|move +colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 1|move +colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 1|move (4 rows) step s3-unlock-2-start: @@ -152,10 +155,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) @@ -202,15 +206,16 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1 -colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1 -colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0 -colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0 +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1|move +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1|move +colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0|move +colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0|move (4 rows) step s7-release-lock: @@ -245,10 +250,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) @@ -287,15 +293,16 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 8192| 1 -colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 8192| 1 -colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0 -colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0 +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 8192| 1|move +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 8192| 1|move +colocated1|1500002| 196608|localhost | 57637| 196608|localhost | 57638| 0| 0|move +colocated2|1500006| 8192|localhost | 57637| 8192|localhost | 57638| 0| 0|move (4 rows) step s6-release-advisory-lock: @@ -335,10 +342,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) @@ -384,13 +392,14 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1 -colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1 +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1|move +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1|move (2 rows) step s7-release-lock: @@ -417,10 +426,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) @@ -455,13 +465,14 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1 -colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1 +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1|move +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1|move (2 rows) step s2-unlock-1-start: @@ -488,10 +499,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) @@ -537,13 +549,14 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1 -colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1 +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1|move +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1|move (2 rows) step s7-release-lock: @@ -570,10 +583,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) @@ -581,6 +595,57 @@ step enable-deferred-drop: ALTER SYSTEM RESET citus.defer_drop_after_shard_move; +starting permutation: s2-lock-1-start s1-shard-copy-c1-block-writes s7-get-progress s2-unlock-1-start s1-commit +master_set_node_property +--------------------------------------------------------------------- + +(1 row) + +step s2-lock-1-start: + BEGIN; + DELETE FROM colocated1 WHERE test_id = 1; + DELETE FROM separate WHERE test_id = 1; + +step s1-shard-copy-c1-block-writes: + BEGIN; + UPDATE pg_dist_partition SET repmodel = 'c' WHERE logicalrelid IN ('colocated1', 'colocated2'); + SELECT citus_copy_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, transfer_mode:='block_writes'); + +step s7-get-progress: + set LOCAL client_min_messages=NOTICE; + SELECT + table_name, + shardid, + shard_size, + sourcename, + sourceport, + source_shard_size, + targetname, + targetport, + target_shard_size, + progress, + operation_type + FROM get_rebalance_progress(); + +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type +--------------------------------------------------------------------- +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1|copy +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1|copy +(2 rows) + +step s2-unlock-1-start: + ROLLBACK; + +step s1-shard-copy-c1-block-writes: <... completed> +citus_copy_shard_placement +--------------------------------------------------------------------- + +(1 row) + +step s1-commit: + COMMIT; + + starting permutation: s6-acquire-advisory-lock s1-shard-move-c1-online s7-get-progress s6-release-advisory-lock s1-commit s7-get-progress enable-deferred-drop master_set_node_property --------------------------------------------------------------------- @@ -611,13 +676,14 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 8192| 1 -colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 8192| 1 +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 8192| 1|move +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 8192| 1|move (2 rows) step s6-release-advisory-lock: @@ -649,10 +715,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) @@ -698,13 +765,14 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1 -colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1 +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1|move +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1|move (2 rows) step s7-release-lock: @@ -731,10 +799,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) @@ -742,6 +811,65 @@ step enable-deferred-drop: ALTER SYSTEM RESET citus.defer_drop_after_shard_move; +starting permutation: s6-acquire-advisory-lock s1-shard-copy-c1-online s7-get-progress s6-release-advisory-lock s1-commit +master_set_node_property +--------------------------------------------------------------------- + +(1 row) + +step s6-acquire-advisory-lock: + SELECT pg_advisory_lock(44000, 55152); + +pg_advisory_lock +--------------------------------------------------------------------- + +(1 row) + +step s1-shard-copy-c1-online: + BEGIN; + UPDATE pg_dist_partition SET repmodel = 'c' WHERE logicalrelid IN ('colocated1', 'colocated2'); + SELECT citus_copy_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, transfer_mode:='force_logical'); + +step s7-get-progress: + set LOCAL client_min_messages=NOTICE; + SELECT + table_name, + shardid, + shard_size, + sourcename, + sourceport, + source_shard_size, + targetname, + targetport, + target_shard_size, + progress, + operation_type + FROM get_rebalance_progress(); + +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type +--------------------------------------------------------------------- +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 8192| 1|copy +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 8192| 1|copy +(2 rows) + +step s6-release-advisory-lock: + SELECT pg_advisory_unlock(44000, 55152); + +pg_advisory_unlock +--------------------------------------------------------------------- +t +(1 row) + +step s1-shard-copy-c1-online: <... completed> +citus_copy_shard_placement +--------------------------------------------------------------------- + +(1 row) + +step s1-commit: + COMMIT; + + starting permutation: s2-lock-1-start s1-shard-move-c1-block-writes s4-shard-move-sep-block-writes s7-get-progress s2-unlock-1-start s1-commit s4-commit s7-get-progress enable-deferred-drop master_set_node_property --------------------------------------------------------------------- @@ -773,14 +901,15 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1 -colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1 -separate |1500009| 122880|localhost | 57637| 122880|localhost | 57638| 0| 1 +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 0| 1|move +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 0| 1|move +separate |1500009| 122880|localhost | 57637| 122880|localhost | 57638| 0| 1|move (3 rows) step s2-unlock-1-start: @@ -816,10 +945,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) @@ -869,14 +999,15 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- -colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1 -colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1 -separate |1500009| 122880|localhost | 57637| 122880|localhost | 57638| 147456| 1 +colocated1|1500001| 49152|localhost | 57637| 49152|localhost | 57638| 73728| 1|move +colocated2|1500005| 376832|localhost | 57637| 376832|localhost | 57638| 401408| 1|move +separate |1500009| 122880|localhost | 57637| 122880|localhost | 57638| 147456| 1|move (3 rows) step s7-release-lock: @@ -912,10 +1043,11 @@ step s7-get-progress: targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); -table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress +table_name|shardid|shard_size|sourcename|sourceport|source_shard_size|targetname|targetport|target_shard_size|progress|operation_type --------------------------------------------------------------------- (0 rows) diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 5f37f7a32..4ae526935 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -1111,64 +1111,66 @@ ERROR: extension "citus" already exists -- Snapshot of state at 11.1-1 ALTER EXTENSION citus UPDATE TO '11.1-1'; SELECT * FROM multi_extension.print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- - access method columnar | - function alter_columnar_table_reset(regclass,boolean,boolean,boolean,boolean) void | - function alter_columnar_table_set(regclass,integer,integer,name,integer) void | - function citus_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) void | - function citus_internal.columnar_ensure_am_depends_catalog() void | - function citus_internal.downgrade_columnar_storage(regclass) void | - function citus_internal.upgrade_columnar_storage(regclass) void | - function columnar.columnar_handler(internal) table_am_handler | - function isolate_tenant_to_new_shard(regclass,"any",text) bigint | - function replicate_reference_tables() void | - function worker_cleanup_job_schema_cache() void | - function worker_create_schema(bigint,text) void | - function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) void | - function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) void | - function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) void | - function worker_merge_files_into_table(bigint,integer,text[],text[]) void | - function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) void | - function worker_repartition_cleanup(bigint) void | - schema columnar | - sequence columnar.storageid_seq | - table columnar.chunk | - table columnar.chunk_group | - table columnar.options | - table columnar.stripe | - | function citus_cleanup_orphaned_resources() - | function citus_copy_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) void - | function citus_internal_delete_partition_metadata(regclass) void - | function citus_job_cancel(bigint) void - | function citus_job_wait(bigint,citus_job_status) void - | function citus_locks() SETOF record - | function citus_rebalance_start(name,boolean,citus.shard_transfer_mode) bigint - | function citus_rebalance_stop() void - | function citus_rebalance_wait() void - | function citus_split_shard_by_split_points(bigint,text[],integer[],citus.shard_transfer_mode) void - | function create_distributed_table_concurrently(regclass,text,citus.distribution_type,text,integer) void - | function isolate_tenant_to_new_shard(regclass,"any",text,citus.shard_transfer_mode) bigint - | function replicate_reference_tables(citus.shard_transfer_mode) void - | function worker_copy_table_to_node(regclass,integer) void - | function worker_split_copy(bigint,text,split_copy_info[]) void - | function worker_split_shard_release_dsm() void - | function worker_split_shard_replication_setup(split_shard_info[]) SETOF replication_slot_info - | sequence pg_dist_background_job_job_id_seq - | sequence pg_dist_background_task_task_id_seq - | sequence pg_dist_cleanup_recordid_seq - | sequence pg_dist_operationid_seq - | table pg_dist_background_job - | table pg_dist_background_task - | table pg_dist_background_task_depend - | table pg_dist_cleanup - | type citus_job_status - | type citus_task_status - | type replication_slot_info - | type split_copy_info - | type split_shard_info - | view citus_locks -(55 rows) + access method columnar | + function alter_columnar_table_reset(regclass,boolean,boolean,boolean,boolean) void | + function alter_columnar_table_set(regclass,integer,integer,name,integer) void | + function citus_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) void | + function citus_internal.columnar_ensure_am_depends_catalog() void | + function citus_internal.downgrade_columnar_storage(regclass) void | + function citus_internal.upgrade_columnar_storage(regclass) void | + function columnar.columnar_handler(internal) table_am_handler | + function get_rebalance_progress() TABLE(sessionid integer, table_name regclass, shardid bigint, shard_size bigint, sourcename text, sourceport integer, targetname text, targetport integer, progress bigint, source_shard_size bigint, target_shard_size bigint) | + function isolate_tenant_to_new_shard(regclass,"any",text) bigint | + function replicate_reference_tables() void | + function worker_cleanup_job_schema_cache() void | + function worker_create_schema(bigint,text) void | + function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) void | + function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) void | + function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) void | + function worker_merge_files_into_table(bigint,integer,text[],text[]) void | + function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) void | + function worker_repartition_cleanup(bigint) void | + schema columnar | + sequence columnar.storageid_seq | + table columnar.chunk | + table columnar.chunk_group | + table columnar.options | + table columnar.stripe | + | function citus_cleanup_orphaned_resources() + | function citus_copy_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) void + | function citus_internal_delete_partition_metadata(regclass) void + | function citus_job_cancel(bigint) void + | function citus_job_wait(bigint,citus_job_status) void + | function citus_locks() SETOF record + | function citus_rebalance_start(name,boolean,citus.shard_transfer_mode) bigint + | function citus_rebalance_stop() void + | function citus_rebalance_wait() void + | function citus_split_shard_by_split_points(bigint,text[],integer[],citus.shard_transfer_mode) void + | function create_distributed_table_concurrently(regclass,text,citus.distribution_type,text,integer) void + | function get_rebalance_progress() TABLE(sessionid integer, table_name regclass, shardid bigint, shard_size bigint, sourcename text, sourceport integer, targetname text, targetport integer, progress bigint, source_shard_size bigint, target_shard_size bigint, operation_type text) + | function isolate_tenant_to_new_shard(regclass,"any",text,citus.shard_transfer_mode) bigint + | function replicate_reference_tables(citus.shard_transfer_mode) void + | function worker_copy_table_to_node(regclass,integer) void + | function worker_split_copy(bigint,text,split_copy_info[]) void + | function worker_split_shard_release_dsm() void + | function worker_split_shard_replication_setup(split_shard_info[]) SETOF replication_slot_info + | sequence pg_dist_background_job_job_id_seq + | sequence pg_dist_background_task_task_id_seq + | sequence pg_dist_cleanup_recordid_seq + | sequence pg_dist_operationid_seq + | table pg_dist_background_job + | table pg_dist_background_task + | table pg_dist_background_task_depend + | table pg_dist_cleanup + | type citus_job_status + | type citus_task_status + | type replication_slot_info + | type split_copy_info + | type split_shard_info + | view citus_locks +(57 rows) DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff; -- show running version diff --git a/src/test/regress/expected/shard_rebalancer.out b/src/test/regress/expected/shard_rebalancer.out index 4f7fad246..c7af9a94b 100644 --- a/src/test/regress/expected/shard_rebalancer.out +++ b/src/test/regress/expected/shard_rebalancer.out @@ -1044,7 +1044,7 @@ SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', rebala -- Check that we can call this function SELECT * FROM get_rebalance_progress(); - sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress | source_shard_size | target_shard_size + sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress | source_shard_size | target_shard_size | operation_type --------------------------------------------------------------------- (0 rows) @@ -1058,7 +1058,7 @@ SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, CALL citus_cleanup_orphaned_shards(); -- Check that we can call this function without a crash SELECT * FROM get_rebalance_progress(); - sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress | source_shard_size | target_shard_size + sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress | source_shard_size | target_shard_size | operation_type --------------------------------------------------------------------- (0 rows) diff --git a/src/test/regress/spec/isolation_shard_rebalancer_progress.spec b/src/test/regress/spec/isolation_shard_rebalancer_progress.spec index c9bb3b641..572163f7c 100644 --- a/src/test/regress/spec/isolation_shard_rebalancer_progress.spec +++ b/src/test/regress/spec/isolation_shard_rebalancer_progress.spec @@ -58,12 +58,26 @@ step "s1-shard-move-c1-block-writes" SELECT citus_move_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); } +step "s1-shard-copy-c1-block-writes" +{ + BEGIN; + UPDATE pg_dist_partition SET repmodel = 'c' WHERE logicalrelid IN ('colocated1', 'colocated2'); + SELECT citus_copy_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, transfer_mode:='block_writes'); +} + step "s1-shard-move-c1-online" { BEGIN; SELECT citus_move_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='force_logical'); } +step "s1-shard-copy-c1-online" +{ + BEGIN; + UPDATE pg_dist_partition SET repmodel = 'c' WHERE logicalrelid IN ('colocated1', 'colocated2'); + SELECT citus_copy_shard_placement(1500001, 'localhost', 57637, 'localhost', 57638, transfer_mode:='force_logical'); +} + step "s1-commit" { COMMIT; @@ -156,7 +170,8 @@ step "s7-get-progress" targetname, targetport, target_shard_size, - progress + progress, + operation_type FROM get_rebalance_progress(); } @@ -188,10 +203,15 @@ permutation "s7-grab-lock" "s1-shard-move-c1-online" "s7-get-progress" "s7-relea permutation "s2-lock-1-start" "s1-shard-move-c1-block-writes" "s7-get-progress" "s2-unlock-1-start" "s1-commit" "s7-get-progress" "enable-deferred-drop" permutation "s7-grab-lock" "s1-shard-move-c1-block-writes" "s7-get-progress" "s7-release-lock" "s1-commit" "s7-get-progress" "enable-deferred-drop" +// blocking shard copy +permutation "s2-lock-1-start" "s1-shard-copy-c1-block-writes" "s7-get-progress" "s2-unlock-1-start" "s1-commit" + // online shard move permutation "s6-acquire-advisory-lock" "s1-shard-move-c1-online" "s7-get-progress" "s6-release-advisory-lock" "s1-commit" "s7-get-progress" "enable-deferred-drop" permutation "s7-grab-lock" "s1-shard-move-c1-online" "s7-get-progress" "s7-release-lock" "s1-commit" "s7-get-progress" "enable-deferred-drop" +// online shard copy +permutation "s6-acquire-advisory-lock" "s1-shard-copy-c1-online" "s7-get-progress" "s6-release-advisory-lock" "s1-commit" // parallel blocking shard move permutation "s2-lock-1-start" "s1-shard-move-c1-block-writes" "s4-shard-move-sep-block-writes" "s7-get-progress" "s2-unlock-1-start" "s1-commit" "s4-commit" "s7-get-progress" "enable-deferred-drop" From f34467dcb3bdeef2be15453653b6a61ade9c46ac Mon Sep 17 00:00:00 2001 From: Hanefi Onaldi Date: Tue, 13 Sep 2022 13:48:51 +0300 Subject: [PATCH 16/17] Remove missing declaration warning (#6330) When I built Citus on PG15beta4 locally, I get a warning message. ``` utils/background_jobs.c:902:5: warning: declaration does not declare anything [-Wmissing-declarations] __attribute__((fallthrough)); ^ 1 warning generated. ``` This is a hint to the compiler that we are deliberately falling through in a switch-case block. --- src/backend/distributed/utils/background_jobs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/utils/background_jobs.c b/src/backend/distributed/utils/background_jobs.c index 6b7cb024b..524bcdb2a 100644 --- a/src/backend/distributed/utils/background_jobs.c +++ b/src/backend/distributed/utils/background_jobs.c @@ -899,9 +899,10 @@ ConsumeTaskWorkerOutput(shm_mq_handle *responseq, StringInfo message, bool *hadE { *hadError = true; } - __attribute__((fallthrough)); } + /* FALLTHROUGH */ + case 'N': /* NoticeResponse */ { ErrorData edata = { 0 }; From da527951ca620d8facd3f18b90cdef0be5007597 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Tue, 13 Sep 2022 23:19:31 +0200 Subject: [PATCH 17/17] Fix: rebalance stop non super user (#6334) No need for description, fixing issue introduced with new feature for 11.1 Fixes #6333 Due to Postgres' C api being o-indexed and postgres' attributes being 1-indexed, we were reading the wrong Datum as the Task owner when cancelling. Here we add a test to show the error and fix the off-by-one error. --- .../distributed/metadata/metadata_utility.c | 2 +- .../regress/expected/background_rebalance.out | 34 +++++++++++++++++++ src/test/regress/sql/background_rebalance.sql | 18 ++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/metadata/metadata_utility.c b/src/backend/distributed/metadata/metadata_utility.c index b7d20a9b5..c09ad358c 100644 --- a/src/backend/distributed/metadata/metadata_utility.c +++ b/src/backend/distributed/metadata/metadata_utility.c @@ -4022,7 +4022,7 @@ CancelTasksForJob(int64 jobid) } /* make sure the current user has the rights to cancel this task */ - Oid taskOwner = DatumGetObjectId(values[Anum_pg_dist_background_task_owner]); + Oid taskOwner = DatumGetObjectId(values[Anum_pg_dist_background_task_owner - 1]); if (superuser_arg(taskOwner) && !superuser()) { /* must be a superuser to cancel tasks owned by superuser */ diff --git a/src/test/regress/expected/background_rebalance.out b/src/test/regress/expected/background_rebalance.out index 32a5e86b0..8843654d6 100644 --- a/src/test/regress/expected/background_rebalance.out +++ b/src/test/regress/expected/background_rebalance.out @@ -176,5 +176,39 @@ SELECT citus_rebalance_wait(); (1 row) +DROP TABLE t1; +-- make sure a non-super user can stop rebalancing +CREATE USER non_super_user_rebalance WITH LOGIN; +GRANT ALL ON SCHEMA background_rebalance TO non_super_user_rebalance; +SET ROLE non_super_user_rebalance; +CREATE TABLE non_super_user_t1 (a int PRIMARY KEY); +SELECT create_distributed_table('non_super_user_t1', 'a', shard_count => 4, colocate_with => 'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_move_shard_placement(85674008, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +SELECT 1 FROM citus_rebalance_start(); +NOTICE: Scheduled 1 moves as job xxx +DETAIL: Rebalance scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT citus_rebalance_stop(); + citus_rebalance_stop +--------------------------------------------------------------------- + +(1 row) + +RESET ROLE; SET client_min_messages TO WARNING; DROP SCHEMA background_rebalance CASCADE; diff --git a/src/test/regress/sql/background_rebalance.sql b/src/test/regress/sql/background_rebalance.sql index 9158fc532..6528c71b7 100644 --- a/src/test/regress/sql/background_rebalance.sql +++ b/src/test/regress/sql/background_rebalance.sql @@ -59,6 +59,24 @@ SELECT 1 FROM citus_rebalance_start(); SELECT rebalance_table_shards(); SELECT citus_rebalance_wait(); +DROP TABLE t1; + + +-- make sure a non-super user can stop rebalancing +CREATE USER non_super_user_rebalance WITH LOGIN; +GRANT ALL ON SCHEMA background_rebalance TO non_super_user_rebalance; + +SET ROLE non_super_user_rebalance; + +CREATE TABLE non_super_user_t1 (a int PRIMARY KEY); +SELECT create_distributed_table('non_super_user_t1', 'a', shard_count => 4, colocate_with => 'none'); +SELECT citus_move_shard_placement(85674008, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode => 'block_writes'); + +SELECT 1 FROM citus_rebalance_start(); +SELECT citus_rebalance_stop(); + +RESET ROLE; + SET client_min_messages TO WARNING; DROP SCHEMA background_rebalance CASCADE;