diff --git a/src/backend/distributed/commands/dependencies.c b/src/backend/distributed/commands/dependencies.c index 01653c3c8..baa5082d7 100644 --- a/src/backend/distributed/commands/dependencies.c +++ b/src/backend/distributed/commands/dependencies.c @@ -29,16 +29,14 @@ #include "storage/lmgr.h" #include "utils/lsyscache.h" -typedef bool (*AddressPredicate)(const ObjectAddress *); static void EnsureDependenciesCanBeDistributed(const ObjectAddress *relationAddress); static void ErrorIfCircularDependencyExists(const ObjectAddress *objectAddress); static int ObjectAddressComparator(const void *a, const void *b); -static List * FilterObjectAddressListByPredicate(List *objectAddressList, - AddressPredicate predicate); static void EnsureDependenciesExistOnAllNodes(const ObjectAddress *target); static List * GetDependencyCreateDDLCommands(const ObjectAddress *dependency); static bool ShouldPropagateObject(const ObjectAddress *address); +static char * DropTableIfExistsCommand(Oid relationId); /* * EnsureDependenciesExistOnAllNodes finds all the dependencies that we support and makes @@ -325,6 +323,21 @@ GetDistributableDependenciesForObject(const ObjectAddress *target) } +/* + * DropTableIfExistsCommand returns command to drop given table if exists. + */ +static char * +DropTableIfExistsCommand(Oid relationId) +{ + char *qualifiedRelationName = generate_qualified_relation_name(relationId); + StringInfo dropTableCommand = makeStringInfo(); + appendStringInfo(dropTableCommand, "DROP TABLE IF EXISTS %s CASCADE", + qualifiedRelationName); + + return dropTableCommand->data; +} + + /* * GetDependencyCreateDDLCommands returns a list (potentially empty or NIL) of ddl * commands to execute on a worker to create the object. @@ -379,6 +392,10 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency) commandList = lappend(commandList, GetTableDDLCommand( tableDDLCommand)); } + + /* we need to drop table, if exists, first to make table creation idempotent */ + commandList = lcons(DropTableIfExistsCommand(relationId), + commandList); } return commandList; @@ -532,68 +549,6 @@ GetAllDependencyCreateDDLCommands(const List *dependencies) } -/* - * ReplicateAllObjectsToNodeCommandList returns commands to replicate all - * previously marked objects to a worker node. The function also sets - * clusterHasDistributedFunction if there are any distributed functions. - */ -List * -ReplicateAllObjectsToNodeCommandList(const char *nodeName, int nodePort) -{ - /* since we are executing ddl commands disable propagation first, primarily for mx */ - List *ddlCommands = list_make1(DISABLE_DDL_PROPAGATION); - - /* - * collect all dependencies in creation order and get their ddl commands - */ - List *dependencies = GetDistributedObjectAddressList(); - - /* - * Depending on changes in the environment, such as the enable_metadata_sync guc - * there might be objects in the distributed object address list that should currently - * not be propagated by citus as they are 'not supported'. - */ - dependencies = FilterObjectAddressListByPredicate(dependencies, - &SupportedDependencyByCitus); - - /* - * When dependency lists are getting longer we see a delay in the creation time on the - * workers. We would like to inform the user. Currently we warn for lists greater than - * 100 items, where 100 is an arbitrarily chosen number. If we find it too high or too - * low we can adjust this based on experience. - */ - if (list_length(dependencies) > 100) - { - ereport(NOTICE, (errmsg("Replicating postgres objects to node %s:%d", nodeName, - nodePort), - errdetail("There are %d objects to replicate, depending on your " - "environment this might take a while", - list_length(dependencies)))); - } - - dependencies = OrderObjectAddressListInDependencyOrder(dependencies); - ObjectAddress *dependency = NULL; - foreach_ptr(dependency, dependencies) - { - if (IsAnyObjectAddressOwnedByExtension(list_make1(dependency), NULL)) - { - /* - * we expect extension-owned objects to be created as a result - * of the extension being created. - */ - continue; - } - - ddlCommands = list_concat(ddlCommands, - GetDependencyCreateDDLCommands(dependency)); - } - - ddlCommands = lappend(ddlCommands, ENABLE_DDL_PROPAGATION); - - return ddlCommands; -} - - /* * ShouldPropagate determines if we should be propagating anything */ @@ -749,7 +704,7 @@ ShouldPropagateAnyObject(List *addresses) * FilterObjectAddressListByPredicate takes a list of ObjectAddress *'s and returns a list * only containing the ObjectAddress *'s for which the predicate returned true. */ -static List * +List * FilterObjectAddressListByPredicate(List *objectAddressList, AddressPredicate predicate) { List *result = NIL; diff --git a/src/backend/distributed/connection/connection_management.c b/src/backend/distributed/connection/connection_management.c index 12a5e7b3f..e4aca3ee7 100644 --- a/src/backend/distributed/connection/connection_management.c +++ b/src/backend/distributed/connection/connection_management.c @@ -1202,6 +1202,17 @@ FinishConnectionEstablishment(MultiConnection *connection) } +/* + * ForceConnectionCloseAtTransactionEnd marks connection to be closed at the end of the + * transaction. + */ +void +ForceConnectionCloseAtTransactionEnd(MultiConnection *connection) +{ + connection->forceCloseAtTransactionEnd = true; +} + + /* * ClaimConnectionExclusively signals that this connection is actively being * used. That means it'll not be, again, returned by diff --git a/src/backend/distributed/metadata/metadata_sync.c b/src/backend/distributed/metadata/metadata_sync.c index 494041bea..e3310c5c8 100644 --- a/src/backend/distributed/metadata/metadata_sync.c +++ b/src/backend/distributed/metadata/metadata_sync.c @@ -90,6 +90,7 @@ /* managed via a GUC */ char *EnableManualMetadataChangesForUser = ""; +int MetadataSyncTransMode = METADATA_SYNC_TRANSACTIONAL; static void EnsureObjectMetadataIsSane(int distributionArgumentIndex, @@ -193,8 +194,20 @@ start_metadata_sync_to_node(PG_FUNCTION_ARGS) EnsureCoordinator(); char *nodeNameString = text_to_cstring(nodeName); + WorkerNode *workerNode = ModifiableWorkerNode(nodeNameString, nodePort); - ActivateNode(nodeNameString, nodePort); + /* + * Create MetadataSyncContext which is used throughout nodes' activation. + * It contains activated nodes, bare connections if the mode is nontransactional, + * and a memory context for allocation. + */ + bool collectCommands = false; + bool nodesAddedInSameTransaction = false; + MetadataSyncContext *context = CreateMetadataSyncContext(list_make1(workerNode), + collectCommands, + nodesAddedInSameTransaction); + + ActivateNodeList(context); TransactionModifiedNodeMetadata = true; PG_RETURN_VOID(); @@ -214,90 +227,26 @@ start_metadata_sync_to_all_nodes(PG_FUNCTION_ARGS) EnsureSuperUser(); EnsureCoordinator(); - List *workerNodes = ActivePrimaryNonCoordinatorNodeList(RowShareLock); + List *nodeList = ActivePrimaryNonCoordinatorNodeList(RowShareLock); - ActivateNodeList(workerNodes); + /* + * Create MetadataSyncContext which is used throughout nodes' activation. + * It contains activated nodes, bare connections if the mode is nontransactional, + * and a memory context for allocation. + */ + bool collectCommands = false; + bool nodesAddedInSameTransaction = false; + MetadataSyncContext *context = CreateMetadataSyncContext(nodeList, + collectCommands, + nodesAddedInSameTransaction); + + ActivateNodeList(context); TransactionModifiedNodeMetadata = true; PG_RETURN_BOOL(true); } -/* - * SyncNodeMetadataToNode is the internal API for - * start_metadata_sync_to_node(). - */ -void -SyncNodeMetadataToNode(const char *nodeNameString, int32 nodePort) -{ - char *escapedNodeName = quote_literal_cstr(nodeNameString); - - CheckCitusVersion(ERROR); - EnsureCoordinator(); - EnsureModificationsCanRun(); - - EnsureSequentialModeMetadataOperations(); - - LockRelationOid(DistNodeRelationId(), ExclusiveLock); - - WorkerNode *workerNode = FindWorkerNode(nodeNameString, nodePort); - if (workerNode == NULL) - { - ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("you cannot sync metadata to a non-existent node"), - errhint("First, add the node with SELECT citus_add_node" - "(%s,%d)", escapedNodeName, nodePort))); - } - - if (!workerNode->isActive) - { - ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("you cannot sync metadata to an inactive node"), - errhint("First, activate the node with " - "SELECT citus_activate_node(%s,%d)", - escapedNodeName, nodePort))); - } - - if (NodeIsCoordinator(workerNode)) - { - ereport(NOTICE, (errmsg("%s:%d is the coordinator and already contains " - "metadata, skipping syncing the metadata", - nodeNameString, nodePort))); - return; - } - - UseCoordinatedTransaction(); - - /* - * One would normally expect to set hasmetadata first, and then metadata sync. - * However, at this point we do the order reverse. - * We first set metadatasynced, and then hasmetadata; since setting columns for - * nodes with metadatasynced==false could cause errors. - * (See ErrorIfAnyMetadataNodeOutOfSync) - * We can safely do that because we are in a coordinated transaction and the changes - * are only visible to our own transaction. - * If anything goes wrong, we are going to rollback all the changes. - */ - workerNode = SetWorkerColumn(workerNode, Anum_pg_dist_node_metadatasynced, - BoolGetDatum(true)); - workerNode = SetWorkerColumn(workerNode, Anum_pg_dist_node_hasmetadata, BoolGetDatum( - true)); - - if (!NodeIsPrimary(workerNode)) - { - /* - * If this is a secondary node we can't actually sync metadata to it; we assume - * the primary node is receiving metadata. - */ - return; - } - - /* fail if metadata synchronization doesn't succeed */ - bool raiseInterrupts = true; - SyncNodeMetadataSnapshotToNode(workerNode, raiseInterrupts); -} - - /* * SyncCitusTableMetadata syncs citus table metadata to worker nodes with metadata. * Our definition of metadata includes the shell table and its inter relations with @@ -612,6 +561,25 @@ ShouldSyncTableMetadataViaCatalog(Oid relationId) } +/* + * FetchRelationIdFromPgPartitionHeapTuple returns relation id from given heap tuple. + */ +Oid +FetchRelationIdFromPgPartitionHeapTuple(HeapTuple heapTuple, TupleDesc tupleDesc) +{ + Assert(heapTuple->t_tableOid == DistPartitionRelationId()); + + bool isNullArray[Natts_pg_dist_partition]; + Datum datumArray[Natts_pg_dist_partition]; + heap_deform_tuple(heapTuple, tupleDesc, datumArray, isNullArray); + + Datum relationIdDatum = datumArray[Anum_pg_dist_partition_logicalrelid - 1]; + Oid relationId = DatumGetObjectId(relationIdDatum); + + return relationId; +} + + /* * ShouldSyncTableMetadataInternal decides whether we should sync the metadata for a table * based on whether it is a hash distributed table, or a citus table with no distribution @@ -715,11 +683,12 @@ DropMetadataSnapshotOnNode(WorkerNode *workerNode) * Detach partitions, break dependencies between sequences and table then * remove shell tables first. */ + bool singleTransaction = true; List *dropMetadataCommandList = DetachPartitionCommandList(); dropMetadataCommandList = lappend(dropMetadataCommandList, BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND); dropMetadataCommandList = lappend(dropMetadataCommandList, - REMOVE_ALL_SHELL_TABLES_COMMAND); + WorkerDropAllShellTablesCommand(singleTransaction)); dropMetadataCommandList = list_concat(dropMetadataCommandList, NodeMetadataDropCommands()); dropMetadataCommandList = lappend(dropMetadataCommandList, @@ -769,114 +738,6 @@ NodeMetadataCreateCommands(void) } -/* - * DistributedObjectMetadataSyncCommandList returns the necessary commands to create - * pg_dist_object entries on the new node. - */ -List * -DistributedObjectMetadataSyncCommandList(void) -{ - HeapTuple pgDistObjectTup = NULL; - Relation pgDistObjectRel = table_open(DistObjectRelationId(), AccessShareLock); - Relation pgDistObjectIndexRel = index_open(DistObjectPrimaryKeyIndexId(), - AccessShareLock); - TupleDesc pgDistObjectDesc = RelationGetDescr(pgDistObjectRel); - - List *objectAddressList = NIL; - List *distArgumentIndexList = NIL; - List *colocationIdList = NIL; - List *forceDelegationList = NIL; - - /* It is not strictly necessary to read the tuples in order. - * However, it is useful to get consistent behavior, both for regression - * tests and also in production systems. - */ - SysScanDesc pgDistObjectScan = systable_beginscan_ordered(pgDistObjectRel, - pgDistObjectIndexRel, NULL, - 0, NULL); - while (HeapTupleIsValid(pgDistObjectTup = systable_getnext_ordered(pgDistObjectScan, - ForwardScanDirection))) - { - Form_pg_dist_object pg_dist_object = (Form_pg_dist_object) GETSTRUCT( - pgDistObjectTup); - - ObjectAddress *address = palloc(sizeof(ObjectAddress)); - - ObjectAddressSubSet(*address, pg_dist_object->classid, pg_dist_object->objid, - pg_dist_object->objsubid); - - bool distributionArgumentIndexIsNull = false; - Datum distributionArgumentIndexDatum = - heap_getattr(pgDistObjectTup, - Anum_pg_dist_object_distribution_argument_index, - pgDistObjectDesc, - &distributionArgumentIndexIsNull); - int32 distributionArgumentIndex = DatumGetInt32(distributionArgumentIndexDatum); - - bool colocationIdIsNull = false; - Datum colocationIdDatum = - heap_getattr(pgDistObjectTup, - Anum_pg_dist_object_colocationid, - pgDistObjectDesc, - &colocationIdIsNull); - int32 colocationId = DatumGetInt32(colocationIdDatum); - - bool forceDelegationIsNull = false; - Datum forceDelegationDatum = - heap_getattr(pgDistObjectTup, - Anum_pg_dist_object_force_delegation, - pgDistObjectDesc, - &forceDelegationIsNull); - bool forceDelegation = DatumGetBool(forceDelegationDatum); - - objectAddressList = lappend(objectAddressList, address); - - if (distributionArgumentIndexIsNull) - { - distArgumentIndexList = lappend_int(distArgumentIndexList, - INVALID_DISTRIBUTION_ARGUMENT_INDEX); - } - else - { - distArgumentIndexList = lappend_int(distArgumentIndexList, - distributionArgumentIndex); - } - - if (colocationIdIsNull) - { - colocationIdList = lappend_int(colocationIdList, - INVALID_COLOCATION_ID); - } - else - { - colocationIdList = lappend_int(colocationIdList, colocationId); - } - - if (forceDelegationIsNull) - { - forceDelegationList = lappend_int(forceDelegationList, NO_FORCE_PUSHDOWN); - } - else - { - forceDelegationList = lappend_int(forceDelegationList, forceDelegation); - } - } - - systable_endscan_ordered(pgDistObjectScan); - index_close(pgDistObjectIndexRel, AccessShareLock); - relation_close(pgDistObjectRel, NoLock); - - char *workerMetadataUpdateCommand = - MarkObjectsDistributedCreateCommand(objectAddressList, - distArgumentIndexList, - colocationIdList, - forceDelegationList); - List *commandList = list_make1(workerMetadataUpdateCommand); - - return commandList; -} - - /* * CitusTableMetadataCreateCommandList returns the set of commands necessary to * create the given distributed table metadata on a worker. @@ -989,6 +850,35 @@ NodeListInsertCommand(List *workerNodeList) } +/* + * NodeListIdempotentInsertCommand generates an idempotent multi-row INSERT command that + * can be executed to insert the nodes that are in workerNodeList to pg_dist_node table. + * It would insert new nodes or replace current nodes with new nodes if nodename-nodeport + * pairs already exist. + */ +char * +NodeListIdempotentInsertCommand(List *workerNodeList) +{ + StringInfo nodeInsertIdempotentCommand = makeStringInfo(); + char *nodeInsertStr = NodeListInsertCommand(workerNodeList); + appendStringInfoString(nodeInsertIdempotentCommand, nodeInsertStr); + char *onConflictStr = " ON CONFLICT ON CONSTRAINT pg_dist_node_nodename_nodeport_key " + "DO UPDATE SET nodeid = EXCLUDED.nodeid, " + "groupid = EXCLUDED.groupid, " + "nodename = EXCLUDED.nodename, " + "nodeport = EXCLUDED.nodeport, " + "noderack = EXCLUDED.noderack, " + "hasmetadata = EXCLUDED.hasmetadata, " + "isactive = EXCLUDED.isactive, " + "noderole = EXCLUDED.noderole, " + "nodecluster = EXCLUDED.nodecluster ," + "metadatasynced = EXCLUDED.metadatasynced, " + "shouldhaveshards = EXCLUDED.shouldhaveshards"; + appendStringInfoString(nodeInsertIdempotentCommand, onConflictStr); + return nodeInsertIdempotentCommand->data; +} + + /* * MarkObjectsDistributedCreateCommand generates a command that can be executed to * insert or update the provided objects into pg_dist_object on a worker node. @@ -3390,7 +3280,6 @@ EnsureCoordinatorInitiatedOperation(void) * by the coordinator. */ if (!(IsCitusInternalBackend() || IsRebalancerInternalBackend()) || - !MyBackendIsInDisributedTransaction() || GetLocalGroupId() == COORDINATOR_GROUP_ID) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), @@ -4048,47 +3937,493 @@ ColocationGroupDeleteCommand(uint32 colocationId) /* - * ColocationGroupCreateCommandList returns the full list of commands for syncing - * pg_dist_colocation. + * SetMetadataSyncNodesFromNodeList sets list of nodes that needs to be metadata + * synced among given node list into metadataSyncContext. */ -List * -ColocationGroupCreateCommandList(void) +void +SetMetadataSyncNodesFromNodeList(MetadataSyncContext *context, List *nodeList) { - bool hasColocations = false; + /* sync is disabled, then no nodes to sync */ + if (!EnableMetadataSync) + { + return; + } - StringInfo colocationGroupCreateCommand = makeStringInfo(); - appendStringInfo(colocationGroupCreateCommand, - "WITH colocation_group_data (colocationid, shardcount, " - "replicationfactor, distributioncolumntype, " - "distributioncolumncollationname, " - "distributioncolumncollationschema) AS (VALUES "); + List *activatedWorkerNodeList = NIL; - Relation pgDistColocation = table_open(DistColocationRelationId(), AccessShareLock); - Relation colocationIdIndexRel = index_open(DistColocationIndexId(), AccessShareLock); + WorkerNode *node = NULL; + foreach_ptr(node, nodeList) + { + if (NodeIsPrimary(node)) + { + /* warn if we have coordinator in nodelist */ + if (NodeIsCoordinator(node)) + { + ereport(NOTICE, (errmsg("%s:%d is the coordinator and already contains " + "metadata, skipping syncing the metadata", + node->workerName, node->workerPort))); + continue; + } + + activatedWorkerNodeList = lappend(activatedWorkerNodeList, node); + } + } + + context->activatedWorkerNodeList = activatedWorkerNodeList; +} + + +/* + * EstablishAndSetMetadataSyncBareConnections establishes and sets + * connections used throughout nontransactional metadata sync. + */ +void +EstablishAndSetMetadataSyncBareConnections(MetadataSyncContext *context) +{ + Assert(MetadataSyncTransMode == METADATA_SYNC_NON_TRANSACTIONAL); + + int connectionFlags = REQUIRE_METADATA_CONNECTION; + + /* establish bare connections to activated worker nodes */ + List *bareConnectionList = NIL; + WorkerNode *node = NULL; + foreach_ptr(node, context->activatedWorkerNodeList) + { + MultiConnection *connection = GetNodeUserDatabaseConnection(connectionFlags, + node->workerName, + node->workerPort, + CurrentUserName(), + NULL); + + Assert(connection != NULL); + ForceConnectionCloseAtTransactionEnd(connection); + bareConnectionList = lappend(bareConnectionList, connection); + } + + context->activatedWorkerBareConnections = bareConnectionList; +} + + +/* + * CreateMetadataSyncContext creates a context which contains worker connections + * and a MemoryContext to be used throughout the metadata sync. + * + * If we collect commands, connections will not be established as caller's intent + * is to collect sync commands. + * + * If the nodes are newly added before activation, we would not try to unset + * metadatasynced in separate transaction during nontransactional metadatasync. + */ +MetadataSyncContext * +CreateMetadataSyncContext(List *nodeList, bool collectCommands, + bool nodesAddedInSameTransaction) +{ + /* should be alive during local transaction during the sync */ + MemoryContext context = AllocSetContextCreate(TopTransactionContext, + "metadata_sync_context", + ALLOCSET_DEFAULT_SIZES); + + MetadataSyncContext *metadataSyncContext = (MetadataSyncContext *) palloc0( + sizeof(MetadataSyncContext)); + + metadataSyncContext->context = context; + metadataSyncContext->transactionMode = MetadataSyncTransMode; + metadataSyncContext->collectCommands = collectCommands; + metadataSyncContext->collectedCommands = NIL; + metadataSyncContext->nodesAddedInSameTransaction = nodesAddedInSameTransaction; + + /* filter the nodes that needs to be activated from given node list */ + SetMetadataSyncNodesFromNodeList(metadataSyncContext, nodeList); /* - * It is not strictly necessary to read the tuples in order. - * However, it is useful to get consistent behavior, both for regression - * tests and also in production systems. + * establish connections only for nontransactional mode to prevent connection + * open-close for each command */ - SysScanDesc scanDescriptor = - systable_beginscan_ordered(pgDistColocation, colocationIdIndexRel, - NULL, 0, NULL); - - HeapTuple colocationTuple = systable_getnext_ordered(scanDescriptor, - ForwardScanDirection); - - while (HeapTupleIsValid(colocationTuple)) + if (!collectCommands && MetadataSyncTransMode == METADATA_SYNC_NON_TRANSACTIONAL) { - if (hasColocations) + EstablishAndSetMetadataSyncBareConnections(metadataSyncContext); + } + + /* use 2PC coordinated transactions if we operate in transactional mode */ + if (MetadataSyncTransMode == METADATA_SYNC_TRANSACTIONAL) + { + Use2PCForCoordinatedTransaction(); + } + + return metadataSyncContext; +} + + +/* + * ResetMetadataSyncMemoryContext resets memory context inside metadataSyncContext, if + * we are not collecting commands. + */ +void +ResetMetadataSyncMemoryContext(MetadataSyncContext *context) +{ + if (!MetadataSyncCollectsCommands(context)) + { + MemoryContextReset(context->context); + } +} + + +/* + * MetadataSyncCollectsCommands returns whether context is used for collecting + * commands instead of sending them to workers. + */ +bool +MetadataSyncCollectsCommands(MetadataSyncContext *context) +{ + return context->collectCommands; +} + + +/* + * SendOrCollectCommandListToActivatedNodes sends the commands to the activated nodes with + * bare connections inside metadatacontext or via coordinated connections. + * Note that when context only collects commands, we add commands into the context + * without sending the commands. + */ +void +SendOrCollectCommandListToActivatedNodes(MetadataSyncContext *context, List *commands) +{ + /* do nothing if no commands */ + if (commands == NIL) + { + return; + } + + /* + * do not send any command to workers if we collect commands. + * Collect commands into metadataSyncContext's collected command + * list. + */ + if (MetadataSyncCollectsCommands(context)) + { + context->collectedCommands = list_concat(context->collectedCommands, commands); + return; + } + + /* send commands to new workers, the current user should be a superuser */ + Assert(superuser()); + + if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL) + { + List *workerNodes = context->activatedWorkerNodeList; + SendMetadataCommandListToWorkerListInCoordinatedTransaction(workerNodes, + CurrentUserName(), + commands); + } + else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL) + { + List *workerConnections = context->activatedWorkerBareConnections; + SendCommandListToWorkerListWithBareConnections(workerConnections, commands); + } + else + { + pg_unreachable(); + } +} + + +/* + * SendOrCollectCommandListToMetadataNodes sends the commands to the metadata nodes with + * bare connections inside metadatacontext or via coordinated connections. + * Note that when context only collects commands, we add commands into the context + * without sending the commands. + */ +void +SendOrCollectCommandListToMetadataNodes(MetadataSyncContext *context, List *commands) +{ + /* + * do not send any command to workers if we collcet commands. + * Collect commands into metadataSyncContext's collected command + * list. + */ + if (MetadataSyncCollectsCommands(context)) + { + context->collectedCommands = list_concat(context->collectedCommands, commands); + return; + } + + /* send commands to new workers, the current user should be a superuser */ + Assert(superuser()); + + if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL) + { + List *metadataNodes = TargetWorkerSetNodeList(NON_COORDINATOR_METADATA_NODES, + RowShareLock); + SendMetadataCommandListToWorkerListInCoordinatedTransaction(metadataNodes, + CurrentUserName(), + commands); + } + else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL) + { + SendBareCommandListToMetadataWorkers(commands); + } + else + { + pg_unreachable(); + } +} + + +/* + * SendOrCollectCommandListToSingleNode sends the commands to the specific worker + * indexed by nodeIdx with bare connection inside metadatacontext or via coordinated + * connection. Note that when context only collects commands, we add commands into + * the context without sending the commands. + */ +void +SendOrCollectCommandListToSingleNode(MetadataSyncContext *context, List *commands, + int nodeIdx) +{ + /* + * Do not send any command to workers if we collect commands. + * Collect commands into metadataSyncContext's collected command + * list. + */ + if (MetadataSyncCollectsCommands(context)) + { + context->collectedCommands = list_concat(context->collectedCommands, commands); + return; + } + + /* send commands to new workers, the current user should be a superuser */ + Assert(superuser()); + + if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL) + { + List *workerNodes = context->activatedWorkerNodeList; + Assert(nodeIdx < list_length(workerNodes)); + + WorkerNode *node = list_nth(workerNodes, nodeIdx); + SendMetadataCommandListToWorkerListInCoordinatedTransaction(list_make1(node), + CurrentUserName(), + commands); + } + else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL) + { + List *workerConnections = context->activatedWorkerBareConnections; + Assert(nodeIdx < list_length(workerConnections)); + + MultiConnection *workerConnection = list_nth(workerConnections, nodeIdx); + List *connectionList = list_make1(workerConnection); + SendCommandListToWorkerListWithBareConnections(connectionList, commands); + } + else + { + pg_unreachable(); + } +} + + +/* + * WorkerDropAllShellTablesCommand returns command required to drop shell tables + * from workers. When singleTransaction is false, we create transaction per shell + * table. Otherwise, we drop all shell tables within single transaction. + */ +char * +WorkerDropAllShellTablesCommand(bool singleTransaction) +{ + char *singleTransactionString = (singleTransaction) ? "true" : "false"; + StringInfo removeAllShellTablesCommand = makeStringInfo(); + appendStringInfo(removeAllShellTablesCommand, WORKER_DROP_ALL_SHELL_TABLES, + singleTransactionString); + return removeAllShellTablesCommand->data; +} + + +/* + * PropagateNodeWideObjectsCommandList is called during node activation to + * propagate any object that should be propagated for every node. These are + * generally not linked to any distributed object but change system wide behaviour. + */ +static List * +PropagateNodeWideObjectsCommandList(void) +{ + /* collect all commands */ + List *ddlCommands = NIL; + + if (EnableAlterRoleSetPropagation) + { + /* + * Get commands for database and postgres wide settings. Since these settings are not + * linked to any role that can be distributed we need to distribute them seperately + */ + List *alterRoleSetCommands = GenerateAlterRoleSetCommandForRole(InvalidOid); + ddlCommands = list_concat(ddlCommands, alterRoleSetCommands); + } + + return ddlCommands; +} + + +/* + * SyncDistributedObjects sync the distributed objects to the nodes in metadataSyncContext + * with transactional or nontransactional mode according to transactionMode inside + * metadataSyncContext. + * + * Transactions should be ordered like below: + * - Nodewide objects (only roles for now), + * - Deletion of sequence and shell tables and metadata entries + * - All dependencies (e.g., types, schemas, sequences) and all shell distributed + * table and their pg_dist_xx metadata entries + * - Inter relation between those shell tables + * + * Note that we do not create the distributed dependencies on the coordinator + * since all the dependencies should be present in the coordinator already. + */ +void +SyncDistributedObjects(MetadataSyncContext *context) +{ + if (context->activatedWorkerNodeList == NIL) + { + return; + } + + EnsureSequentialModeMetadataOperations(); + + Assert(ShouldPropagate()); + + /* Send systemwide objects, only roles for now */ + SendNodeWideObjectsSyncCommands(context); + + /* + * Break dependencies between sequences-shell tables, then remove shell tables, + * and metadata tables respectively. + * We should delete shell tables before metadata entries as we look inside + * pg_dist_partition to figure out shell tables. + */ + SendShellTableDeletionCommands(context); + SendMetadataDeletionCommands(context); + + /* + * Commands to insert pg_dist_colocation entries. + * Replicating dist objects and their metadata depends on this step. + */ + SendColocationMetadataCommands(context); + + /* + * Replicate all objects of the pg_dist_object to the remote node and + * create metadata entries for Citus tables (pg_dist_shard, pg_dist_shard_placement, + * pg_dist_partition, pg_dist_object). + */ + SendDependencyCreationCommands(context); + SendDistTableMetadataCommands(context); + SendDistObjectCommands(context); + + /* + * After creating each table, handle the inter table relationship between + * those tables. + */ + SendInterTableRelationshipCommands(context); +} + + +/* + * SendNodeWideObjectsSyncCommands sends systemwide objects to workers with + * transactional or nontransactional mode according to transactionMode inside + * metadataSyncContext. + */ +void +SendNodeWideObjectsSyncCommands(MetadataSyncContext *context) +{ + /* propagate node wide objects. It includes only roles for now. */ + List *commandList = PropagateNodeWideObjectsCommandList(); + + if (commandList == NIL) + { + return; + } + + commandList = lcons(DISABLE_DDL_PROPAGATION, commandList); + commandList = lappend(commandList, ENABLE_DDL_PROPAGATION); + SendOrCollectCommandListToActivatedNodes(context, commandList); +} + + +/* + * SendShellTableDeletionCommands sends sequence, and shell table deletion + * commands to workers with transactional or nontransactional mode according to + * transactionMode inside metadataSyncContext. + */ +void +SendShellTableDeletionCommands(MetadataSyncContext *context) +{ + /* break all sequence deps for citus tables and remove all shell tables */ + char *breakSeqDepsCommand = BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND; + SendOrCollectCommandListToActivatedNodes(context, list_make1(breakSeqDepsCommand)); + + /* remove shell tables */ + bool singleTransaction = (context->transactionMode == METADATA_SYNC_TRANSACTIONAL); + char *dropShellTablesCommand = WorkerDropAllShellTablesCommand(singleTransaction); + SendOrCollectCommandListToActivatedNodes(context, list_make1(dropShellTablesCommand)); +} + + +/* + * SendMetadataDeletionCommands sends metadata entry deletion commands to workers + * with transactional or nontransactional mode according to transactionMode inside + * metadataSyncContext. + */ +void +SendMetadataDeletionCommands(MetadataSyncContext *context) +{ + /* remove pg_dist_partition entries */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(DELETE_ALL_PARTITIONS)); + + /* remove pg_dist_shard entries */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(DELETE_ALL_SHARDS)); + + /* remove pg_dist_placement entries */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(DELETE_ALL_PLACEMENTS)); + + /* remove pg_dist_object entries */ + SendOrCollectCommandListToActivatedNodes(context, + list_make1(DELETE_ALL_DISTRIBUTED_OBJECTS)); + + /* remove pg_dist_colocation entries */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(DELETE_ALL_COLOCATION)); +} + + +/* + * SendColocationMetadataCommands sends colocation metadata with transactional or + * nontransactional mode according to transactionMode inside metadataSyncContext. + */ +void +SendColocationMetadataCommands(MetadataSyncContext *context) +{ + ScanKeyData scanKey[1]; + int scanKeyCount = 0; + + Relation relation = table_open(DistColocationRelationId(), AccessShareLock); + SysScanDesc scanDesc = systable_beginscan(relation, InvalidOid, false, NULL, + scanKeyCount, scanKey); + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + HeapTuple nextTuple = NULL; + while (true) + { + ResetMetadataSyncMemoryContext(context); + + nextTuple = systable_getnext(scanDesc); + if (!HeapTupleIsValid(nextTuple)) { - appendStringInfo(colocationGroupCreateCommand, ", "); + break; } - hasColocations = true; + StringInfo colocationGroupCreateCommand = makeStringInfo(); + appendStringInfo(colocationGroupCreateCommand, + "WITH colocation_group_data (colocationid, shardcount, " + "replicationfactor, distributioncolumntype, " + "distributioncolumncollationname, " + "distributioncolumncollationschema) AS (VALUES "); Form_pg_dist_colocation colocationForm = - (Form_pg_dist_colocation) GETSTRUCT(colocationTuple); + (Form_pg_dist_colocation) GETSTRUCT(nextTuple); appendStringInfo(colocationGroupCreateCommand, "(%d, %d, %d, %s, ", @@ -4106,20 +4441,17 @@ ColocationGroupCreateCommandList(void) { Datum collationIdDatum = ObjectIdGetDatum(distributionColumCollation); HeapTuple collationTuple = SearchSysCache1(COLLOID, collationIdDatum); - if (HeapTupleIsValid(collationTuple)) { Form_pg_collation collationform = (Form_pg_collation) GETSTRUCT(collationTuple); char *collationName = NameStr(collationform->collname); - char *collationSchemaName = get_namespace_name( - collationform->collnamespace); - + char *collationSchemaName = + get_namespace_name(collationform->collnamespace); appendStringInfo(colocationGroupCreateCommand, "%s, %s)", quote_literal_cstr(collationName), quote_literal_cstr(collationSchemaName)); - ReleaseSysCache(collationTuple); } else @@ -4134,26 +4466,290 @@ ColocationGroupCreateCommandList(void) "NULL, NULL)"); } - colocationTuple = systable_getnext_ordered(scanDescriptor, ForwardScanDirection); + appendStringInfo(colocationGroupCreateCommand, + ") SELECT pg_catalog.citus_internal_add_colocation_metadata(" + "colocationid, shardcount, replicationfactor, " + "distributioncolumntype, coalesce(c.oid, 0)) " + "FROM colocation_group_data d LEFT JOIN pg_collation c " + "ON (d.distributioncolumncollationname = c.collname " + "AND d.distributioncolumncollationschema::regnamespace" + " = c.collnamespace)"); + + List *commandList = list_make1(colocationGroupCreateCommand->data); + SendOrCollectCommandListToActivatedNodes(context, commandList); } + MemoryContextSwitchTo(oldContext); - systable_endscan_ordered(scanDescriptor); - index_close(colocationIdIndexRel, AccessShareLock); - table_close(pgDistColocation, AccessShareLock); - - if (!hasColocations) - { - return NIL; - } - - appendStringInfo(colocationGroupCreateCommand, - ") SELECT pg_catalog.citus_internal_add_colocation_metadata(" - "colocationid, shardcount, replicationfactor, " - "distributioncolumntype, coalesce(c.oid, 0)) " - "FROM colocation_group_data d LEFT JOIN pg_collation c " - "ON (d.distributioncolumncollationname = c.collname " - "AND d.distributioncolumncollationschema::regnamespace" - " = c.collnamespace)"); - - return list_make1(colocationGroupCreateCommand->data); + systable_endscan(scanDesc); + table_close(relation, AccessShareLock); +} + + +/* + * SendDependencyCreationCommands sends dependency creation commands to workers + * with transactional or nontransactional mode according to transactionMode + * inside metadataSyncContext. + */ +void +SendDependencyCreationCommands(MetadataSyncContext *context) +{ + /* disable ddl propagation */ + SendOrCollectCommandListToActivatedNodes(context, + list_make1(DISABLE_DDL_PROPAGATION)); + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + + /* collect all dependencies in creation order and get their ddl commands */ + List *dependencies = GetDistributedObjectAddressList(); + + /* + * Depending on changes in the environment, such as the enable_metadata_sync guc + * there might be objects in the distributed object address list that should currently + * not be propagated by citus as they are 'not supported'. + */ + dependencies = FilterObjectAddressListByPredicate(dependencies, + &SupportedDependencyByCitus); + + dependencies = OrderObjectAddressListInDependencyOrder(dependencies); + + /* + * We need to create a subcontext as we reset the context after each dependency + * creation but we want to preserve all dependency objects at metadataSyncContext. + */ + MemoryContext commandsContext = AllocSetContextCreate(context->context, + "dependency commands context", + ALLOCSET_DEFAULT_SIZES); + MemoryContextSwitchTo(commandsContext); + ObjectAddress *dependency = NULL; + foreach_ptr(dependency, dependencies) + { + if (!MetadataSyncCollectsCommands(context)) + { + MemoryContextReset(commandsContext); + } + + if (IsAnyObjectAddressOwnedByExtension(list_make1(dependency), NULL)) + { + /* + * We expect extension-owned objects to be created as a result + * of the extension being created. + */ + continue; + } + + /* dependency creation commands */ + List *ddlCommands = GetAllDependencyCreateDDLCommands(list_make1(dependency)); + SendOrCollectCommandListToActivatedNodes(context, ddlCommands); + } + MemoryContextSwitchTo(oldContext); + + if (!MetadataSyncCollectsCommands(context)) + { + MemoryContextDelete(commandsContext); + } + ResetMetadataSyncMemoryContext(context); + + /* enable ddl propagation */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(ENABLE_DDL_PROPAGATION)); +} + + +/* + * SendDistTableMetadataCommands sends commands related to pg_dist_shard and, + * pg_dist_shard_placement entries to workers with transactional or nontransactional + * mode according to transactionMode inside metadataSyncContext. + */ +void +SendDistTableMetadataCommands(MetadataSyncContext *context) +{ + ScanKeyData scanKey[1]; + int scanKeyCount = 0; + + Relation relation = table_open(DistPartitionRelationId(), AccessShareLock); + TupleDesc tupleDesc = RelationGetDescr(relation); + + SysScanDesc scanDesc = systable_beginscan(relation, InvalidOid, false, NULL, + scanKeyCount, scanKey); + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + HeapTuple nextTuple = NULL; + while (true) + { + ResetMetadataSyncMemoryContext(context); + + nextTuple = systable_getnext(scanDesc); + if (!HeapTupleIsValid(nextTuple)) + { + break; + } + + /* + * Create Citus table metadata commands (pg_dist_shard, pg_dist_shard_placement, + * pg_dist_partition). Only Citus tables have shard metadata. + */ + Oid relationId = FetchRelationIdFromPgPartitionHeapTuple(nextTuple, tupleDesc); + if (!ShouldSyncTableMetadata(relationId)) + { + continue; + } + + List *commandList = CitusTableMetadataCreateCommandList(relationId); + SendOrCollectCommandListToActivatedNodes(context, commandList); + } + MemoryContextSwitchTo(oldContext); + + systable_endscan(scanDesc); + table_close(relation, AccessShareLock); +} + + +/* + * SendDistObjectCommands sends commands related to pg_dist_object entries to + * workers with transactional or nontransactional mode according to transactionMode + * inside metadataSyncContext. + */ +void +SendDistObjectCommands(MetadataSyncContext *context) +{ + ScanKeyData scanKey[1]; + int scanKeyCount = 0; + + Relation relation = table_open(DistObjectRelationId(), AccessShareLock); + TupleDesc tupleDesc = RelationGetDescr(relation); + + SysScanDesc scanDesc = systable_beginscan(relation, InvalidOid, false, NULL, + scanKeyCount, scanKey); + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + HeapTuple nextTuple = NULL; + while (true) + { + ResetMetadataSyncMemoryContext(context); + + nextTuple = systable_getnext(scanDesc); + if (!HeapTupleIsValid(nextTuple)) + { + break; + } + + Form_pg_dist_object pg_dist_object = (Form_pg_dist_object) GETSTRUCT(nextTuple); + + ObjectAddress *address = palloc(sizeof(ObjectAddress)); + + ObjectAddressSubSet(*address, pg_dist_object->classid, pg_dist_object->objid, + pg_dist_object->objsubid); + + bool distributionArgumentIndexIsNull = false; + Datum distributionArgumentIndexDatum = + heap_getattr(nextTuple, + Anum_pg_dist_object_distribution_argument_index, + tupleDesc, + &distributionArgumentIndexIsNull); + int32 distributionArgumentIndex = DatumGetInt32(distributionArgumentIndexDatum); + + bool colocationIdIsNull = false; + Datum colocationIdDatum = + heap_getattr(nextTuple, + Anum_pg_dist_object_colocationid, + tupleDesc, + &colocationIdIsNull); + int32 colocationId = DatumGetInt32(colocationIdDatum); + + bool forceDelegationIsNull = false; + Datum forceDelegationDatum = + heap_getattr(nextTuple, + Anum_pg_dist_object_force_delegation, + tupleDesc, + &forceDelegationIsNull); + bool forceDelegation = DatumGetBool(forceDelegationDatum); + + if (distributionArgumentIndexIsNull) + { + distributionArgumentIndex = INVALID_DISTRIBUTION_ARGUMENT_INDEX; + } + + if (colocationIdIsNull) + { + colocationId = INVALID_COLOCATION_ID; + } + + if (forceDelegationIsNull) + { + forceDelegation = NO_FORCE_PUSHDOWN; + } + + char *workerMetadataUpdateCommand = + MarkObjectsDistributedCreateCommand(list_make1(address), + list_make1_int(distributionArgumentIndex), + list_make1_int(colocationId), + list_make1_int(forceDelegation)); + SendOrCollectCommandListToActivatedNodes(context, + list_make1(workerMetadataUpdateCommand)); + } + MemoryContextSwitchTo(oldContext); + + systable_endscan(scanDesc); + relation_close(relation, NoLock); +} + + +/* + * SendInterTableRelationshipCommands sends inter-table relationship commands + * (e.g. constraints, attach partitions) to workers with transactional or + * nontransactional mode per inter table relationship according to transactionMode + * inside metadataSyncContext. + */ +void +SendInterTableRelationshipCommands(MetadataSyncContext *context) +{ + /* disable ddl propagation */ + SendOrCollectCommandListToActivatedNodes(context, + list_make1(DISABLE_DDL_PROPAGATION)); + + ScanKeyData scanKey[1]; + int scanKeyCount = 0; + + Relation relation = table_open(DistPartitionRelationId(), AccessShareLock); + TupleDesc tupleDesc = RelationGetDescr(relation); + + SysScanDesc scanDesc = systable_beginscan(relation, InvalidOid, false, NULL, + scanKeyCount, scanKey); + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + HeapTuple nextTuple = NULL; + while (true) + { + ResetMetadataSyncMemoryContext(context); + + nextTuple = systable_getnext(scanDesc); + if (!HeapTupleIsValid(nextTuple)) + { + break; + } + + Oid relationId = FetchRelationIdFromPgPartitionHeapTuple(nextTuple, tupleDesc); + if (!ShouldSyncTableMetadata(relationId)) + { + continue; + } + + /* + * Skip foreign key and partition creation when the Citus table is + * owned by an extension. + */ + if (IsTableOwnedByExtension(relationId)) + { + continue; + } + + List *commandList = InterTableRelationshipOfRelationCommandList(relationId); + SendOrCollectCommandListToActivatedNodes(context, commandList); + } + MemoryContextSwitchTo(oldContext); + + systable_endscan(scanDesc); + table_close(relation, AccessShareLock); + + /* enable ddl propagation */ + SendOrCollectCommandListToActivatedNodes(context, list_make1(ENABLE_DDL_PROPAGATION)); } diff --git a/src/backend/distributed/metadata/node_metadata.c b/src/backend/distributed/metadata/node_metadata.c index 91ffca4fe..2639b79f0 100644 --- a/src/backend/distributed/metadata/node_metadata.c +++ b/src/backend/distributed/metadata/node_metadata.c @@ -48,6 +48,7 @@ #include "distributed/version_compat.h" #include "distributed/worker_manager.h" #include "distributed/worker_transaction.h" +#include "executor/spi.h" #include "lib/stringinfo.h" #include "postmaster/postmaster.h" #include "storage/bufmgr.h" @@ -90,24 +91,21 @@ static void RemoveNodeFromCluster(char *nodeName, int32 nodePort); static void ErrorIfNodeContainsNonRemovablePlacements(WorkerNode *workerNode); static bool PlacementHasActivePlacementOnAnotherGroup(GroupShardPlacement *sourcePlacement); -static int AddNodeMetadata(char *nodeName, int32 nodePort, NodeMetadata - *nodeMetadata, bool *nodeAlreadyExists); -static WorkerNode * SetNodeState(char *nodeName, int32 nodePort, bool isActive); +static int AddNodeMetadata(char *nodeName, int32 nodePort, NodeMetadata *nodeMetadata, + bool *nodeAlreadyExists, bool localOnly); +static int AddNodeMetadataViaMetadataContext(char *nodeName, int32 nodePort, + NodeMetadata *nodeMetadata, + bool *nodeAlreadyExists); static HeapTuple GetNodeTuple(const char *nodeName, int32 nodePort); +static HeapTuple GetNodeByNodeId(int32 nodeId); static int32 GetNextGroupId(void); static int GetNextNodeId(void); static void InsertPlaceholderCoordinatorRecord(void); static void InsertNodeRow(int nodeid, char *nodename, int32 nodeport, NodeMetadata *nodeMetadata); static void DeleteNodeRow(char *nodename, int32 nodeport); -static void SyncDistributedObjectsToNodeList(List *workerNodeList); -static void UpdateLocalGroupIdOnNode(WorkerNode *workerNode); -static void SyncPgDistTableMetadataToNodeList(List *nodeList); -static List * InterTableRelationshipCommandList(); static void BlockDistributedQueriesOnMetadataNodes(void); static WorkerNode * TupleToWorkerNode(TupleDesc tupleDescriptor, HeapTuple heapTuple); -static List * PropagateNodeWideObjectsCommandList(); -static WorkerNode * ModifiableWorkerNode(const char *nodeName, int32 nodePort); static bool NodeIsLocal(WorkerNode *worker); static void SetLockTimeoutLocally(int32 lock_cooldown); static void UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort); @@ -122,6 +120,19 @@ static void ErrorIfCoordinatorMetadataSetFalse(WorkerNode *workerNode, Datum val static WorkerNode * SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards); static int FindCoordinatorNodeId(void); static WorkerNode * FindNodeAnyClusterByNodeId(uint32 nodeId); +static void ErrorIfAnyNodeNotExist(List *nodeList); +static void UpdateLocalGroupIdsViaMetadataContext(MetadataSyncContext *context); +static void SendDeletionCommandsForReplicatedTablePlacements( + MetadataSyncContext *context); +static void SyncNodeMetadata(MetadataSyncContext *context); +static void SetNodeStateViaMetadataContext(MetadataSyncContext *context, + WorkerNode *workerNode, + Datum value); +static void MarkNodesNotSyncedInLoopBackConnection(MetadataSyncContext *context, + pid_t parentSessionPid); +static void EnsureParentSessionHasExclusiveLockOnPgDistNode(pid_t parentSessionPid); +static void SetNodeMetadata(MetadataSyncContext *context, bool localOnly); +static void EnsureTransactionalMetadataSyncMode(void); /* declarations for dynamic loading */ PG_FUNCTION_INFO_V1(citus_set_coordinator_host); @@ -146,6 +157,7 @@ PG_FUNCTION_INFO_V1(citus_nodename_for_nodeid); PG_FUNCTION_INFO_V1(citus_nodeport_for_nodeid); PG_FUNCTION_INFO_V1(citus_coordinator_nodeid); PG_FUNCTION_INFO_V1(citus_is_coordinator); +PG_FUNCTION_INFO_V1(citus_internal_mark_node_not_synced); /* @@ -188,16 +200,26 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS) Name nodeClusterName = PG_GETARG_NAME(3); nodeMetadata.nodeCluster = NameStr(*nodeClusterName); + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (nodeMetadata.nodeRole == SecondaryNodeRoleId()) + { + EnsureTransactionalMetadataSyncMode(); + } + bool isCoordinatorInMetadata = false; WorkerNode *coordinatorNode = PrimaryNodeForGroup(COORDINATOR_GROUP_ID, &isCoordinatorInMetadata); if (!isCoordinatorInMetadata) { bool nodeAlreadyExists = false; + bool localOnly = false; /* add the coordinator to pg_dist_node if it was not already added */ AddNodeMetadata(nodeNameString, nodePort, &nodeMetadata, - &nodeAlreadyExists); + &nodeAlreadyExists, localOnly); /* we just checked */ Assert(!nodeAlreadyExists); @@ -222,6 +244,21 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS) } +/* + * EnsureTransactionalMetadataSyncMode ensures metadata sync mode is transactional. + */ +static void +EnsureTransactionalMetadataSyncMode(void) +{ + if (MetadataSyncTransMode == METADATA_SYNC_NON_TRANSACTIONAL) + { + ereport(ERROR, (errmsg("this operation cannot be completed in nontransactional " + "metadata sync mode"), + errhint("SET citus.metadata_sync_mode to 'transactional'"))); + } +} + + /* * citus_add_node function adds a new node to the cluster and returns its id. It also * replicates all reference tables to the new node. @@ -231,6 +268,9 @@ citus_add_node(PG_FUNCTION_ARGS) { CheckCitusVersion(ERROR); + EnsureSuperUser(); + EnsureCoordinator(); + text *nodeName = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); char *nodeNameString = text_to_cstring(nodeName); @@ -262,38 +302,33 @@ citus_add_node(PG_FUNCTION_ARGS) nodeMetadata.shouldHaveShards = false; } - int nodeId = AddNodeMetadata(nodeNameString, nodePort, &nodeMetadata, - &nodeAlreadyExists); - TransactionModifiedNodeMetadata = true; - /* - * After adding new node, if the node did not already exist, we will activate - * the node. This means we will replicate all reference tables to the new - * node. + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. */ - if (!nodeAlreadyExists) + if (nodeMetadata.nodeRole == SecondaryNodeRoleId()) { - WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeNameString, nodePort); - - /* - * If the worker is not marked as a coordinator, check that - * the node is not trying to add itself - */ - if (workerNode != NULL && - workerNode->groupId != COORDINATOR_GROUP_ID && - workerNode->nodeRole != SecondaryNodeRoleId() && - IsWorkerTheCurrentNode(workerNode)) - { - ereport(ERROR, (errmsg("Node cannot add itself as a worker."), - errhint( - "Add the node as a coordinator by using: " - "SELECT citus_set_coordinator_host('%s', %d);", - nodeNameString, nodePort))); - } - - ActivateNode(nodeNameString, nodePort); + EnsureTransactionalMetadataSyncMode(); } + if (MetadataSyncTransMode == METADATA_SYNC_NON_TRANSACTIONAL && + IsMultiStatementTransaction()) + { + /* + * prevent inside transaction block as we use bare connections which can + * lead deadlock + */ + ereport(ERROR, (errmsg("do not add node in transaction block " + "when the sync mode is nontransactional"), + errhint("add the node after SET citus.metadata_sync_mode " + "TO 'transactional'"))); + } + + int nodeId = AddNodeMetadataViaMetadataContext(nodeNameString, nodePort, + &nodeMetadata, + &nodeAlreadyExists); + TransactionModifiedNodeMetadata = true; + PG_RETURN_INT32(nodeId); } @@ -334,8 +369,18 @@ citus_add_inactive_node(PG_FUNCTION_ARGS) ereport(ERROR, (errmsg("coordinator node cannot be added as inactive node"))); } + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (nodeMetadata.nodeRole == SecondaryNodeRoleId()) + { + EnsureTransactionalMetadataSyncMode(); + } + + bool localOnly = false; int nodeId = AddNodeMetadata(nodeNameString, nodePort, &nodeMetadata, - &nodeAlreadyExists); + &nodeAlreadyExists, localOnly); TransactionModifiedNodeMetadata = true; PG_RETURN_INT32(nodeId); @@ -378,8 +423,15 @@ citus_add_secondary_node(PG_FUNCTION_ARGS) nodeMetadata.nodeRole = SecondaryNodeRoleId(); nodeMetadata.isActive = true; + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + EnsureTransactionalMetadataSyncMode(); + + bool localOnly = false; int nodeId = AddNodeMetadata(nodeNameString, nodePort, &nodeMetadata, - &nodeAlreadyExists); + &nodeAlreadyExists, localOnly); TransactionModifiedNodeMetadata = true; PG_RETURN_INT32(nodeId); @@ -457,6 +509,15 @@ citus_disable_node(PG_FUNCTION_ARGS) ErrorIfCoordinatorMetadataSetFalse(workerNode, BoolGetDatum(isActive), "isactive"); + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (NodeIsSecondary(workerNode)) + { + EnsureTransactionalMetadataSyncMode(); + } + WorkerNode *firstWorkerNode = GetFirstPrimaryWorkerNode(); bool disablingFirstNode = (firstWorkerNode && firstWorkerNode->nodeId == workerNode->nodeId); @@ -615,6 +676,15 @@ citus_set_node_property(PG_FUNCTION_ARGS) WorkerNode *workerNode = ModifiableWorkerNode(text_to_cstring(nodeNameText), nodePort); + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (NodeIsSecondary(workerNode)) + { + EnsureTransactionalMetadataSyncMode(); + } + if (strcmp(text_to_cstring(propertyText), "shouldhaveshards") == 0) { SetShouldHaveShards(workerNode, value); @@ -642,308 +712,11 @@ master_set_node_property(PG_FUNCTION_ARGS) } -/* - * InterTableRelationshipCommandList returns the command list to - * set up the multiple integrations including - * - * (i) Foreign keys - * (ii) Partionining hierarchy - * - * for each citus table. - */ -static List * -InterTableRelationshipCommandList() -{ - List *distributedTableList = CitusTableList(); - List *propagatedTableList = NIL; - List *multipleTableIntegrationCommandList = NIL; - - CitusTableCacheEntry *cacheEntry = NULL; - foreach_ptr(cacheEntry, distributedTableList) - { - /* - * Skip foreign key and partition creation when we shouldn't need to sync - * tablem metadata or the Citus table is owned by an extension. - */ - if (ShouldSyncTableMetadata(cacheEntry->relationId) && - !IsTableOwnedByExtension(cacheEntry->relationId)) - { - propagatedTableList = lappend(propagatedTableList, cacheEntry); - } - } - - foreach_ptr(cacheEntry, propagatedTableList) - { - Oid relationId = cacheEntry->relationId; - - List *commandListForRelation = - InterTableRelationshipOfRelationCommandList(relationId); - - multipleTableIntegrationCommandList = list_concat( - multipleTableIntegrationCommandList, - commandListForRelation); - } - - multipleTableIntegrationCommandList = lcons(DISABLE_DDL_PROPAGATION, - multipleTableIntegrationCommandList); - multipleTableIntegrationCommandList = lappend(multipleTableIntegrationCommandList, - ENABLE_DDL_PROPAGATION); - - return multipleTableIntegrationCommandList; -} - - -/* - * PgDistTableMetadataSyncCommandList returns the command list to sync the pg_dist_* - * (except pg_dist_node) metadata. We call them as table metadata. - */ -List * -PgDistTableMetadataSyncCommandList(void) -{ - List *distributedTableList = CitusTableList(); - List *propagatedTableList = NIL; - List *metadataSnapshotCommandList = NIL; - - /* create the list of tables whose metadata will be created */ - CitusTableCacheEntry *cacheEntry = NULL; - foreach_ptr(cacheEntry, distributedTableList) - { - if (ShouldSyncTableMetadata(cacheEntry->relationId)) - { - propagatedTableList = lappend(propagatedTableList, cacheEntry); - } - } - - /* remove all dist table and object related metadata first */ - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, - DELETE_ALL_PARTITIONS); - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, DELETE_ALL_SHARDS); - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, - DELETE_ALL_PLACEMENTS); - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, - DELETE_ALL_DISTRIBUTED_OBJECTS); - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, - DELETE_ALL_COLOCATION); - - /* create pg_dist_partition, pg_dist_shard and pg_dist_placement entries */ - foreach_ptr(cacheEntry, propagatedTableList) - { - List *tableMetadataCreateCommandList = - CitusTableMetadataCreateCommandList(cacheEntry->relationId); - - metadataSnapshotCommandList = list_concat(metadataSnapshotCommandList, - tableMetadataCreateCommandList); - } - - /* commands to insert pg_dist_colocation entries */ - List *colocationGroupSyncCommandList = ColocationGroupCreateCommandList(); - metadataSnapshotCommandList = list_concat(metadataSnapshotCommandList, - colocationGroupSyncCommandList); - - List *distributedObjectSyncCommandList = DistributedObjectMetadataSyncCommandList(); - metadataSnapshotCommandList = list_concat(metadataSnapshotCommandList, - distributedObjectSyncCommandList); - - metadataSnapshotCommandList = lcons(DISABLE_DDL_PROPAGATION, - metadataSnapshotCommandList); - metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, - ENABLE_DDL_PROPAGATION); - - return metadataSnapshotCommandList; -} - - -/* - * PropagateNodeWideObjectsCommandList is called during node activation to - * propagate any object that should be propagated for every node. These are - * generally not linked to any distributed object but change system wide behaviour. - */ -static List * -PropagateNodeWideObjectsCommandList() -{ - /* collect all commands */ - List *ddlCommands = NIL; - - if (EnableAlterRoleSetPropagation) - { - /* - * Get commands for database and postgres wide settings. Since these settings are not - * linked to any role that can be distributed we need to distribute them seperately - */ - List *alterRoleSetCommands = GenerateAlterRoleSetCommandForRole(InvalidOid); - ddlCommands = list_concat(ddlCommands, alterRoleSetCommands); - } - - if (list_length(ddlCommands) > 0) - { - /* if there are command wrap them in enable_ddl_propagation off */ - ddlCommands = lcons(DISABLE_DDL_PROPAGATION, ddlCommands); - ddlCommands = lappend(ddlCommands, ENABLE_DDL_PROPAGATION); - } - - return ddlCommands; -} - - -/* - * SyncDistributedObjectsCommandList returns commands to sync object dependencies - * to the given worker node. To be idempotent, it first drops the ones required to be - * dropped. - * - * Object dependencies include: - * - * - All dependencies (e.g., types, schemas, sequences) - * - All shell distributed tables - * - Inter relation between those shell tables - * - Node wide objects - * - * We also update the local group id here, as handling sequence dependencies - * requires it. - */ -List * -SyncDistributedObjectsCommandList(WorkerNode *workerNode) -{ - List *commandList = NIL; - - /* - * Propagate node wide objects. It includes only roles for now. - */ - commandList = list_concat(commandList, PropagateNodeWideObjectsCommandList()); - - /* - * Detach partitions, break dependencies between sequences and table then - * remove shell tables first. - */ - commandList = list_concat(commandList, DetachPartitionCommandList()); - commandList = lappend(commandList, BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND); - commandList = lappend(commandList, REMOVE_ALL_SHELL_TABLES_COMMAND); - - /* - * Replicate all objects of the pg_dist_object to the remote node. - */ - commandList = list_concat(commandList, ReplicateAllObjectsToNodeCommandList( - workerNode->workerName, workerNode->workerPort)); - - /* - * After creating each table, handle the inter table relationship between - * those tables. - */ - commandList = list_concat(commandList, InterTableRelationshipCommandList()); - - return commandList; -} - - -/* - * SyncDistributedObjectsToNodeList sync the distributed objects to the node. It includes - * - All dependencies (e.g., types, schemas, sequences) - * - All shell distributed table - * - Inter relation between those shell tables - * - * Note that we do not create the distributed dependencies on the coordinator - * since all the dependencies should be present in the coordinator already. - */ -static void -SyncDistributedObjectsToNodeList(List *workerNodeList) -{ - List *workerNodesToSync = NIL; - WorkerNode *workerNode = NULL; - foreach_ptr(workerNode, workerNodeList) - { - if (NodeIsCoordinator(workerNode)) - { - /* coordinator has all the objects */ - continue; - } - - if (!NodeIsPrimary(workerNode)) - { - /* secondary nodes gets the objects from their primaries via replication */ - continue; - } - - workerNodesToSync = lappend(workerNodesToSync, workerNode); - } - - if (workerNodesToSync == NIL) - { - return; - } - - EnsureSequentialModeMetadataOperations(); - - Assert(ShouldPropagate()); - - List *commandList = SyncDistributedObjectsCommandList(workerNode); - - /* send commands to new workers, the current user should be a superuser */ - Assert(superuser()); - SendMetadataCommandListToWorkerListInCoordinatedTransaction( - workerNodesToSync, - CurrentUserName(), - commandList); -} - - -/* - * UpdateLocalGroupIdOnNode updates local group id on node. - */ -static void -UpdateLocalGroupIdOnNode(WorkerNode *workerNode) -{ - if (NodeIsPrimary(workerNode) && !NodeIsCoordinator(workerNode)) - { - List *commandList = list_make1(LocalGroupIdUpdateCommand(workerNode->groupId)); - - /* send commands to new workers, the current user should be a superuser */ - Assert(superuser()); - SendMetadataCommandListToWorkerListInCoordinatedTransaction( - list_make1(workerNode), - CurrentUserName(), - commandList); - } -} - - -/* - * SyncPgDistTableMetadataToNodeList syncs the pg_dist_partition, pg_dist_shard - * pg_dist_placement and pg_dist_object metadata entries. - * - */ -static void -SyncPgDistTableMetadataToNodeList(List *nodeList) -{ - /* send commands to new workers, the current user should be a superuser */ - Assert(superuser()); - - List *nodesWithMetadata = NIL; - WorkerNode *workerNode = NULL; - foreach_ptr(workerNode, nodeList) - { - if (NodeIsPrimary(workerNode) && !NodeIsCoordinator(workerNode)) - { - nodesWithMetadata = lappend(nodesWithMetadata, workerNode); - } - } - - if (nodesWithMetadata == NIL) - { - return; - } - - List *syncPgDistMetadataCommandList = PgDistTableMetadataSyncCommandList(); - SendMetadataCommandListToWorkerListInCoordinatedTransaction( - nodesWithMetadata, - CurrentUserName(), - syncPgDistMetadataCommandList); -} - - /* * ModifiableWorkerNode gets the requested WorkerNode and also gets locks * required for modifying it. This fails if the node does not exist. */ -static WorkerNode * +WorkerNode * ModifiableWorkerNode(const char *nodeName, int32 nodePort) { CheckCitusVersion(ERROR); @@ -972,10 +745,30 @@ citus_activate_node(PG_FUNCTION_ARGS) text *nodeNameText = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); - WorkerNode *workerNode = ModifiableWorkerNode(text_to_cstring(nodeNameText), - nodePort); - ActivateNode(workerNode->workerName, workerNode->workerPort); + char *nodeNameString = text_to_cstring(nodeNameText); + WorkerNode *workerNode = ModifiableWorkerNode(nodeNameString, nodePort); + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (NodeIsSecondary(workerNode)) + { + EnsureTransactionalMetadataSyncMode(); + } + + /* + * Create MetadataSyncContext which is used throughout nodes' activation. + * It contains activated nodes, bare connections if the mode is nontransactional, + * and a memory context for allocation. + */ + bool collectCommands = false; + bool nodesAddedInSameTransaction = false; + MetadataSyncContext *context = CreateMetadataSyncContext(list_make1(workerNode), + collectCommands, + nodesAddedInSameTransaction); + + ActivateNodeList(context); TransactionModifiedNodeMetadata = true; PG_RETURN_INT32(workerNode->nodeId); @@ -1131,14 +924,145 @@ PrimaryNodeForGroup(int32 groupId, bool *groupContainsNodes) /* - * ActivateNodeList iterates over the nodeList and activates the nodes. - * Some part of the node activation is done parallel across the nodes, - * such as syncing the metadata. However, reference table replication is - * done one by one across nodes. + * MarkNodesNotSyncedInLoopBackConnection unsets metadatasynced flag in separate + * connection to localhost by calling the udf `citus_internal_mark_node_not_synced`. + */ +static void +MarkNodesNotSyncedInLoopBackConnection(MetadataSyncContext *context, + pid_t parentSessionPid) +{ + Assert(context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL); + Assert(!MetadataSyncCollectsCommands(context)); + + /* + * Set metadatasynced to false for all activated nodes to mark the nodes as not synced + * in case nontransactional metadata sync fails before we activate the nodes inside + * metadataSyncContext. + * We set metadatasynced to false at coordinator to mark the nodes as not synced. But we + * do not set isactive and hasmetadata flags to false as we still want to route queries + * to the nodes if their isactive flag is true and propagate DDL to the nodes if possible. + * + * NOTES: + * 1) We use separate connection to localhost as we would rollback the local + * transaction in case of failure. + * 2) Operator should handle problems at workers if any. Wworkers probably fail + * due to improper metadata when a query hits. Or DDL might fail due to desynced + * nodes. (when hasmetadata = true, metadatasynced = false) + * In those cases, proper metadata sync for the workers should be done.) + */ + + /* + * Because we try to unset metadatasynced flag with a separate transaction, + * we could not find the new node if the node is added in the current local + * transaction. But, hopefully, we do not need to unset metadatasynced for + * the new node as local transaction would rollback in case of a failure. + */ + if (context->nodesAddedInSameTransaction) + { + return; + } + + if (context->activatedWorkerNodeList == NIL) + { + return; + } + + int connectionFlag = FORCE_NEW_CONNECTION; + MultiConnection *connection = GetNodeConnection(connectionFlag, LocalHostName, + PostPortNumber); + + List *commandList = NIL; + WorkerNode *workerNode = NULL; + foreach_ptr(workerNode, context->activatedWorkerNodeList) + { + /* + * We need to prevent self deadlock when we access pg_dist_node using separate + * connection to localhost. To achieve this, we check if the caller session's + * pid holds the Exclusive lock on pg_dist_node. After ensuring that (we are + * called from parent session which holds the Exclusive lock), we can safely + * update node metadata by acquiring the relaxed lock. + */ + StringInfo metadatasyncCommand = makeStringInfo(); + appendStringInfo(metadatasyncCommand, CITUS_INTERNAL_MARK_NODE_NOT_SYNCED, + parentSessionPid, workerNode->nodeId); + commandList = lappend(commandList, metadatasyncCommand->data); + } + + SendCommandListToWorkerOutsideTransactionWithConnection(connection, commandList); + CloseConnection(connection); +} + + +/* + * SetNodeMetadata sets isactive, metadatasynced and hasmetadata flags locally + * and, if required, remotely. + */ +static void +SetNodeMetadata(MetadataSyncContext *context, bool localOnly) +{ + /* do not execute local transaction if we collect commands */ + if (!MetadataSyncCollectsCommands(context)) + { + List *updatedActivatedNodeList = NIL; + + WorkerNode *node = NULL; + foreach_ptr(node, context->activatedWorkerNodeList) + { + node = SetWorkerColumnLocalOnly(node, Anum_pg_dist_node_isactive, + BoolGetDatum(true)); + node = SetWorkerColumnLocalOnly(node, Anum_pg_dist_node_metadatasynced, + BoolGetDatum(true)); + node = SetWorkerColumnLocalOnly(node, Anum_pg_dist_node_hasmetadata, + BoolGetDatum(true)); + + updatedActivatedNodeList = lappend(updatedActivatedNodeList, node); + } + + /* reset activated nodes inside metadataSyncContext afer local update */ + SetMetadataSyncNodesFromNodeList(context, updatedActivatedNodeList); + } + + if (!localOnly && EnableMetadataSync) + { + WorkerNode *node = NULL; + foreach_ptr(node, context->activatedWorkerNodeList) + { + SetNodeStateViaMetadataContext(context, node, BoolGetDatum(true)); + } + } +} + + +/* + * ActivateNodeList does some sanity checks and acquire Exclusive lock on pg_dist_node, + * and then activates the nodes inside given metadataSyncContext. + * + * The function operates in 3 different modes according to transactionMode inside + * metadataSyncContext. + * + * 1. MetadataSyncCollectsCommands(context): + * Only collect commands instead of sending them to workers, + * 2. context.transactionMode == METADATA_SYNC_TRANSACTIONAL: + * Send all commands using coordinated transaction, + * 3. context.transactionMode == METADATA_SYNC_NON_TRANSACTIONAL: + * Send all commands using bare (no transaction block) connections. */ void -ActivateNodeList(List *nodeList) +ActivateNodeList(MetadataSyncContext *context) { + if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL && + IsMultiStatementTransaction()) + { + /* + * prevent inside transaction block as we use bare connections which can + * lead deadlock + */ + ereport(ERROR, (errmsg("do not sync metadata in transaction block " + "when the sync mode is nontransactional"), + errhint("resync after SET citus.metadata_sync_mode " + "TO 'transactional'"))); + } + /* * We currently require the object propagation to happen via superuser, * see #5139. While activating a node, we sync both metadata and object @@ -1152,122 +1076,86 @@ ActivateNodeList(List *nodeList) */ EnsureSuperUser(); - /* take an exclusive lock on pg_dist_node to serialize pg_dist_node changes */ + /* + * Take an exclusive lock on pg_dist_node to serialize pg_dist_node + * changes. + */ LockRelationOid(DistNodeRelationId(), ExclusiveLock); + /* + * Error if there is concurrent change to node table before acquiring + * the lock + */ + ErrorIfAnyNodeNotExist(context->activatedWorkerNodeList); - List *nodeToSyncMetadata = NIL; - WorkerNode *node = NULL; - foreach_ptr(node, nodeList) + /* + * we need to unset metadatasynced flag to false at coordinator in separate + * transaction only at nontransactional sync mode and if we do not collect + * commands. + * + * We make sure we set the flag to false at the start of nontransactional + * metadata sync to mark those nodes are not synced in case of a failure in + * the middle of the sync. + */ + if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL && + !MetadataSyncCollectsCommands(context)) { - /* - * First, locally mark the node is active, if everything goes well, - * we are going to sync this information to all the metadata nodes. - */ - WorkerNode *workerNode = - FindWorkerNodeAnyCluster(node->workerName, node->workerPort); - if (workerNode == NULL) - { - ereport(ERROR, (errmsg("node at \"%s:%u\" does not exist", node->workerName, - node->workerPort))); - } - - /* both nodes should be the same */ - Assert(workerNode->nodeId == node->nodeId); - - /* - * Delete existing reference and replicated table placements on the - * given groupId if the group has been disabled earlier (e.g., isActive - * set to false). - * - * Sync the metadata changes to all existing metadata nodes irrespective - * of the current nodes' metadata sync state. We expect all nodes up - * and running when another node is activated. - */ - if (!workerNode->isActive && NodeIsPrimary(workerNode)) - { - bool localOnly = false; - DeleteAllReplicatedTablePlacementsFromNodeGroup(workerNode->groupId, - localOnly); - } - - workerNode = - SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_isactive, - BoolGetDatum(true)); - - /* TODO: Once all tests will be enabled for MX, we can remove sync by default check */ - bool syncMetadata = EnableMetadataSync && NodeIsPrimary(workerNode); - if (syncMetadata) - { - /* - * We are going to sync the metadata anyway in this transaction, so do - * not fail just because the current metadata is not synced. - */ - SetWorkerColumn(workerNode, Anum_pg_dist_node_metadatasynced, - BoolGetDatum(true)); - - /* - * Update local group id first, as object dependency logic requires to have - * updated local group id. - */ - UpdateLocalGroupIdOnNode(workerNode); - - nodeToSyncMetadata = lappend(nodeToSyncMetadata, workerNode); - } + MarkNodesNotSyncedInLoopBackConnection(context, MyProcPid); } /* - * Sync distributed objects first. We must sync distributed objects before - * replicating reference tables to the remote node, as reference tables may - * need such objects. + * Delete existing reference and replicated table placements on the + * given groupId if the group has been disabled earlier (e.g., isActive + * set to false). */ - SyncDistributedObjectsToNodeList(nodeToSyncMetadata); + SendDeletionCommandsForReplicatedTablePlacements(context); /* - * Sync node metadata. We must sync node metadata before syncing table - * related pg_dist_xxx metadata. Since table related metadata requires - * to have right pg_dist_node entries. + * SetNodeMetadata sets isactive, metadatasynced and hasmetadata flags + * locally for following reasons: + * + * 1) Set isactive to true locally so that we can find activated nodes amongst + * active workers, + * 2) Do not fail just because the current metadata is not synced. (see + * ErrorIfAnyMetadataNodeOutOfSync), + * 3) To propagate activated nodes nodemetadata correctly. + * + * We are going to sync the metadata anyway in this transaction, set + * isactive, metadatasynced, and hasmetadata to true locally. + * The changes would rollback in case of failure. */ - foreach_ptr(node, nodeToSyncMetadata) - { - SyncNodeMetadataToNode(node->workerName, node->workerPort); - } + bool localOnly = true; + SetNodeMetadata(context, localOnly); /* - * As the last step, sync the table related metadata to the remote node. - * We must handle it as the last step because of limitations shared with - * above comments. + * Update local group ids so that upcoming transactions can see its effect. + * Object dependency logic requires to have updated local group id. */ - SyncPgDistTableMetadataToNodeList(nodeToSyncMetadata); + UpdateLocalGroupIdsViaMetadataContext(context); - foreach_ptr(node, nodeList) - { - bool isActive = true; + /* + * Sync node metadata so that placement insertion does not fail due to + * EnsureShardPlacementMetadataIsSane. + */ + SyncNodeMetadata(context); - /* finally, let all other active metadata nodes to learn about this change */ - SetNodeState(node->workerName, node->workerPort, isActive); - } -} + /* + * Sync all dependencies and distributed objects with their pg_dist_xx tables to + * metadata nodes inside metadataSyncContext. Depends on node metadata. + */ + SyncDistributedObjects(context); - -/* - * ActivateNode activates the node with nodeName and nodePort. Currently, activation - * includes only replicating the reference tables and setting isactive column of the - * given node. - */ -int -ActivateNode(char *nodeName, int nodePort) -{ - bool isActive = true; - - WorkerNode *workerNode = ModifiableWorkerNode(nodeName, nodePort); - ActivateNodeList(list_make1(workerNode)); - - /* finally, let all other active metadata nodes to learn about this change */ - WorkerNode *newWorkerNode = SetNodeState(nodeName, nodePort, isActive); - Assert(newWorkerNode->nodeId == workerNode->nodeId); - - return newWorkerNode->nodeId; + /* + * Let all nodes to be active and synced after all operations succeeded. + * we make sure that the metadata sync is idempotent and safe overall with multiple + * other transactions, if nontransactional mode is used. + * + * We already took Exclusive lock on node metadata, which prevents modification + * on node metadata on coordinator. The step will rollback, in case of a failure, + * to the state where metadatasynced=false. + */ + localOnly = false; + SetNodeMetadata(context, localOnly); } @@ -1328,6 +1216,14 @@ citus_update_node(PG_FUNCTION_ARGS) errmsg("node %u not found", nodeId))); } + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (NodeIsSecondary(workerNode)) + { + EnsureTransactionalMetadataSyncMode(); + } /* * If the node is a primary node we block reads and writes. @@ -1672,6 +1568,98 @@ citus_is_coordinator(PG_FUNCTION_ARGS) } +/* + * EnsureParentSessionHasExclusiveLockOnPgDistNode ensures given session id + * holds Exclusive lock on pg_dist_node. + */ +static void +EnsureParentSessionHasExclusiveLockOnPgDistNode(pid_t parentSessionPid) +{ + StringInfo checkIfParentLockCommandStr = makeStringInfo(); + + int spiConnectionResult = SPI_connect(); + if (spiConnectionResult != SPI_OK_CONNECT) + { + ereport(ERROR, (errmsg("could not connect to SPI manager"))); + } + + char *checkIfParentLockCommand = "SELECT pid FROM pg_locks WHERE " + "pid = %d AND database = %d AND relation = %d AND " + "mode = 'ExclusiveLock' AND granted = TRUE"; + appendStringInfo(checkIfParentLockCommandStr, checkIfParentLockCommand, + parentSessionPid, MyDatabaseId, DistNodeRelationId()); + + bool readOnly = true; + int spiQueryResult = SPI_execute(checkIfParentLockCommandStr->data, readOnly, 0); + if (spiQueryResult != SPI_OK_SELECT) + { + ereport(ERROR, (errmsg("execution was not successful \"%s\"", + checkIfParentLockCommandStr->data))); + } + + bool parentHasExclusiveLock = SPI_processed > 0; + + SPI_finish(); + + if (!parentHasExclusiveLock) + { + ereport(ERROR, (errmsg("lock is not held by the caller. Unexpected caller " + "for citus_internal_mark_node_not_synced"))); + } +} + + +/* + * citus_internal_mark_node_not_synced unsets metadatasynced flag in separate connection + * to localhost. Should only be called by `MarkNodesNotSyncedInLoopBackConnection`. + * See it for details. + */ +Datum +citus_internal_mark_node_not_synced(PG_FUNCTION_ARGS) +{ + CheckCitusVersion(ERROR); + + /* only called by superuser */ + EnsureSuperUser(); + + pid_t parentSessionPid = PG_GETARG_INT32(0); + + /* fetch node by id */ + int nodeId = PG_GETARG_INT32(1); + HeapTuple heapTuple = GetNodeByNodeId(nodeId); + + /* ensure that parent session holds Exclusive lock to pg_dist_node */ + EnsureParentSessionHasExclusiveLockOnPgDistNode(parentSessionPid); + + /* + * We made sure parent session holds the ExclusiveLock, so we can unset + * metadatasynced for the node safely with the relaxed lock here. + */ + Relation pgDistNode = table_open(DistNodeRelationId(), AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode); + + Datum values[Natts_pg_dist_node]; + bool isnull[Natts_pg_dist_node]; + bool replace[Natts_pg_dist_node]; + + memset(replace, 0, sizeof(replace)); + values[Anum_pg_dist_node_metadatasynced - 1] = DatumGetBool(false); + isnull[Anum_pg_dist_node_metadatasynced - 1] = false; + replace[Anum_pg_dist_node_metadatasynced - 1] = true; + + heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull, replace); + + CatalogTupleUpdate(pgDistNode, &heapTuple->t_self, heapTuple); + + CitusInvalidateRelcacheByRelid(DistNodeRelationId()); + CommandCounterIncrement(); + + table_close(pgDistNode, NoLock); + + PG_RETURN_VOID(); +} + + /* * FindWorkerNode searches over the worker nodes and returns the workerNode * if it already exists. Else, the function returns NULL. @@ -1874,6 +1862,16 @@ static void RemoveNodeFromCluster(char *nodeName, int32 nodePort) { WorkerNode *workerNode = ModifiableWorkerNode(nodeName, nodePort); + + /* + * We do not allow metadata operations on secondary nodes in nontransactional + * sync mode. + */ + if (NodeIsSecondary(workerNode)) + { + EnsureTransactionalMetadataSyncMode(); + } + if (NodeIsPrimary(workerNode)) { ErrorIfNodeContainsNonRemovablePlacements(workerNode); @@ -2002,12 +2000,11 @@ CountPrimariesWithMetadata(void) * If not, the following procedure is followed while adding a node: If the groupId is not * explicitly given by the user, the function picks the group that the new node should * be in with respect to GroupSize. Then, the new node is inserted into the local - * pg_dist_node as well as the nodes with hasmetadata=true. + * pg_dist_node as well as the nodes with hasmetadata=true if localOnly is false. */ static int -AddNodeMetadata(char *nodeName, int32 nodePort, - NodeMetadata *nodeMetadata, - bool *nodeAlreadyExists) +AddNodeMetadata(char *nodeName, int32 nodePort, NodeMetadata *nodeMetadata, + bool *nodeAlreadyExists, bool localOnly) { EnsureCoordinator(); @@ -2136,7 +2133,7 @@ AddNodeMetadata(char *nodeName, int32 nodePort, workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort); - if (EnableMetadataSync) + if (EnableMetadataSync && !localOnly) { /* send the delete command to all primary nodes with metadata */ char *nodeDeleteCommand = NodeDeleteCommand(workerNode->nodeId); @@ -2157,6 +2154,93 @@ AddNodeMetadata(char *nodeName, int32 nodePort, } +/* + * AddNodeMetadataViaMetadataContext does the same thing as AddNodeMetadata but + * make use of metadata sync context to send commands to workers to support both + * transactional and nontransactional sync modes. + */ +static int +AddNodeMetadataViaMetadataContext(char *nodeName, int32 nodePort, + NodeMetadata *nodeMetadata, bool *nodeAlreadyExists) +{ + bool localOnly = true; + int nodeId = AddNodeMetadata(nodeName, nodePort, nodeMetadata, nodeAlreadyExists, + localOnly); + + /* do nothing as the node already exists */ + if (*nodeAlreadyExists) + { + return nodeId; + } + + /* + * Create metadata sync context that is used throughout node addition + * and activation if necessary. + */ + WorkerNode *node = ModifiableWorkerNode(nodeName, nodePort); + + /* we should always set active flag to true if we call citus_add_node */ + node = SetWorkerColumnLocalOnly(node, Anum_pg_dist_node_isactive, DatumGetBool(true)); + + /* + * After adding new node, if the node did not already exist, we will activate + * the node. + * If the worker is not marked as a coordinator, check that + * the node is not trying to add itself + */ + if (node != NULL && + node->groupId != COORDINATOR_GROUP_ID && + node->nodeRole != SecondaryNodeRoleId() && + IsWorkerTheCurrentNode(node)) + { + ereport(ERROR, (errmsg("Node cannot add itself as a worker."), + errhint( + "Add the node as a coordinator by using: " + "SELECT citus_set_coordinator_host('%s', %d);", + node->workerName, node->workerPort))); + } + + List *nodeList = list_make1(node); + bool collectCommands = false; + bool nodesAddedInSameTransaction = true; + MetadataSyncContext *context = CreateMetadataSyncContext(nodeList, collectCommands, + nodesAddedInSameTransaction); + + if (EnableMetadataSync) + { + /* send the delete command to all primary nodes with metadata */ + char *nodeDeleteCommand = NodeDeleteCommand(node->nodeId); + SendOrCollectCommandListToMetadataNodes(context, list_make1(nodeDeleteCommand)); + + /* finally prepare the insert command and send it to all primary nodes */ + uint32 primariesWithMetadata = CountPrimariesWithMetadata(); + if (primariesWithMetadata != 0) + { + char *nodeInsertCommand = NULL; + if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL) + { + nodeInsertCommand = NodeListInsertCommand(nodeList); + } + else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL) + { + /* + * We need to ensure node insertion is idempotent in nontransactional + * sync mode. + */ + nodeInsertCommand = NodeListIdempotentInsertCommand(nodeList); + } + Assert(nodeInsertCommand != NULL); + SendOrCollectCommandListToMetadataNodes(context, + list_make1(nodeInsertCommand)); + } + } + + ActivateNodeList(context); + + return nodeId; +} + + /* * SetWorkerColumn function sets the column with the specified index * on the worker in pg_dist_node, by calling SetWorkerColumnLocalOnly. @@ -2181,6 +2265,30 @@ SetWorkerColumn(WorkerNode *workerNode, int columnIndex, Datum value) } +/* + * SetNodeStateViaMetadataContext sets or unsets isactive, metadatasynced, and hasmetadata + * flags via metadataSyncContext. + */ +static void +SetNodeStateViaMetadataContext(MetadataSyncContext *context, WorkerNode *workerNode, + Datum value) +{ + char *isActiveCommand = + GetMetadataSyncCommandToSetNodeColumn(workerNode, Anum_pg_dist_node_isactive, + value); + char *metadatasyncedCommand = + GetMetadataSyncCommandToSetNodeColumn(workerNode, + Anum_pg_dist_node_metadatasynced, value); + char *hasmetadataCommand = + GetMetadataSyncCommandToSetNodeColumn(workerNode, Anum_pg_dist_node_hasmetadata, + value); + List *commandList = list_make3(isActiveCommand, metadatasyncedCommand, + hasmetadataCommand); + + SendOrCollectCommandListToMetadataNodes(context, commandList); +} + + /* * SetWorkerColumnOptional function sets the column with the specified index * on the worker in pg_dist_node, by calling SetWorkerColumnLocalOnly. @@ -2393,20 +2501,6 @@ SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards) } -/* - * SetNodeState function sets the isactive column of the specified worker in - * pg_dist_node to isActive. Also propagates this to other metadata nodes. - * It returns the new worker node after the modification. - */ -static WorkerNode * -SetNodeState(char *nodeName, int nodePort, bool isActive) -{ - WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort); - return SetWorkerColumn(workerNode, Anum_pg_dist_node_isactive, BoolGetDatum( - isActive)); -} - - /* * GetNodeTuple function returns the heap tuple of given nodeName and nodePort. If the * node is not found this function returns NULL. @@ -2443,6 +2537,41 @@ GetNodeTuple(const char *nodeName, int32 nodePort) } +/* + * GetNodeByNodeId returns the heap tuple for given node id by looking up catalog. + */ +static HeapTuple +GetNodeByNodeId(int32 nodeId) +{ + Relation pgDistNode = table_open(DistNodeRelationId(), AccessShareLock); + const int scanKeyCount = 1; + const bool indexOK = false; + + ScanKeyData scanKey[1]; + HeapTuple nodeTuple = NULL; + + ScanKeyInit(&scanKey[0], Anum_pg_dist_node_nodeid, + BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(nodeId)); + SysScanDesc scanDescriptor = systable_beginscan(pgDistNode, InvalidOid, indexOK, + NULL, scanKeyCount, scanKey); + + HeapTuple heapTuple = systable_getnext(scanDescriptor); + if (HeapTupleIsValid(heapTuple)) + { + nodeTuple = heap_copytuple(heapTuple); + } + else + { + ereport(ERROR, (errmsg("could not find valid entry for node id %d", nodeId))); + } + + systable_endscan(scanDescriptor); + table_close(pgDistNode, NoLock); + + return nodeTuple; +} + + /* * GetNextGroupId allocates and returns a unique groupId for the group * to be created. This allocation occurs both in shared memory and in write @@ -2583,9 +2712,11 @@ InsertPlaceholderCoordinatorRecord(void) nodeMetadata.nodeCluster = "default"; bool nodeAlreadyExists = false; + bool localOnly = false; /* as long as there is a single node, localhost should be ok */ - AddNodeMetadata(LocalHostName, PostPortNumber, &nodeMetadata, &nodeAlreadyExists); + AddNodeMetadata(LocalHostName, PostPortNumber, &nodeMetadata, &nodeAlreadyExists, + localOnly); } @@ -2868,3 +2999,119 @@ UnsetMetadataSyncedForAllWorkers(void) return updatedAtLeastOne; } + + +/* + * ErrorIfAnyNodeNotExist errors if any node in given list not found. + */ +static void +ErrorIfAnyNodeNotExist(List *nodeList) +{ + WorkerNode *node = NULL; + foreach_ptr(node, nodeList) + { + /* + * First, locally mark the node is active, if everything goes well, + * we are going to sync this information to all the metadata nodes. + */ + WorkerNode *workerNode = + FindWorkerNodeAnyCluster(node->workerName, node->workerPort); + if (workerNode == NULL) + { + ereport(ERROR, (errmsg("node at \"%s:%u\" does not exist", node->workerName, + node->workerPort))); + } + } +} + + +/* + * UpdateLocalGroupIdsViaMetadataContext updates local group ids for given list + * of nodes with transactional or nontransactional mode according to transactionMode + * inside metadataSyncContext. + */ +static void +UpdateLocalGroupIdsViaMetadataContext(MetadataSyncContext *context) +{ + int activatedPrimaryCount = list_length(context->activatedWorkerNodeList); + int nodeIdx = 0; + for (nodeIdx = 0; nodeIdx < activatedPrimaryCount; nodeIdx++) + { + WorkerNode *node = list_nth(context->activatedWorkerNodeList, nodeIdx); + List *commandList = list_make1(LocalGroupIdUpdateCommand(node->groupId)); + + /* send commands to new workers, the current user should be a superuser */ + Assert(superuser()); + + SendOrCollectCommandListToSingleNode(context, commandList, nodeIdx); + } +} + + +/* + * SendDeletionCommandsForReplicatedTablePlacements sends commands to delete replicated + * placement for the metadata nodes with transactional or nontransactional mode according + * to transactionMode inside metadataSyncContext. + */ +static void +SendDeletionCommandsForReplicatedTablePlacements(MetadataSyncContext *context) +{ + WorkerNode *node = NULL; + foreach_ptr(node, context->activatedWorkerNodeList) + { + if (!node->isActive) + { + bool localOnly = false; + int32 groupId = node->groupId; + DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext(context, + groupId, + localOnly); + } + } +} + + +/* + * SyncNodeMetadata syncs node metadata with transactional or nontransactional + * mode according to transactionMode inside metadataSyncContext. + */ +static void +SyncNodeMetadata(MetadataSyncContext *context) +{ + CheckCitusVersion(ERROR); + + if (!EnableMetadataSync) + { + return; + } + + /* + * Do not fail when we call this method from activate_node_snapshot + * from workers. + */ + if (!MetadataSyncCollectsCommands(context)) + { + EnsureCoordinator(); + } + + EnsureModificationsCanRun(); + EnsureSequentialModeMetadataOperations(); + + LockRelationOid(DistNodeRelationId(), ExclusiveLock); + + /* generate the queries which drop the node metadata */ + List *dropMetadataCommandList = NodeMetadataDropCommands(); + + /* generate the queries which create the node metadata from scratch */ + List *createMetadataCommandList = NodeMetadataCreateCommands(); + + List *recreateNodeSnapshotCommandList = dropMetadataCommandList; + recreateNodeSnapshotCommandList = list_concat(recreateNodeSnapshotCommandList, + createMetadataCommandList); + + /* + * We should have already added node metadata to metadata workers. Sync node + * metadata just for activated workers. + */ + SendOrCollectCommandListToActivatedNodes(context, recreateNodeSnapshotCommandList); +} diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index bd9ca679b..23393078b 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -360,6 +360,11 @@ static const struct config_enum_entry cpu_priority_options[] = { { NULL, 0, false} }; +static const struct config_enum_entry metadata_sync_mode_options[] = { + { "transactional", METADATA_SYNC_TRANSACTIONAL, false }, + { "nontransactional", METADATA_SYNC_NON_TRANSACTIONAL, false }, + { NULL, 0, false } +}; /* *INDENT-ON* */ @@ -1880,6 +1885,21 @@ RegisterCitusConfigVariables(void) GUC_UNIT_MS | GUC_NO_SHOW_ALL, NULL, NULL, NULL); + DefineCustomEnumVariable( + "citus.metadata_sync_mode", + gettext_noop("Sets transaction mode for metadata syncs."), + gettext_noop("metadata sync can be run inside a single coordinated " + "transaction or with multiple small transactions in " + "idempotent way. By default we sync metadata in single " + "coordinated transaction. When we hit memory problems " + "at workers, we have alternative nontransactional mode " + "where we send each command with separate transaction."), + &MetadataSyncTransMode, + METADATA_SYNC_TRANSACTIONAL, metadata_sync_mode_options, + PGC_SUSET, + GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomIntVariable( "citus.metadata_sync_retry_interval", gettext_noop("Sets the interval to retry failed metadata syncs."), diff --git a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql index c14904a94..bbaf0ce4d 100644 --- a/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql +++ b/src/backend/distributed/sql/citus--11.2-1--11.3-1.sql @@ -7,3 +7,6 @@ ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY USING INDEX pg_dist_pl ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY USING INDEX pg_dist_rebalance_strategy_name_key; ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY USING INDEX pg_dist_shard_shardid_index; ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY USING INDEX pg_dist_transaction_unique_constraint; + +#include "udfs/worker_drop_all_shell_tables/11.3-1.sql" +#include "udfs/citus_internal_mark_node_not_synced/11.3-1.sql" diff --git a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql index c9fe75d1a..322613e5f 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.3-1--11.2-1.sql @@ -17,3 +17,6 @@ ALTER TABLE pg_catalog.pg_dist_placement REPLICA IDENTITY NOTHING; ALTER TABLE pg_catalog.pg_dist_rebalance_strategy REPLICA IDENTITY NOTHING; ALTER TABLE pg_catalog.pg_dist_shard REPLICA IDENTITY NOTHING; ALTER TABLE pg_catalog.pg_dist_transaction REPLICA IDENTITY NOTHING; + +DROP PROCEDURE pg_catalog.worker_drop_all_shell_tables(bool); +DROP FUNCTION pg_catalog.citus_internal_mark_node_not_synced(int, int); diff --git a/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql b/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql new file mode 100644 index 000000000..0d90c8f1a --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/11.3-1.sql @@ -0,0 +1,6 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_mark_node_not_synced(parent_pid int, nodeid int) + RETURNS VOID + LANGUAGE C STRICT + AS 'MODULE_PATHNAME', $$citus_internal_mark_node_not_synced$$; +COMMENT ON FUNCTION citus_internal_mark_node_not_synced(int, int) + IS 'marks given node not synced by unsetting metadatasynced column at the start of the nontransactional sync.'; diff --git a/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/latest.sql b/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/latest.sql new file mode 100644 index 000000000..0d90c8f1a --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_internal_mark_node_not_synced/latest.sql @@ -0,0 +1,6 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_internal_mark_node_not_synced(parent_pid int, nodeid int) + RETURNS VOID + LANGUAGE C STRICT + AS 'MODULE_PATHNAME', $$citus_internal_mark_node_not_synced$$; +COMMENT ON FUNCTION citus_internal_mark_node_not_synced(int, int) + IS 'marks given node not synced by unsetting metadatasynced column at the start of the nontransactional sync.'; diff --git a/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/11.3-1.sql b/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/11.3-1.sql new file mode 100644 index 000000000..55236286c --- /dev/null +++ b/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/11.3-1.sql @@ -0,0 +1,23 @@ + -- During metadata sync, when we send many ddls over single transaction, worker node can error due +-- to reaching at max allocation block size for invalidation messages. To find a workaround for the problem, +-- we added nontransactional metadata sync mode where we create many transaction while dropping shell tables +-- via https://github.com/citusdata/citus/pull/6728. +CREATE OR REPLACE PROCEDURE pg_catalog.worker_drop_all_shell_tables(singleTransaction bool DEFAULT true) +LANGUAGE plpgsql +AS $$ +DECLARE + table_name text; +BEGIN + -- drop shell tables within single or multiple transactions according to the flag singleTransaction + FOR table_name IN SELECT logicalrelid::regclass::text FROM pg_dist_partition + LOOP + PERFORM pg_catalog.worker_drop_shell_table(table_name); + IF not singleTransaction THEN + COMMIT; + END IF; + END LOOP; +END; +$$; +COMMENT ON PROCEDURE worker_drop_all_shell_tables(singleTransaction bool) + IS 'drop all distributed tables only without the metadata within single transaction or ' + 'multiple transaction specified by the flag singleTransaction'; diff --git a/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/latest.sql b/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/latest.sql new file mode 100644 index 000000000..55236286c --- /dev/null +++ b/src/backend/distributed/sql/udfs/worker_drop_all_shell_tables/latest.sql @@ -0,0 +1,23 @@ + -- During metadata sync, when we send many ddls over single transaction, worker node can error due +-- to reaching at max allocation block size for invalidation messages. To find a workaround for the problem, +-- we added nontransactional metadata sync mode where we create many transaction while dropping shell tables +-- via https://github.com/citusdata/citus/pull/6728. +CREATE OR REPLACE PROCEDURE pg_catalog.worker_drop_all_shell_tables(singleTransaction bool DEFAULT true) +LANGUAGE plpgsql +AS $$ +DECLARE + table_name text; +BEGIN + -- drop shell tables within single or multiple transactions according to the flag singleTransaction + FOR table_name IN SELECT logicalrelid::regclass::text FROM pg_dist_partition + LOOP + PERFORM pg_catalog.worker_drop_shell_table(table_name); + IF not singleTransaction THEN + COMMIT; + END IF; + END LOOP; +END; +$$; +COMMENT ON PROCEDURE worker_drop_all_shell_tables(singleTransaction bool) + IS 'drop all distributed tables only without the metadata within single transaction or ' + 'multiple transaction specified by the flag singleTransaction'; diff --git a/src/backend/distributed/test/metadata_sync.c b/src/backend/distributed/test/metadata_sync.c index b1c8a095c..46d2303d6 100644 --- a/src/backend/distributed/test/metadata_sync.c +++ b/src/backend/distributed/test/metadata_sync.c @@ -49,26 +49,23 @@ activate_node_snapshot(PG_FUNCTION_ARGS) */ WorkerNode *dummyWorkerNode = GetFirstPrimaryWorkerNode(); - List *updateLocalGroupCommand = - list_make1(LocalGroupIdUpdateCommand(dummyWorkerNode->groupId)); - List *syncDistObjCommands = SyncDistributedObjectsCommandList(dummyWorkerNode); - List *dropSnapshotCommands = NodeMetadataDropCommands(); - List *createSnapshotCommands = NodeMetadataCreateCommands(); - List *pgDistTableMetadataSyncCommands = PgDistTableMetadataSyncCommandList(); + /* + * Create MetadataSyncContext which is used throughout nodes' activation. + * As we set collectCommands to true, it would not create connections to workers. + * Instead it would collect and return sync commands to be sent to workers. + */ + bool collectCommands = true; + bool nodesAddedInSameTransaction = false; + MetadataSyncContext *context = CreateMetadataSyncContext(list_make1(dummyWorkerNode), + collectCommands, + nodesAddedInSameTransaction); - List *activateNodeCommandList = NIL; + ActivateNodeList(context); + + List *activateNodeCommandList = context->collectedCommands; int activateNodeCommandIndex = 0; Oid ddlCommandTypeId = TEXTOID; - activateNodeCommandList = list_concat(activateNodeCommandList, - updateLocalGroupCommand); - activateNodeCommandList = list_concat(activateNodeCommandList, syncDistObjCommands); - activateNodeCommandList = list_concat(activateNodeCommandList, dropSnapshotCommands); - activateNodeCommandList = list_concat(activateNodeCommandList, - createSnapshotCommands); - activateNodeCommandList = list_concat(activateNodeCommandList, - pgDistTableMetadataSyncCommands); - int activateNodeCommandCount = list_length(activateNodeCommandList); Datum *activateNodeCommandDatumArray = palloc0(activateNodeCommandCount * sizeof(Datum)); diff --git a/src/backend/distributed/transaction/backend_data.c b/src/backend/distributed/transaction/backend_data.c index 5c554ef06..fc89fde9a 100644 --- a/src/backend/distributed/transaction/backend_data.c +++ b/src/backend/distributed/transaction/backend_data.c @@ -1270,23 +1270,6 @@ MyBackendGotCancelledDueToDeadlock(bool clearState) } -/* - * MyBackendIsInDisributedTransaction returns true if MyBackendData - * is in a distributed transaction. - */ -bool -MyBackendIsInDisributedTransaction(void) -{ - /* backend might not have used citus yet and thus not initialized backend data */ - if (!MyBackendData) - { - return false; - } - - return IsInDistributedTransaction(MyBackendData); -} - - /* * ActiveDistributedTransactionNumbers returns a list of pointers to * transaction numbers of distributed transactions that are in progress diff --git a/src/backend/distributed/transaction/worker_transaction.c b/src/backend/distributed/transaction/worker_transaction.c index 486dd7280..b4a497647 100644 --- a/src/backend/distributed/transaction/worker_transaction.c +++ b/src/backend/distributed/transaction/worker_transaction.c @@ -374,6 +374,54 @@ SendCommandListToWorkerOutsideTransactionWithConnection(MultiConnection *workerC } +/* + * SendCommandListToWorkerListWithBareConnections sends the command list + * over the specified bare connections. This function is mainly useful to + * avoid opening an closing connections excessively by allowing reusing + * connections to send multiple separate bare commands. The function + * raises an error if any of the queries fail. + */ +void +SendCommandListToWorkerListWithBareConnections(List *workerConnectionList, + List *commandList) +{ + Assert(!InCoordinatedTransaction()); + Assert(!GetCoordinatedTransactionShouldUse2PC()); + + if (list_length(commandList) == 0 || list_length(workerConnectionList) == 0) + { + /* nothing to do */ + return; + } + + /* + * In order to avoid round-trips per query in queryStringList, + * we join the string and send as a single command. Also, + * if there is only a single command, avoid additional call to + * StringJoin given that some strings can be quite large. + */ + char *stringToSend = (list_length(commandList) == 1) ? + linitial(commandList) : StringJoin(commandList, ';'); + + /* send commands in parallel */ + MultiConnection *connection = NULL; + foreach_ptr(connection, workerConnectionList) + { + int querySent = SendRemoteCommand(connection, stringToSend); + if (querySent == 0) + { + ReportConnectionError(connection, ERROR); + } + } + + bool failOnError = true; + foreach_ptr(connection, workerConnectionList) + { + ClearResults(connection, failOnError); + } +} + + /* * SendCommandListToWorkerInCoordinatedTransaction opens connection to the node * with the given nodeName and nodePort. The commands are sent as part of the @@ -390,6 +438,8 @@ SendMetadataCommandListToWorkerListInCoordinatedTransaction(List *workerNodeList return; } + ErrorIfAnyMetadataNodeOutOfSync(workerNodeList); + UseCoordinatedTransaction(); List *connectionList = NIL; diff --git a/src/backend/distributed/utils/reference_table_utils.c b/src/backend/distributed/utils/reference_table_utils.c index 0b085c67a..687ce02a7 100644 --- a/src/backend/distributed/utils/reference_table_utils.c +++ b/src/backend/distributed/utils/reference_table_utils.c @@ -503,12 +503,11 @@ GetReferenceTableColocationId() /* - * DeleteAllReplicatedTablePlacementsFromNodeGroup function iterates over - * list of reference and replicated hash distributed tables and deletes - * all placements from pg_dist_placement table for given group. + * GetAllReplicatedTableList returns all tables which has replicated placements. + * i.e. (all reference tables) + (distributed tables with more than 1 placements) */ -void -DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly) +List * +GetAllReplicatedTableList(void) { List *referenceTableList = CitusTableTypeIdList(REFERENCE_TABLE); List *replicatedMetadataSyncedDistributedTableList = @@ -517,13 +516,25 @@ DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly) List *replicatedTableList = list_concat(referenceTableList, replicatedMetadataSyncedDistributedTableList); - /* if there are no reference tables, we do not need to do anything */ + return replicatedTableList; +} + + +/* + * ReplicatedPlacementsForNodeGroup filters all replicated placements for given + * node group id. + */ +List * +ReplicatedPlacementsForNodeGroup(int32 groupId) +{ + List *replicatedTableList = GetAllReplicatedTableList(); + if (list_length(replicatedTableList) == 0) { - return; + return NIL; } - StringInfo deletePlacementCommand = makeStringInfo(); + List *replicatedPlacementsForNodeGroup = NIL; Oid replicatedTableId = InvalidOid; foreach_oid(replicatedTableId, replicatedTableList) { @@ -538,25 +549,104 @@ DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly) continue; } - GroupShardPlacement *placement = NULL; - foreach_ptr(placement, placements) - { - LockShardDistributionMetadata(placement->shardId, ExclusiveLock); - - DeleteShardPlacementRow(placement->placementId); - - if (!localOnly) - { - resetStringInfo(deletePlacementCommand); - appendStringInfo(deletePlacementCommand, - "DELETE FROM pg_catalog.pg_dist_placement " - "WHERE placementid = " UINT64_FORMAT, - placement->placementId); - - SendCommandToWorkersWithMetadata(deletePlacementCommand->data); - } - } + replicatedPlacementsForNodeGroup = list_concat(replicatedPlacementsForNodeGroup, + placements); } + + return replicatedPlacementsForNodeGroup; +} + + +/* + * DeleteShardPlacementCommand returns a command for deleting given placement from + * metadata. + */ +char * +DeleteShardPlacementCommand(uint64 placementId) +{ + StringInfo deletePlacementCommand = makeStringInfo(); + appendStringInfo(deletePlacementCommand, + "DELETE FROM pg_catalog.pg_dist_placement " + "WHERE placementid = " UINT64_FORMAT, placementId); + return deletePlacementCommand->data; +} + + +/* + * DeleteAllReplicatedTablePlacementsFromNodeGroup function iterates over + * list of reference and replicated hash distributed tables and deletes + * all placements from pg_dist_placement table for given group. + */ +void +DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly) +{ + List *replicatedPlacementListForGroup = ReplicatedPlacementsForNodeGroup(groupId); + + /* if there are no replicated tables for the group, we do not need to do anything */ + if (list_length(replicatedPlacementListForGroup) == 0) + { + return; + } + + GroupShardPlacement *placement = NULL; + foreach_ptr(placement, replicatedPlacementListForGroup) + { + LockShardDistributionMetadata(placement->shardId, ExclusiveLock); + + if (!localOnly) + { + char *deletePlacementCommand = + DeleteShardPlacementCommand(placement->placementId); + + SendCommandToWorkersWithMetadata(deletePlacementCommand); + } + + DeleteShardPlacementRow(placement->placementId); + } +} + + +/* + * DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext does the same as + * DeleteAllReplicatedTablePlacementsFromNodeGroup except it uses metadataSyncContext for + * connections. + */ +void +DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext( + MetadataSyncContext *context, int32 groupId, bool localOnly) +{ + List *replicatedPlacementListForGroup = ReplicatedPlacementsForNodeGroup(groupId); + + /* if there are no replicated tables for the group, we do not need to do anything */ + if (list_length(replicatedPlacementListForGroup) == 0) + { + return; + } + + MemoryContext oldContext = MemoryContextSwitchTo(context->context); + GroupShardPlacement *placement = NULL; + foreach_ptr(placement, replicatedPlacementListForGroup) + { + LockShardDistributionMetadata(placement->shardId, ExclusiveLock); + + if (!localOnly) + { + char *deletePlacementCommand = + DeleteShardPlacementCommand(placement->placementId); + + SendOrCollectCommandListToMetadataNodes(context, + list_make1(deletePlacementCommand)); + } + + /* do not execute local transaction if we collect commands */ + if (!MetadataSyncCollectsCommands(context)) + { + DeleteShardPlacementRow(placement->placementId); + } + + ResetMetadataSyncMemoryContext(context); + } + MemoryContextSwitchTo(oldContext); } diff --git a/src/include/distributed/backend_data.h b/src/include/distributed/backend_data.h index 51bbb0368..1fcd31141 100644 --- a/src/include/distributed/backend_data.h +++ b/src/include/distributed/backend_data.h @@ -66,7 +66,6 @@ extern int ExtractProcessIdFromGlobalPID(uint64 globalPID); extern void GetBackendDataForProc(PGPROC *proc, BackendData *result); extern void CancelTransactionDueToDeadlock(PGPROC *proc); extern bool MyBackendGotCancelledDueToDeadlock(bool clearState); -extern bool MyBackendIsInDisributedTransaction(void); extern List * ActiveDistributedTransactionNumbers(void); extern LocalTransactionId GetMyProcLocalTransactionId(void); extern int GetExternalClientBackendCount(void); diff --git a/src/include/distributed/connection_management.h b/src/include/distributed/connection_management.h index 41882bdf1..278d7ca2d 100644 --- a/src/include/distributed/connection_management.h +++ b/src/include/distributed/connection_management.h @@ -323,6 +323,7 @@ extern void ShutdownConnection(MultiConnection *connection); /* dealing with a connection */ extern void FinishConnectionListEstablishment(List *multiConnectionList); extern void FinishConnectionEstablishment(MultiConnection *connection); +extern void ForceConnectionCloseAtTransactionEnd(MultiConnection *connection); extern void ClaimConnectionExclusively(MultiConnection *connection); extern void UnclaimConnection(MultiConnection *connection); extern void MarkConnectionConnected(MultiConnection *connection); diff --git a/src/include/distributed/metadata/dependency.h b/src/include/distributed/metadata/dependency.h index c5a65319e..2d3759e1f 100644 --- a/src/include/distributed/metadata/dependency.h +++ b/src/include/distributed/metadata/dependency.h @@ -19,6 +19,8 @@ #include "distributed/errormessage.h" #include "nodes/pg_list.h" +typedef bool (*AddressPredicate)(const ObjectAddress *); + extern List * GetUniqueDependenciesList(List *objectAddressesList); extern List * GetDependenciesForObject(const ObjectAddress *target); extern List * GetAllSupportedDependenciesForObject(const ObjectAddress *target); @@ -33,5 +35,7 @@ extern List * GetPgDependTuplesForDependingObjects(Oid targetObjectClassId, Oid targetObjectId); extern List * GetDependingViews(Oid relationId); extern Oid GetDependingView(Form_pg_depend pg_depend); +extern List * FilterObjectAddressListByPredicate(List *objectAddressList, + AddressPredicate predicate); #endif /* CITUS_DEPENDENCY_H */ diff --git a/src/include/distributed/metadata_sync.h b/src/include/distributed/metadata_sync.h index 11140beff..d5878ec71 100644 --- a/src/include/distributed/metadata_sync.h +++ b/src/include/distributed/metadata_sync.h @@ -18,9 +18,31 @@ #include "distributed/metadata_cache.h" #include "nodes/pg_list.h" +/* managed via guc.c */ +typedef enum +{ + METADATA_SYNC_TRANSACTIONAL = 0, + METADATA_SYNC_NON_TRANSACTIONAL = 1 +} MetadataSyncTransactionMode; + /* config variables */ extern int MetadataSyncInterval; extern int MetadataSyncRetryInterval; +extern int MetadataSyncTransMode; + +/* + * MetadataSyncContext is used throughout metadata sync. + */ +typedef struct MetadataSyncContext +{ + List *activatedWorkerNodeList; /* activated worker nodes */ + List *activatedWorkerBareConnections; /* bare connections to activated nodes */ + MemoryContext context; /* memory context for all allocations */ + MetadataSyncTransactionMode transactionMode; /* transaction mode for the sync */ + bool collectCommands; /* if we collect commands instead of sending and resetting */ + List *collectedCommands; /* collected commands. (NIL if collectCommands == false) */ + bool nodesAddedInSameTransaction; /* if the nodes are added just before activation */ +} MetadataSyncContext; typedef enum { @@ -52,7 +74,6 @@ extern void citus_internal_add_placement_metadata_internal(int64 shardId, int64 shardLength, int32 groupId, int64 placementId); -extern void SyncNodeMetadataToNode(const char *nodeNameString, int32 nodePort); extern void SyncCitusTableMetadata(Oid relationId); extern void EnsureSequentialModeMetadataOperations(void); extern bool ClusterHasKnownMetadataWorkers(void); @@ -60,10 +81,10 @@ extern char * LocalGroupIdUpdateCommand(int32 groupId); extern bool ShouldSyncUserCommandForObject(ObjectAddress objectAddress); extern bool ShouldSyncTableMetadata(Oid relationId); extern bool ShouldSyncTableMetadataViaCatalog(Oid relationId); +extern Oid FetchRelationIdFromPgPartitionHeapTuple(HeapTuple heapTuple, + TupleDesc tupleDesc); extern bool ShouldSyncSequenceMetadata(Oid relationId); extern List * NodeMetadataCreateCommands(void); -extern List * DistributedObjectMetadataSyncCommandList(void); -extern List * ColocationGroupCreateCommandList(void); extern List * CitusTableMetadataCreateCommandList(Oid relationId); extern List * NodeMetadataDropCommands(void); extern char * MarkObjectsDistributedCreateCommand(List *addresses, @@ -76,6 +97,7 @@ extern char * DistributionDeleteCommand(const char *schemaName, extern char * DistributionDeleteMetadataCommand(Oid relationId); extern char * TableOwnerResetCommand(Oid distributedRelationId); extern char * NodeListInsertCommand(List *workerNodeList); +char * NodeListIdempotentInsertCommand(List *workerNodeList); extern List * ShardListInsertCommand(List *shardIntervalList); extern List * ShardDeleteCommandList(ShardInterval *shardInterval); extern char * NodeDeleteCommand(uint32 nodeId); @@ -116,14 +138,46 @@ extern void SyncNewColocationGroupToNodes(uint32 colocationId, int shardCount, Oid distributionColumnCollation); extern void SyncDeleteColocationGroupToNodes(uint32 colocationId); +extern MetadataSyncContext * CreateMetadataSyncContext(List *nodeList, + bool collectCommands, + bool nodesAddedInSameTransaction); +extern void EstablishAndSetMetadataSyncBareConnections(MetadataSyncContext *context); +extern void SetMetadataSyncNodesFromNodeList(MetadataSyncContext *context, + List *nodeList); +extern void ResetMetadataSyncMemoryContext(MetadataSyncContext *context); +extern bool MetadataSyncCollectsCommands(MetadataSyncContext *context); +extern void SendOrCollectCommandListToActivatedNodes(MetadataSyncContext *context, + List *commands); +extern void SendOrCollectCommandListToMetadataNodes(MetadataSyncContext *context, + List *commands); +extern void SendOrCollectCommandListToSingleNode(MetadataSyncContext *context, + List *commands, int nodeIdx); + +extern void ActivateNodeList(MetadataSyncContext *context); + +extern char * WorkerDropAllShellTablesCommand(bool singleTransaction); + +extern void SyncDistributedObjects(MetadataSyncContext *context); +extern void SendNodeWideObjectsSyncCommands(MetadataSyncContext *context); +extern void SendShellTableDeletionCommands(MetadataSyncContext *context); +extern void SendMetadataDeletionCommands(MetadataSyncContext *context); +extern void SendColocationMetadataCommands(MetadataSyncContext *context); +extern void SendDependencyCreationCommands(MetadataSyncContext *context); +extern void SendDistTableMetadataCommands(MetadataSyncContext *context); +extern void SendDistObjectCommands(MetadataSyncContext *context); +extern void SendInterTableRelationshipCommands(MetadataSyncContext *context); + #define DELETE_ALL_NODES "DELETE FROM pg_dist_node" #define DELETE_ALL_PLACEMENTS "DELETE FROM pg_dist_placement" #define DELETE_ALL_SHARDS "DELETE FROM pg_dist_shard" #define DELETE_ALL_DISTRIBUTED_OBJECTS "DELETE FROM pg_catalog.pg_dist_object" #define DELETE_ALL_PARTITIONS "DELETE FROM pg_dist_partition" #define DELETE_ALL_COLOCATION "DELETE FROM pg_catalog.pg_dist_colocation" -#define REMOVE_ALL_SHELL_TABLES_COMMAND \ - "SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition" +#define WORKER_DROP_ALL_SHELL_TABLES \ + "CALL pg_catalog.worker_drop_all_shell_tables(%s)" +#define CITUS_INTERNAL_MARK_NODE_NOT_SYNCED \ + "SELECT citus_internal_mark_node_not_synced(%d, %d)" + #define REMOVE_ALL_CITUS_TABLES_COMMAND \ "SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_partition" #define BREAK_CITUS_TABLE_SEQUENCE_DEPENDENCY_COMMAND \ diff --git a/src/include/distributed/metadata_utility.h b/src/include/distributed/metadata_utility.h index acb4ae5da..64d2e3557 100644 --- a/src/include/distributed/metadata_utility.h +++ b/src/include/distributed/metadata_utility.h @@ -338,7 +338,6 @@ extern List * GetAllDependencyCreateDDLCommands(const List *dependencies); extern bool ShouldPropagate(void); extern bool ShouldPropagateCreateInCoordinatedTransction(void); extern bool ShouldPropagateAnyObject(List *addresses); -extern List * ReplicateAllObjectsToNodeCommandList(const char *nodeName, int nodePort); /* Remaining metadata utility functions */ extern Oid TableOwnerOid(Oid relationId); diff --git a/src/include/distributed/reference_table_utils.h b/src/include/distributed/reference_table_utils.h index ce2de9d9d..cf5a6fd02 100644 --- a/src/include/distributed/reference_table_utils.h +++ b/src/include/distributed/reference_table_utils.h @@ -17,14 +17,20 @@ #include "listutils.h" #include "distributed/metadata_cache.h" +#include "distributed/metadata_sync.h" extern void EnsureReferenceTablesExistOnAllNodes(void); extern void EnsureReferenceTablesExistOnAllNodesExtended(char transferMode); extern bool HasNodesWithMissingReferenceTables(List **referenceTableList); extern uint32 CreateReferenceTableColocationId(void); extern uint32 GetReferenceTableColocationId(void); +extern List * GetAllReplicatedTableList(void); +extern List * ReplicatedPlacementsForNodeGroup(int32 groupId); +extern char * DeleteShardPlacementCommand(uint64 placementId); extern void DeleteAllReplicatedTablePlacementsFromNodeGroup(int32 groupId, bool localOnly); +extern void DeleteAllReplicatedTablePlacementsFromNodeGroupViaMetadataContext( + MetadataSyncContext *context, int32 groupId, bool localOnly); extern int CompareOids(const void *leftElement, const void *rightElement); extern void ReplicateAllReferenceTablesToNode(WorkerNode *workerNode); extern void ErrorIfNotAllNodesHaveReferenceTableReplicas(List *workerNodeList); diff --git a/src/include/distributed/worker_manager.h b/src/include/distributed/worker_manager.h index bb7abf183..5ad7f4962 100644 --- a/src/include/distributed/worker_manager.h +++ b/src/include/distributed/worker_manager.h @@ -62,9 +62,6 @@ extern int MaxWorkerNodesTracked; extern char *WorkerListFileName; extern char *CurrentCluster; -extern void ActivateNodeList(List *nodeList); -extern int ActivateNode(char *nodeName, int nodePort); - /* Function declarations for finding worker nodes to place shards on */ extern WorkerNode * WorkerGetRandomCandidateNode(List *currentNodeList); extern WorkerNode * WorkerGetRoundRobinCandidateNode(List *workerNodeList, @@ -87,6 +84,7 @@ extern WorkerNode * FindWorkerNode(const char *nodeName, int32 nodePort); extern WorkerNode * FindWorkerNodeOrError(const char *nodeName, int32 nodePort); extern WorkerNode * FindWorkerNodeAnyCluster(const char *nodeName, int32 nodePort); extern WorkerNode * FindNodeWithNodeId(int nodeId, bool missingOk); +extern WorkerNode * ModifiableWorkerNode(const char *nodeName, int32 nodePort); extern List * ReadDistNode(bool includeNodesFromOtherClusters); extern void EnsureCoordinator(void); extern void EnsureCoordinatorIsInMetadata(void); @@ -105,8 +103,6 @@ extern WorkerNode * SetWorkerColumnLocalOnly(WorkerNode *workerNode, int columnI Datum value); extern uint32 CountPrimariesWithMetadata(void); extern WorkerNode * GetFirstPrimaryWorkerNode(void); -extern List * SyncDistributedObjectsCommandList(WorkerNode *workerNode); -extern List * PgDistTableMetadataSyncCommandList(void); /* Function declarations for worker node utilities */ extern int CompareWorkerNodes(const void *leftElement, const void *rightElement); diff --git a/src/include/distributed/worker_transaction.h b/src/include/distributed/worker_transaction.h index aa137b76b..be8fe5ed6 100644 --- a/src/include/distributed/worker_transaction.h +++ b/src/include/distributed/worker_transaction.h @@ -82,6 +82,8 @@ extern void SendCommandListToWorkerOutsideTransaction(const char *nodeName, extern void SendCommandListToWorkerOutsideTransactionWithConnection( MultiConnection *workerConnection, List *commandList); +extern void SendCommandListToWorkerListWithBareConnections(List *workerConnections, + List *commandList); extern void SendMetadataCommandListToWorkerListInCoordinatedTransaction( List *workerNodeList, const char * diff --git a/src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out b/src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out new file mode 100644 index 000000000..3a39f3644 --- /dev/null +++ b/src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out @@ -0,0 +1,687 @@ +-- +-- failure_mx_metadata_sync_multi_trans.sql +-- +CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans; +SET SEARCH_PATH = mx_metadata_sync_multi_trans; +SET citus.shard_count TO 2; +SET citus.next_shard_id TO 16000000; +SET citus.shard_replication_factor TO 1; +SET citus.metadata_sync_mode TO 'nontransactional'; +SELECT pg_backend_pid() as pid \gset +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\set VERBOSITY terse +SET client_min_messages TO ERROR; +-- Create roles +CREATE ROLE foo1; +CREATE ROLE foo2; +-- Create sequence +CREATE SEQUENCE seq; +-- Create colocated distributed tables +CREATE TABLE dist1 (id int PRIMARY KEY default nextval('seq')); +SELECT create_distributed_table('dist1', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO dist1 SELECT i FROM generate_series(1,100) i; +CREATE TABLE dist2 (id int PRIMARY KEY default nextval('seq')); +SELECT create_distributed_table('dist2', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO dist2 SELECT i FROM generate_series(1,100) i; +-- Create a reference table +CREATE TABLE ref (id int UNIQUE); +SELECT create_reference_table('ref'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO ref SELECT i FROM generate_series(1,100) i; +-- Create local tables +CREATE TABLE loc1 (id int PRIMARY KEY); +INSERT INTO loc1 SELECT i FROM generate_series(1,100) i; +CREATE TABLE loc2 (id int REFERENCES loc1(id)); +INSERT INTO loc2 SELECT i FROM generate_series(1,100) i; +SELECT citus_set_coordinator_host('localhost', :master_port); + citus_set_coordinator_host +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_add_local_table_to_metadata('loc1', cascade_via_foreign_keys => true); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +-- Create partitioned distributed table +CREATE TABLE orders ( + id bigint, + order_time timestamp without time zone NOT NULL, + region_id bigint NOT NULL +) +PARTITION BY RANGE (order_time); +SELECT create_time_partitions( + table_name := 'orders', + partition_interval := '1 day', + start_from := '2020-01-01', + end_at := '2020-01-11' +); + create_time_partitions +--------------------------------------------------------------------- + t +(1 row) + +SELECT create_distributed_table('orders', 'region_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Initially turn metadata sync to worker2 off because we'll ingest errors to start/stop metadata sync operations +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); + stop_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +SELECT isactive, metadatasynced, hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port; + isactive | metadatasynced | hasmetadata +--------------------------------------------------------------------- + t | f | f +(1 row) + +-- Failure to send local group id +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_local_group SET groupid").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_local_group SET groupid").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to drop node metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_node").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_node").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to send node metadata +SELECT citus.mitmproxy('conn.onQuery(query="INSERT INTO pg_dist_node").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="INSERT INTO pg_dist_node").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to drop sequence +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.worker_drop_sequence_dependency").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.worker_drop_sequence_dependency").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to drop shell table +SELECT citus.mitmproxy('conn.onQuery(query="CALL pg_catalog.worker_drop_all_shell_tables").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CALL pg_catalog.worker_drop_all_shell_tables").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to delete all pg_dist_partition metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_partition").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_partition").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to delete all pg_dist_shard metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_shard").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_shard").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to delete all pg_dist_placement metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_placement").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_placement").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to delete all pg_dist_object metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_object").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_object").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to delete all pg_dist_colocation metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_colocation").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_colocation").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to alter or create role +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_create_or_alter_role").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_create_or_alter_role").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to set database owner +SELECT citus.mitmproxy('conn.onQuery(query="ALTER DATABASE.*OWNER TO").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="ALTER DATABASE.*OWNER TO").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Filure to create schema +SELECT citus.mitmproxy('conn.onQuery(query="CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans AUTHORIZATION").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans AUTHORIZATION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create sequence +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_apply_sequence_command").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_apply_sequence_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create distributed table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.dist1").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.dist1").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create reference table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.ref").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.ref").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create local table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.loc1").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.loc1").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create distributed partitioned table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to create distributed partition table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders_p2020_01_05").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders_p2020_01_05").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to attach partition +SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE mx_metadata_sync_multi_trans.orders ATTACH PARTITION mx_metadata_sync_multi_trans.orders_p2020_01_05").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE mx_metadata_sync_multi_trans.orders ATTACH PARTITION mx_metadata_sync_multi_trans.orders_p2020_01_05").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to add partition metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_partition_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_partition_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to add shard metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_shard_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_shard_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to add placement metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_placement_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_placement_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to add colocation metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.citus_internal_add_colocation_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.citus_internal_add_colocation_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to add distributed object metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_object_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_object_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Failure to set isactive to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET isactive = TRUE").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET isactive = TRUE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection not open +-- Failure to set metadatasynced to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET metadatasynced = TRUE").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET metadatasynced = TRUE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection not open +-- Failure to set hasmetadata to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET hasmetadata = TRUE").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET hasmetadata = TRUE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +ERROR: connection not open +-- Show node metadata info on coordinator after failures +SELECT * FROM pg_dist_node ORDER BY nodeport; + nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards +--------------------------------------------------------------------- + 4 | 4 | localhost | 9060 | default | f | t | primary | default | f | t + 6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f + 1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t +(3 rows) + +-- Show that we can still query the node from coordinator +SELECT COUNT(*) FROM dist1; + count +--------------------------------------------------------------------- + 100 +(1 row) + +-- Verify that the value 103 belongs to a shard at the node to which we failed to sync metadata +SELECT 103 AS failed_node_val \gset +SELECT nodeid AS failed_nodeid FROM pg_dist_node WHERE metadatasynced = false \gset +SELECT get_shard_id_for_distribution_column('dist1', :failed_node_val) AS shardid \gset +SELECT groupid = :failed_nodeid FROM pg_dist_placement WHERE shardid = :shardid; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +-- Show that we can still insert into a shard at the node from coordinator +INSERT INTO dist1 VALUES (:failed_node_val); +-- Show that we can still update a shard at the node from coordinator +UPDATE dist1 SET id = :failed_node_val WHERE id = :failed_node_val; +-- Show that we can still delete from a shard at the node from coordinator +DELETE FROM dist1 WHERE id = :failed_node_val; +-- Show that DDL would still propagate to the node +SET client_min_messages TO NOTICE; +SET citus.log_remote_commands TO 1; +CREATE SCHEMA dummy; +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); +NOTICE: issuing SET citus.enable_ddl_propagation TO 'off' +NOTICE: issuing CREATE SCHEMA dummy +NOTICE: issuing SET citus.enable_ddl_propagation TO 'on' +NOTICE: issuing SET citus.enable_ddl_propagation TO 'off' +NOTICE: issuing CREATE SCHEMA dummy +NOTICE: issuing SET citus.enable_ddl_propagation TO 'on' +NOTICE: issuing WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['dummy']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; +NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx' +NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx' +NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx' +NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx' +SET citus.log_remote_commands TO 0; +SET client_min_messages TO ERROR; +-- Successfully activate the node after many failures +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + citus_activate_node +--------------------------------------------------------------------- + 4 +(1 row) + +-- Activate the node once more to verify it works again with already synced metadata +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + citus_activate_node +--------------------------------------------------------------------- + 4 +(1 row) + +-- Show node metadata info on worker2 and coordinator after success +\c - - - :worker_2_port +SELECT * FROM pg_dist_node ORDER BY nodeport; + nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards +--------------------------------------------------------------------- + 4 | 4 | localhost | 9060 | default | t | t | primary | default | t | t + 6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f + 1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t +(3 rows) + +\c - - - :master_port +SELECT * FROM pg_dist_node ORDER BY nodeport; + nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards +--------------------------------------------------------------------- + 4 | 4 | localhost | 9060 | default | t | t | primary | default | t | t + 6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f + 1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t +(3 rows) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +RESET citus.metadata_sync_mode; +DROP SCHEMA dummy; +DROP SCHEMA mx_metadata_sync_multi_trans CASCADE; +NOTICE: drop cascades to 10 other objects +DROP ROLE foo1; +DROP ROLE foo2; +SELECT citus_remove_node('localhost', :master_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + diff --git a/src/test/regress/expected/metadata_sync_helpers.out b/src/test/regress/expected/metadata_sync_helpers.out index ae2f9a04b..f745b0fe2 100644 --- a/src/test/regress/expected/metadata_sync_helpers.out +++ b/src/test/regress/expected/metadata_sync_helpers.out @@ -1197,15 +1197,6 @@ BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; SELECT citus_internal_delete_shard_metadata(shardid) FROM shard_data; ERROR: must be owner of table super_user_table ROLLBACK; --- the user only allowed to delete shards in a distributed transaction -BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; - SET application_name to 'citus_internal gpid=10000000001'; - \set VERBOSITY terse - WITH shard_data(shardid) - AS (VALUES (1420007)) - SELECT citus_internal_delete_shard_metadata(shardid) FROM shard_data; -ERROR: This is an internal Citus function can only be used in a distributed transaction -ROLLBACK; -- the user cannot delete non-existing shards BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; SELECT assign_distributed_transaction_id(0, 8, '2021-07-09 15:41:55.542377+02'); diff --git a/src/test/regress/expected/multi_cluster_management.out b/src/test/regress/expected/multi_cluster_management.out index 56235cc91..0bbbc6899 100644 --- a/src/test/regress/expected/multi_cluster_management.out +++ b/src/test/regress/expected/multi_cluster_management.out @@ -2,19 +2,22 @@ SET citus.next_shard_id TO 1220000; ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 1390000; ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART 1; -- Tests functions related to cluster membership --- add the nodes to the cluster +-- add the first node to the cluster in transactional mode SELECT 1 FROM master_add_node('localhost', :worker_1_port); ?column? --------------------------------------------------------------------- 1 (1 row) +-- add the second node in nontransactional mode +SET citus.metadata_sync_mode TO 'nontransactional'; SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? --------------------------------------------------------------------- 1 (1 row) +RESET citus.metadata_sync_mode; -- I am coordinator SELECT citus_is_coordinator(); citus_is_coordinator @@ -374,7 +377,7 @@ SELECT master_get_active_worker_nodes(); SELECT * FROM master_add_node('localhost', :worker_2_port); master_add_node --------------------------------------------------------------------- - 7 + 6 (1 row) ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART WITH 7; @@ -445,7 +448,7 @@ SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=1 WHERE g -- when there is no primary we should get a pretty error UPDATE pg_dist_node SET noderole = 'secondary' WHERE nodeport=:worker_2_port; SELECT * FROM cluster_management_test; -ERROR: node group 6 does not have a primary node +ERROR: node group 5 does not have a primary node -- when there is no node at all in the group we should get a different error DELETE FROM pg_dist_node WHERE nodeport=:worker_2_port; SELECT run_command_on_workers('DELETE FROM pg_dist_node WHERE nodeport=' || :'worker_2_port'); @@ -455,13 +458,12 @@ SELECT run_command_on_workers('DELETE FROM pg_dist_node WHERE nodeport=' || :'wo (1 row) SELECT * FROM cluster_management_test; -ERROR: there is a shard placement in node group 6 but there are no nodes in that group +ERROR: there is a shard placement in node group 5 but there are no nodes in that group -- clean-up SELECT * INTO old_placements FROM pg_dist_placement WHERE groupid = :worker_2_group; DELETE FROM pg_dist_placement WHERE groupid = :worker_2_group; SELECT master_add_node('localhost', :worker_2_port) AS new_node \gset WARNING: could not find any shard placements for shardId 1220001 -WARNING: could not find any shard placements for shardId 1220001 WARNING: could not find any shard placements for shardId 1220003 WARNING: could not find any shard placements for shardId 1220005 WARNING: could not find any shard placements for shardId 1220007 @@ -1202,6 +1204,33 @@ SELECT start_metadata_sync_to_all_nodes(); t (1 row) +-- nontransactional sync mode tests +SET citus.metadata_sync_mode TO 'nontransactional'; +-- do not allow nontransactional sync inside transaction block +BEGIN; + SELECT start_metadata_sync_to_all_nodes(); +ERROR: do not sync metadata in transaction block when the sync mode is nontransactional +HINT: resync after SET citus.metadata_sync_mode TO 'transactional' +COMMIT; +SELECT start_metadata_sync_to_all_nodes(); + start_metadata_sync_to_all_nodes +--------------------------------------------------------------------- + t +(1 row) + +-- do not allow nontransactional node addition inside transaction block +BEGIN; + SELECT citus_remove_node('localhost', :worker_1_port); + citus_remove_node +--------------------------------------------------------------------- + +(1 row) + + SELECT citus_add_node('localhost', :worker_1_port); +ERROR: do not add node in transaction block when the sync mode is nontransactional +HINT: add the node after SET citus.metadata_sync_mode TO 'transactional' +COMMIT; +RESET citus.metadata_sync_mode; -- verify that at the end of this file, all primary nodes have metadata synced SELECT bool_and(hasmetadata) AND bool_and(metadatasynced) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary'; ?column? diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 2b9b70dfa..d0ed4f82a 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -1363,10 +1363,12 @@ SELECT * FROM multi_extension.print_extension_changes(); previous_object | current_object --------------------------------------------------------------------- | function citus_internal_is_replication_origin_tracking_active() boolean + | function citus_internal_mark_node_not_synced(integer,integer) void | function citus_internal_start_replication_origin_tracking() void | function citus_internal_stop_replication_origin_tracking() void | function worker_adjust_identity_column_seq_ranges(regclass) void -(4 rows) + | function worker_drop_all_shell_tables(boolean) +(6 rows) DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff; -- show running version diff --git a/src/test/regress/expected/multi_metadata_sync.out b/src/test/regress/expected/multi_metadata_sync.out index a17dc7634..f371e11e7 100644 --- a/src/test/regress/expected/multi_metadata_sync.out +++ b/src/test/regress/expected/multi_metadata_sync.out @@ -69,9 +69,10 @@ ALTER ROLE CURRENT_USER WITH PASSWORD 'dummypassword'; -- Show that, with no MX tables, activate node snapshot contains only the delete commands, -- pg_dist_node entries, pg_dist_object entries and roles. SELECT unnest(activate_node_snapshot()) order by 1; - unnest + unnest --------------------------------------------------------------------- ALTER DATABASE regression OWNER TO postgres; + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner DELETE FROM pg_catalog.pg_dist_colocation DELETE FROM pg_catalog.pg_dist_object @@ -89,18 +90,20 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT alter_role_if_exists('postgres', 'ALTER ROLE postgres SET lc_messages = ''C''') SELECT pg_catalog.worker_drop_sequence_dependency(logicalrelid::regclass::text) FROM pg_dist_partition SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; -(29 rows) + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; +(32 rows) -- Create a test table with constraints and SERIAL and default from user defined sequence CREATE SEQUENCE user_defined_seq; @@ -127,6 +130,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE public.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE public.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) USING heap DELETE FROM pg_catalog.pg_dist_colocation @@ -135,6 +139,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS public.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -150,21 +155,26 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('public.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('public.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('public.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('public.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('public.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('public.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('public.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('public.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(42 rows) +(49 rows) -- Show that CREATE INDEX commands are included in the activate node snapshot CREATE INDEX mx_index ON mx_test_table(col_2); @@ -176,6 +186,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE public.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE public.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON public.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) USING heap @@ -185,6 +196,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS public.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -200,21 +212,26 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('public.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('public.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('public.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('public.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('public.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('public.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('public.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('public.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(43 rows) +(50 rows) -- Show that schema changes are included in the activate node snapshot CREATE SCHEMA mx_testing_schema; @@ -227,6 +244,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner @@ -237,6 +255,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -252,21 +271,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Show that append distributed tables are not included in the activate node snapshot CREATE TABLE non_mx_test_table (col_1 int, col_2 text); @@ -285,6 +310,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner @@ -295,6 +321,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -310,21 +337,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Show that range distributed tables are not included in the activate node snapshot UPDATE pg_dist_partition SET partmethod='r' WHERE logicalrelid='non_mx_test_table'::regclass; @@ -336,6 +369,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION pg_database_owner @@ -346,6 +380,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -361,21 +396,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Test start_metadata_sync_to_node and citus_activate_node UDFs -- Ensure that hasmetadata=false for all nodes @@ -1761,6 +1802,7 @@ ALTER TABLE dist_table_1 ADD COLUMN b int; ERROR: localhost:xxxxx is a metadata node, but is out of sync HINT: If the node is up, wait until metadata gets synced to it and try again. SELECT master_add_node('localhost', :master_port, groupid => 0); +NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata ERROR: localhost:xxxxx is a metadata node, but is out of sync HINT: If the node is up, wait until metadata gets synced to it and try again. SELECT citus_disable_node_and_wait('localhost', :worker_1_port); @@ -1836,7 +1878,7 @@ ALTER TABLE test_table ADD COLUMN id2 int DEFAULT nextval('mx_test_sequence_1'); ALTER TABLE test_table ALTER COLUMN id2 DROP DEFAULT; ALTER TABLE test_table ALTER COLUMN id2 SET DEFAULT nextval('mx_test_sequence_1'); SELECT unnest(activate_node_snapshot()) order by 1; - unnest + unnest --------------------------------------------------------------------- ALTER DATABASE regression OWNER TO postgres; ALTER SEQUENCE mx_testing_schema.mx_test_table_col_3_seq OWNER TO postgres @@ -1854,6 +1896,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER TABLE public.dist_table_1 OWNER TO postgres ALTER TABLE public.mx_ref OWNER TO postgres ALTER TABLE public.test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE INDEX mx_index_1 ON mx_test_schema_1.mx_table_1 USING btree (col1) CREATE INDEX mx_index_2 ON mx_test_schema_2.mx_table_2 USING btree (col2) @@ -1874,6 +1917,12 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_test_schema_1.mx_table_1 CASCADE + DROP TABLE IF EXISTS mx_test_schema_2.mx_table_2 CASCADE + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE + DROP TABLE IF EXISTS public.dist_table_1 CASCADE + DROP TABLE IF EXISTS public.mx_ref CASCADE + DROP TABLE IF EXISTS public.test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO pg_database_owner; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -1901,18 +1950,35 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_create_truncate_trigger('public.dist_table_1') SELECT worker_create_truncate_trigger('public.mx_ref') SELECT worker_create_truncate_trigger('public.test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE pg_database_owner SET ROLE pg_database_owner SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 - WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10009, 1, -1, 0, NULL, NULL), (10010, 4, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_test_schema_1', 'mx_table_1']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_test_schema_2', 'mx_table_2']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_ref']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'dist_table_1']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_sequence_0']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_sequence_1']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema_2']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_test_schema_1']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_test_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 + WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10009, 1, -1, 0, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) + WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10010, 4, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_test_schema_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_test_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_sequence_0']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_sequence_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_test_schema_1', 'mx_table_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_test_schema_2', 'mx_table_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'dist_table_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_ref']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 5, 100001), (1310002, 0, 1, 100002), (1310003, 0, 5, 100003), (1310004, 0, 1, 100004), (1310005, 0, 5, 100005), (1310006, 0, 1, 100006), (1310007, 0, 5, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310020, 0, 1, 100020), (1310021, 0, 5, 100021), (1310022, 0, 1, 100022), (1310023, 0, 5, 100023), (1310024, 0, 1, 100024)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310025, 0, 1, 100025), (1310026, 0, 5, 100026), (1310027, 0, 1, 100027), (1310028, 0, 5, 100028), (1310029, 0, 1, 100029)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; @@ -1925,7 +1991,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.dist_table_1'::regclass, 1310074, 't'::"char", '-2147483648', '-1073741825'), ('public.dist_table_1'::regclass, 1310075, 't'::"char", '-1073741824', '-1'), ('public.dist_table_1'::regclass, 1310076, 't'::"char", '0', '1073741823'), ('public.dist_table_1'::regclass, 1310077, 't'::"char", '1073741824', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_ref'::regclass, 1310073, 't'::"char", NULL, NULL)) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.test_table'::regclass, 1310083, 't'::"char", '-2147483648', '-1073741825'), ('public.test_table'::regclass, 1310084, 't'::"char", '-1073741824', '-1'), ('public.test_table'::regclass, 1310085, 't'::"char", '0', '1073741823'), ('public.test_table'::regclass, 1310086, 't'::"char", '1073741824', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(87 rows) +(111 rows) -- shouldn't work since test_table is MX ALTER TABLE test_table ADD COLUMN id3 bigserial; diff --git a/src/test/regress/expected/multi_metadata_sync_0.out b/src/test/regress/expected/multi_metadata_sync_0.out index b7998db1e..5d5aa56dd 100644 --- a/src/test/regress/expected/multi_metadata_sync_0.out +++ b/src/test/regress/expected/multi_metadata_sync_0.out @@ -69,9 +69,10 @@ ALTER ROLE CURRENT_USER WITH PASSWORD 'dummypassword'; -- Show that, with no MX tables, activate node snapshot contains only the delete commands, -- pg_dist_node entries, pg_dist_object entries and roles. SELECT unnest(activate_node_snapshot()) order by 1; - unnest + unnest --------------------------------------------------------------------- ALTER DATABASE regression OWNER TO postgres; + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres DELETE FROM pg_catalog.pg_dist_colocation DELETE FROM pg_catalog.pg_dist_object @@ -89,18 +90,20 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT alter_role_if_exists('postgres', 'ALTER ROLE postgres SET lc_messages = ''C''') SELECT pg_catalog.worker_drop_sequence_dependency(logicalrelid::regclass::text) FROM pg_dist_partition SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; -(29 rows) + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; +(32 rows) -- Create a test table with constraints and SERIAL and default from user defined sequence CREATE SEQUENCE user_defined_seq; @@ -127,6 +130,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE public.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE public.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) USING heap DELETE FROM pg_catalog.pg_dist_colocation @@ -135,6 +139,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS public.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -150,21 +155,26 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('public.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('public.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('public.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('public.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('public.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('public.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('public.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('public.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(42 rows) +(49 rows) -- Show that CREATE INDEX commands are included in the activate node snapshot CREATE INDEX mx_index ON mx_test_table(col_2); @@ -176,6 +186,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE public.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE public.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON public.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) USING heap @@ -185,6 +196,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS public.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -200,21 +212,26 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('public.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('public.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('public.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('public.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('public.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('public.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('public.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('public.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(43 rows) +(50 rows) -- Show that schema changes are included in the activate node snapshot CREATE SCHEMA mx_testing_schema; @@ -227,6 +244,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres @@ -237,6 +255,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -252,21 +271,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Show that append distributed tables are not included in the activate node snapshot CREATE TABLE non_mx_test_table (col_1 int, col_2 text); @@ -285,6 +310,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres @@ -295,6 +321,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -310,21 +337,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Show that range distributed tables are not included in the activate node snapshot UPDATE pg_dist_partition SET partmethod='r' WHERE logicalrelid='non_mx_test_table'::regclass; @@ -336,6 +369,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE SCHEMA IF NOT EXISTS mx_testing_schema AUTHORIZATION postgres CREATE SCHEMA IF NOT EXISTS public AUTHORIZATION postgres @@ -346,6 +380,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -361,21 +396,27 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq AS bigint INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 CACHE 1 NO CYCLE','bigint') SELECT worker_create_or_alter_role('postgres', 'CREATE ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''', 'ALTER ROLE postgres SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 0 PASSWORD ''md5c53670dddfc3bb4b5675c7872bc2249a'' VALID UNTIL ''2052-05-05 00:00:00-07''') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (2, 8, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 2, 100001), (1310002, 0, 1, 100002), (1310003, 0, 2, 100003), (1310004, 0, 1, 100004), (1310005, 0, 2, 100005), (1310006, 0, 1, 100006), (1310007, 0, 2, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't'::"char", '-2147483648', '-1610612737'), ('mx_testing_schema.mx_test_table'::regclass, 1310001, 't'::"char", '-1610612736', '-1073741825'), ('mx_testing_schema.mx_test_table'::regclass, 1310002, 't'::"char", '-1073741824', '-536870913'), ('mx_testing_schema.mx_test_table'::regclass, 1310003, 't'::"char", '-536870912', '-1'), ('mx_testing_schema.mx_test_table'::regclass, 1310004, 't'::"char", '0', '536870911'), ('mx_testing_schema.mx_test_table'::regclass, 1310005, 't'::"char", '536870912', '1073741823'), ('mx_testing_schema.mx_test_table'::regclass, 1310006, 't'::"char", '1073741824', '1610612735'), ('mx_testing_schema.mx_test_table'::regclass, 1310007, 't'::"char", '1610612736', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(44 rows) +(52 rows) -- Test start_metadata_sync_to_node and citus_activate_node UDFs -- Ensure that hasmetadata=false for all nodes @@ -1761,6 +1802,7 @@ ALTER TABLE dist_table_1 ADD COLUMN b int; ERROR: localhost:xxxxx is a metadata node, but is out of sync HINT: If the node is up, wait until metadata gets synced to it and try again. SELECT master_add_node('localhost', :master_port, groupid => 0); +NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata ERROR: localhost:xxxxx is a metadata node, but is out of sync HINT: If the node is up, wait until metadata gets synced to it and try again. SELECT citus_disable_node_and_wait('localhost', :worker_1_port); @@ -1836,7 +1878,7 @@ ALTER TABLE test_table ADD COLUMN id2 int DEFAULT nextval('mx_test_sequence_1'); ALTER TABLE test_table ALTER COLUMN id2 DROP DEFAULT; ALTER TABLE test_table ALTER COLUMN id2 SET DEFAULT nextval('mx_test_sequence_1'); SELECT unnest(activate_node_snapshot()) order by 1; - unnest + unnest --------------------------------------------------------------------- ALTER DATABASE regression OWNER TO postgres; ALTER SEQUENCE mx_testing_schema.mx_test_table_col_3_seq OWNER TO postgres @@ -1854,6 +1896,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; ALTER TABLE public.dist_table_1 OWNER TO postgres ALTER TABLE public.mx_ref OWNER TO postgres ALTER TABLE public.test_table OWNER TO postgres + CALL pg_catalog.worker_drop_all_shell_tables(true) CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) CREATE INDEX mx_index_1 ON mx_test_schema_1.mx_table_1 USING btree (col1) CREATE INDEX mx_index_2 ON mx_test_schema_2.mx_table_2 USING btree (col2) @@ -1874,6 +1917,12 @@ SELECT unnest(activate_node_snapshot()) order by 1; DELETE FROM pg_dist_partition DELETE FROM pg_dist_placement DELETE FROM pg_dist_shard + DROP TABLE IF EXISTS mx_test_schema_1.mx_table_1 CASCADE + DROP TABLE IF EXISTS mx_test_schema_2.mx_table_2 CASCADE + DROP TABLE IF EXISTS mx_testing_schema.mx_test_table CASCADE + DROP TABLE IF EXISTS public.dist_table_1 CASCADE + DROP TABLE IF EXISTS public.mx_ref CASCADE + DROP TABLE IF EXISTS public.test_table CASCADE GRANT CREATE ON SCHEMA public TO PUBLIC; GRANT CREATE ON SCHEMA public TO postgres; GRANT USAGE ON SCHEMA public TO PUBLIC; @@ -1901,18 +1950,35 @@ SELECT unnest(activate_node_snapshot()) order by 1; SELECT worker_create_truncate_trigger('public.dist_table_1') SELECT worker_create_truncate_trigger('public.mx_ref') SELECT worker_create_truncate_trigger('public.test_table') - SELECT worker_drop_shell_table(logicalrelid::regclass::text) FROM pg_dist_partition SET ROLE postgres SET ROLE postgres SET citus.enable_ddl_propagation TO 'off' SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'off' - SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' SET citus.enable_ddl_propagation TO 'on' UPDATE pg_dist_local_group SET groupid = 1 - WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10009, 1, -1, 0, NULL, NULL), (10010, 4, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) - WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_test_schema_1', 'mx_table_1']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['mx_test_schema_2', 'mx_table_2']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'mx_ref']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'dist_table_1']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_sequence_0']::text[], ARRAY[]::text[], -1, 0, false), ('sequence', ARRAY['public', 'mx_test_sequence_1']::text[], ARRAY[]::text[], -1, 0, false), ('table', ARRAY['public', 'test_table']::text[], ARRAY[]::text[], -1, 0, false), ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false), ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_testing_schema_2']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_test_schema_1']::text[], ARRAY[]::text[], -1, 0, false), ('schema', ARRAY['mx_test_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + UPDATE pg_dist_node SET hasmetadata = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET isactive = TRUE WHERE nodeid = 1 + UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid = 1 + WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10009, 1, -1, 0, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) + WITH colocation_group_data (colocationid, shardcount, replicationfactor, distributioncolumntype, distributioncolumncollationname, distributioncolumncollationschema) AS (VALUES (10010, 4, 1, 'integer'::regtype, NULL, NULL)) SELECT pg_catalog.citus_internal_add_colocation_metadata(colocationid, shardcount, replicationfactor, distributioncolumntype, coalesce(c.oid, 0)) FROM colocation_group_data d LEFT JOIN pg_collation c ON (d.distributioncolumncollationname = c.collname AND d.distributioncolumncollationschema::regnamespace = c.collnamespace) + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('database', ARRAY['regression']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('role', ARRAY['postgres']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_test_schema_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_test_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['mx_testing_schema_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['public']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['mx_testing_schema', 'mx_test_table_col_3_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_sequence_0']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'mx_test_sequence_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('sequence', ARRAY['public', 'user_defined_seq']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_test_schema_1', 'mx_table_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_test_schema_2', 'mx_table_2']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['mx_testing_schema', 'mx_test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'dist_table_1']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'mx_ref']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; + WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('table', ARRAY['public', 'test_table']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310000, 0, 1, 100000), (1310001, 0, 5, 100001), (1310002, 0, 1, 100002), (1310003, 0, 5, 100003), (1310004, 0, 1, 100004), (1310005, 0, 5, 100005), (1310006, 0, 1, 100006), (1310007, 0, 5, 100007)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310020, 0, 1, 100020), (1310021, 0, 5, 100021), (1310022, 0, 1, 100022), (1310023, 0, 5, 100023), (1310024, 0, 1, 100024)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; WITH placement_data(shardid, shardlength, groupid, placementid) AS (VALUES (1310025, 0, 1, 100025), (1310026, 0, 5, 100026), (1310027, 0, 1, 100027), (1310028, 0, 5, 100028), (1310029, 0, 1, 100029)) SELECT citus_internal_add_placement_metadata(shardid, shardlength, groupid, placementid) FROM placement_data; @@ -1925,7 +1991,7 @@ SELECT unnest(activate_node_snapshot()) order by 1; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.dist_table_1'::regclass, 1310074, 't'::"char", '-2147483648', '-1073741825'), ('public.dist_table_1'::regclass, 1310075, 't'::"char", '-1073741824', '-1'), ('public.dist_table_1'::regclass, 1310076, 't'::"char", '0', '1073741823'), ('public.dist_table_1'::regclass, 1310077, 't'::"char", '1073741824', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.mx_ref'::regclass, 1310073, 't'::"char", NULL, NULL)) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; WITH shard_data(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) AS (VALUES ('public.test_table'::regclass, 1310083, 't'::"char", '-2147483648', '-1073741825'), ('public.test_table'::regclass, 1310084, 't'::"char", '-1073741824', '-1'), ('public.test_table'::regclass, 1310085, 't'::"char", '0', '1073741823'), ('public.test_table'::regclass, 1310086, 't'::"char", '1073741824', '2147483647')) SELECT citus_internal_add_shard_metadata(relationname, shardid, storagetype, shardminvalue, shardmaxvalue) FROM shard_data; -(87 rows) +(111 rows) -- shouldn't work since test_table is MX ALTER TABLE test_table ADD COLUMN id3 bigserial; diff --git a/src/test/regress/expected/upgrade_citus_finish_citus_upgrade.out b/src/test/regress/expected/upgrade_citus_finish_citus_upgrade.out index 8c46aae43..bb80d9103 100644 --- a/src/test/regress/expected/upgrade_citus_finish_citus_upgrade.out +++ b/src/test/regress/expected/upgrade_citus_finish_citus_upgrade.out @@ -21,3 +21,12 @@ FROM pg_dist_node_metadata, pg_extension WHERE extname = 'citus'; -- still, do not NOTICE the version as it changes per release SET client_min_messages TO WARNING; CALL citus_finish_citus_upgrade(); +-- we should be able to sync metadata in nontransactional way as well +SET citus.metadata_sync_mode TO 'nontransactional'; +SELECT start_metadata_sync_to_all_nodes(); + start_metadata_sync_to_all_nodes +--------------------------------------------------------------------- + t +(1 row) + +RESET citus.metadata_sync_mode; diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out index 857a749a3..a234c4bac 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects.out +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -76,6 +76,7 @@ ORDER BY 1; function citus_internal_global_blocked_processes() function citus_internal_is_replication_origin_tracking_active() function citus_internal_local_blocked_processes() + function citus_internal_mark_node_not_synced(integer,integer) function citus_internal_start_replication_origin_tracking() function citus_internal_stop_replication_origin_tracking() function citus_internal_update_placement_metadata(bigint,integer,integer) @@ -245,6 +246,7 @@ ORDER BY 1; function worker_create_or_replace_object(text) function worker_create_or_replace_object(text[]) function worker_create_truncate_trigger(regclass) + function worker_drop_all_shell_tables(boolean) function worker_drop_distributed_table(text) function worker_drop_sequence_dependency(text) function worker_drop_shell_table(text) @@ -322,5 +324,5 @@ ORDER BY 1; view citus_stat_statements view pg_dist_shard_placement view time_partitions -(314 rows) +(316 rows) diff --git a/src/test/regress/expected/upgrade_post_11_after.out b/src/test/regress/expected/upgrade_post_11_after.out index d7d7c46b0..cf41da8e1 100644 --- a/src/test/regress/expected/upgrade_post_11_after.out +++ b/src/test/regress/expected/upgrade_post_11_after.out @@ -25,11 +25,11 @@ SELECT pg_identify_object_as_address(classid, objid, objsubid) FROM pg_catalog.p (19 rows) -- on all nodes -SELECT run_command_on_workers($$SELECT array_agg(pg_identify_object_as_address(classid, objid, objsubid)) FROM pg_catalog.pg_dist_object WHERE objid IN ('post_11_upgrade'::regnamespace, 'post_11_upgrade.part_table'::regclass, 'post_11_upgrade.sensors'::regclass, 'post_11_upgrade.func_in_transaction_def'::regproc, 'post_11_upgrade.partial_index_test_config'::regconfig, 'post_11_upgrade.my_type'::regtype, 'post_11_upgrade.view_for_upgrade_test'::regclass, 'post_11_upgrade.view_for_upgrade_test_my_type'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view_2'::regclass, 'post_11_upgrade.reporting_line'::regclass) ORDER BY 1;$$) ORDER BY 1; +SELECT run_command_on_workers($$SELECT array_agg(worker_object) FROM (SELECT pg_identify_object_as_address(classid, objid, objsubid) worker_object FROM pg_catalog.pg_dist_object WHERE objid IN ('post_11_upgrade'::regnamespace, 'post_11_upgrade.part_table'::regclass, 'post_11_upgrade.sensors'::regclass, 'post_11_upgrade.func_in_transaction_def'::regproc, 'post_11_upgrade.partial_index_test_config'::regconfig, 'post_11_upgrade.my_type'::regtype, 'post_11_upgrade.view_for_upgrade_test'::regclass, 'post_11_upgrade.view_for_upgrade_test_my_type'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view_2'::regclass, 'post_11_upgrade.reporting_line'::regclass) ORDER BY 1) worker_objects;$$) ORDER BY 1; run_command_on_workers --------------------------------------------------------------------- - (localhost,57636,t,"{""(type,{post_11_upgrade.my_type},{})"",""(function,\\""{post_11_upgrade,func_in_transaction_def}\\"",{})"",""(table,\\""{post_11_upgrade,part_table}\\"",{})"",""(table,\\""{post_11_upgrade,sensors}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test_my_type}\\"",{})"",""(view,\\""{post_11_upgrade,non_dist_upgrade_ref_view_2}\\"",{})"",""(view,\\""{post_11_upgrade,reporting_line}\\"",{})"",""(schema,{post_11_upgrade},{})"",""(\\""text search configuration\\"",\\""{post_11_upgrade,partial_index_test_config}\\"",{})""}") - (localhost,57637,t,"{""(type,{post_11_upgrade.my_type},{})"",""(function,\\""{post_11_upgrade,func_in_transaction_def}\\"",{})"",""(table,\\""{post_11_upgrade,part_table}\\"",{})"",""(table,\\""{post_11_upgrade,sensors}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test_my_type}\\"",{})"",""(view,\\""{post_11_upgrade,non_dist_upgrade_ref_view_2}\\"",{})"",""(view,\\""{post_11_upgrade,reporting_line}\\"",{})"",""(schema,{post_11_upgrade},{})"",""(\\""text search configuration\\"",\\""{post_11_upgrade,partial_index_test_config}\\"",{})""}") + (localhost,57636,t,"{""(function,\\""{post_11_upgrade,func_in_transaction_def}\\"",{})"",""(schema,{post_11_upgrade},{})"",""(table,\\""{post_11_upgrade,part_table}\\"",{})"",""(table,\\""{post_11_upgrade,sensors}\\"",{})"",""(\\""text search configuration\\"",\\""{post_11_upgrade,partial_index_test_config}\\"",{})"",""(type,{post_11_upgrade.my_type},{})"",""(view,\\""{post_11_upgrade,non_dist_upgrade_ref_view_2}\\"",{})"",""(view,\\""{post_11_upgrade,reporting_line}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test_my_type}\\"",{})""}") + (localhost,57637,t,"{""(function,\\""{post_11_upgrade,func_in_transaction_def}\\"",{})"",""(schema,{post_11_upgrade},{})"",""(table,\\""{post_11_upgrade,part_table}\\"",{})"",""(table,\\""{post_11_upgrade,sensors}\\"",{})"",""(\\""text search configuration\\"",\\""{post_11_upgrade,partial_index_test_config}\\"",{})"",""(type,{post_11_upgrade.my_type},{})"",""(view,\\""{post_11_upgrade,non_dist_upgrade_ref_view_2}\\"",{})"",""(view,\\""{post_11_upgrade,reporting_line}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test}\\"",{})"",""(view,\\""{post_11_upgrade,view_for_upgrade_test_my_type}\\"",{})""}") (2 rows) -- Create the necessary test utility function diff --git a/src/test/regress/failure_schedule b/src/test/regress/failure_schedule index 816f9d9e2..afc4780bf 100644 --- a/src/test/regress/failure_schedule +++ b/src/test/regress/failure_schedule @@ -32,6 +32,7 @@ test: failure_single_mod test: failure_savepoints test: failure_multi_row_insert test: failure_mx_metadata_sync +test: failure_mx_metadata_sync_multi_trans test: failure_connection_establishment # this test syncs metadata to the workers diff --git a/src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql b/src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql new file mode 100644 index 000000000..efd4879bd --- /dev/null +++ b/src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql @@ -0,0 +1,282 @@ +-- +-- failure_mx_metadata_sync_multi_trans.sql +-- +CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans; +SET SEARCH_PATH = mx_metadata_sync_multi_trans; +SET citus.shard_count TO 2; +SET citus.next_shard_id TO 16000000; +SET citus.shard_replication_factor TO 1; +SET citus.metadata_sync_mode TO 'nontransactional'; + +SELECT pg_backend_pid() as pid \gset +SELECT citus.mitmproxy('conn.allow()'); + +\set VERBOSITY terse +SET client_min_messages TO ERROR; + +-- Create roles +CREATE ROLE foo1; +CREATE ROLE foo2; + +-- Create sequence +CREATE SEQUENCE seq; + +-- Create colocated distributed tables +CREATE TABLE dist1 (id int PRIMARY KEY default nextval('seq')); +SELECT create_distributed_table('dist1', 'id'); +INSERT INTO dist1 SELECT i FROM generate_series(1,100) i; + +CREATE TABLE dist2 (id int PRIMARY KEY default nextval('seq')); +SELECT create_distributed_table('dist2', 'id'); +INSERT INTO dist2 SELECT i FROM generate_series(1,100) i; + +-- Create a reference table +CREATE TABLE ref (id int UNIQUE); +SELECT create_reference_table('ref'); +INSERT INTO ref SELECT i FROM generate_series(1,100) i; + +-- Create local tables +CREATE TABLE loc1 (id int PRIMARY KEY); +INSERT INTO loc1 SELECT i FROM generate_series(1,100) i; + +CREATE TABLE loc2 (id int REFERENCES loc1(id)); +INSERT INTO loc2 SELECT i FROM generate_series(1,100) i; + +SELECT citus_set_coordinator_host('localhost', :master_port); +SELECT citus_add_local_table_to_metadata('loc1', cascade_via_foreign_keys => true); + +-- Create partitioned distributed table +CREATE TABLE orders ( + id bigint, + order_time timestamp without time zone NOT NULL, + region_id bigint NOT NULL +) +PARTITION BY RANGE (order_time); + +SELECT create_time_partitions( + table_name := 'orders', + partition_interval := '1 day', + start_from := '2020-01-01', + end_at := '2020-01-11' +); +SELECT create_distributed_table('orders', 'region_id'); + +-- Initially turn metadata sync to worker2 off because we'll ingest errors to start/stop metadata sync operations +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); +SELECT isactive, metadatasynced, hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port; + +-- Failure to send local group id +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_local_group SET groupid").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_local_group SET groupid").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to drop node metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_node").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_node").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to send node metadata +SELECT citus.mitmproxy('conn.onQuery(query="INSERT INTO pg_dist_node").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="INSERT INTO pg_dist_node").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to drop sequence +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.worker_drop_sequence_dependency").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.worker_drop_sequence_dependency").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to drop shell table +SELECT citus.mitmproxy('conn.onQuery(query="CALL pg_catalog.worker_drop_all_shell_tables").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CALL pg_catalog.worker_drop_all_shell_tables").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to delete all pg_dist_partition metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_partition").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_partition").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to delete all pg_dist_shard metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_shard").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_shard").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to delete all pg_dist_placement metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_placement").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_dist_placement").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to delete all pg_dist_object metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_object").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_object").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to delete all pg_dist_colocation metadata +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_colocation").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="DELETE FROM pg_catalog.pg_dist_colocation").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to alter or create role +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_create_or_alter_role").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_create_or_alter_role").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to set database owner +SELECT citus.mitmproxy('conn.onQuery(query="ALTER DATABASE.*OWNER TO").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="ALTER DATABASE.*OWNER TO").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Filure to create schema +SELECT citus.mitmproxy('conn.onQuery(query="CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans AUTHORIZATION").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE SCHEMA IF NOT EXISTS mx_metadata_sync_multi_trans AUTHORIZATION").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create sequence +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_apply_sequence_command").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_apply_sequence_command").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create distributed table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.dist1").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.dist1").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create reference table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.ref").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.ref").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create local table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.loc1").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.loc1").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create distributed partitioned table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to create distributed partition table +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders_p2020_01_05").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE mx_metadata_sync_multi_trans.orders_p2020_01_05").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to attach partition +SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE mx_metadata_sync_multi_trans.orders ATTACH PARTITION mx_metadata_sync_multi_trans.orders_p2020_01_05").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE mx_metadata_sync_multi_trans.orders ATTACH PARTITION mx_metadata_sync_multi_trans.orders_p2020_01_05").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to add partition metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_partition_metadata").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_partition_metadata").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to add shard metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_shard_metadata").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_shard_metadata").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to add placement metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_placement_metadata").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_placement_metadata").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to add colocation metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.citus_internal_add_colocation_metadata").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT pg_catalog.citus_internal_add_colocation_metadata").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to add distributed object metadata +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_object_metadata").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="SELECT citus_internal_add_object_metadata").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to set isactive to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET isactive = TRUE").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET isactive = TRUE").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to set metadatasynced to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET metadatasynced = TRUE").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET metadatasynced = TRUE").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Failure to set hasmetadata to true +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET hasmetadata = TRUE").cancel(' || :pid || ')'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +SELECT citus.mitmproxy('conn.onQuery(query="UPDATE pg_dist_node SET hasmetadata = TRUE").kill()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Show node metadata info on coordinator after failures +SELECT * FROM pg_dist_node ORDER BY nodeport; + +-- Show that we can still query the node from coordinator +SELECT COUNT(*) FROM dist1; + +-- Verify that the value 103 belongs to a shard at the node to which we failed to sync metadata +SELECT 103 AS failed_node_val \gset +SELECT nodeid AS failed_nodeid FROM pg_dist_node WHERE metadatasynced = false \gset +SELECT get_shard_id_for_distribution_column('dist1', :failed_node_val) AS shardid \gset +SELECT groupid = :failed_nodeid FROM pg_dist_placement WHERE shardid = :shardid; + +-- Show that we can still insert into a shard at the node from coordinator +INSERT INTO dist1 VALUES (:failed_node_val); + +-- Show that we can still update a shard at the node from coordinator +UPDATE dist1 SET id = :failed_node_val WHERE id = :failed_node_val; + +-- Show that we can still delete from a shard at the node from coordinator +DELETE FROM dist1 WHERE id = :failed_node_val; + +-- Show that DDL would still propagate to the node +SET client_min_messages TO NOTICE; +SET citus.log_remote_commands TO 1; +CREATE SCHEMA dummy; +SET citus.log_remote_commands TO 0; +SET client_min_messages TO ERROR; + +-- Successfully activate the node after many failures +SELECT citus.mitmproxy('conn.allow()'); +SELECT citus_activate_node('localhost', :worker_2_proxy_port); +-- Activate the node once more to verify it works again with already synced metadata +SELECT citus_activate_node('localhost', :worker_2_proxy_port); + +-- Show node metadata info on worker2 and coordinator after success +\c - - - :worker_2_port +SELECT * FROM pg_dist_node ORDER BY nodeport; +\c - - - :master_port +SELECT * FROM pg_dist_node ORDER BY nodeport; +SELECT citus.mitmproxy('conn.allow()'); + +RESET citus.metadata_sync_mode; +DROP SCHEMA dummy; +DROP SCHEMA mx_metadata_sync_multi_trans CASCADE; +DROP ROLE foo1; +DROP ROLE foo2; +SELECT citus_remove_node('localhost', :master_port); diff --git a/src/test/regress/sql/metadata_sync_helpers.sql b/src/test/regress/sql/metadata_sync_helpers.sql index 856ec0bfb..1c5d5b15d 100644 --- a/src/test/regress/sql/metadata_sync_helpers.sql +++ b/src/test/regress/sql/metadata_sync_helpers.sql @@ -749,15 +749,6 @@ BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; SELECT citus_internal_delete_shard_metadata(shardid) FROM shard_data; ROLLBACK; --- the user only allowed to delete shards in a distributed transaction -BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; - SET application_name to 'citus_internal gpid=10000000001'; - \set VERBOSITY terse - WITH shard_data(shardid) - AS (VALUES (1420007)) - SELECT citus_internal_delete_shard_metadata(shardid) FROM shard_data; -ROLLBACK; - -- the user cannot delete non-existing shards BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; SELECT assign_distributed_transaction_id(0, 8, '2021-07-09 15:41:55.542377+02'); diff --git a/src/test/regress/sql/multi_cluster_management.sql b/src/test/regress/sql/multi_cluster_management.sql index f9aa81836..d0bb8b16d 100644 --- a/src/test/regress/sql/multi_cluster_management.sql +++ b/src/test/regress/sql/multi_cluster_management.sql @@ -4,9 +4,12 @@ ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART 1; -- Tests functions related to cluster membership --- add the nodes to the cluster +-- add the first node to the cluster in transactional mode SELECT 1 FROM master_add_node('localhost', :worker_1_port); +-- add the second node in nontransactional mode +SET citus.metadata_sync_mode TO 'nontransactional'; SELECT 1 FROM master_add_node('localhost', :worker_2_port); +RESET citus.metadata_sync_mode; -- I am coordinator SELECT citus_is_coordinator(); @@ -506,5 +509,19 @@ BEGIN; COMMIT; SELECT start_metadata_sync_to_all_nodes(); +-- nontransactional sync mode tests +SET citus.metadata_sync_mode TO 'nontransactional'; +-- do not allow nontransactional sync inside transaction block +BEGIN; + SELECT start_metadata_sync_to_all_nodes(); +COMMIT; +SELECT start_metadata_sync_to_all_nodes(); +-- do not allow nontransactional node addition inside transaction block +BEGIN; + SELECT citus_remove_node('localhost', :worker_1_port); + SELECT citus_add_node('localhost', :worker_1_port); +COMMIT; +RESET citus.metadata_sync_mode; + -- verify that at the end of this file, all primary nodes have metadata synced SELECT bool_and(hasmetadata) AND bool_and(metadatasynced) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary'; diff --git a/src/test/regress/sql/upgrade_citus_finish_citus_upgrade.sql b/src/test/regress/sql/upgrade_citus_finish_citus_upgrade.sql index bc2c40b0c..a326fb0a4 100644 --- a/src/test/regress/sql/upgrade_citus_finish_citus_upgrade.sql +++ b/src/test/regress/sql/upgrade_citus_finish_citus_upgrade.sql @@ -17,3 +17,8 @@ FROM pg_dist_node_metadata, pg_extension WHERE extname = 'citus'; -- still, do not NOTICE the version as it changes per release SET client_min_messages TO WARNING; CALL citus_finish_citus_upgrade(); + +-- we should be able to sync metadata in nontransactional way as well +SET citus.metadata_sync_mode TO 'nontransactional'; +SELECT start_metadata_sync_to_all_nodes(); +RESET citus.metadata_sync_mode; diff --git a/src/test/regress/sql/upgrade_post_11_after.sql b/src/test/regress/sql/upgrade_post_11_after.sql index e38491593..946c52ae2 100644 --- a/src/test/regress/sql/upgrade_post_11_after.sql +++ b/src/test/regress/sql/upgrade_post_11_after.sql @@ -4,7 +4,7 @@ SET search_path = post_11_upgrade; SELECT pg_identify_object_as_address(classid, objid, objsubid) FROM pg_catalog.pg_dist_object WHERE objid IN ('post_11_upgrade'::regnamespace, 'post_11_upgrade.part_table'::regclass, 'post_11_upgrade.sensors'::regclass, 'post_11_upgrade.func_in_transaction_def'::regproc, 'post_11_upgrade.partial_index_test_config'::regconfig, 'post_11_upgrade.my_type'::regtype, 'post_11_upgrade.employees'::regclass, 'post_11_upgrade.view_for_upgrade_test'::regclass, 'post_11_upgrade.my_type_for_view'::regtype, 'post_11_upgrade.view_for_upgrade_test_my_type'::regclass, 'post_11_upgrade.non_dist_table_for_view'::regclass, 'post_11_upgrade.non_dist_upgrade_test_view'::regclass, 'post_11_upgrade.non_dist_upgrade_test_view_local_join'::regclass, 'post_11_upgrade.non_dist_upgrade_multiple_dist_view'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view_2'::regclass, 'post_11_upgrade.reporting_line'::regclass, 'post_11_upgrade.v_test_1'::regclass, 'post_11_upgrade.v_test_2'::regclass, 'post_11_upgrade.owned_by_extension_table'::regclass, 'post_11_upgrade.materialized_view'::regclass, 'post_11_upgrade.owned_by_extension_view'::regclass, 'post_11_upgrade.local_type'::regtype, 'post_11_upgrade.non_dist_dist_table_for_view'::regclass, 'post_11_upgrade.depends_on_nothing_1'::regclass, 'post_11_upgrade.depends_on_nothing_2'::regclass, 'post_11_upgrade.depends_on_pg'::regclass, 'post_11_upgrade.depends_on_citus'::regclass, 'post_11_upgrade.depends_on_seq'::regclass, 'post_11_upgrade.depends_on_seq_and_no_support'::regclass) ORDER BY 1; -- on all nodes -SELECT run_command_on_workers($$SELECT array_agg(pg_identify_object_as_address(classid, objid, objsubid)) FROM pg_catalog.pg_dist_object WHERE objid IN ('post_11_upgrade'::regnamespace, 'post_11_upgrade.part_table'::regclass, 'post_11_upgrade.sensors'::regclass, 'post_11_upgrade.func_in_transaction_def'::regproc, 'post_11_upgrade.partial_index_test_config'::regconfig, 'post_11_upgrade.my_type'::regtype, 'post_11_upgrade.view_for_upgrade_test'::regclass, 'post_11_upgrade.view_for_upgrade_test_my_type'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view_2'::regclass, 'post_11_upgrade.reporting_line'::regclass) ORDER BY 1;$$) ORDER BY 1; +SELECT run_command_on_workers($$SELECT array_agg(worker_object) FROM (SELECT pg_identify_object_as_address(classid, objid, objsubid) worker_object FROM pg_catalog.pg_dist_object WHERE objid IN ('post_11_upgrade'::regnamespace, 'post_11_upgrade.part_table'::regclass, 'post_11_upgrade.sensors'::regclass, 'post_11_upgrade.func_in_transaction_def'::regproc, 'post_11_upgrade.partial_index_test_config'::regconfig, 'post_11_upgrade.my_type'::regtype, 'post_11_upgrade.view_for_upgrade_test'::regclass, 'post_11_upgrade.view_for_upgrade_test_my_type'::regclass, 'post_11_upgrade.non_dist_upgrade_ref_view_2'::regclass, 'post_11_upgrade.reporting_line'::regclass) ORDER BY 1) worker_objects;$$) ORDER BY 1; -- Create the necessary test utility function CREATE OR REPLACE FUNCTION activate_node_snapshot()