diff --git a/src/backend/distributed/commands/function.c b/src/backend/distributed/commands/function.c index 398d803d6..a9c29b3e1 100644 --- a/src/backend/distributed/commands/function.c +++ b/src/backend/distributed/commands/function.c @@ -1147,7 +1147,7 @@ TriggerSyncMetadataToPrimaryNodes(void) /* let the maintanince deamon know about the metadata sync */ if (triggerMetadataSync) { - TriggerMetadataSync(MyDatabaseId); + TriggerMetadataSyncOnCommit(); } } diff --git a/src/backend/distributed/metadata/node_metadata.c b/src/backend/distributed/metadata/node_metadata.c index 65176152a..335cb115b 100644 --- a/src/backend/distributed/metadata/node_metadata.c +++ b/src/backend/distributed/metadata/node_metadata.c @@ -444,7 +444,7 @@ SetUpDistributedTableDependencies(WorkerNode *newWorkerNode) { MarkNodeHasMetadata(newWorkerNode->workerName, newWorkerNode->workerPort, true); - TriggerMetadataSync(MyDatabaseId); + TriggerMetadataSyncOnCommit(); } } } @@ -810,7 +810,7 @@ master_update_node(PG_FUNCTION_ARGS) */ if (UnsetMetadataSyncedForAll()) { - TriggerMetadataSync(MyDatabaseId); + TriggerMetadataSyncOnCommit(); } if (handle != NULL) diff --git a/src/backend/distributed/test/metadata_sync.c b/src/backend/distributed/test/metadata_sync.c index c2dc1ce4f..47e12ce7a 100644 --- a/src/backend/distributed/test/metadata_sync.c +++ b/src/backend/distributed/test/metadata_sync.c @@ -135,7 +135,7 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS) Datum trigger_metadata_sync(PG_FUNCTION_ARGS) { - TriggerMetadataSync(MyDatabaseId); + TriggerMetadataSyncOnCommit(); PG_RETURN_VOID(); } diff --git a/src/backend/distributed/transaction/transaction_management.c b/src/backend/distributed/transaction/transaction_management.c index 1999c836c..96a4180a4 100644 --- a/src/backend/distributed/transaction/transaction_management.c +++ b/src/backend/distributed/transaction/transaction_management.c @@ -28,6 +28,7 @@ #include "distributed/listutils.h" #include "distributed/local_executor.h" #include "distributed/locally_reserved_shared_connections.h" +#include "distributed/maintenanced.h" #include "distributed/multi_executor.h" #include "distributed/multi_explain.h" #include "distributed/repartition_join_execution.h" @@ -102,6 +103,9 @@ bool CoordinatedTransactionUses2PC = false; /* if disabled, distributed statements in a function may run as separate transactions */ bool FunctionOpensTransactionBlock = true; +/* if true, we should trigger metadata sync on commit */ +bool MetadataSyncOnCommit = false; + /* transaction management functions */ static void CoordinatedTransactionCallback(XactEvent event, void *arg); @@ -262,6 +266,15 @@ CoordinatedTransactionCallback(XactEvent event, void *arg) AfterXactConnectionHandling(true); } + /* + * Changes to catalog tables are now visible to the metadata sync + * daemon, so we can trigger metadata sync if necessary. + */ + if (MetadataSyncOnCommit) + { + TriggerMetadataSync(MyDatabaseId); + } + ResetGlobalVariables(); /* @@ -474,6 +487,7 @@ ResetGlobalVariables() activeSetStmts = NULL; CoordinatedTransactionUses2PC = false; TransactionModifiedNodeMetadata = false; + MetadataSyncOnCommit = false; ResetWorkerErrorIndication(); } @@ -728,3 +742,15 @@ MaybeExecutingUDF(void) { return ExecutorLevel > 1 || (ExecutorLevel == 1 && PlannerLevel > 0); } + + +/* + * TriggerMetadataSyncOnCommit sets a flag to do metadata sync on commit. + * This is because new metadata only becomes visible to the metadata sync + * daemon after commit happens. + */ +void +TriggerMetadataSyncOnCommit(void) +{ + MetadataSyncOnCommit = true; +} diff --git a/src/include/distributed/transaction_management.h b/src/include/distributed/transaction_management.h index a0a595fac..f6bac9100 100644 --- a/src/include/distributed/transaction_management.h +++ b/src/include/distributed/transaction_management.h @@ -121,6 +121,7 @@ extern void InitializeTransactionManagement(void); /* other functions */ extern List * ActiveSubXactContexts(void); extern StringInfo BeginAndSetDistributedTransactionIdCommand(void); +extern void TriggerMetadataSyncOnCommit(void); #endif /* TRANSACTION_MANAGMENT_H */ diff --git a/src/test/regress/expected/isolation_metadata_sync_deadlock.out b/src/test/regress/expected/isolation_metadata_sync_deadlock.out index 0977caeea..e19498f6f 100644 --- a/src/test/regress/expected/isolation_metadata_sync_deadlock.out +++ b/src/test/regress/expected/isolation_metadata_sync_deadlock.out @@ -1,6 +1,6 @@ Parsed test spec with 3 sessions -starting permutation: enable-deadlock-detection reload-conf s2-start-session-level-connection s1-begin s1-update-1 s2-begin-on-worker s2-update-2-on-worker s2-truncate-on-worker s3-invalidate-metadata s3-resync s2-update-1-on-worker s1-update-2 s1-commit s2-commit-on-worker disable-deadlock-detection reload-conf s2-stop-connection +starting permutation: enable-deadlock-detection reload-conf s2-start-session-level-connection s1-begin s1-update-1 s2-begin-on-worker s2-update-2-on-worker s2-truncate-on-worker s3-invalidate-metadata s3-resync s3-wait s2-update-1-on-worker s1-update-2 s1-commit s2-commit-on-worker disable-deadlock-detection reload-conf s2-stop-connection create_distributed_table @@ -48,11 +48,13 @@ step s3-invalidate-metadata: step s3-resync: SELECT trigger_metadata_sync(); - SELECT pg_sleep(2); trigger_metadata_sync +step s3-wait: + SELECT pg_sleep(2); + pg_sleep @@ -96,7 +98,7 @@ restore_isolation_tester_func -starting permutation: increase-retry-interval reload-conf s2-start-session-level-connection s2-begin-on-worker s2-truncate-on-worker s3-invalidate-metadata s3-resync s1-count-daemons s1-cancel-metadata-sync s1-count-daemons reset-retry-interval reload-conf s2-commit-on-worker s2-stop-connection s3-resync +starting permutation: increase-retry-interval reload-conf s2-start-session-level-connection s2-begin-on-worker s2-truncate-on-worker s3-invalidate-metadata s3-resync s3-wait s1-count-daemons s1-cancel-metadata-sync s1-count-daemons reset-retry-interval reload-conf s2-commit-on-worker s2-stop-connection s3-resync s3-wait create_distributed_table @@ -132,11 +134,13 @@ step s3-invalidate-metadata: step s3-resync: SELECT trigger_metadata_sync(); - SELECT pg_sleep(2); trigger_metadata_sync +step s3-wait: + SELECT pg_sleep(2); + pg_sleep @@ -185,11 +189,13 @@ stop_session_level_connection_to_node step s3-resync: SELECT trigger_metadata_sync(); - SELECT pg_sleep(2); trigger_metadata_sync +step s3-wait: + SELECT pg_sleep(2); + pg_sleep diff --git a/src/test/regress/spec/isolation_metadata_sync_deadlock.spec b/src/test/regress/spec/isolation_metadata_sync_deadlock.spec index 9aa9a05b2..c5cabfd84 100644 --- a/src/test/regress/spec/isolation_metadata_sync_deadlock.spec +++ b/src/test/regress/spec/isolation_metadata_sync_deadlock.spec @@ -134,6 +134,10 @@ step "s3-invalidate-metadata" step "s3-resync" { SELECT trigger_metadata_sync(); +} + +step "s3-wait" +{ SELECT pg_sleep(2); } @@ -142,8 +146,8 @@ step "s3-resync" // causes the concurrent metadata sync to be blocked. But s2 and s1 themselves are // themselves involved in a distributed deadlock. // See https://github.com/citusdata/citus/issues/4393 for more details. -permutation "enable-deadlock-detection" "reload-conf" "s2-start-session-level-connection" "s1-begin" "s1-update-1" "s2-begin-on-worker" "s2-update-2-on-worker" "s2-truncate-on-worker" "s3-invalidate-metadata" "s3-resync" "s2-update-1-on-worker" "s1-update-2" "s1-commit" "s2-commit-on-worker" "disable-deadlock-detection" "reload-conf" "s2-stop-connection" +permutation "enable-deadlock-detection" "reload-conf" "s2-start-session-level-connection" "s1-begin" "s1-update-1" "s2-begin-on-worker" "s2-update-2-on-worker" "s2-truncate-on-worker" "s3-invalidate-metadata" "s3-resync" "s3-wait" "s2-update-1-on-worker" "s1-update-2" "s1-commit" "s2-commit-on-worker" "disable-deadlock-detection" "reload-conf" "s2-stop-connection" // Test that when metadata sync is waiting for locks, cancelling it terminates it. // This is important in cases where the metadata sync daemon itself is involved in a deadlock. -permutation "increase-retry-interval" "reload-conf" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-truncate-on-worker" "s3-invalidate-metadata" "s3-resync" "s1-count-daemons" "s1-cancel-metadata-sync" "s1-count-daemons" "reset-retry-interval" "reload-conf" "s2-commit-on-worker" "s2-stop-connection" "s3-resync" +permutation "increase-retry-interval" "reload-conf" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-truncate-on-worker" "s3-invalidate-metadata" "s3-resync" "s3-wait" "s1-count-daemons" "s1-cancel-metadata-sync" "s1-count-daemons" "reset-retry-interval" "reload-conf" "s2-commit-on-worker" "s2-stop-connection" "s3-resync" "s3-wait"