diff --git a/src/backend/distributed/transaction/transaction_management.c b/src/backend/distributed/transaction/transaction_management.c index 7a211b384..2012ca5dd 100644 --- a/src/backend/distributed/transaction/transaction_management.c +++ b/src/backend/distributed/transaction/transaction_management.c @@ -135,7 +135,16 @@ UseCoordinatedTransaction(void) } CurrentCoordinatedTransactionState = COORD_TRANS_STARTED; - AssignDistributedTransactionId(); + + /* + * If assign_distributed_transaction_id() has been called, we should reuse + * that identifier so distributed deadlock detection works properly. + */ + DistributedTransactionId *transactionId = GetCurrentDistributedTransactionId(); + if (transactionId->transactionNumber == 0) + { + AssignDistributedTransactionId(); + } } diff --git a/src/backend/distributed/utils/reference_table_utils.c b/src/backend/distributed/utils/reference_table_utils.c index 558a67656..fe2711554 100644 --- a/src/backend/distributed/utils/reference_table_utils.c +++ b/src/backend/distributed/utils/reference_table_utils.c @@ -210,11 +210,15 @@ EnsureReferenceTablesExistOnAllNodes(void) if (PQstatus(connection->pgConn) == CONNECTION_OK) { + UseCoordinatedTransaction(); + + RemoteTransactionBegin(connection); StringInfo placementCopyCommand = CopyShardPlacementToWorkerNodeQuery(sourceShardPlacement, newWorkerNode, TRANSFER_MODE_AUTOMATIC); ExecuteCriticalRemoteCommand(connection, placementCopyCommand->data); + RemoteTransactionCommit(connection); } else { diff --git a/src/test/regress/expected/isolation_dump_global_wait_edges.out b/src/test/regress/expected/isolation_dump_global_wait_edges.out index 9384bd287..71a7f1a7a 100644 --- a/src/test/regress/expected/isolation_dump_global_wait_edges.out +++ b/src/test/regress/expected/isolation_dump_global_wait_edges.out @@ -28,11 +28,11 @@ step detector-dump-wait-edges: waiting_transaction_numblocking_transaction_numblocking_transaction_waiting -392 391 f +390 389 f transactionnumberwaitingtransactionnumbers -391 -392 391 +389 +390 389 step s1-abort: ABORT; @@ -75,14 +75,14 @@ step detector-dump-wait-edges: waiting_transaction_numblocking_transaction_numblocking_transaction_waiting -396 395 f -397 395 f -397 396 t +394 393 f +395 393 f +395 394 t transactionnumberwaitingtransactionnumbers -395 -396 395 -397 395,396 +393 +394 393 +395 393,394 step s1-abort: ABORT; diff --git a/src/test/regress/expected/multi_replicate_reference_table.out b/src/test/regress/expected/multi_replicate_reference_table.out index bcbc634d2..140c2fbbb 100644 --- a/src/test/regress/expected/multi_replicate_reference_table.out +++ b/src/test/regress/expected/multi_replicate_reference_table.out @@ -1014,6 +1014,38 @@ SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); (1 row) +-- +-- The following case used to get stuck on create_distributed_table() instead +-- of detecting the distributed deadlock. +-- +SET citus.replicate_reference_tables_on_activate TO off; +SET citus.shard_replication_factor TO 1; +select master_remove_node('localhost', :worker_2_port); + master_remove_node +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE ref (a int primary key, b int); +SELECT create_reference_table('ref'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE test (x int, y int references ref(a)); +select 1 FROM master_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +BEGIN; +DROP TABLE test; +CREATE TABLE test (x int, y int references ref(a)); +SELECT create_distributed_table('test','x'); +ERROR: canceling the transaction since it was involved in a distributed deadlock +END; -- test adding an invalid node while we have reference tables to replicate -- set client message level to ERROR and verbosity to terse to supporess -- OS-dependent host name resolution warnings diff --git a/src/test/regress/sql/multi_replicate_reference_table.sql b/src/test/regress/sql/multi_replicate_reference_table.sql index 4535aebfc..4a8b76d4d 100644 --- a/src/test/regress/sql/multi_replicate_reference_table.sql +++ b/src/test/regress/sql/multi_replicate_reference_table.sql @@ -634,6 +634,25 @@ SET search_path TO replicate_reference_table; SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); +-- +-- The following case used to get stuck on create_distributed_table() instead +-- of detecting the distributed deadlock. +-- +SET citus.replicate_reference_tables_on_activate TO off; +SET citus.shard_replication_factor TO 1; + +select master_remove_node('localhost', :worker_2_port); + +CREATE TABLE ref (a int primary key, b int); +SELECT create_reference_table('ref'); +CREATE TABLE test (x int, y int references ref(a)); +select 1 FROM master_add_node('localhost', :worker_2_port); +BEGIN; +DROP TABLE test; +CREATE TABLE test (x int, y int references ref(a)); +SELECT create_distributed_table('test','x'); +END; + -- test adding an invalid node while we have reference tables to replicate -- set client message level to ERROR and verbosity to terse to supporess -- OS-dependent host name resolution warnings