Detect deadlocks in replicate_reference_tables()

pull/3764/head
Hadi Moshayedi 2020-04-15 10:58:23 -07:00
parent df9048ebaa
commit 59b9a4e5a1
5 changed files with 74 additions and 10 deletions

View File

@ -135,7 +135,16 @@ UseCoordinatedTransaction(void)
}
CurrentCoordinatedTransactionState = COORD_TRANS_STARTED;
AssignDistributedTransactionId();
/*
* If assign_distributed_transaction_id() has been called, we should reuse
* that identifier so distributed deadlock detection works properly.
*/
DistributedTransactionId *transactionId = GetCurrentDistributedTransactionId();
if (transactionId->transactionNumber == 0)
{
AssignDistributedTransactionId();
}
}

View File

@ -210,11 +210,15 @@ EnsureReferenceTablesExistOnAllNodes(void)
if (PQstatus(connection->pgConn) == CONNECTION_OK)
{
UseCoordinatedTransaction();
RemoteTransactionBegin(connection);
StringInfo placementCopyCommand =
CopyShardPlacementToWorkerNodeQuery(sourceShardPlacement,
newWorkerNode,
TRANSFER_MODE_AUTOMATIC);
ExecuteCriticalRemoteCommand(connection, placementCopyCommand->data);
RemoteTransactionCommit(connection);
}
else
{

View File

@ -28,11 +28,11 @@ step detector-dump-wait-edges:
waiting_transaction_numblocking_transaction_numblocking_transaction_waiting
392 391 f
390 389 f
transactionnumberwaitingtransactionnumbers
391
392 391
389
390 389
step s1-abort:
ABORT;
@ -75,14 +75,14 @@ step detector-dump-wait-edges:
waiting_transaction_numblocking_transaction_numblocking_transaction_waiting
396 395 f
397 395 f
397 396 t
394 393 f
395 393 f
395 394 t
transactionnumberwaitingtransactionnumbers
395
396 395
397 395,396
393
394 393
395 393,394
step s1-abort:
ABORT;

View File

@ -1014,6 +1014,38 @@ SELECT stop_metadata_sync_to_node('localhost', :worker_1_port);
(1 row)
--
-- The following case used to get stuck on create_distributed_table() instead
-- of detecting the distributed deadlock.
--
SET citus.replicate_reference_tables_on_activate TO off;
SET citus.shard_replication_factor TO 1;
select master_remove_node('localhost', :worker_2_port);
master_remove_node
---------------------------------------------------------------------
(1 row)
CREATE TABLE ref (a int primary key, b int);
SELECT create_reference_table('ref');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE test (x int, y int references ref(a));
select 1 FROM master_add_node('localhost', :worker_2_port);
?column?
---------------------------------------------------------------------
1
(1 row)
BEGIN;
DROP TABLE test;
CREATE TABLE test (x int, y int references ref(a));
SELECT create_distributed_table('test','x');
ERROR: canceling the transaction since it was involved in a distributed deadlock
END;
-- test adding an invalid node while we have reference tables to replicate
-- set client message level to ERROR and verbosity to terse to supporess
-- OS-dependent host name resolution warnings

View File

@ -634,6 +634,25 @@ SET search_path TO replicate_reference_table;
SELECT stop_metadata_sync_to_node('localhost', :worker_1_port);
--
-- The following case used to get stuck on create_distributed_table() instead
-- of detecting the distributed deadlock.
--
SET citus.replicate_reference_tables_on_activate TO off;
SET citus.shard_replication_factor TO 1;
select master_remove_node('localhost', :worker_2_port);
CREATE TABLE ref (a int primary key, b int);
SELECT create_reference_table('ref');
CREATE TABLE test (x int, y int references ref(a));
select 1 FROM master_add_node('localhost', :worker_2_port);
BEGIN;
DROP TABLE test;
CREATE TABLE test (x int, y int references ref(a));
SELECT create_distributed_table('test','x');
END;
-- test adding an invalid node while we have reference tables to replicate
-- set client message level to ERROR and verbosity to terse to supporess
-- OS-dependent host name resolution warnings