mirror of https://github.com/citusdata/citus.git
Change the order in which the locks are acquired (#7542)
This PR changes the order in which the locks are acquired (for the target and reference tables), when a modify request is initiated from a worker node that is not the "FirstWorkerNode". To prevent concurrent writes, locks are acquired on the first worker node for the replicated tables. When the update statement originates from the first worker node, it acquires the lock on the reference table(s) first, followed by the target table(s). However, if the update statement is initiated in another worker node, the lock requests are sent to the first worker in a different order. This PR unifies the modification order on the first worker node. With the third commit, independent of the node that received the request, the locks are acquired for the modified table and then the reference tables on the first node. The first commit shows a sample output for the test prior to the fix. Fixes #7477 --------- Co-authored-by: Jelte Fennema-Nio <jelte.fennema@microsoft.com>pull/7555/head^2
parent
12f56438fc
commit
8afa2d0386
|
@ -707,13 +707,27 @@ SerializeNonCommutativeWrites(List *shardIntervalList, LOCKMODE lockMode)
|
|||
}
|
||||
|
||||
List *replicatedShardList = NIL;
|
||||
if (AnyTableReplicated(shardIntervalList, &replicatedShardList))
|
||||
{
|
||||
if (ClusterHasKnownMetadataWorkers() && !IsFirstWorkerNode())
|
||||
bool anyTableReplicated = AnyTableReplicated(shardIntervalList, &replicatedShardList);
|
||||
|
||||
/*
|
||||
* Acquire locks on the modified table.
|
||||
* If the table is replicated, the locks are first acquired on the first worker node then locally.
|
||||
* But if we're already on the first worker, acquiring on the first worker node and locally are the same operation.
|
||||
* So we only acquire locally in that case.
|
||||
*/
|
||||
if (anyTableReplicated && ClusterHasKnownMetadataWorkers() && !IsFirstWorkerNode())
|
||||
{
|
||||
LockShardListResourcesOnFirstWorker(lockMode, replicatedShardList);
|
||||
}
|
||||
LockShardListResources(shardIntervalList, lockMode);
|
||||
|
||||
/*
|
||||
* Next, acquire locks on the reference tables that are referenced by a foreign key if there are any.
|
||||
* Note that LockReferencedReferenceShardResources() first acquires locks on the first worker,
|
||||
* then locally.
|
||||
*/
|
||||
if (anyTableReplicated)
|
||||
{
|
||||
ShardInterval *firstShardInterval =
|
||||
(ShardInterval *) linitial(replicatedShardList);
|
||||
if (ReferenceTableShardId(firstShardInterval->shardId))
|
||||
|
@ -728,8 +742,6 @@ SerializeNonCommutativeWrites(List *shardIntervalList, LOCKMODE lockMode)
|
|||
LockReferencedReferenceShardResources(firstShardInterval->shardId, lockMode);
|
||||
}
|
||||
}
|
||||
|
||||
LockShardListResources(shardIntervalList, lockMode);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
--- Test for updating a table that has a foreign key reference to another reference table.
|
||||
--- Issue #7477: Distributed deadlock after issuing a simple UPDATE statement
|
||||
--- https://github.com/citusdata/citus/issues/7477
|
||||
CREATE TABLE table1 (id INT PRIMARY KEY);
|
||||
SELECT create_reference_table('table1');
|
||||
create_reference_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO table1 VALUES (1);
|
||||
CREATE TABLE table2 (
|
||||
id INT,
|
||||
info TEXT,
|
||||
CONSTRAINT table1_id_fk FOREIGN KEY (id) REFERENCES table1 (id)
|
||||
);
|
||||
SELECT create_reference_table('table2');
|
||||
create_reference_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO table2 VALUES (1, 'test');
|
||||
--- Runs the update command in parallel on workers.
|
||||
--- Due to bug #7477, before the fix, the result is non-deterministic
|
||||
--- and have several rows of the form:
|
||||
--- localhost | 57638 | f | ERROR: deadlock detected
|
||||
--- localhost | 57637 | f | ERROR: deadlock detected
|
||||
--- localhost | 57637 | f | ERROR: canceling the transaction since it was involved in a distributed deadlock
|
||||
SELECT * FROM master_run_on_worker(
|
||||
ARRAY['localhost', 'localhost','localhost', 'localhost','localhost',
|
||||
'localhost','localhost', 'localhost','localhost', 'localhost']::text[],
|
||||
ARRAY[57638, 57637, 57637, 57638, 57637, 57638, 57637, 57638, 57638, 57637]::int[],
|
||||
ARRAY['UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1'
|
||||
]::text[],
|
||||
true);
|
||||
node_name | node_port | success | result
|
||||
---------------------------------------------------------------------
|
||||
localhost | 57638 | t | UPDATE 1
|
||||
localhost | 57637 | t | UPDATE 1
|
||||
localhost | 57637 | t | UPDATE 1
|
||||
localhost | 57638 | t | UPDATE 1
|
||||
localhost | 57637 | t | UPDATE 1
|
||||
localhost | 57638 | t | UPDATE 1
|
||||
localhost | 57637 | t | UPDATE 1
|
||||
localhost | 57638 | t | UPDATE 1
|
||||
localhost | 57638 | t | UPDATE 1
|
||||
localhost | 57637 | t | UPDATE 1
|
||||
(10 rows)
|
||||
|
||||
--- cleanup
|
||||
DROP TABLE table2;
|
||||
DROP TABLE table1;
|
|
@ -103,7 +103,7 @@ test: multi_dropped_column_aliases foreign_key_restriction_enforcement
|
|||
test: binary_protocol
|
||||
test: alter_table_set_access_method
|
||||
test: alter_distributed_table
|
||||
test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758
|
||||
test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477
|
||||
test: object_propagation_debug
|
||||
test: undistribute_table
|
||||
test: run_command_on_all_nodes
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
|
||||
--- Test for updating a table that has a foreign key reference to another reference table.
|
||||
--- Issue #7477: Distributed deadlock after issuing a simple UPDATE statement
|
||||
--- https://github.com/citusdata/citus/issues/7477
|
||||
|
||||
CREATE TABLE table1 (id INT PRIMARY KEY);
|
||||
SELECT create_reference_table('table1');
|
||||
INSERT INTO table1 VALUES (1);
|
||||
|
||||
CREATE TABLE table2 (
|
||||
id INT,
|
||||
info TEXT,
|
||||
CONSTRAINT table1_id_fk FOREIGN KEY (id) REFERENCES table1 (id)
|
||||
);
|
||||
SELECT create_reference_table('table2');
|
||||
INSERT INTO table2 VALUES (1, 'test');
|
||||
|
||||
--- Runs the update command in parallel on workers.
|
||||
--- Due to bug #7477, before the fix, the result is non-deterministic
|
||||
--- and have several rows of the form:
|
||||
--- localhost | 57638 | f | ERROR: deadlock detected
|
||||
--- localhost | 57637 | f | ERROR: deadlock detected
|
||||
--- localhost | 57637 | f | ERROR: canceling the transaction since it was involved in a distributed deadlock
|
||||
|
||||
SELECT * FROM master_run_on_worker(
|
||||
ARRAY['localhost', 'localhost','localhost', 'localhost','localhost',
|
||||
'localhost','localhost', 'localhost','localhost', 'localhost']::text[],
|
||||
ARRAY[57638, 57637, 57637, 57638, 57637, 57638, 57637, 57638, 57638, 57637]::int[],
|
||||
ARRAY['UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1',
|
||||
'UPDATE table2 SET info = ''test_update'' WHERE id = 1'
|
||||
]::text[],
|
||||
true);
|
||||
|
||||
--- cleanup
|
||||
DROP TABLE table2;
|
||||
DROP TABLE table1;
|
Loading…
Reference in New Issue