diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c index 40486d08f..ca3a5f4c4 100644 --- a/src/backend/columnar/columnar_tableam.c +++ b/src/backend/columnar/columnar_tableam.c @@ -2946,7 +2946,7 @@ MajorVersionsCompatibleColumnar(char *leftVersion, char *rightVersion) } else { - rightComparisionLimit = strlen(leftVersion); + rightComparisionLimit = strlen(rightVersion); } /* we can error out early if hypens are not in the same position */ diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c index d1868d3c4..03dc4c1b8 100644 --- a/src/backend/distributed/operations/shard_rebalancer.c +++ b/src/backend/distributed/operations/shard_rebalancer.c @@ -384,6 +384,7 @@ CheckRebalanceStateInvariants(const RebalanceState *state) Assert(shardCost->cost <= prevShardCost->cost); } totalCost += shardCost->cost; + prevShardCost = shardCost; } /* Check that utilization field is up to date. */ diff --git a/src/backend/distributed/planner/function_call_delegation.c b/src/backend/distributed/planner/function_call_delegation.c index f17b02347..4a79dc25a 100644 --- a/src/backend/distributed/planner/function_call_delegation.c +++ b/src/backend/distributed/planner/function_call_delegation.c @@ -91,6 +91,10 @@ bool InDelegatedFunctionCall = false; static bool contain_param_walker(Node *node, void *context) { + if (node == NULL) + { + return false; + } if (IsA(node, Param)) { Param *paramNode = (Param *) node; diff --git a/src/backend/distributed/test/metadata_sync.c b/src/backend/distributed/test/metadata_sync.c index dec20c772..ce025cff9 100644 --- a/src/backend/distributed/test/metadata_sync.c +++ b/src/backend/distributed/test/metadata_sync.c @@ -50,6 +50,13 @@ activate_node_snapshot(PG_FUNCTION_ARGS) * so we are using first primary worker node just for test purposes. */ WorkerNode *dummyWorkerNode = GetFirstPrimaryWorkerNode(); + if (dummyWorkerNode == NULL) + { + ereport(ERROR, (errmsg("no worker nodes found"), + errdetail("Function activate_node_snapshot is meant to be " + "used when running tests on a multi-node cluster " + "with workers."))); + } /* * Create MetadataSyncContext which is used throughout nodes' activation. diff --git a/src/backend/distributed/utils/resource_lock.c b/src/backend/distributed/utils/resource_lock.c index 13e88a16e..8ac269e43 100644 --- a/src/backend/distributed/utils/resource_lock.c +++ b/src/backend/distributed/utils/resource_lock.c @@ -707,13 +707,27 @@ SerializeNonCommutativeWrites(List *shardIntervalList, LOCKMODE lockMode) } List *replicatedShardList = NIL; - if (AnyTableReplicated(shardIntervalList, &replicatedShardList)) - { - if (ClusterHasKnownMetadataWorkers() && !IsFirstWorkerNode()) - { - LockShardListResourcesOnFirstWorker(lockMode, replicatedShardList); - } + bool anyTableReplicated = AnyTableReplicated(shardIntervalList, &replicatedShardList); + /* + * Acquire locks on the modified table. + * If the table is replicated, the locks are first acquired on the first worker node then locally. + * But if we're already on the first worker, acquiring on the first worker node and locally are the same operation. + * So we only acquire locally in that case. + */ + if (anyTableReplicated && ClusterHasKnownMetadataWorkers() && !IsFirstWorkerNode()) + { + LockShardListResourcesOnFirstWorker(lockMode, replicatedShardList); + } + LockShardListResources(shardIntervalList, lockMode); + + /* + * Next, acquire locks on the reference tables that are referenced by a foreign key if there are any. + * Note that LockReferencedReferenceShardResources() first acquires locks on the first worker, + * then locally. + */ + if (anyTableReplicated) + { ShardInterval *firstShardInterval = (ShardInterval *) linitial(replicatedShardList); if (ReferenceTableShardId(firstShardInterval->shardId)) @@ -728,8 +742,6 @@ SerializeNonCommutativeWrites(List *shardIntervalList, LOCKMODE lockMode) LockReferencedReferenceShardResources(firstShardInterval->shardId, lockMode); } } - - LockShardListResources(shardIntervalList, lockMode); } diff --git a/src/test/regress/expected/function_with_case_when.out b/src/test/regress/expected/function_with_case_when.out new file mode 100644 index 000000000..18df5be0a --- /dev/null +++ b/src/test/regress/expected/function_with_case_when.out @@ -0,0 +1,32 @@ +CREATE SCHEMA function_with_case; +SET search_path TO function_with_case; +-- create function +CREATE OR REPLACE FUNCTION test_err(v1 text) + RETURNS text + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ + +begin + return v1 || ' - ok'; +END; +$function$; +do $$ declare + lNewValues text; + val text; +begin + val = 'test'; + lNewValues = test_err(v1 => case when val::text = 'test'::text then 'yes' else 'no' end); + raise notice 'lNewValues= %', lNewValues; +end;$$ ; +NOTICE: lNewValues= yes - ok +CONTEXT: PL/pgSQL function inline_code_block line XX at RAISE +-- call function +SELECT test_err('test'); + test_err +--------------------------------------------------------------------- + test - ok +(1 row) + +DROP SCHEMA function_with_case CASCADE; +NOTICE: drop cascades to function test_err(text) diff --git a/src/test/regress/expected/issue_7477.out b/src/test/regress/expected/issue_7477.out new file mode 100644 index 000000000..224d85c6e --- /dev/null +++ b/src/test/regress/expected/issue_7477.out @@ -0,0 +1,62 @@ +--- Test for updating a table that has a foreign key reference to another reference table. +--- Issue #7477: Distributed deadlock after issuing a simple UPDATE statement +--- https://github.com/citusdata/citus/issues/7477 +CREATE TABLE table1 (id INT PRIMARY KEY); +SELECT create_reference_table('table1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO table1 VALUES (1); +CREATE TABLE table2 ( + id INT, + info TEXT, + CONSTRAINT table1_id_fk FOREIGN KEY (id) REFERENCES table1 (id) + ); +SELECT create_reference_table('table2'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO table2 VALUES (1, 'test'); +--- Runs the update command in parallel on workers. +--- Due to bug #7477, before the fix, the result is non-deterministic +--- and have several rows of the form: +--- localhost | 57638 | f | ERROR: deadlock detected +--- localhost | 57637 | f | ERROR: deadlock detected +--- localhost | 57637 | f | ERROR: canceling the transaction since it was involved in a distributed deadlock +SELECT * FROM master_run_on_worker( + ARRAY['localhost', 'localhost','localhost', 'localhost','localhost', + 'localhost','localhost', 'localhost','localhost', 'localhost']::text[], + ARRAY[57638, 57637, 57637, 57638, 57637, 57638, 57637, 57638, 57638, 57637]::int[], + ARRAY['UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1' + ]::text[], + true); + node_name | node_port | success | result +--------------------------------------------------------------------- + localhost | 57638 | t | UPDATE 1 + localhost | 57637 | t | UPDATE 1 + localhost | 57637 | t | UPDATE 1 + localhost | 57638 | t | UPDATE 1 + localhost | 57637 | t | UPDATE 1 + localhost | 57638 | t | UPDATE 1 + localhost | 57637 | t | UPDATE 1 + localhost | 57638 | t | UPDATE 1 + localhost | 57638 | t | UPDATE 1 + localhost | 57637 | t | UPDATE 1 +(10 rows) + +--- cleanup +DROP TABLE table2; +DROP TABLE table1; diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 3fec50aac..af5921e60 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -103,13 +103,14 @@ test: multi_dropped_column_aliases foreign_key_restriction_enforcement test: binary_protocol test: alter_table_set_access_method test: alter_distributed_table -test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 +test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477 test: object_propagation_debug test: undistribute_table test: run_command_on_all_nodes test: background_task_queue_monitor test: other_databases grant_role_from_non_maindb role_operations_from_non_maindb seclabel_non_maindb test: citus_internal_access +test: function_with_case_when # Causal clock test test: clock diff --git a/src/test/regress/sql/function_with_case_when.sql b/src/test/regress/sql/function_with_case_when.sql new file mode 100644 index 000000000..03c6678e4 --- /dev/null +++ b/src/test/regress/sql/function_with_case_when.sql @@ -0,0 +1,27 @@ +CREATE SCHEMA function_with_case; +SET search_path TO function_with_case; + +-- create function +CREATE OR REPLACE FUNCTION test_err(v1 text) + RETURNS text + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ + +begin + return v1 || ' - ok'; +END; +$function$; +do $$ declare + lNewValues text; + val text; +begin + val = 'test'; + lNewValues = test_err(v1 => case when val::text = 'test'::text then 'yes' else 'no' end); + raise notice 'lNewValues= %', lNewValues; +end;$$ ; + +-- call function +SELECT test_err('test'); + +DROP SCHEMA function_with_case CASCADE; diff --git a/src/test/regress/sql/issue_7477.sql b/src/test/regress/sql/issue_7477.sql new file mode 100644 index 000000000..b9c1578e9 --- /dev/null +++ b/src/test/regress/sql/issue_7477.sql @@ -0,0 +1,44 @@ + +--- Test for updating a table that has a foreign key reference to another reference table. +--- Issue #7477: Distributed deadlock after issuing a simple UPDATE statement +--- https://github.com/citusdata/citus/issues/7477 + +CREATE TABLE table1 (id INT PRIMARY KEY); +SELECT create_reference_table('table1'); +INSERT INTO table1 VALUES (1); + +CREATE TABLE table2 ( + id INT, + info TEXT, + CONSTRAINT table1_id_fk FOREIGN KEY (id) REFERENCES table1 (id) + ); +SELECT create_reference_table('table2'); +INSERT INTO table2 VALUES (1, 'test'); + +--- Runs the update command in parallel on workers. +--- Due to bug #7477, before the fix, the result is non-deterministic +--- and have several rows of the form: +--- localhost | 57638 | f | ERROR: deadlock detected +--- localhost | 57637 | f | ERROR: deadlock detected +--- localhost | 57637 | f | ERROR: canceling the transaction since it was involved in a distributed deadlock + +SELECT * FROM master_run_on_worker( + ARRAY['localhost', 'localhost','localhost', 'localhost','localhost', + 'localhost','localhost', 'localhost','localhost', 'localhost']::text[], + ARRAY[57638, 57637, 57637, 57638, 57637, 57638, 57637, 57638, 57638, 57637]::int[], + ARRAY['UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1' + ]::text[], + true); + +--- cleanup +DROP TABLE table2; +DROP TABLE table1;