From 1a9066c34abd7f65fa19915d97094ccdd89daf99 Mon Sep 17 00:00:00 2001 From: aykut-bozkurt <51649454+aykut-bozkurt@users.noreply.github.com> Date: Tue, 11 Apr 2023 11:27:16 +0300 Subject: [PATCH] fixes update propagation bug when `citus_set_coordinator_host` is called more than once (#6837) DESCRIPTION: Fixes update propagation bug when `citus_set_coordinator_host` is called more than once. Fixes https://github.com/citusdata/citus/issues/6731. (cherry picked from commit a20f7e1a55ce9738a7bf2db446d9c578e6bc48ff) --- .../distributed/metadata/node_metadata.c | 26 +++++++++++++++---- .../failure_mx_metadata_sync_multi_trans.out | 16 ++++++++++++ .../failure_mx_metadata_sync_multi_trans.sql | 5 ++++ 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/backend/distributed/metadata/node_metadata.c b/src/backend/distributed/metadata/node_metadata.c index 2639b79f0..1c0314a49 100644 --- a/src/backend/distributed/metadata/node_metadata.c +++ b/src/backend/distributed/metadata/node_metadata.c @@ -108,7 +108,8 @@ static void BlockDistributedQueriesOnMetadataNodes(void); static WorkerNode * TupleToWorkerNode(TupleDesc tupleDescriptor, HeapTuple heapTuple); static bool NodeIsLocal(WorkerNode *worker); static void SetLockTimeoutLocally(int32 lock_cooldown); -static void UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort); +static void UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort, + bool localOnly); static bool UnsetMetadataSyncedForAllWorkers(void); static char * GetMetadataSyncCommandToSetNodeColumn(WorkerNode *workerNode, int columnIndex, @@ -231,8 +232,8 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS) * do not need to worry about concurrent changes (e.g. deletion) and * can proceed to update immediately. */ - - UpdateNodeLocation(coordinatorNode->nodeId, nodeNameString, nodePort); + bool localOnly = false; + UpdateNodeLocation(coordinatorNode->nodeId, nodeNameString, nodePort, localOnly); /* clear cached plans that have the old host/port */ ResetPlanCache(); @@ -1290,7 +1291,8 @@ citus_update_node(PG_FUNCTION_ARGS) */ ResetPlanCache(); - UpdateNodeLocation(nodeId, newNodeNameString, newNodePort); + bool localOnly = true; + UpdateNodeLocation(nodeId, newNodeNameString, newNodePort, localOnly); /* we should be able to find the new node from the metadata */ workerNode = FindWorkerNodeAnyCluster(newNodeNameString, newNodePort); @@ -1352,7 +1354,7 @@ SetLockTimeoutLocally(int32 lockCooldown) static void -UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort) +UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort, bool localOnly) { const bool indexOK = true; @@ -1396,6 +1398,20 @@ UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort) CommandCounterIncrement(); + if (!localOnly && EnableMetadataSync) + { + WorkerNode *updatedNode = FindWorkerNodeAnyCluster(newNodeName, newNodePort); + Assert(updatedNode->nodeId == nodeId); + + /* send the delete command to all primary nodes with metadata */ + char *nodeDeleteCommand = NodeDeleteCommand(updatedNode->nodeId); + SendCommandToWorkersWithMetadata(nodeDeleteCommand); + + /* send the insert command to all primary nodes with metadata */ + char *nodeInsertCommand = NodeListInsertCommand(list_make1(updatedNode)); + SendCommandToWorkersWithMetadata(nodeInsertCommand); + } + systable_endscan(scanDescriptor); table_close(pgDistNode, NoLock); } diff --git a/src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out b/src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out index 3a39f3644..fd0fc632b 100644 --- a/src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out +++ b/src/test/regress/expected/failure_mx_metadata_sync_multi_trans.out @@ -52,12 +52,28 @@ CREATE TABLE loc1 (id int PRIMARY KEY); INSERT INTO loc1 SELECT i FROM generate_series(1,100) i; CREATE TABLE loc2 (id int REFERENCES loc1(id)); INSERT INTO loc2 SELECT i FROM generate_series(1,100) i; +-- citus_set_coordinator_host with wrong port +SELECT citus_set_coordinator_host('localhost', 9999); + citus_set_coordinator_host +--------------------------------------------------------------------- + +(1 row) + +-- citus_set_coordinator_host with correct port SELECT citus_set_coordinator_host('localhost', :master_port); citus_set_coordinator_host --------------------------------------------------------------------- (1 row) +-- show coordinator port is correct on all workers +SELECT * FROM run_command_on_workers($$SELECT row(nodename,nodeport) FROM pg_dist_node WHERE groupid = 0$$); + nodename | nodeport | success | result +--------------------------------------------------------------------- + localhost | 9060 | t | (localhost,57636) + localhost | 57637 | t | (localhost,57636) +(2 rows) + SELECT citus_add_local_table_to_metadata('loc1', cascade_via_foreign_keys => true); citus_add_local_table_to_metadata --------------------------------------------------------------------- diff --git a/src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql b/src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql index efd4879bd..0540827e7 100644 --- a/src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql +++ b/src/test/regress/sql/failure_mx_metadata_sync_multi_trans.sql @@ -42,7 +42,12 @@ INSERT INTO loc1 SELECT i FROM generate_series(1,100) i; CREATE TABLE loc2 (id int REFERENCES loc1(id)); INSERT INTO loc2 SELECT i FROM generate_series(1,100) i; +-- citus_set_coordinator_host with wrong port +SELECT citus_set_coordinator_host('localhost', 9999); +-- citus_set_coordinator_host with correct port SELECT citus_set_coordinator_host('localhost', :master_port); +-- show coordinator port is correct on all workers +SELECT * FROM run_command_on_workers($$SELECT row(nodename,nodeport) FROM pg_dist_node WHERE groupid = 0$$); SELECT citus_add_local_table_to_metadata('loc1', cascade_via_foreign_keys => true); -- Create partitioned distributed table