Merge pull request #6057 from citusdata/fix_read_rep_error

Fix errors while promoting read-replicas to primary
2022-07-13 15:14:21 +02:00 · 2022-07-13 15:14:21 +02:00 · beebbfc9ff
parent 79fd5eca8a 6cd7319f12
commit beebbfc9ff
8 changed files with 72 additions and 6 deletions
--- a/src/backend/distributed/metadata/metadata_sync.c
+++ b/src/backend/distributed/metadata/metadata_sync.c
@ -2505,7 +2505,7 @@ SchemaOwnerName(Oid objectId)
 static bool
 HasMetadataWorkers(void)
 {
-	List *workerNodeList = ActivePrimaryNonCoordinatorNodeList(NoLock);
+	List *workerNodeList = ActiveReadableNonCoordinatorNodeList();

 	WorkerNode *workerNode = NULL;
 	foreach_ptr(workerNode, workerNodeList)
--- a/src/backend/distributed/metadata/node_metadata.c
+++ b/src/backend/distributed/metadata/node_metadata.c
@ -1687,7 +1687,7 @@ citus_is_coordinator(PG_FUNCTION_ARGS)
 	bool isCoordinator = false;

 	if (GetLocalGroupId() == COORDINATOR_GROUP_ID &&
-		ActivePrimaryNodeCount() > 0)
+		ActiveReadableNodeCount() > 0)
 	{
 		isCoordinator = true;
 	}
--- a/src/backend/distributed/operations/worker_node_manager.c
+++ b/src/backend/distributed/operations/worker_node_manager.c
@ -94,12 +94,12 @@ ActivePrimaryNonCoordinatorNodeCount(void)


 /*
- * ActivePrimaryNodeCount returns the number of groups with a primary in the cluster.
+ * ActiveReadableNodeCount returns the number of nodes in the cluster.
 */
 uint32
-ActivePrimaryNodeCount(void)
+ActiveReadableNodeCount(void)
 {
-	List *nodeList = ActivePrimaryNodeList(NoLock);
+	List *nodeList = ActiveReadableNodeList();
 	return list_length(nodeList);
 }

--- a/src/include/distributed/worker_manager.h
+++ b/src/include/distributed/worker_manager.h
@ -72,7 +72,7 @@ extern WorkerNode * WorkerGetRoundRobinCandidateNode(List *workerNodeList,
 													 uint64 shardId,
 													 uint32 placementIndex);
 extern uint32 ActivePrimaryNonCoordinatorNodeCount(void);
-extern uint32 ActivePrimaryNodeCount(void);
+extern uint32 ActiveReadableNodeCount(void);
 extern List * ActivePrimaryNonCoordinatorNodeList(LOCKMODE lockMode);
 extern List * ActivePrimaryNodeList(LOCKMODE lockMode);
 extern List * ActivePrimaryRemoteNodeList(LOCKMODE lockMode);
--- a/src/test/regress/expected/multi_follower_dml.out
+++ b/src/test/regress/expected/multi_follower_dml.out
@ -354,6 +354,33 @@ ERROR:  writing to worker nodes is not currently allowed
 DETAIL:  citus.use_secondary_nodes is set to 'always'
 SELECT * FROM citus_local_table ORDER BY a;
 ERROR:  there is a shard placement in node group 0 but there are no nodes in that group
+\c "port=57636 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"
+-- when an existing read-replica is forked to become
+-- another primary node, we sometimes have to use citus.use_secondary_nodes=always
+-- even if the node is not in recovery mode. In those cases, allow LOCK
+-- command on local / metadata tables, and also certain UDFs
+SHOW citus.use_secondary_nodes;
+ citus.use_secondary_nodes
+---------------------------------------------------------------------
+ always
+(1 row)
+
+SELECT pg_is_in_recovery();
+ pg_is_in_recovery
+---------------------------------------------------------------------
+ f
+(1 row)
+
+SELECT citus_is_coordinator();
+ citus_is_coordinator
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+	LOCK TABLE pg_dist_node IN SHARE ROW EXCLUSIVE MODE;
+	LOCK TABLE local IN SHARE ROW EXCLUSIVE MODE;
+COMMIT;
 \c -reuse-previous=off regression - - :master_port
 DROP TABLE the_table;
 DROP TABLE reference_table;
--- a/src/test/regress/expected/multi_follower_select_statements.out
+++ b/src/test/regress/expected/multi_follower_select_statements.out
@ -141,6 +141,25 @@ ORDER BY
 localhost |      9072
 (2 rows)

+-- basic helper utilities should work fine
+SELECT citus_is_coordinator();
+ citus_is_coordinator
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT count(*) FROM citus_lock_waits;
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+SELECT count(*) FROM citus_dist_stat_activity WHERE global_pid = citus_backend_gpid();
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
 -- okay, now let's play with nodecluster. If we change the cluster of our follower node
 -- queries should stat failing again, since there are no worker nodes in the new cluster
 \c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"
--- a/src/test/regress/sql/multi_follower_dml.sql
+++ b/src/test/regress/sql/multi_follower_dml.sql
@ -163,6 +163,20 @@ SELECT * FROM reference_table ORDER BY a;
 INSERT INTO citus_local_table (a, b, z) VALUES (1, 2, 3);
 SELECT * FROM citus_local_table ORDER BY a;

+\c "port=57636 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"
+
+-- when an existing read-replica is forked to become
+-- another primary node, we sometimes have to use citus.use_secondary_nodes=always
+-- even if the node is not in recovery mode. In those cases, allow LOCK
+-- command on local / metadata tables, and also certain UDFs
+SHOW citus.use_secondary_nodes;
+SELECT pg_is_in_recovery();
+SELECT citus_is_coordinator();
+BEGIN;
+	LOCK TABLE pg_dist_node IN SHARE ROW EXCLUSIVE MODE;
+	LOCK TABLE local IN SHARE ROW EXCLUSIVE MODE;
+COMMIT;
+
 \c -reuse-previous=off regression - - :master_port
 DROP TABLE the_table;
 DROP TABLE reference_table;
--- a/src/test/regress/sql/multi_follower_select_statements.sql
+++ b/src/test/regress/sql/multi_follower_select_statements.sql
@ -89,6 +89,12 @@ FROM
 ORDER BY
  node_name, node_port;

+-- basic helper utilities should work fine
+SELECT citus_is_coordinator();
+SELECT count(*) FROM citus_lock_waits;
+SELECT count(*) FROM citus_dist_stat_activity WHERE global_pid = citus_backend_gpid();
+
+
 -- okay, now let's play with nodecluster. If we change the cluster of our follower node
 -- queries should stat failing again, since there are no worker nodes in the new cluster