Merge pull request #6057 from citusdata/fix_read_rep_error

Fix errors while promoting read-replicas to primary
pull/6060/head
Önder Kalacı 2022-07-13 15:14:21 +02:00 committed by GitHub
commit beebbfc9ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 72 additions and 6 deletions

View File

@ -2505,7 +2505,7 @@ SchemaOwnerName(Oid objectId)
static bool
HasMetadataWorkers(void)
{
List *workerNodeList = ActivePrimaryNonCoordinatorNodeList(NoLock);
List *workerNodeList = ActiveReadableNonCoordinatorNodeList();
WorkerNode *workerNode = NULL;
foreach_ptr(workerNode, workerNodeList)

View File

@ -1687,7 +1687,7 @@ citus_is_coordinator(PG_FUNCTION_ARGS)
bool isCoordinator = false;
if (GetLocalGroupId() == COORDINATOR_GROUP_ID &&
ActivePrimaryNodeCount() > 0)
ActiveReadableNodeCount() > 0)
{
isCoordinator = true;
}

View File

@ -94,12 +94,12 @@ ActivePrimaryNonCoordinatorNodeCount(void)
/*
* ActivePrimaryNodeCount returns the number of groups with a primary in the cluster.
* ActiveReadableNodeCount returns the number of nodes in the cluster.
*/
uint32
ActivePrimaryNodeCount(void)
ActiveReadableNodeCount(void)
{
List *nodeList = ActivePrimaryNodeList(NoLock);
List *nodeList = ActiveReadableNodeList();
return list_length(nodeList);
}

View File

@ -72,7 +72,7 @@ extern WorkerNode * WorkerGetRoundRobinCandidateNode(List *workerNodeList,
uint64 shardId,
uint32 placementIndex);
extern uint32 ActivePrimaryNonCoordinatorNodeCount(void);
extern uint32 ActivePrimaryNodeCount(void);
extern uint32 ActiveReadableNodeCount(void);
extern List * ActivePrimaryNonCoordinatorNodeList(LOCKMODE lockMode);
extern List * ActivePrimaryNodeList(LOCKMODE lockMode);
extern List * ActivePrimaryRemoteNodeList(LOCKMODE lockMode);

View File

@ -354,6 +354,33 @@ ERROR: writing to worker nodes is not currently allowed
DETAIL: citus.use_secondary_nodes is set to 'always'
SELECT * FROM citus_local_table ORDER BY a;
ERROR: there is a shard placement in node group 0 but there are no nodes in that group
\c "port=57636 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"
-- when an existing read-replica is forked to become
-- another primary node, we sometimes have to use citus.use_secondary_nodes=always
-- even if the node is not in recovery mode. In those cases, allow LOCK
-- command on local / metadata tables, and also certain UDFs
SHOW citus.use_secondary_nodes;
citus.use_secondary_nodes
---------------------------------------------------------------------
always
(1 row)
SELECT pg_is_in_recovery();
pg_is_in_recovery
---------------------------------------------------------------------
f
(1 row)
SELECT citus_is_coordinator();
citus_is_coordinator
---------------------------------------------------------------------
t
(1 row)
BEGIN;
LOCK TABLE pg_dist_node IN SHARE ROW EXCLUSIVE MODE;
LOCK TABLE local IN SHARE ROW EXCLUSIVE MODE;
COMMIT;
\c -reuse-previous=off regression - - :master_port
DROP TABLE the_table;
DROP TABLE reference_table;

View File

@ -141,6 +141,25 @@ ORDER BY
localhost | 9072
(2 rows)
-- basic helper utilities should work fine
SELECT citus_is_coordinator();
citus_is_coordinator
---------------------------------------------------------------------
t
(1 row)
SELECT count(*) FROM citus_lock_waits;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM citus_dist_stat_activity WHERE global_pid = citus_backend_gpid();
count
---------------------------------------------------------------------
1
(1 row)
-- okay, now let's play with nodecluster. If we change the cluster of our follower node
-- queries should stat failing again, since there are no worker nodes in the new cluster
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"

View File

@ -163,6 +163,20 @@ SELECT * FROM reference_table ORDER BY a;
INSERT INTO citus_local_table (a, b, z) VALUES (1, 2, 3);
SELECT * FROM citus_local_table ORDER BY a;
\c "port=57636 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"
-- when an existing read-replica is forked to become
-- another primary node, we sometimes have to use citus.use_secondary_nodes=always
-- even if the node is not in recovery mode. In those cases, allow LOCK
-- command on local / metadata tables, and also certain UDFs
SHOW citus.use_secondary_nodes;
SELECT pg_is_in_recovery();
SELECT citus_is_coordinator();
BEGIN;
LOCK TABLE pg_dist_node IN SHARE ROW EXCLUSIVE MODE;
LOCK TABLE local IN SHARE ROW EXCLUSIVE MODE;
COMMIT;
\c -reuse-previous=off regression - - :master_port
DROP TABLE the_table;
DROP TABLE reference_table;

View File

@ -89,6 +89,12 @@ FROM
ORDER BY
node_name, node_port;
-- basic helper utilities should work fine
SELECT citus_is_coordinator();
SELECT count(*) FROM citus_lock_waits;
SELECT count(*) FROM citus_dist_stat_activity WHERE global_pid = citus_backend_gpid();
-- okay, now let's play with nodecluster. If we change the cluster of our follower node
-- queries should stat failing again, since there are no worker nodes in the new cluster