From e67899a620e2c1ef0fe6f273d2bf62937935f9d3 Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Tue, 31 Jan 2023 12:18:29 +0100 Subject: [PATCH] Fix background rebalance when reference table has no PK (#6682) DESCRIPTION: Fix background rebalance when reference table has no PK For the background rebalance we would always fail if a reference table that was not replicated to all nodes would not have a PK (or replica identity). Even when we used force_logical or block_writes as the shard transfer mode. This fixes that and adds some regression tests. Fixes #6680 (cherry picked from commit 14c31fbb07d5560f5ed5b36ead2b2fec378bbe26) --- .../distributed/operations/shard_rebalancer.c | 5 +- .../regress/expected/background_rebalance.out | 89 +++++++++++++++++++ src/test/regress/sql/background_rebalance.sql | 31 ++++++- 3 files changed, 123 insertions(+), 2 deletions(-) diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c index beab2be47..2bfae36fc 100644 --- a/src/backend/distributed/operations/shard_rebalancer.c +++ b/src/backend/distributed/operations/shard_rebalancer.c @@ -1818,7 +1818,10 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo if (HasNodesWithMissingReferenceTables(&referenceTableIdList)) { - VerifyTablesHaveReplicaIdentity(referenceTableIdList); + if (shardTransferMode == TRANSFER_MODE_AUTOMATIC) + { + VerifyTablesHaveReplicaIdentity(referenceTableIdList); + } /* * Reference tables need to be copied to (newly-added) nodes, this needs to be the diff --git a/src/test/regress/expected/background_rebalance.out b/src/test/regress/expected/background_rebalance.out index 8843654d6..091b20b7d 100644 --- a/src/test/regress/expected/background_rebalance.out +++ b/src/test/regress/expected/background_rebalance.out @@ -210,5 +210,94 @@ SELECT citus_rebalance_stop(); (1 row) RESET ROLE; +SET citus.replicate_reference_tables_on_activate = false; +CREATE TABLE ref_no_pk(a int); +SELECT create_reference_table('ref_no_pk'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE ref_with_pk(a int primary key); +SELECT create_reference_table('ref_with_pk'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- Add coordinator so there's a node which doesn't have the reference tables +SELECT 1 FROM citus_add_node('localhost', :master_port, groupId=>0); +NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- fails +BEGIN; +SELECT 1 FROM citus_rebalance_start(); +ERROR: cannot use logical replication to transfer shards of the relation ref_no_pk since it doesn't have a REPLICA IDENTITY or PRIMARY KEY +DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY. +HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'. +ROLLBACK; +-- success +BEGIN; +SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'force_logical'); +NOTICE: Scheduled 1 moves as job xxx +DETAIL: Rebalance scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +ROLLBACK; +-- success +BEGIN; +SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'block_writes'); +NOTICE: Scheduled 1 moves as job xxx +DETAIL: Rebalance scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +ROLLBACK; +-- fails +SELECT 1 FROM citus_rebalance_start(); +ERROR: cannot use logical replication to transfer shards of the relation ref_no_pk since it doesn't have a REPLICA IDENTITY or PRIMARY KEY +DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY. +HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'. +-- succeeds +SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'force_logical'); +NOTICE: Scheduled 1 moves as job xxx +DETAIL: Rebalance scheduled as background job +HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress(); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- wait for success +SELECT citus_rebalance_wait(); + citus_rebalance_wait +--------------------------------------------------------------------- + +(1 row) + +-- Remove coordinator again to allow rerunning of this test +SELECT 1 FROM citus_remove_node('localhost', :master_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT public.wait_until_metadata_sync(30000); + wait_until_metadata_sync +--------------------------------------------------------------------- + +(1 row) + SET client_min_messages TO WARNING; DROP SCHEMA background_rebalance CASCADE; diff --git a/src/test/regress/sql/background_rebalance.sql b/src/test/regress/sql/background_rebalance.sql index 6528c71b7..ad36412f5 100644 --- a/src/test/regress/sql/background_rebalance.sql +++ b/src/test/regress/sql/background_rebalance.sql @@ -61,7 +61,6 @@ SELECT citus_rebalance_wait(); DROP TABLE t1; - -- make sure a non-super user can stop rebalancing CREATE USER non_super_user_rebalance WITH LOGIN; GRANT ALL ON SCHEMA background_rebalance TO non_super_user_rebalance; @@ -76,7 +75,37 @@ SELECT 1 FROM citus_rebalance_start(); SELECT citus_rebalance_stop(); RESET ROLE; +SET citus.replicate_reference_tables_on_activate = false; +CREATE TABLE ref_no_pk(a int); +SELECT create_reference_table('ref_no_pk'); +CREATE TABLE ref_with_pk(a int primary key); +SELECT create_reference_table('ref_with_pk'); +-- Add coordinator so there's a node which doesn't have the reference tables +SELECT 1 FROM citus_add_node('localhost', :master_port, groupId=>0); +-- fails +BEGIN; +SELECT 1 FROM citus_rebalance_start(); +ROLLBACK; +-- success +BEGIN; +SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'force_logical'); +ROLLBACK; +-- success +BEGIN; +SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'block_writes'); +ROLLBACK; + +-- fails +SELECT 1 FROM citus_rebalance_start(); +-- succeeds +SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'force_logical'); +-- wait for success +SELECT citus_rebalance_wait(); + +-- Remove coordinator again to allow rerunning of this test +SELECT 1 FROM citus_remove_node('localhost', :master_port); +SELECT public.wait_until_metadata_sync(30000); SET client_min_messages TO WARNING; DROP SCHEMA background_rebalance CASCADE;