Fix background rebalance when reference table has no PK (#6682)

DESCRIPTION: Fix background rebalance when reference table has no PK

For the background rebalance we would always fail if a reference table
that was not replicated to all nodes would not have a PK (or replica
identity). Even when we used force_logical or block_writes as the shard
transfer mode. This fixes that and adds some regression tests.

Fixes #6680

(cherry picked from commit 14c31fbb07)
release-11.1-jelte
Jelte Fennema 2023-01-31 12:18:29 +01:00 committed by Jelte Fennema
parent d41f88b6f8
commit e67899a620
3 changed files with 123 additions and 2 deletions

View File

@ -1818,7 +1818,10 @@ RebalanceTableShardsBackground(RebalanceOptions *options, Oid shardReplicationMo
if (HasNodesWithMissingReferenceTables(&referenceTableIdList))
{
VerifyTablesHaveReplicaIdentity(referenceTableIdList);
if (shardTransferMode == TRANSFER_MODE_AUTOMATIC)
{
VerifyTablesHaveReplicaIdentity(referenceTableIdList);
}
/*
* Reference tables need to be copied to (newly-added) nodes, this needs to be the

View File

@ -210,5 +210,94 @@ SELECT citus_rebalance_stop();
(1 row)
RESET ROLE;
SET citus.replicate_reference_tables_on_activate = false;
CREATE TABLE ref_no_pk(a int);
SELECT create_reference_table('ref_no_pk');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE ref_with_pk(a int primary key);
SELECT create_reference_table('ref_with_pk');
create_reference_table
---------------------------------------------------------------------
(1 row)
-- Add coordinator so there's a node which doesn't have the reference tables
SELECT 1 FROM citus_add_node('localhost', :master_port, groupId=>0);
NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata
?column?
---------------------------------------------------------------------
1
(1 row)
-- fails
BEGIN;
SELECT 1 FROM citus_rebalance_start();
ERROR: cannot use logical replication to transfer shards of the relation ref_no_pk since it doesn't have a REPLICA IDENTITY or PRIMARY KEY
DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY.
HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'.
ROLLBACK;
-- success
BEGIN;
SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'force_logical');
NOTICE: Scheduled 1 moves as job xxx
DETAIL: Rebalance scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
?column?
---------------------------------------------------------------------
1
(1 row)
ROLLBACK;
-- success
BEGIN;
SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'block_writes');
NOTICE: Scheduled 1 moves as job xxx
DETAIL: Rebalance scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
?column?
---------------------------------------------------------------------
1
(1 row)
ROLLBACK;
-- fails
SELECT 1 FROM citus_rebalance_start();
ERROR: cannot use logical replication to transfer shards of the relation ref_no_pk since it doesn't have a REPLICA IDENTITY or PRIMARY KEY
DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY.
HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'.
-- succeeds
SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'force_logical');
NOTICE: Scheduled 1 moves as job xxx
DETAIL: Rebalance scheduled as background job
HINT: To monitor progress, run: SELECT * FROM pg_dist_background_task WHERE job_id = xxx ORDER BY task_id ASC; or SELECT * FROM get_rebalance_progress();
?column?
---------------------------------------------------------------------
1
(1 row)
-- wait for success
SELECT citus_rebalance_wait();
citus_rebalance_wait
---------------------------------------------------------------------
(1 row)
-- Remove coordinator again to allow rerunning of this test
SELECT 1 FROM citus_remove_node('localhost', :master_port);
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT public.wait_until_metadata_sync(30000);
wait_until_metadata_sync
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO WARNING;
DROP SCHEMA background_rebalance CASCADE;

View File

@ -61,7 +61,6 @@ SELECT citus_rebalance_wait();
DROP TABLE t1;
-- make sure a non-super user can stop rebalancing
CREATE USER non_super_user_rebalance WITH LOGIN;
GRANT ALL ON SCHEMA background_rebalance TO non_super_user_rebalance;
@ -76,7 +75,37 @@ SELECT 1 FROM citus_rebalance_start();
SELECT citus_rebalance_stop();
RESET ROLE;
SET citus.replicate_reference_tables_on_activate = false;
CREATE TABLE ref_no_pk(a int);
SELECT create_reference_table('ref_no_pk');
CREATE TABLE ref_with_pk(a int primary key);
SELECT create_reference_table('ref_with_pk');
-- Add coordinator so there's a node which doesn't have the reference tables
SELECT 1 FROM citus_add_node('localhost', :master_port, groupId=>0);
-- fails
BEGIN;
SELECT 1 FROM citus_rebalance_start();
ROLLBACK;
-- success
BEGIN;
SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'force_logical');
ROLLBACK;
-- success
BEGIN;
SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'block_writes');
ROLLBACK;
-- fails
SELECT 1 FROM citus_rebalance_start();
-- succeeds
SELECT 1 FROM citus_rebalance_start(shard_transfer_mode := 'force_logical');
-- wait for success
SELECT citus_rebalance_wait();
-- Remove coordinator again to allow rerunning of this test
SELECT 1 FROM citus_remove_node('localhost', :master_port);
SELECT public.wait_until_metadata_sync(30000);
SET client_min_messages TO WARNING;
DROP SCHEMA background_rebalance CASCADE;