mirror of https://github.com/citusdata/citus.git
192 lines
8.3 KiB
PL/PgSQL
192 lines
8.3 KiB
PL/PgSQL
-- Test creation of mx tables and metadata syncing
|
|
|
|
SELECT nextval('pg_catalog.pg_dist_placement_placementid_seq') AS last_placement_id
|
|
\gset
|
|
SELECT nextval('pg_catalog.pg_dist_groupid_seq') AS last_group_id \gset
|
|
SELECT nextval('pg_catalog.pg_dist_node_nodeid_seq') AS last_node_id \gset
|
|
SELECT nextval('pg_catalog.pg_dist_colocationid_seq') AS last_colocation_id \gset
|
|
SELECT nextval('pg_catalog.pg_dist_shardid_seq') AS last_shard_id \gset
|
|
|
|
|
|
SET citus.replication_model TO streaming;
|
|
SET citus.shard_count TO 8;
|
|
SET citus.shard_replication_factor TO 1;
|
|
|
|
-- set sync intervals to less than 15s so wait_until_metadata_sync never times out
|
|
ALTER SYSTEM SET citus.metadata_sync_interval TO 3000;
|
|
ALTER SYSTEM SET citus.metadata_sync_retry_interval TO 500;
|
|
SELECT pg_reload_conf();
|
|
|
|
CREATE FUNCTION wait_until_metadata_sync(timeout INTEGER DEFAULT 15000)
|
|
RETURNS void
|
|
LANGUAGE C STRICT
|
|
AS 'citus';
|
|
|
|
-- Verifies pg_dist_node and pg_dist_palcement in the given worker matches the ones in coordinator
|
|
CREATE FUNCTION verify_metadata(hostname TEXT, port INTEGER, master_port INTEGER DEFAULT 57636)
|
|
RETURNS BOOLEAN
|
|
LANGUAGE sql
|
|
AS $$
|
|
WITH dist_node_summary AS (
|
|
SELECT 'SELECT jsonb_agg(ROW(nodeid, groupid, nodename, nodeport, isactive) ORDER BY nodeid) FROM pg_dist_node' as query
|
|
), dist_node_check AS (
|
|
SELECT count(distinct result) = 1 AS matches
|
|
FROM dist_node_summary CROSS JOIN LATERAL
|
|
master_run_on_worker(ARRAY[hostname, 'localhost'], ARRAY[port, master_port],
|
|
ARRAY[dist_node_summary.query, dist_node_summary.query],
|
|
false)
|
|
), dist_placement_summary AS (
|
|
SELECT 'SELECT jsonb_agg(pg_dist_placement ORDER BY shardid) FROM pg_dist_placement)' AS query
|
|
), dist_placement_check AS (
|
|
SELECT count(distinct result) = 1 AS matches
|
|
FROM dist_placement_summary CROSS JOIN LATERAL
|
|
master_run_on_worker(ARRAY[hostname, 'localhost'], ARRAY[port, master_port],
|
|
ARRAY[dist_placement_summary.query, dist_placement_summary.query],
|
|
false)
|
|
)
|
|
SELECT dist_node_check.matches AND dist_placement_check.matches
|
|
FROM dist_node_check CROSS JOIN dist_placement_check
|
|
$$;
|
|
|
|
-- Simulates a readonly node by setting default_transaction_read_only.
|
|
CREATE FUNCTION mark_node_readonly(hostname TEXT, port INTEGER, isreadonly BOOLEAN)
|
|
RETURNS TEXT
|
|
LANGUAGE sql
|
|
AS $$
|
|
SELECT master_run_on_worker(ARRAY[hostname], ARRAY[port],
|
|
ARRAY['ALTER SYSTEM SET default_transaction_read_only TO ' || isreadonly::TEXT], false);
|
|
SELECT result FROM
|
|
master_run_on_worker(ARRAY[hostname], ARRAY[port],
|
|
ARRAY['SELECT pg_reload_conf()'], false);
|
|
$$;
|
|
|
|
-- add a node to the cluster
|
|
SELECT master_add_node('localhost', :worker_1_port) As nodeid_1 \gset
|
|
SELECT nodeid, nodename, nodeport, hasmetadata, metadatasynced FROM pg_dist_node;
|
|
|
|
-- create couple of tables
|
|
CREATE TABLE ref_table(a int primary key);
|
|
SELECT create_reference_table('ref_table');
|
|
|
|
CREATE TABLE dist_table_1(a int primary key, b int references ref_table(a));
|
|
SELECT create_distributed_table('dist_table_1', 'a');
|
|
|
|
-- update the node
|
|
SELECT 1 FROM master_update_node((SELECT nodeid FROM pg_dist_node),
|
|
'localhost', :worker_2_port);
|
|
SELECT nodeid, nodename, nodeport, hasmetadata, metadatasynced FROM pg_dist_node;
|
|
|
|
-- start syncing metadata to the node
|
|
SELECT 1 FROM start_metadata_sync_to_node('localhost', :worker_2_port);
|
|
SELECT nodeid, nodename, nodeport, hasmetadata, metadatasynced FROM pg_dist_node;
|
|
|
|
--------------------------------------------------------------------------
|
|
-- Test that maintenance daemon syncs after master_update_node
|
|
--------------------------------------------------------------------------
|
|
|
|
-- Update the node again. We do this as epeatable read, so we just see the
|
|
-- changes by master_update_node(). This is to avoid inconsistent results
|
|
-- if the maintenance daemon does the metadata sync too fast.
|
|
BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
|
|
SELECT nodeid, nodename, nodeport, hasmetadata, metadatasynced FROM pg_dist_node;
|
|
SELECT 1 FROM master_update_node(:nodeid_1, 'localhost', :worker_1_port);
|
|
SELECT nodeid, nodename, nodeport, hasmetadata, metadatasynced FROM pg_dist_node;
|
|
END;
|
|
|
|
-- wait until maintenance daemon does the next metadata sync, and then
|
|
-- check if metadata is synced again
|
|
SELECT wait_until_metadata_sync();
|
|
SELECT nodeid, hasmetadata, metadatasynced FROM pg_dist_node;
|
|
|
|
SELECT verify_metadata('localhost', :worker_1_port);
|
|
|
|
-- Update the node to a non-existent node. This is to simulate updating to
|
|
-- a unwriteable node.
|
|
BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
|
|
SELECT nodeid, nodename, nodeport, hasmetadata, metadatasynced FROM pg_dist_node;
|
|
SELECT 1 FROM master_update_node(:nodeid_1, 'localhost', 12345);
|
|
SELECT nodeid, nodename, nodeport, hasmetadata, metadatasynced FROM pg_dist_node;
|
|
END;
|
|
|
|
-- maintenace daemon metadata sync should fail, because node is still unwriteable.
|
|
SELECT wait_until_metadata_sync();
|
|
SELECT nodeid, hasmetadata, metadatasynced FROM pg_dist_node;
|
|
|
|
-- update it back to :worker_1_port, now metadata should be synced
|
|
SELECT 1 FROM master_update_node(:nodeid_1, 'localhost', :worker_1_port);
|
|
SELECT wait_until_metadata_sync();
|
|
SELECT nodeid, hasmetadata, metadatasynced FROM pg_dist_node;
|
|
|
|
--------------------------------------------------------------------------
|
|
-- Test updating a node when another node is in readonly-mode
|
|
--------------------------------------------------------------------------
|
|
|
|
SELECT FROM master_add_node('localhost', :worker_2_port) AS nodeid_2 \gset
|
|
SELECT 1 FROM start_metadata_sync_to_node('localhost', :worker_2_port);
|
|
|
|
-- Create a table with shards on both nodes
|
|
CREATE TABLE dist_table_2(a int);
|
|
SELECT create_distributed_table('dist_table_2', 'a');
|
|
INSERT INTO dist_table_2 SELECT i FROM generate_series(1, 100) i;
|
|
|
|
SELECT mark_node_readonly('localhost', :worker_2_port, TRUE);
|
|
|
|
-- Now updating the other node should try syncing to worker 2, but instead of
|
|
-- failure, it should just warn and mark the readonly node as not synced.
|
|
SELECT 1 FROM master_update_node(:nodeid_1, 'localhost', 12345);
|
|
SELECT nodeid, hasmetadata, metadatasynced FROM pg_dist_node ORDER BY nodeid;
|
|
|
|
-- worker_2 is out of sync, so further updates aren't sent to it and
|
|
-- we shouldn't see the warnings.
|
|
SELECT 1 FROM master_update_node(:nodeid_1, 'localhost', 23456);
|
|
SELECT nodeid, hasmetadata, metadatasynced FROM pg_dist_node ORDER BY nodeid;
|
|
|
|
-- Make the node writeable.
|
|
SELECT mark_node_readonly('localhost', :worker_2_port, FALSE);
|
|
SELECT wait_until_metadata_sync();
|
|
|
|
-- Mark the node readonly again, so the following master_update_node warns
|
|
SELECT mark_node_readonly('localhost', :worker_2_port, TRUE);
|
|
|
|
-- Revert the nodeport of worker 1, metadata propagation to worker 2 should
|
|
-- still fail, but after the failure, we should still be able to read from
|
|
-- worker 2 in the same transaction!
|
|
BEGIN;
|
|
SELECT 1 FROM master_update_node(:nodeid_1, 'localhost', :worker_1_port);
|
|
SELECT count(*) FROM dist_table_2;
|
|
END;
|
|
|
|
SELECT wait_until_metadata_sync();
|
|
|
|
-- Make the node writeable.
|
|
SELECT mark_node_readonly('localhost', :worker_2_port, FALSE);
|
|
SELECT wait_until_metadata_sync();
|
|
|
|
SELECT 1 FROM master_update_node(:nodeid_1, 'localhost', :worker_1_port);
|
|
SELECT verify_metadata('localhost', :worker_1_port),
|
|
verify_metadata('localhost', :worker_2_port);
|
|
|
|
--------------------------------------------------------------------------
|
|
-- Test that master_update_node rolls back properly
|
|
--------------------------------------------------------------------------
|
|
BEGIN;
|
|
SELECT 1 FROM master_update_node(:nodeid_1, 'localhost', 12345);
|
|
ROLLBACK;
|
|
|
|
SELECT verify_metadata('localhost', :worker_1_port),
|
|
verify_metadata('localhost', :worker_2_port);
|
|
|
|
-- cleanup
|
|
DROP TABLE dist_table_1, ref_table, dist_table_2;
|
|
TRUNCATE pg_dist_colocation;
|
|
SELECT count(*) FROM (SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node) t;
|
|
ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART :last_group_id;
|
|
ALTER SEQUENCE pg_catalog.pg_dist_node_nodeid_seq RESTART :last_node_id;
|
|
ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART :last_colocation_id;
|
|
ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART :last_placement_id;
|
|
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART :last_shard_id;
|
|
|
|
RESET citus.shard_count;
|
|
RESET citus.shard_replication_factor;
|
|
RESET citus.replication_model;
|