mirror of https://github.com/citusdata/citus.git
950 lines
33 KiB
PL/PgSQL
950 lines
33 KiB
PL/PgSQL
CREATE SCHEMA local_shard_execution;
|
|
SET search_path TO local_shard_execution;
|
|
|
|
SET citus.shard_count TO 4;
|
|
SET citus.shard_replication_factor TO 1;
|
|
SET citus.replication_model TO 'streaming';
|
|
SET citus.next_shard_id TO 1470000;
|
|
|
|
CREATE TABLE reference_table (key int PRIMARY KEY);
|
|
SELECT create_reference_table('reference_table');
|
|
|
|
CREATE TABLE distributed_table (key int PRIMARY KEY , value text, age bigint CHECK (age > 10), FOREIGN KEY (key) REFERENCES reference_table(key) ON DELETE CASCADE);
|
|
SELECT create_distributed_table('distributed_table','key');
|
|
|
|
CREATE TABLE second_distributed_table (key int PRIMARY KEY , value text, FOREIGN KEY (key) REFERENCES distributed_table(key) ON DELETE CASCADE);
|
|
SELECT create_distributed_table('second_distributed_table','key');
|
|
|
|
-- ingest some data to enable some tests with data
|
|
INSERT INTO reference_table VALUES (1);
|
|
INSERT INTO distributed_table VALUES (1, '1', 20);
|
|
INSERT INTO second_distributed_table VALUES (1, '1');
|
|
|
|
-- a simple test for
|
|
CREATE TABLE collections_list (
|
|
key bigserial,
|
|
ser bigserial,
|
|
ts timestamptz,
|
|
collection_id integer,
|
|
value numeric,
|
|
PRIMARY KEY(key, collection_id)
|
|
) PARTITION BY LIST (collection_id );
|
|
|
|
SELECT create_distributed_table('collections_list', 'key');
|
|
|
|
CREATE TABLE collections_list_0
|
|
PARTITION OF collections_list (key, ser, ts, collection_id, value)
|
|
FOR VALUES IN ( 0 );
|
|
|
|
-- create a volatile function that returns the local node id
|
|
CREATE OR REPLACE FUNCTION get_local_node_id_volatile()
|
|
RETURNS INT AS $$
|
|
DECLARE localGroupId int;
|
|
BEGIN
|
|
SELECT groupid INTO localGroupId FROM pg_dist_local_group;
|
|
RETURN localGroupId;
|
|
END; $$ language plpgsql VOLATILE;
|
|
SELECT create_distributed_function('get_local_node_id_volatile()');
|
|
|
|
-- test case for issue #3556
|
|
CREATE TABLE accounts (id text PRIMARY KEY);
|
|
CREATE TABLE stats (account_id text PRIMARY KEY, spent int);
|
|
|
|
SELECT create_distributed_table('accounts', 'id', colocate_with => 'none');
|
|
SELECT create_distributed_table('stats', 'account_id', colocate_with => 'accounts');
|
|
|
|
INSERT INTO accounts (id) VALUES ('foo');
|
|
INSERT INTO stats (account_id, spent) VALUES ('foo', 100);
|
|
|
|
-- connection worker and get ready for the tests
|
|
\c - - - :worker_1_port
|
|
SET search_path TO local_shard_execution;
|
|
|
|
-- returns true of the distribution key filter
|
|
-- on the distributed tables (e.g., WHERE key = 1), we'll hit a shard
|
|
-- placement which is local to this not
|
|
CREATE OR REPLACE FUNCTION shard_of_distribution_column_is_local(dist_key int) RETURNS bool AS $$
|
|
|
|
DECLARE shard_is_local BOOLEAN := FALSE;
|
|
|
|
BEGIN
|
|
|
|
WITH local_shard_ids AS (SELECT get_shard_id_for_distribution_column('local_shard_execution.distributed_table', dist_key)),
|
|
all_local_shard_ids_on_node AS (SELECT shardid FROM pg_dist_placement WHERE groupid IN (SELECT groupid FROM pg_dist_local_group))
|
|
SELECT
|
|
true INTO shard_is_local
|
|
FROM
|
|
local_shard_ids
|
|
WHERE
|
|
get_shard_id_for_distribution_column IN (SELECT * FROM all_local_shard_ids_on_node);
|
|
|
|
IF shard_is_local IS NULL THEN
|
|
shard_is_local = FALSE;
|
|
END IF;
|
|
|
|
RETURN shard_is_local;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- test case for issue #3556
|
|
SET citus.log_intermediate_results TO TRUE;
|
|
SET client_min_messages TO DEBUG1;
|
|
|
|
SELECT *
|
|
FROM
|
|
(
|
|
WITH accounts_cte AS (
|
|
SELECT id AS account_id
|
|
FROM accounts
|
|
),
|
|
joined_stats_cte_1 AS (
|
|
SELECT spent, account_id
|
|
FROM stats
|
|
INNER JOIN accounts_cte USING (account_id)
|
|
),
|
|
joined_stats_cte_2 AS (
|
|
SELECT spent, account_id
|
|
FROM joined_stats_cte_1
|
|
INNER JOIN accounts_cte USING (account_id)
|
|
)
|
|
SELECT SUM(spent) OVER (PARTITION BY coalesce(account_id, NULL))
|
|
FROM accounts_cte
|
|
INNER JOIN joined_stats_cte_2 USING (account_id)
|
|
) inner_query;
|
|
|
|
SET citus.log_intermediate_results TO DEFAULT;
|
|
SET client_min_messages TO DEFAULT;
|
|
|
|
-- pick some example values that reside on the shards locally and remote
|
|
|
|
-- distribution key values of 1,6, 500 and 701 are LOCAL to shards,
|
|
-- we'll use these values in the tests
|
|
SELECT shard_of_distribution_column_is_local(1);
|
|
SELECT shard_of_distribution_column_is_local(6);
|
|
SELECT shard_of_distribution_column_is_local(500);
|
|
SELECT shard_of_distribution_column_is_local(701);
|
|
|
|
-- distribution key values of 11 and 12 are REMOTE to shards
|
|
SELECT shard_of_distribution_column_is_local(11);
|
|
SELECT shard_of_distribution_column_is_local(12);
|
|
|
|
--- enable logging to see which tasks are executed locally
|
|
SET citus.log_local_commands TO ON;
|
|
|
|
-- first, make sure that local execution works fine
|
|
-- with simple queries that are not in transcation blocks
|
|
SELECT count(*) FROM distributed_table WHERE key = 1;
|
|
|
|
-- multiple tasks both of which are local should NOT use local execution
|
|
-- because local execution means executing the tasks locally, so the executor
|
|
-- favors parallel execution even if everyting is local to node
|
|
SELECT count(*) FROM distributed_table WHERE key IN (1,6);
|
|
|
|
-- queries that hit any remote shards should NOT use local execution
|
|
SELECT count(*) FROM distributed_table WHERE key IN (1,11);
|
|
SELECT count(*) FROM distributed_table;
|
|
|
|
-- modifications also follow the same rules
|
|
INSERT INTO reference_table VALUES (1) ON CONFLICT DO NOTHING;
|
|
INSERT INTO distributed_table VALUES (1, '1', 21) ON CONFLICT DO NOTHING;
|
|
|
|
-- local query
|
|
DELETE FROM distributed_table WHERE key = 1 AND age = 21;
|
|
|
|
-- hitting multiple shards, so should be a distributed execution
|
|
DELETE FROM distributed_table WHERE age = 21;
|
|
|
|
-- although both shards are local, the executor choose the parallel execution
|
|
-- over the wire because as noted above local execution is sequential
|
|
DELETE FROM second_distributed_table WHERE key IN (1,6);
|
|
|
|
-- similarly, any multi-shard query just follows distributed execution
|
|
DELETE FROM second_distributed_table;
|
|
|
|
-- load some more data for the following tests
|
|
INSERT INTO second_distributed_table VALUES (1, '1');
|
|
|
|
-- INSERT .. SELECT hitting a single single (co-located) shard(s) should
|
|
-- be executed locally
|
|
INSERT INTO distributed_table
|
|
SELECT
|
|
distributed_table.*
|
|
FROM
|
|
distributed_table, second_distributed_table
|
|
WHERE
|
|
distributed_table.key = 1 and distributed_table.key=second_distributed_table.key
|
|
ON CONFLICT(key) DO UPDATE SET value = '22'
|
|
RETURNING *;
|
|
|
|
-- INSERT .. SELECT hitting multi-shards should go thourgh distributed execution
|
|
INSERT INTO distributed_table
|
|
SELECT
|
|
distributed_table.*
|
|
FROM
|
|
distributed_table, second_distributed_table
|
|
WHERE
|
|
distributed_table.key != 1 and distributed_table.key=second_distributed_table.key
|
|
ON CONFLICT(key) DO UPDATE SET value = '22'
|
|
RETURNING *;
|
|
|
|
-- INSERT..SELECT via coordinator consists of two steps, select + COPY
|
|
-- that's why it is disallowed to use local execution even if the SELECT
|
|
-- can be executed locally
|
|
INSERT INTO distributed_table SELECT * FROM distributed_table WHERE key = 1 OFFSET 0 ON CONFLICT DO NOTHING;
|
|
INSERT INTO distributed_table SELECT 1, '1',15 FROM distributed_table WHERE key = 2 LIMIT 1 ON CONFLICT DO NOTHING;
|
|
|
|
-- sanity check: multi-shard INSERT..SELECT pushdown goes through distributed execution
|
|
INSERT INTO distributed_table SELECT * FROM distributed_table ON CONFLICT DO NOTHING;
|
|
|
|
|
|
-- EXPLAIN for local execution just works fine
|
|
-- though going through distributed execution
|
|
EXPLAIN (COSTS OFF) SELECT * FROM distributed_table WHERE key = 1 AND age = 20;
|
|
|
|
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM distributed_table WHERE key = 1 AND age = 20;
|
|
|
|
EXPLAIN (COSTS OFF) DELETE FROM distributed_table WHERE key = 1 AND age = 20;
|
|
|
|
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DELETE FROM distributed_table WHERE key = 1 AND age = 20;
|
|
-- show that EXPLAIN ANALYZE deleted the row and cascades deletes
|
|
SELECT * FROM distributed_table WHERE key = 1 AND age = 20 ORDER BY 1,2,3;
|
|
SELECT * FROM second_distributed_table WHERE key = 1 ORDER BY 1,2;
|
|
-- Put rows back for other tests
|
|
INSERT INTO distributed_table VALUES (1, '22', 20);
|
|
INSERT INTO second_distributed_table VALUES (1, '1');
|
|
|
|
|
|
-- copy always happens via distributed execution irrespective of the
|
|
-- shards that are accessed
|
|
COPY reference_table FROM STDIN;
|
|
6
|
|
11
|
|
\.
|
|
|
|
COPY distributed_table FROM STDIN WITH CSV;
|
|
6,'6',25
|
|
11,'11',121
|
|
\.
|
|
|
|
COPY second_distributed_table FROM STDIN WITH CSV;
|
|
6,'6'
|
|
\.
|
|
|
|
-- the behaviour in transaction blocks is the following:
|
|
-- (a) Unless the first query is a local query, always use distributed execution.
|
|
-- (b) If the executor has used local execution, it has to use local execution
|
|
-- for the remaining of the transaction block. If that's not possible, the
|
|
-- executor has to error out
|
|
|
|
-- rollback should be able to rollback local execution
|
|
BEGIN;
|
|
INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '29' RETURNING *;
|
|
SELECT * FROM distributed_table WHERE key = 1 ORDER BY 1,2,3;
|
|
ROLLBACK;
|
|
|
|
-- make sure that the value is rollbacked
|
|
SELECT * FROM distributed_table WHERE key = 1 ORDER BY 1,2,3;
|
|
|
|
|
|
-- rollback should be able to rollback both the local and distributed executions
|
|
BEGIN;
|
|
INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '29' RETURNING *;
|
|
DELETE FROM distributed_table;
|
|
|
|
-- DELETE should cascade, and we should not see any rows
|
|
SELECT count(*) FROM second_distributed_table;
|
|
ROLLBACK;
|
|
|
|
-- make sure that everything is rollbacked
|
|
SELECT * FROM distributed_table WHERE key = 1 ORDER BY 1,2,3;
|
|
SELECT count(*) FROM second_distributed_table;
|
|
SELECT * FROM second_distributed_table;
|
|
|
|
-- very simple examples, an SELECTs should see the modifications
|
|
-- that has done before
|
|
BEGIN;
|
|
-- INSERT is executed locally
|
|
INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '23' RETURNING *;
|
|
|
|
-- since the INSERT is executed locally, the SELECT should also be
|
|
-- executed locally and see the changes
|
|
SELECT * FROM distributed_table WHERE key = 1 ORDER BY 1,2,3;
|
|
|
|
-- multi-shard SELECTs are now forced to use local execution on
|
|
-- the shards that reside on this node
|
|
SELECT * FROM distributed_table WHERE value = '23' ORDER BY 1,2,3;
|
|
|
|
-- similarly, multi-shard modifications should use local exection
|
|
-- on the shards that reside on this node
|
|
DELETE FROM distributed_table WHERE value = '23';
|
|
|
|
-- make sure that the value is deleted
|
|
SELECT * FROM distributed_table WHERE value = '23' ORDER BY 1,2,3;
|
|
COMMIT;
|
|
|
|
-- make sure that we've committed everything
|
|
SELECT * FROM distributed_table WHERE key = 1 ORDER BY 1,2,3;
|
|
|
|
-- if we start with a distributed execution, we should keep
|
|
-- using that and never switch back to local execution
|
|
BEGIN;
|
|
DELETE FROM distributed_table WHERE value = '11';
|
|
|
|
-- although this command could have been executed
|
|
-- locally, it is not going to be executed locally
|
|
SELECT * FROM distributed_table WHERE key = 1 ORDER BY 1,2,3;
|
|
|
|
-- but we can still execute parallel queries, even if
|
|
-- they are utility commands
|
|
TRUNCATE distributed_table CASCADE;
|
|
|
|
-- TRUNCATE cascaded into second_distributed_table
|
|
SELECT count(*) FROM second_distributed_table;
|
|
ROLLBACK;
|
|
|
|
-- load some data so that foreign keys won't complain with the next tests
|
|
INSERT INTO reference_table SELECT i FROM generate_series(500, 600) i;
|
|
|
|
-- show that cascading foreign keys just works fine with local execution
|
|
BEGIN;
|
|
INSERT INTO reference_table VALUES (701);
|
|
INSERT INTO distributed_table VALUES (701, '701', 701);
|
|
INSERT INTO second_distributed_table VALUES (701, '701');
|
|
|
|
DELETE FROM reference_table WHERE key = 701;
|
|
|
|
SELECT count(*) FROM distributed_table WHERE key = 701;
|
|
SELECT count(*) FROM second_distributed_table WHERE key = 701;
|
|
|
|
-- multi-shard commands should also see the changes
|
|
SELECT count(*) FROM distributed_table WHERE key > 700;
|
|
|
|
-- we can still do multi-shard commands
|
|
DELETE FROM distributed_table;
|
|
ROLLBACK;
|
|
|
|
-- multiple queries hitting different shards can be executed locally
|
|
BEGIN;
|
|
SELECT count(*) FROM distributed_table WHERE key = 1;
|
|
SELECT count(*) FROM distributed_table WHERE key = 6;
|
|
SELECT count(*) FROM distributed_table WHERE key = 500;
|
|
ROLLBACK;
|
|
|
|
-- a local query followed by TRUNCATE command can be executed locally
|
|
BEGIN;
|
|
SELECT count(*) FROM distributed_table WHERE key = 1;
|
|
TRUNCATE distributed_table CASCADE;
|
|
ROLLBACK;
|
|
|
|
-- a local query is followed by a command that cannot be executed locally
|
|
BEGIN;
|
|
SELECT count(*) FROM distributed_table WHERE key = 1;
|
|
|
|
INSERT INTO distributed_table (key) SELECT i FROM generate_series(1,1) i;
|
|
ROLLBACK;
|
|
|
|
-- a local query is followed by a command that cannot be executed locally
|
|
BEGIN;
|
|
SELECT count(*) FROM distributed_table WHERE key = 1;
|
|
|
|
INSERT INTO distributed_table (key) SELECT key+1 FROM distributed_table;
|
|
ROLLBACK;
|
|
|
|
INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '29' RETURNING *;
|
|
|
|
BEGIN;
|
|
DELETE FROM distributed_table WHERE key = 1;
|
|
EXPLAIN ANALYZE DELETE FROM distributed_table WHERE key = 1;
|
|
ROLLBACK;
|
|
|
|
BEGIN;
|
|
INSERT INTO distributed_table VALUES (11, '111',29) ON CONFLICT(key) DO UPDATE SET value = '29' RETURNING *;
|
|
|
|
-- this is already disallowed on the nodes, adding it in case we
|
|
-- support DDLs from the worker nodes in the future
|
|
ALTER TABLE distributed_table ADD COLUMN x INT;
|
|
ROLLBACK;
|
|
|
|
BEGIN;
|
|
INSERT INTO distributed_table VALUES (11, '111',29) ON CONFLICT(key) DO UPDATE SET value = '29' RETURNING *;
|
|
|
|
-- this is already disallowed because VACUUM cannot be executed in tx block
|
|
-- adding in case this is supported some day
|
|
VACUUM second_distributed_table;
|
|
ROLLBACK;
|
|
|
|
-- make sure that functions can use local execution
|
|
CREATE OR REPLACE PROCEDURE only_local_execution() AS $$
|
|
DECLARE cnt INT;
|
|
BEGIN
|
|
INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '29';
|
|
SELECT count(*) INTO cnt FROM distributed_table WHERE key = 1;
|
|
DELETE FROM distributed_table WHERE key = 1;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CALL only_local_execution();
|
|
|
|
-- insert a row that we need in the next tests
|
|
INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '29';
|
|
|
|
-- make sure that functions can use local execution
|
|
CREATE OR REPLACE PROCEDURE only_local_execution_with_function_evaluation() AS $$
|
|
DECLARE nodeId INT;
|
|
BEGIN
|
|
-- fast path router
|
|
SELECT get_local_node_id_volatile() INTO nodeId FROM distributed_table WHERE key = 1;
|
|
IF nodeId <= 0 THEN
|
|
RAISE NOTICE 'unexpected node id';
|
|
END IF;
|
|
|
|
-- regular router
|
|
SELECT get_local_node_id_volatile() INTO nodeId FROM distributed_table d1 JOIN distributed_table d2 USING (key) WHERE d1.key = 1;
|
|
IF nodeId <= 0 THEN
|
|
RAISE NOTICE 'unexpected node id';
|
|
END IF;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CALL only_local_execution_with_function_evaluation();
|
|
|
|
CREATE OR REPLACE PROCEDURE only_local_execution_with_params(int) AS $$
|
|
DECLARE cnt INT;
|
|
BEGIN
|
|
INSERT INTO distributed_table VALUES ($1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '29';
|
|
SELECT count(*) INTO cnt FROM distributed_table WHERE key = $1;
|
|
DELETE FROM distributed_table WHERE key = $1;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CALL only_local_execution_with_params(1);
|
|
|
|
CREATE OR REPLACE PROCEDURE only_local_execution_with_function_evaluation_param(int) AS $$
|
|
DECLARE nodeId INT;
|
|
BEGIN
|
|
-- fast path router
|
|
SELECT get_local_node_id_volatile() INTO nodeId FROM distributed_table WHERE key = $1;
|
|
IF nodeId <= 0 THEN
|
|
RAISE NOTICE 'unexpected node id';
|
|
END IF;
|
|
|
|
-- regular router
|
|
SELECT get_local_node_id_volatile() INTO nodeId FROM distributed_table d1 JOIN distributed_table d2 USING (key) WHERE d1.key = $1;
|
|
IF nodeId <= 0 THEN
|
|
RAISE NOTICE 'unexpected node id';
|
|
END IF;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CALL only_local_execution_with_function_evaluation_param(1);
|
|
|
|
CREATE OR REPLACE PROCEDURE local_execution_followed_by_dist() AS $$
|
|
DECLARE cnt INT;
|
|
BEGIN
|
|
INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '29';
|
|
SELECT count(*) INTO cnt FROM distributed_table WHERE key = 1;
|
|
DELETE FROM distributed_table;
|
|
SELECT count(*) INTO cnt FROM distributed_table;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CALL local_execution_followed_by_dist();
|
|
|
|
-- test CTEs, including modifying CTEs
|
|
WITH local_insert AS (INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '29' RETURNING *),
|
|
distributed_local_mixed AS (SELECT * FROM reference_table WHERE key IN (SELECT key FROM local_insert))
|
|
SELECT * FROM local_insert, distributed_local_mixed;
|
|
|
|
-- since we start with parallel execution, we do not switch back to local execution in the
|
|
-- latter CTEs
|
|
WITH distributed_local_mixed AS (SELECT * FROM distributed_table),
|
|
local_insert AS (INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '29' RETURNING *)
|
|
SELECT * FROM local_insert, distributed_local_mixed ORDER BY 1,2,3,4,5;
|
|
|
|
-- router CTE pushdown
|
|
WITH all_data AS (SELECT * FROM distributed_table WHERE key = 1)
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
distributed_table, all_data
|
|
WHERE
|
|
distributed_table.key = all_data.key AND distributed_table.key = 1;
|
|
|
|
INSERT INTO reference_table VALUES (2);
|
|
INSERT INTO distributed_table VALUES (2, '29', 29);
|
|
INSERT INTO second_distributed_table VALUES (2, '29');
|
|
|
|
-- single shard that is not a local query followed by a local query
|
|
WITH all_data AS (SELECT * FROM second_distributed_table WHERE key = 2)
|
|
SELECT
|
|
distributed_table.key
|
|
FROM
|
|
distributed_table, all_data
|
|
WHERE
|
|
distributed_table.value = all_data.value AND distributed_table.key = 1
|
|
ORDER BY
|
|
1 DESC;
|
|
|
|
-- multi-shard CTE is followed by a query which could be executed locally, but
|
|
-- since the query started with a parallel query, it doesn't use local execution
|
|
-- note that if we allow Postgres to inline the CTE (e.g., not have the EXISTS
|
|
-- subquery), then it'd pushdown the filters and the query becomes single-shard,
|
|
-- locally executable query
|
|
WITH all_data AS (SELECT * FROM distributed_table)
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
distributed_table, all_data
|
|
WHERE
|
|
distributed_table.key = all_data.key AND distributed_table.key = 1
|
|
AND EXISTS (SELECT * FROM all_data);
|
|
|
|
-- in pg12, the following CTE can be inlined, still the query becomes
|
|
-- a subquery that needs to be recursively planned and a parallel
|
|
-- query, so do not use local execution
|
|
WITH all_data AS (SELECT age FROM distributed_table)
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
distributed_table, all_data
|
|
WHERE
|
|
distributed_table.key = all_data.age AND distributed_table.key = 1;
|
|
|
|
-- get ready for the next commands
|
|
TRUNCATE reference_table, distributed_table, second_distributed_table;
|
|
|
|
-- local execution of returning of reference tables
|
|
INSERT INTO reference_table VALUES (1),(2),(3),(4),(5),(6) RETURNING *;
|
|
|
|
-- local execution of multi-row INSERTs
|
|
INSERT INTO distributed_table VALUES (1, '11',21), (5,'55',22) ON CONFLICT(key) DO UPDATE SET value = (EXCLUDED.value::int + 1)::text RETURNING *;
|
|
|
|
|
|
-- distributed execution of multi-rows INSERTs, where some part of the execution
|
|
-- could have been done via local execution but the executor choose the other way around
|
|
-- because the command is a multi-shard query
|
|
INSERT INTO distributed_table VALUES (1, '11',21), (2,'22',22), (3,'33',33), (4,'44',44),(5,'55',55) ON CONFLICT(key) DO UPDATE SET value = (EXCLUDED.value::int + 1)::text RETURNING *;
|
|
|
|
|
|
PREPARE local_prepare_no_param AS SELECT count(*) FROM distributed_table WHERE key = 1;
|
|
PREPARE local_prepare_no_param_subquery AS
|
|
SELECT DISTINCT trim(value) FROM (
|
|
SELECT value FROM distributed_table
|
|
WHERE
|
|
key IN (1, 6, 500, 701)
|
|
AND (select 2) > random()
|
|
order by 1
|
|
limit 2
|
|
) t;
|
|
PREPARE local_prepare_param (int) AS SELECT count(*) FROM distributed_table WHERE key = $1;
|
|
PREPARE remote_prepare_param (int) AS SELECT count(*) FROM distributed_table WHERE key != $1;
|
|
BEGIN;
|
|
-- 6 local execution without params
|
|
EXECUTE local_prepare_no_param;
|
|
EXECUTE local_prepare_no_param;
|
|
EXECUTE local_prepare_no_param;
|
|
EXECUTE local_prepare_no_param;
|
|
EXECUTE local_prepare_no_param;
|
|
EXECUTE local_prepare_no_param;
|
|
|
|
-- 6 local execution without params and some subqueries
|
|
EXECUTE local_prepare_no_param_subquery;
|
|
EXECUTE local_prepare_no_param_subquery;
|
|
EXECUTE local_prepare_no_param_subquery;
|
|
EXECUTE local_prepare_no_param_subquery;
|
|
EXECUTE local_prepare_no_param_subquery;
|
|
EXECUTE local_prepare_no_param_subquery;
|
|
|
|
-- 6 local executions with params
|
|
EXECUTE local_prepare_param(1);
|
|
EXECUTE local_prepare_param(5);
|
|
EXECUTE local_prepare_param(6);
|
|
EXECUTE local_prepare_param(1);
|
|
EXECUTE local_prepare_param(5);
|
|
EXECUTE local_prepare_param(6);
|
|
|
|
-- followed by a non-local execution
|
|
EXECUTE remote_prepare_param(1);
|
|
COMMIT;
|
|
|
|
PREPARE local_insert_prepare_no_param AS INSERT INTO distributed_table VALUES (1+0*random(), '11',21::int) ON CONFLICT(key) DO UPDATE SET value = '29' || '28' RETURNING *, key + 1, value || '30', age * 15;
|
|
PREPARE local_insert_prepare_param (int) AS INSERT INTO distributed_table VALUES ($1+0*random(), '11',21::int) ON CONFLICT(key) DO UPDATE SET value = '29' || '28' RETURNING *, key + 1, value || '30', age * 15;
|
|
BEGIN;
|
|
-- 6 local execution without params
|
|
EXECUTE local_insert_prepare_no_param;
|
|
EXECUTE local_insert_prepare_no_param;
|
|
EXECUTE local_insert_prepare_no_param;
|
|
EXECUTE local_insert_prepare_no_param;
|
|
EXECUTE local_insert_prepare_no_param;
|
|
EXECUTE local_insert_prepare_no_param;
|
|
|
|
-- 6 local executions with params
|
|
EXECUTE local_insert_prepare_param(1);
|
|
EXECUTE local_insert_prepare_param(5);
|
|
EXECUTE local_insert_prepare_param(6);
|
|
EXECUTE local_insert_prepare_param(1);
|
|
EXECUTE local_insert_prepare_param(5);
|
|
EXECUTE local_insert_prepare_param(6);
|
|
|
|
-- followed by a non-local execution
|
|
EXECUTE remote_prepare_param(2);
|
|
COMMIT;
|
|
|
|
PREPARE local_multi_row_insert_prepare_no_param AS
|
|
INSERT INTO distributed_table VALUES (1,'55', 21), (5,'15',33) ON CONFLICT (key) WHERE key > 3 and key < 4 DO UPDATE SET value = '88' || EXCLUDED.value;
|
|
|
|
PREPARE local_multi_row_insert_prepare_no_param_multi_shard AS
|
|
INSERT INTO distributed_table VALUES (6,'55', 21), (5,'15',33) ON CONFLICT (key) WHERE key > 3 AND key < 4 DO UPDATE SET value = '88' || EXCLUDED.value;;
|
|
|
|
PREPARE local_multi_row_insert_prepare_params(int,int) AS
|
|
INSERT INTO distributed_table VALUES ($1,'55', 21), ($2,'15',33) ON CONFLICT (key) WHERE key > 3 and key < 4 DO UPDATE SET value = '88' || EXCLUDED.value;;
|
|
INSERT INTO reference_table VALUES (11);
|
|
BEGIN;
|
|
EXECUTE local_multi_row_insert_prepare_no_param;
|
|
EXECUTE local_multi_row_insert_prepare_no_param;
|
|
EXECUTE local_multi_row_insert_prepare_no_param;
|
|
EXECUTE local_multi_row_insert_prepare_no_param;
|
|
EXECUTE local_multi_row_insert_prepare_no_param;
|
|
EXECUTE local_multi_row_insert_prepare_no_param;
|
|
|
|
EXECUTE local_multi_row_insert_prepare_no_param_multi_shard;
|
|
EXECUTE local_multi_row_insert_prepare_no_param_multi_shard;
|
|
EXECUTE local_multi_row_insert_prepare_no_param_multi_shard;
|
|
EXECUTE local_multi_row_insert_prepare_no_param_multi_shard;
|
|
EXECUTE local_multi_row_insert_prepare_no_param_multi_shard;
|
|
EXECUTE local_multi_row_insert_prepare_no_param_multi_shard;
|
|
|
|
EXECUTE local_multi_row_insert_prepare_params(1,6);
|
|
EXECUTE local_multi_row_insert_prepare_params(1,5);
|
|
EXECUTE local_multi_row_insert_prepare_params(6,5);
|
|
EXECUTE local_multi_row_insert_prepare_params(5,1);
|
|
EXECUTE local_multi_row_insert_prepare_params(5,6);
|
|
EXECUTE local_multi_row_insert_prepare_params(5,1);
|
|
|
|
-- one task is remote
|
|
EXECUTE local_multi_row_insert_prepare_params(5,11);
|
|
ROLLBACK;
|
|
|
|
|
|
|
|
-- failures of local execution should rollback both the
|
|
-- local execution and remote executions
|
|
|
|
-- fail on a local execution
|
|
BEGIN;
|
|
INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '100' RETURNING *;
|
|
|
|
UPDATE distributed_table SET value = '200';
|
|
|
|
INSERT INTO distributed_table VALUES (1, '100',21) ON CONFLICT(key) DO UPDATE SET value = (1 / (100.0 - EXCLUDED.value::int))::text RETURNING *;
|
|
ROLLBACK;
|
|
|
|
-- we've rollbacked everything
|
|
SELECT count(*) FROM distributed_table WHERE value = '200';
|
|
|
|
-- RETURNING should just work fine for reference tables
|
|
INSERT INTO reference_table VALUES (500) RETURNING *;
|
|
DELETE FROM reference_table WHERE key = 500 RETURNING *;
|
|
|
|
-- should be able to skip local execution even if in a sequential mode of execution
|
|
BEGIN;
|
|
SET LOCAL citus.multi_shard_modify_mode TO sequential ;
|
|
|
|
DELETE FROM distributed_table;
|
|
INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '100' RETURNING *;
|
|
ROLLBACK;
|
|
|
|
-- sequential execution should just work fine after a local execution
|
|
BEGIN;
|
|
SET citus.multi_shard_modify_mode TO sequential ;
|
|
|
|
INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '100' RETURNING *;
|
|
DELETE FROM distributed_table;
|
|
ROLLBACK;
|
|
|
|
|
|
|
|
-- load some data so that foreign keys won't complain with the next tests
|
|
TRUNCATE reference_table CASCADE;
|
|
INSERT INTO reference_table SELECT i FROM generate_series(500, 600) i;
|
|
INSERT INTO distributed_table SELECT i, i::text, i % 10 + 25 FROM generate_series(500, 600) i;
|
|
|
|
-- show that both local, and mixed local-distributed executions
|
|
-- calculate rows processed correctly
|
|
BEGIN;
|
|
DELETE FROM distributed_table WHERE key = 500;
|
|
|
|
DELETE FROM distributed_table WHERE value != '123123213123213';
|
|
ROLLBACK;
|
|
|
|
BEGIN;
|
|
|
|
DELETE FROM reference_table WHERE key = 500 RETURNING *;
|
|
|
|
DELETE FROM reference_table;
|
|
ROLLBACK;
|
|
|
|
|
|
-- task-tracker select execution
|
|
BEGIN;
|
|
DELETE FROM distributed_table WHERE key = 500;
|
|
|
|
SET LOCAL citus.task_executor_type = 'task-tracker';
|
|
|
|
SELECT count(*) FROM distributed_table;
|
|
ROLLBACK;
|
|
|
|
-- local execution should not be executed locally
|
|
-- becase a task-tracker query has already been executed
|
|
BEGIN;
|
|
SET LOCAL citus.task_executor_type = 'task-tracker';
|
|
SET LOCAL client_min_messages TO INFO;
|
|
SELECT count(*) FROM distributed_table;
|
|
SET LOCAL client_min_messages TO LOG;
|
|
|
|
DELETE FROM distributed_table WHERE key = 500;
|
|
ROLLBACK;
|
|
|
|
-- probably not a realistic case since views are not very
|
|
-- well supported with MX
|
|
CREATE VIEW v_local_query_execution AS
|
|
SELECT * FROM distributed_table WHERE key = 500;
|
|
|
|
SELECT * FROM v_local_query_execution;
|
|
|
|
-- similar test, but this time the view itself is a non-local
|
|
-- query, but the query on the view is local
|
|
CREATE VIEW v_local_query_execution_2 AS
|
|
SELECT * FROM distributed_table;
|
|
|
|
SELECT * FROM v_local_query_execution_2 WHERE key = 500;
|
|
|
|
-- even if we switch from remote execution -> local execution,
|
|
-- we are able to use remote execution after rollback
|
|
BEGIN;
|
|
SAVEPOINT my_savepoint;
|
|
|
|
SELECT count(*) FROM distributed_table;
|
|
|
|
DELETE FROM distributed_table WHERE key = 500;
|
|
|
|
ROLLBACK TO SAVEPOINT my_savepoint;
|
|
|
|
DELETE FROM distributed_table WHERE key = 500;
|
|
|
|
COMMIT;
|
|
|
|
-- even if we switch from local execution -> remote execution,
|
|
-- we are able to use local execution after rollback
|
|
BEGIN;
|
|
|
|
SAVEPOINT my_savepoint;
|
|
|
|
DELETE FROM distributed_table WHERE key = 500;
|
|
|
|
SELECT count(*) FROM distributed_table;
|
|
|
|
ROLLBACK TO SAVEPOINT my_savepoint;
|
|
|
|
DELETE FROM distributed_table WHERE key = 500;
|
|
|
|
COMMIT;
|
|
|
|
-- sanity check: local execution on partitions
|
|
INSERT INTO collections_list (collection_id) VALUES (0) RETURNING *;
|
|
|
|
BEGIN;
|
|
INSERT INTO collections_list (key, collection_id) VALUES (1,0);
|
|
SELECT count(*) FROM collections_list_0 WHERE key = 1;
|
|
SELECT count(*) FROM collections_list;
|
|
SELECT * FROM collections_list ORDER BY 1,2,3,4;
|
|
COMMIT;
|
|
|
|
|
|
TRUNCATE collections_list;
|
|
|
|
-- make sure that even if local execution is used, the sequence values
|
|
-- are generated locally
|
|
ALTER SEQUENCE collections_list_key_seq NO MINVALUE NO MAXVALUE;
|
|
|
|
PREPARE serial_prepared_local AS INSERT INTO collections_list (collection_id) VALUES (0) RETURNING key, ser;
|
|
|
|
SELECT setval('collections_list_key_seq', 4);
|
|
EXECUTE serial_prepared_local;
|
|
SELECT setval('collections_list_key_seq', 5);
|
|
EXECUTE serial_prepared_local;
|
|
SELECT setval('collections_list_key_seq', 499);
|
|
EXECUTE serial_prepared_local;
|
|
SELECT setval('collections_list_key_seq', 700);
|
|
EXECUTE serial_prepared_local;
|
|
SELECT setval('collections_list_key_seq', 708);
|
|
EXECUTE serial_prepared_local;
|
|
SELECT setval('collections_list_key_seq', 709);
|
|
EXECUTE serial_prepared_local;
|
|
|
|
-- and, one remote test
|
|
SELECT setval('collections_list_key_seq', 10);
|
|
EXECUTE serial_prepared_local;
|
|
|
|
-- the final queries for the following CTEs are going to happen on the intermediate results only
|
|
-- one of them will be executed remotely, and the other is locally
|
|
-- Citus currently doesn't allow using task_assignment_policy for intermediate results
|
|
WITH distributed_local_mixed AS (INSERT INTO reference_table VALUES (1000) RETURNING *) SELECT * FROM distributed_local_mixed;
|
|
|
|
-- clean the table for the next tests
|
|
SET search_path TO local_shard_execution;
|
|
TRUNCATE distributed_table CASCADE;
|
|
|
|
-- load some data on a remote shard
|
|
INSERT INTO reference_table (key) VALUES (1), (2);
|
|
INSERT INTO distributed_table (key) VALUES (2);
|
|
BEGIN;
|
|
-- local execution followed by a distributed query
|
|
INSERT INTO distributed_table (key) VALUES (1);
|
|
DELETE FROM distributed_table RETURNING key;
|
|
COMMIT;
|
|
|
|
-- a similar test with a reference table
|
|
TRUNCATE reference_table CASCADE;
|
|
|
|
-- load some data on a remote shard
|
|
INSERT INTO reference_table (key) VALUES (2);
|
|
BEGIN;
|
|
-- local execution followed by a distributed query
|
|
INSERT INTO reference_table (key) VALUES (1);
|
|
DELETE FROM reference_table RETURNING key;
|
|
COMMIT;
|
|
|
|
-- however complex the query, local execution can handle
|
|
SET client_min_messages TO LOG;
|
|
SET citus.log_local_commands TO ON;
|
|
WITH cte_1 AS
|
|
(SELECT *
|
|
FROM
|
|
(WITH cte_1 AS
|
|
(SELECT *
|
|
FROM distributed_table
|
|
WHERE key = 1) SELECT *
|
|
FROM cte_1) AS foo)
|
|
SELECT count(*)
|
|
FROM cte_1
|
|
JOIN distributed_table USING (key)
|
|
WHERE distributed_table.key = 1
|
|
AND distributed_table.key IN
|
|
(SELECT key
|
|
FROM distributed_table
|
|
WHERE key = 1);
|
|
|
|
RESET client_min_messages;
|
|
RESET citus.log_local_commands;
|
|
|
|
\c - - - :master_port
|
|
SET citus.next_shard_id TO 1480000;
|
|
-- local execution with custom type
|
|
SET citus.replication_model TO "streaming";
|
|
SET citus.shard_replication_factor TO 1;
|
|
CREATE TYPE invite_resp AS ENUM ('yes', 'no', 'maybe');
|
|
|
|
CREATE TABLE event_responses (
|
|
event_id int,
|
|
user_id int,
|
|
response invite_resp,
|
|
primary key (event_id, user_id)
|
|
);
|
|
|
|
SELECT create_distributed_table('event_responses', 'event_id');
|
|
|
|
CREATE OR REPLACE PROCEDURE register_for_event(p_event_id int, p_user_id int, p_choice invite_resp)
|
|
LANGUAGE plpgsql AS $fn$
|
|
BEGIN
|
|
INSERT INTO event_responses VALUES (p_event_id, p_user_id, p_choice)
|
|
ON CONFLICT (event_id, user_id)
|
|
DO UPDATE SET response = EXCLUDED.response;
|
|
|
|
PERFORM count(*) FROM event_responses WHERE event_id = p_event_id;
|
|
|
|
PERFORM count(*) FROM event_responses WHERE event_id = p_event_id AND false;
|
|
|
|
UPDATE event_responses SET response = p_choice WHERE event_id = p_event_id;
|
|
|
|
END;
|
|
$fn$;
|
|
|
|
SELECT create_distributed_function('register_for_event(int,int,invite_resp)', 'p_event_id', 'event_responses');
|
|
|
|
-- call 7 times to make sure it works after the 5th time(postgres binds values after the 5th time)
|
|
-- after 6th, the local execution caches the local plans and uses it
|
|
-- execute it both locally and remotely
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
|
|
\c - - - :worker_2_port
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
|
|
CALL register_for_event(16, 1, 'yes');
|
|
CALL register_for_event(16, 1, 'yes');
|
|
|
|
-- values 16, 17 and 19 hits the same
|
|
-- shard, so we're re-using the same cached
|
|
-- plans per statement across different distribution
|
|
-- key values
|
|
CALL register_for_event(17, 1, 'yes');
|
|
CALL register_for_event(19, 1, 'yes');
|
|
CALL register_for_event(17, 1, 'yes');
|
|
CALL register_for_event(19, 1, 'yes');
|
|
|
|
-- should work fine if the logs are enabled
|
|
\set VERBOSITY terse
|
|
SET citus.log_local_commands TO ON;
|
|
SET client_min_messages TO DEBUG2;
|
|
CALL register_for_event(19, 1, 'yes');
|
|
|
|
-- should be fine even if no parameters exists in the query
|
|
SELECT count(*) FROM event_responses WHERE event_id = 16;
|
|
SELECT count(*) FROM event_responses WHERE event_id = 16;
|
|
UPDATE event_responses SET response = 'no' WHERE event_id = 16;
|
|
INSERT INTO event_responses VALUES (16, 666, 'maybe')
|
|
ON CONFLICT (event_id, user_id)
|
|
DO UPDATE SET response = EXCLUDED.response RETURNING *;
|
|
|
|
-- multi row INSERTs hitting the same shard
|
|
INSERT INTO event_responses VALUES (16, 666, 'maybe'), (17, 777, 'no')
|
|
ON CONFLICT (event_id, user_id)
|
|
DO UPDATE SET response = EXCLUDED.response RETURNING *;
|
|
|
|
-- now, similar tests with some settings changed
|
|
SET citus.enable_local_execution TO false;
|
|
SET citus.enable_fast_path_router_planner TO false;
|
|
CALL register_for_event(19, 1, 'yes');
|
|
|
|
-- should be fine even if no parameters exists in the query
|
|
SELECT count(*) FROM event_responses WHERE event_id = 16;
|
|
SELECT count(*) FROM event_responses WHERE event_id = 16;
|
|
UPDATE event_responses SET response = 'no' WHERE event_id = 16;
|
|
INSERT INTO event_responses VALUES (16, 666, 'maybe')
|
|
ON CONFLICT (event_id, user_id)
|
|
DO UPDATE SET response = EXCLUDED.response RETURNING *;
|
|
|
|
-- multi row INSERTs hitting the same shard
|
|
INSERT INTO event_responses VALUES (16, 666, 'maybe'), (17, 777, 'no')
|
|
ON CONFLICT (event_id, user_id)
|
|
DO UPDATE SET response = EXCLUDED.response RETURNING *;
|
|
|
|
\c - - - :master_port
|
|
|
|
SET client_min_messages TO ERROR;
|
|
SET search_path TO public;
|
|
DROP SCHEMA local_shard_execution CASCADE;
|
|
|