mirror of https://github.com/citusdata/citus.git
360 lines
12 KiB
SQL
360 lines
12 KiB
SQL
CREATE SCHEMA local_table_join;
|
|
SET search_path TO local_table_join;
|
|
|
|
CREATE TABLE postgres_table (key int, value text, value_2 jsonb);
|
|
CREATE TABLE reference_table (key int, value text, value_2 jsonb);
|
|
SELECT create_reference_table('reference_table');
|
|
CREATE TABLE distributed_table (key int, value text, value_2 jsonb);
|
|
SELECT create_distributed_table('distributed_table', 'key');
|
|
|
|
CREATE TABLE distributed_table_pkey (key int primary key, value text, value_2 jsonb);
|
|
SELECT create_distributed_table('distributed_table_pkey', 'key');
|
|
|
|
CREATE TABLE distributed_table_windex (key int primary key, value text, value_2 jsonb);
|
|
SELECT create_distributed_table('distributed_table_windex', 'key');
|
|
CREATE UNIQUE INDEX key_index ON distributed_table_windex (key);
|
|
|
|
CREATE TABLE distributed_partitioned_table(key int, value text) PARTITION BY RANGE (key);
|
|
CREATE TABLE distributed_partitioned_table_1 PARTITION OF distributed_partitioned_table FOR VALUES FROM (0) TO (10);
|
|
CREATE TABLE distributed_partitioned_table_2 PARTITION OF distributed_partitioned_table FOR VALUES FROM (10) TO (20);
|
|
SELECT create_distributed_table('distributed_partitioned_table', 'key');
|
|
|
|
|
|
SET client_min_messages TO DEBUG1;
|
|
|
|
|
|
-- the user doesn't allow local / distributed table joinn
|
|
SET citus.local_table_join_policy TO 'never';
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table USING(key);
|
|
SELECT count(*) FROM postgres_table JOIN reference_table USING(key);
|
|
|
|
-- the user prefers local table recursively planned
|
|
SET citus.local_table_join_policy TO 'prefer-local';
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table USING(key);
|
|
SELECT count(*) FROM postgres_table JOIN reference_table USING(key);
|
|
|
|
|
|
-- the user prefers distributed table recursively planned
|
|
SET citus.local_table_join_policy TO 'prefer-distributed';
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table USING(key);
|
|
SELECT count(*) FROM postgres_table JOIN reference_table USING(key);
|
|
|
|
-- update/delete
|
|
-- auto tests
|
|
|
|
-- switch back to the default policy, which is auto
|
|
SET citus.local_table_join_policy to 'auto';
|
|
|
|
-- on the auto mode, the local tables should be recursively planned
|
|
-- unless a unique index exists in a column for distributed table
|
|
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key);
|
|
SELECT count(*) FROM reference_table JOIN postgres_table USING(key);
|
|
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) JOIN reference_table USING (key);
|
|
|
|
-- partitioned tables should work as well
|
|
SELECT count(*) FROM distributed_partitioned_table JOIN postgres_table USING(key);
|
|
SELECT count(*) FROM distributed_partitioned_table JOIN postgres_table USING(key) WHERE distributed_partitioned_table.key = 10;
|
|
SELECT count(*) FROM distributed_partitioned_table JOIN postgres_table USING(key) JOIN reference_table USING (key);
|
|
|
|
-- a unique index on key so dist table should be recursively planned
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey USING(key);
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey USING(value);
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON postgres_table.key = distributed_table_pkey.key;
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10;
|
|
-- it should favor distributed table only if it has equality on the unique column
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key > 10;
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key < 10;
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10;
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 AND distributed_table_pkey.key > 10 ;
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 AND distributed_table_pkey.key > 10 ;
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 AND distributed_table_pkey.key > 10 AND postgres_table.key = 5;
|
|
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 OR distributed_table_pkey.key > 10;
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 OR distributed_table_pkey.key = 20;
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 OR distributed_table_pkey.key = 20 OR distributed_table_pkey.key = 30;
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 OR distributed_table_pkey.key = (
|
|
SELECT count(*) FROM distributed_table_pkey
|
|
);
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 OR (distributed_table_pkey.key = 5 and distributed_table_pkey.key > 15);
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 OR (distributed_table_pkey.key > 10 and distributed_table_pkey.key > 15);
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 OR (distributed_table_pkey.key > 10 and distributed_table_pkey.value = 'notext');
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON distributed_table_pkey.key = 10 OR (distributed_table_pkey.key = 10 and distributed_table_pkey.value = 'notext');
|
|
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_pkey ON postgres_table.key = 10;
|
|
|
|
|
|
SELECT count(*) FROM postgres_table JOIN (SELECT * FROM distributed_table) d1 USING(key);
|
|
-- since this is already router plannable, we don't recursively plan the postgres table
|
|
SELECT count(*) FROM postgres_table JOIN (SELECT * FROM distributed_table LIMIT 1) d1 USING(key);
|
|
|
|
-- a unique index on key so dist table should be recursively planned
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_windex USING(key);
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_windex USING(value);
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_windex ON postgres_table.key = distributed_table_windex.key;
|
|
SELECT count(*) FROM postgres_table JOIN distributed_table_windex ON distributed_table_windex.key = 10;
|
|
|
|
-- no unique index on value so local table should be recursively planned.
|
|
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test';
|
|
|
|
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.key = 1;
|
|
|
|
|
|
-- if both local and distributed tables have a filter, we prefer local unless distributed table has unique indexes on any equality filter
|
|
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test' AND postgres_table.value = 'test';
|
|
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test' OR postgres_table.value = 'test';
|
|
|
|
|
|
-- multiple local/distributed tables
|
|
-- only local tables are recursively planned
|
|
SELECT count(*) FROM distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key);
|
|
|
|
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key)
|
|
WHERE
|
|
d1.value = '1';
|
|
|
|
-- if the filter is on the JOIN key, we can recursively plan the local
|
|
-- tables as filters are pushed down to the local tables
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key)
|
|
WHERE
|
|
d1.key = 1;
|
|
|
|
|
|
SET citus.local_table_join_policy to 'auto';
|
|
|
|
-- we can support modification queries as well
|
|
UPDATE
|
|
postgres_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
distributed_table
|
|
WHERE
|
|
distributed_table.key = postgres_table.key;
|
|
|
|
|
|
UPDATE
|
|
distributed_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table
|
|
WHERE
|
|
distributed_table.key = postgres_table.key;
|
|
|
|
UPDATE
|
|
distributed_table_pkey
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table
|
|
WHERE
|
|
distributed_table_pkey.key = postgres_table.key;
|
|
|
|
UPDATE
|
|
distributed_table_windex
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table
|
|
WHERE
|
|
distributed_table_windex.key = postgres_table.key;
|
|
|
|
-- in case of update/delete we always recursively plan
|
|
-- the tables other than target table no matter what the policy is
|
|
|
|
SET citus.local_table_join_policy TO 'prefer-local';
|
|
|
|
UPDATE
|
|
postgres_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
distributed_table
|
|
WHERE
|
|
distributed_table.key = postgres_table.key;
|
|
|
|
|
|
UPDATE
|
|
distributed_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table
|
|
WHERE
|
|
distributed_table.key = postgres_table.key;
|
|
|
|
UPDATE
|
|
distributed_table_pkey
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table
|
|
WHERE
|
|
distributed_table_pkey.key = postgres_table.key;
|
|
|
|
UPDATE
|
|
distributed_table_windex
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table
|
|
WHERE
|
|
distributed_table_windex.key = postgres_table.key;
|
|
|
|
|
|
SET citus.local_table_join_policy TO 'prefer-distributed';
|
|
|
|
UPDATE
|
|
postgres_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
distributed_table
|
|
WHERE
|
|
distributed_table.key = postgres_table.key;
|
|
|
|
|
|
UPDATE
|
|
distributed_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table
|
|
WHERE
|
|
distributed_table.key = postgres_table.key;
|
|
|
|
UPDATE
|
|
distributed_table_pkey
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table
|
|
WHERE
|
|
distributed_table_pkey.key = postgres_table.key;
|
|
|
|
UPDATE
|
|
distributed_table_windex
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table
|
|
WHERE
|
|
distributed_table_windex.key = postgres_table.key;
|
|
|
|
-- modifications with multiple tables
|
|
UPDATE
|
|
distributed_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table p1, postgres_table p2
|
|
WHERE
|
|
distributed_table.key = p1.key AND p1.key = p2.key;
|
|
|
|
UPDATE
|
|
postgres_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
(SELECT * FROM distributed_table) d1
|
|
WHERE
|
|
d1.key = postgres_table.key;
|
|
|
|
UPDATE
|
|
postgres_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
(SELECT * FROM distributed_table LIMIT 1) d1
|
|
WHERE
|
|
d1.key = postgres_table.key;
|
|
|
|
UPDATE
|
|
distributed_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
postgres_table p1, distributed_table d2
|
|
WHERE
|
|
distributed_table.key = p1.key AND p1.key = d2.key;
|
|
|
|
-- pretty inefficient plan as it requires
|
|
-- recursive planninng of 2 distributed tables
|
|
UPDATE
|
|
postgres_table
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
distributed_table d1, distributed_table d2
|
|
WHERE
|
|
postgres_table.key = d1.key AND d1.key = d2.key;
|
|
|
|
-- currently can't plan subquery-local table join
|
|
SELECT count(*)
|
|
FROM
|
|
(SELECT * FROM (SELECT * FROM distributed_table) d1) d2
|
|
JOIN postgres_table
|
|
USING(key);
|
|
|
|
|
|
|
|
---------------------------------------------------------
|
|
|
|
SET client_min_messages to ERROR;
|
|
SELECT master_add_node('localhost', :master_port, groupId => 0);
|
|
|
|
|
|
CREATE TABLE citus_local(key int, value text);
|
|
SELECT create_citus_local_table('citus_local');
|
|
SET client_min_messages TO DEBUG1;
|
|
|
|
-- same for citus local table - distributed table joins
|
|
-- a unique index on key so dist table should be recursively planned
|
|
SELECT count(*) FROM citus_local JOIN distributed_table_windex USING(key);
|
|
SELECT count(*) FROM citus_local JOIN distributed_table_windex USING(value);
|
|
SELECT count(*) FROM citus_local JOIN distributed_table_windex ON citus_local.key = distributed_table_windex.key;
|
|
SELECT count(*) FROM citus_local JOIN distributed_table_windex ON distributed_table_windex.key = 10;
|
|
|
|
-- no unique index, citus local table should be recursively planned
|
|
SELECT count(*) FROM citus_local JOIN distributed_table USING(key);
|
|
SELECT count(*) FROM citus_local JOIN distributed_table USING(value);
|
|
SELECT count(*) FROM citus_local JOIN distributed_table ON citus_local.key = distributed_table.key;
|
|
SELECT count(*) FROM citus_local JOIN distributed_table ON distributed_table.key = 10;
|
|
|
|
SELECT count(*) FROM citus_local JOIN distributed_table USING(key) JOIN postgres_table USING (key) JOIN reference_table USING(key);
|
|
|
|
SELECT count(*) FROM distributed_partitioned_table JOIN postgres_table USING(key) JOIN reference_table USING (key)
|
|
JOIN citus_local USING(key) WHERE distributed_partitioned_table.key > 10 and distributed_partitioned_table.key = 10;
|
|
|
|
-- update
|
|
UPDATE
|
|
distributed_table_windex
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
citus_local
|
|
WHERE
|
|
distributed_table_windex.key = citus_local.key;
|
|
|
|
UPDATE
|
|
citus_local
|
|
SET
|
|
value = 'test'
|
|
FROM
|
|
distributed_table_windex
|
|
WHERE
|
|
distributed_table_windex.key = citus_local.key;
|
|
|
|
DROP TABLE citus_local;
|
|
RESET client_min_messages;
|
|
SELECT master_remove_node('localhost', :master_port);
|
|
\set VERBOSITY terse
|
|
DROP SCHEMA local_table_join CASCADE;
|