add basic regression tests

pull/4358/head
Onder Kalaci 2020-08-26 10:06:32 +02:00 committed by Sait Talha Nisanci
parent 594e001f3b
commit 945193555b
3 changed files with 396 additions and 1 deletions

View File

@ -0,0 +1,257 @@
CREATE SCHEMA local_table_join;
SET search_path TO local_table_join;
CREATE TABLE postgres_table (key int, value text, value_2 jsonb);
CREATE TABLE reference_table (key int, value text, value_2 jsonb);
SELECT create_reference_table('reference_table');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE distributed_table (key int, value text, value_2 jsonb);
SELECT create_distributed_table('distributed_table', 'key');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO DEBUG1;
-- the user doesn't allow local / distributed table joinn
SET citus.local_table_join_policy TO 'never';
SELECT count(*) FROM postgres_table JOIN distributed_table USING(key);
ERROR: relation postgres_table is not distributed
SELECT count(*) FROM postgres_table JOIN reference_table USING(key);
ERROR: relation postgres_table is not distributed
-- the user prefers local table recursively planned
SET citus.local_table_join_policy TO 'pull-local';
SELECT count(*) FROM postgres_table JOIN distributed_table USING(key);
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table JOIN local_table_join.distributed_table USING (key))
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM postgres_table JOIN reference_table USING(key);
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table JOIN local_table_join.reference_table USING (key))
count
---------------------------------------------------------------------
0
(1 row)
-- the user prefers distributed table recursively planned
SET citus.local_table_join_policy TO 'pull-distributed';
SELECT count(*) FROM postgres_table JOIN distributed_table USING(key);
DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.postgres_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) distributed_table USING (key))
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM postgres_table JOIN reference_table USING(key);
DEBUG: Wrapping local relation "reference_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.reference_table WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.reference_table WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.postgres_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) reference_table USING (key))
count
---------------------------------------------------------------------
0
(1 row)
-- update/delete
-- auto tests
-- switch back to the default policy, which is auto
RESET citus.local_table_join_policy;
-- on the default mode, the local tables should be recursively planned
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key);
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.distributed_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key))
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM reference_table JOIN postgres_table USING(key);
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.reference_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key))
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) JOIN reference_table USING (key);
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((local_table_join.distributed_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key)) JOIN local_table_join.reference_table USING (key))
count
---------------------------------------------------------------------
0
(1 row)
-- this is a contreversial part that we should discuss further
-- if the distributed table has at least one filter, we prefer
-- recursively planning of the distributed table
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test';
DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE (value OPERATOR(pg_catalog.=) 'test'::text) OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE (value OPERATOR(pg_catalog.=) 'test'::text) OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) distributed_table JOIN local_table_join.postgres_table USING (key)) WHERE (distributed_table.value OPERATOR(pg_catalog.=) 'test'::text)
count
---------------------------------------------------------------------
0
(1 row)
-- but if the filters can be pushed downn to the local table via the join
-- we are smart about recursively planning the local table
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.key = 1;
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.distributed_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key)) WHERE (distributed_table.key OPERATOR(pg_catalog.=) 1)
count
---------------------------------------------------------------------
0
(1 row)
-- if both local and distributed tables have a filter, we prefer local
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test' AND postgres_table.value = 'test';
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE (value OPERATOR(pg_catalog.=) 'test'::text) OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE (value OPERATOR(pg_catalog.=) 'test'::text) OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.distributed_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key)) WHERE ((distributed_table.value OPERATOR(pg_catalog.=) 'test'::text) AND (postgres_table.value OPERATOR(pg_catalog.=) 'test'::text))
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test' OR postgres_table.value = 'test';
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.distributed_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key)) WHERE ((distributed_table.value OPERATOR(pg_catalog.=) 'test'::text) OR (postgres_table.value OPERATOR(pg_catalog.=) 'test'::text))
count
---------------------------------------------------------------------
0
(1 row)
-- multiple local/distributed tables
-- only local tables are recursively planned
SELECT count(*) FROM distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key);
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0
DEBUG: generating subplan XXX_2 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (((local_table_join.distributed_table d1 JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p1 USING (key)) JOIN local_table_join.distributed_table d2 USING (key)) JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p2 USING (key))
count
---------------------------------------------------------------------
0
(1 row)
-- if one of the distributed tables have a filter, we'll prefer recursive planning of it as well
-- it actually leads to a poor plan as we need to recursively plan local tables anyway as it is
-- joined with another distributed table
SELECT
count(*)
FROM
distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key)
WHERE
d1.value = '1';
DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table d1 WHERE (value OPERATOR(pg_catalog.=) '1'::text) OFFSET 0
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table d1 WHERE (value OPERATOR(pg_catalog.=) '1'::text) OFFSET 0
DEBUG: generating subplan XXX_2 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0
DEBUG: generating subplan XXX_3 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) d1 JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p1 USING (key)) JOIN local_table_join.distributed_table d2 USING (key)) JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p2 USING (key)) WHERE (d1.value OPERATOR(pg_catalog.=) '1'::text)
count
---------------------------------------------------------------------
0
(1 row)
-- if the filter is on the JOIN key, we can recursively plan the local
-- tables as filters are pushded down to the local tables
SELECT
count(*)
FROM
distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key)
WHERE
d1.key = 1;
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0
DEBUG: generating subplan XXX_2 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (((local_table_join.distributed_table d1 JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p1 USING (key)) JOIN local_table_join.distributed_table d2 USING (key)) JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p2 USING (key)) WHERE (d1.key OPERATOR(pg_catalog.=) 1)
count
---------------------------------------------------------------------
0
(1 row)
-- we can support modification queries as well
UPDATE
postgres_table
SET
value = 'test'
FROM
distributed_table
WHERE
distributed_table.key = postgres_table.key;
DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE local_table_join.postgres_table SET value = 'test'::text FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) distributed_table WHERE (distributed_table.key OPERATOR(pg_catalog.=) postgres_table.key)
UPDATE
distributed_table
SET
value = 'test'
FROM
postgres_table
WHERE
distributed_table.key = postgres_table.key;
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE local_table_join.distributed_table SET value = 'test'::text FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table WHERE (distributed_table.key OPERATOR(pg_catalog.=) postgres_table.key)
-- modifications with multiple tables
UPDATE
distributed_table
SET
value = 'test'
FROM
postgres_table p1, postgres_table p2
WHERE
distributed_table.key = p1.key AND p1.key = p2.key;
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0
DEBUG: generating subplan XXX_2 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE local_table_join.distributed_table SET value = 'test'::text FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p1, (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p2 WHERE ((distributed_table.key OPERATOR(pg_catalog.=) p1.key) AND (p1.key OPERATOR(pg_catalog.=) p2.key))
UPDATE
distributed_table
SET
value = 'test'
FROM
postgres_table p1, distributed_table d2
WHERE
distributed_table.key = p1.key AND p1.key = d2.key;
DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE local_table_join.distributed_table SET value = 'test'::text FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p1, local_table_join.distributed_table d2 WHERE ((distributed_table.key OPERATOR(pg_catalog.=) p1.key) AND (p1.key OPERATOR(pg_catalog.=) d2.key))
-- pretty inefficient plan as it requires
-- recursive planninng of 2 distributed tables
UPDATE
postgres_table
SET
value = 'test'
FROM
distributed_table d1, distributed_table d2
WHERE
postgres_table.key = d1.key AND d1.key = d2.key;
DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table d1 WHERE true OFFSET 0
DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table d2 WHERE true OFFSET 0
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table d1 WHERE true OFFSET 0
DEBUG: generating subplan XXX_2 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table d2 WHERE true OFFSET 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE local_table_join.postgres_table SET value = 'test'::text FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) d1, (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) d2 WHERE ((postgres_table.key OPERATOR(pg_catalog.=) d1.key) AND (d1.key OPERATOR(pg_catalog.=) d2.key))
\set VERBOSITY terse
RESET client_min_messages;
DROP SCHEMA local_table_join CASCADE;
NOTICE: drop cascades to 3 other objects

View File

@ -282,7 +282,7 @@ test: multi_colocated_shard_transfer
# ----------
# multi_citus_tools tests utility functions written for citus tools
# ----------
test: multi_citus_tools
test: multi_citus_tools local_table_join
# ----------
# node_conninfo_reload tests that node_conninfo changes take effect

View File

@ -0,0 +1,138 @@
CREATE SCHEMA local_table_join;
SET search_path TO local_table_join;
CREATE TABLE postgres_table (key int, value text, value_2 jsonb);
CREATE TABLE reference_table (key int, value text, value_2 jsonb);
SELECT create_reference_table('reference_table');
CREATE TABLE distributed_table (key int, value text, value_2 jsonb);
SELECT create_distributed_table('distributed_table', 'key');
SET client_min_messages TO DEBUG1;
-- the user doesn't allow local / distributed table joinn
SET citus.local_table_join_policy TO 'never';
SELECT count(*) FROM postgres_table JOIN distributed_table USING(key);
SELECT count(*) FROM postgres_table JOIN reference_table USING(key);
-- the user prefers local table recursively planned
SET citus.local_table_join_policy TO 'pull-local';
SELECT count(*) FROM postgres_table JOIN distributed_table USING(key);
SELECT count(*) FROM postgres_table JOIN reference_table USING(key);
-- the user prefers distributed table recursively planned
SET citus.local_table_join_policy TO 'pull-distributed';
SELECT count(*) FROM postgres_table JOIN distributed_table USING(key);
SELECT count(*) FROM postgres_table JOIN reference_table USING(key);
-- update/delete
-- auto tests
-- switch back to the default policy, which is auto
RESET citus.local_table_join_policy;
-- on the default mode, the local tables should be recursively planned
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key);
SELECT count(*) FROM reference_table JOIN postgres_table USING(key);
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) JOIN reference_table USING (key);
-- this is a contreversial part that we should discuss further
-- if the distributed table has at least one filter, we prefer
-- recursively planning of the distributed table
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test';
-- but if the filters can be pushed downn to the local table via the join
-- we are smart about recursively planning the local table
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.key = 1;
-- if both local and distributed tables have a filter, we prefer local
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test' AND postgres_table.value = 'test';
SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test' OR postgres_table.value = 'test';
-- multiple local/distributed tables
-- only local tables are recursively planned
SELECT count(*) FROM distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key);
-- if one of the distributed tables have a filter, we'll prefer recursive planning of it as well
-- it actually leads to a poor plan as we need to recursively plan local tables anyway as it is
-- joined with another distributed table
SELECT
count(*)
FROM
distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key)
WHERE
d1.value = '1';
-- if the filter is on the JOIN key, we can recursively plan the local
-- tables as filters are pushded down to the local tables
SELECT
count(*)
FROM
distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key)
WHERE
d1.key = 1;
-- we can support modification queries as well
UPDATE
postgres_table
SET
value = 'test'
FROM
distributed_table
WHERE
distributed_table.key = postgres_table.key;
UPDATE
distributed_table
SET
value = 'test'
FROM
postgres_table
WHERE
distributed_table.key = postgres_table.key;
-- modifications with multiple tables
UPDATE
distributed_table
SET
value = 'test'
FROM
postgres_table p1, postgres_table p2
WHERE
distributed_table.key = p1.key AND p1.key = p2.key;
UPDATE
distributed_table
SET
value = 'test'
FROM
postgres_table p1, distributed_table d2
WHERE
distributed_table.key = p1.key AND p1.key = d2.key;
-- pretty inefficient plan as it requires
-- recursive planninng of 2 distributed tables
UPDATE
postgres_table
SET
value = 'test'
FROM
distributed_table d1, distributed_table d2
WHERE
postgres_table.key = d1.key AND d1.key = d2.key;
\set VERBOSITY terse
RESET client_min_messages;
DROP SCHEMA local_table_join CASCADE;