diff --git a/src/test/regress/expected/local_table_join.out b/src/test/regress/expected/local_table_join.out new file mode 100644 index 000000000..38f6f9cb2 --- /dev/null +++ b/src/test/regress/expected/local_table_join.out @@ -0,0 +1,257 @@ +CREATE SCHEMA local_table_join; +SET search_path TO local_table_join; +CREATE TABLE postgres_table (key int, value text, value_2 jsonb); +CREATE TABLE reference_table (key int, value text, value_2 jsonb); +SELECT create_reference_table('reference_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE distributed_table (key int, value text, value_2 jsonb); +SELECT create_distributed_table('distributed_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SET client_min_messages TO DEBUG1; +-- the user doesn't allow local / distributed table joinn +SET citus.local_table_join_policy TO 'never'; +SELECT count(*) FROM postgres_table JOIN distributed_table USING(key); +ERROR: relation postgres_table is not distributed +SELECT count(*) FROM postgres_table JOIN reference_table USING(key); +ERROR: relation postgres_table is not distributed +-- the user prefers local table recursively planned +SET citus.local_table_join_policy TO 'pull-local'; +SELECT count(*) FROM postgres_table JOIN distributed_table USING(key); +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table JOIN local_table_join.distributed_table USING (key)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM postgres_table JOIN reference_table USING(key); +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table JOIN local_table_join.reference_table USING (key)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- the user prefers distributed table recursively planned +SET citus.local_table_join_policy TO 'pull-distributed'; +SELECT count(*) FROM postgres_table JOIN distributed_table USING(key); +DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.postgres_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) distributed_table USING (key)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM postgres_table JOIN reference_table USING(key); +DEBUG: Wrapping local relation "reference_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.reference_table WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.reference_table WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.postgres_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) reference_table USING (key)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- update/delete +-- auto tests +-- switch back to the default policy, which is auto +RESET citus.local_table_join_policy; +-- on the default mode, the local tables should be recursively planned +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key); +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.distributed_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM reference_table JOIN postgres_table USING(key); +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.reference_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) JOIN reference_table USING (key); +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((local_table_join.distributed_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key)) JOIN local_table_join.reference_table USING (key)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- this is a contreversial part that we should discuss further +-- if the distributed table has at least one filter, we prefer +-- recursively planning of the distributed table +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test'; +DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE (value OPERATOR(pg_catalog.=) 'test'::text) OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE (value OPERATOR(pg_catalog.=) 'test'::text) OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) distributed_table JOIN local_table_join.postgres_table USING (key)) WHERE (distributed_table.value OPERATOR(pg_catalog.=) 'test'::text) + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- but if the filters can be pushed downn to the local table via the join +-- we are smart about recursively planning the local table +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.key = 1; +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.distributed_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key)) WHERE (distributed_table.key OPERATOR(pg_catalog.=) 1) + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- if both local and distributed tables have a filter, we prefer local +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test' AND postgres_table.value = 'test'; +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE (value OPERATOR(pg_catalog.=) 'test'::text) OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE (value OPERATOR(pg_catalog.=) 'test'::text) OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.distributed_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key)) WHERE ((distributed_table.value OPERATOR(pg_catalog.=) 'test'::text) AND (postgres_table.value OPERATOR(pg_catalog.=) 'test'::text)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test' OR postgres_table.value = 'test'; +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_table_join.distributed_table JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table USING (key)) WHERE ((distributed_table.value OPERATOR(pg_catalog.=) 'test'::text) OR (postgres_table.value OPERATOR(pg_catalog.=) 'test'::text)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- multiple local/distributed tables +-- only local tables are recursively planned +SELECT count(*) FROM distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key); +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0 +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0 +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (((local_table_join.distributed_table d1 JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p1 USING (key)) JOIN local_table_join.distributed_table d2 USING (key)) JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p2 USING (key)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- if one of the distributed tables have a filter, we'll prefer recursive planning of it as well +-- it actually leads to a poor plan as we need to recursively plan local tables anyway as it is +-- joined with another distributed table +SELECT + count(*) +FROM + distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key) +WHERE + d1.value = '1'; +DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table d1 WHERE (value OPERATOR(pg_catalog.=) '1'::text) OFFSET 0 +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0 +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table d1 WHERE (value OPERATOR(pg_catalog.=) '1'::text) OFFSET 0 +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0 +DEBUG: generating subplan XXX_3 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) d1 JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p1 USING (key)) JOIN local_table_join.distributed_table d2 USING (key)) JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p2 USING (key)) WHERE (d1.value OPERATOR(pg_catalog.=) '1'::text) + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- if the filter is on the JOIN key, we can recursively plan the local +-- tables as filters are pushded down to the local tables +SELECT + count(*) +FROM + distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key) +WHERE + d1.key = 1; +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0 +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0 +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE (key OPERATOR(pg_catalog.=) 1) OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (((local_table_join.distributed_table d1 JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p1 USING (key)) JOIN local_table_join.distributed_table d2 USING (key)) JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p2 USING (key)) WHERE (d1.key OPERATOR(pg_catalog.=) 1) + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we can support modification queries as well +UPDATE + postgres_table +SET + value = 'test' +FROM + distributed_table +WHERE + distributed_table.key = postgres_table.key; +DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE local_table_join.postgres_table SET value = 'test'::text FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) distributed_table WHERE (distributed_table.key OPERATOR(pg_catalog.=) postgres_table.key) +UPDATE + distributed_table +SET + value = 'test' +FROM + postgres_table +WHERE + distributed_table.key = postgres_table.key; +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE local_table_join.distributed_table SET value = 'test'::text FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) postgres_table WHERE (distributed_table.key OPERATOR(pg_catalog.=) postgres_table.key) +-- modifications with multiple tables +UPDATE + distributed_table +SET + value = 'test' +FROM + postgres_table p1, postgres_table p2 +WHERE + distributed_table.key = p1.key AND p1.key = p2.key; +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0 +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0 +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p2 WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE local_table_join.distributed_table SET value = 'test'::text FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p1, (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p2 WHERE ((distributed_table.key OPERATOR(pg_catalog.=) p1.key) AND (p1.key OPERATOR(pg_catalog.=) p2.key)) +UPDATE + distributed_table +SET + value = 'test' +FROM + postgres_table p1, distributed_table d2 +WHERE + distributed_table.key = p1.key AND p1.key = d2.key; +DEBUG: Wrapping local relation "postgres_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.postgres_table p1 WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE local_table_join.distributed_table SET value = 'test'::text FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) p1, local_table_join.distributed_table d2 WHERE ((distributed_table.key OPERATOR(pg_catalog.=) p1.key) AND (p1.key OPERATOR(pg_catalog.=) d2.key)) +-- pretty inefficient plan as it requires +-- recursive planninng of 2 distributed tables +UPDATE + postgres_table +SET + value = 'test' +FROM + distributed_table d1, distributed_table d2 +WHERE + postgres_table.key = d1.key AND d1.key = d2.key; +DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table d1 WHERE true OFFSET 0 +DEBUG: Wrapping local relation "distributed_table" to a subquery: SELECT key, value, value_2 FROM local_table_join.distributed_table d2 WHERE true OFFSET 0 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table d1 WHERE true OFFSET 0 +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, value_2 FROM local_table_join.distributed_table d2 WHERE true OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE local_table_join.postgres_table SET value = 'test'::text FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) d1, (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, value_2 jsonb)) d2 WHERE ((postgres_table.key OPERATOR(pg_catalog.=) d1.key) AND (d1.key OPERATOR(pg_catalog.=) d2.key)) +\set VERBOSITY terse +RESET client_min_messages; +DROP SCHEMA local_table_join CASCADE; +NOTICE: drop cascades to 3 other objects diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 39329b2d7..4da3a305d 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -282,7 +282,7 @@ test: multi_colocated_shard_transfer # ---------- # multi_citus_tools tests utility functions written for citus tools # ---------- -test: multi_citus_tools +test: multi_citus_tools local_table_join # ---------- # node_conninfo_reload tests that node_conninfo changes take effect diff --git a/src/test/regress/sql/local_table_join.sql b/src/test/regress/sql/local_table_join.sql new file mode 100644 index 000000000..0b0285af9 --- /dev/null +++ b/src/test/regress/sql/local_table_join.sql @@ -0,0 +1,138 @@ +CREATE SCHEMA local_table_join; +SET search_path TO local_table_join; + + +CREATE TABLE postgres_table (key int, value text, value_2 jsonb); +CREATE TABLE reference_table (key int, value text, value_2 jsonb); +SELECT create_reference_table('reference_table'); +CREATE TABLE distributed_table (key int, value text, value_2 jsonb); +SELECT create_distributed_table('distributed_table', 'key'); + +SET client_min_messages TO DEBUG1; + + +-- the user doesn't allow local / distributed table joinn +SET citus.local_table_join_policy TO 'never'; +SELECT count(*) FROM postgres_table JOIN distributed_table USING(key); +SELECT count(*) FROM postgres_table JOIN reference_table USING(key); + +-- the user prefers local table recursively planned +SET citus.local_table_join_policy TO 'pull-local'; +SELECT count(*) FROM postgres_table JOIN distributed_table USING(key); +SELECT count(*) FROM postgres_table JOIN reference_table USING(key); + + +-- the user prefers distributed table recursively planned +SET citus.local_table_join_policy TO 'pull-distributed'; +SELECT count(*) FROM postgres_table JOIN distributed_table USING(key); +SELECT count(*) FROM postgres_table JOIN reference_table USING(key); + + +-- update/delete +-- auto tests + +-- switch back to the default policy, which is auto +RESET citus.local_table_join_policy; + +-- on the default mode, the local tables should be recursively planned +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key); +SELECT count(*) FROM reference_table JOIN postgres_table USING(key); +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) JOIN reference_table USING (key); + + + +-- this is a contreversial part that we should discuss further +-- if the distributed table has at least one filter, we prefer +-- recursively planning of the distributed table +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test'; + +-- but if the filters can be pushed downn to the local table via the join +-- we are smart about recursively planning the local table +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.key = 1; + + +-- if both local and distributed tables have a filter, we prefer local +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test' AND postgres_table.value = 'test'; +SELECT count(*) FROM distributed_table JOIN postgres_table USING(key) WHERE distributed_table.value = 'test' OR postgres_table.value = 'test'; + + +-- multiple local/distributed tables +-- only local tables are recursively planned +SELECT count(*) FROM distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key); + + +-- if one of the distributed tables have a filter, we'll prefer recursive planning of it as well +-- it actually leads to a poor plan as we need to recursively plan local tables anyway as it is +-- joined with another distributed table +SELECT + count(*) +FROM + distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key) +WHERE + d1.value = '1'; + +-- if the filter is on the JOIN key, we can recursively plan the local +-- tables as filters are pushded down to the local tables +SELECT + count(*) +FROM + distributed_table d1 JOIN postgres_table p1 USING(key) JOIN distributed_table d2 USING(key) JOIN postgres_table p2 USING(key) +WHERE + d1.key = 1; + + +-- we can support modification queries as well +UPDATE + postgres_table +SET + value = 'test' +FROM + distributed_table +WHERE + distributed_table.key = postgres_table.key; + + +UPDATE + distributed_table +SET + value = 'test' +FROM + postgres_table +WHERE + distributed_table.key = postgres_table.key; + +-- modifications with multiple tables +UPDATE + distributed_table +SET + value = 'test' +FROM + postgres_table p1, postgres_table p2 +WHERE + distributed_table.key = p1.key AND p1.key = p2.key; + + +UPDATE + distributed_table +SET + value = 'test' +FROM + postgres_table p1, distributed_table d2 +WHERE + distributed_table.key = p1.key AND p1.key = d2.key; + +-- pretty inefficient plan as it requires +-- recursive planninng of 2 distributed tables +UPDATE + postgres_table +SET + value = 'test' +FROM + distributed_table d1, distributed_table d2 +WHERE + postgres_table.key = d1.key AND d1.key = d2.key; + + +\set VERBOSITY terse +RESET client_min_messages; +DROP SCHEMA local_table_join CASCADE;