From 6b2a412c12d885e01bc5ec4ab0577e5b6a507cbd Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Mon, 26 Nov 2018 12:43:11 +0300 Subject: [PATCH] Add tests for filter pushdown --- ...relation_planning_restriction_pushdown.out | 536 ++++++++++++++++++ src/test/regress/multi_schedule | 2 +- ...relation_planning_restriction_pushdown.sql | 349 ++++++++++++ 3 files changed, 886 insertions(+), 1 deletion(-) create mode 100644 src/test/regress/expected/recursive_relation_planning_restriction_pushdown.out create mode 100644 src/test/regress/sql/recursive_relation_planning_restriction_pushdown.sql diff --git a/src/test/regress/expected/recursive_relation_planning_restriction_pushdown.out b/src/test/regress/expected/recursive_relation_planning_restriction_pushdown.out new file mode 100644 index 000000000..18c6f26a3 --- /dev/null +++ b/src/test/regress/expected/recursive_relation_planning_restriction_pushdown.out @@ -0,0 +1,536 @@ +---------------------------------------------------- +-- recursive_relation_planning_restirction_pushdown +-- In this test file, we mosly test whether Citus +-- can successfully pushdown filters to the subquery +-- that is +---------------------------------------------------- +-- all the queries in this file have the +-- same tables/subqueries combination as below +-- because this test aims to hold the query planning +-- steady, but mostly ensure that filters are handled +-- properly. Note that u2 is the relation that is +-- recursively planned +-- Setting the debug level so that filters can be observed +SET client_min_messages TO DEBUG1; +-- no filters on u2 +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1); +DEBUG: generating subplan 1_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 1_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 1 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('1_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('1_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) + count +------- + 38501 +(1 row) + +-- scalar array expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > ANY(ARRAY[2, 1, 6]); +DEBUG: generating subplan 4_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_1 OPERATOR(pg_catalog.>) ANY ('{2,1,6}'::integer[])) +DEBUG: generating subplan 4_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 4 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('4_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('4_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (users_table.value_1 OPERATOR(pg_catalog.>) ANY (ARRAY[2, 1, 6])) + count +------- + 33398 +(1 row) + +-- array operators on the table can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE ARRAY[u2.value_1, u2.value_2] @> (ARRAY[2, 3]); +DEBUG: generating subplan 7_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (ARRAY[value_1, value_2] OPERATOR(pg_catalog.@>) '{2,3}'::integer[]) +DEBUG: generating subplan 7_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 7 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('7_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('7_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (ARRAY[users_table.value_1, users_table.value_2] OPERATOR(pg_catalog.@>) ARRAY[2, 3]) + count +------- + 4704 +(1 row) + +-- array operators on different tables cannot be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE ARRAY[u2.value_1, u1.user_id] @> (ARRAY[2, 3]); +DEBUG: generating subplan 10_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 10_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 10 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('10_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('10_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (ARRAY[users_table.value_1, u1.user_id] OPERATOR(pg_catalog.@>) ARRAY[2, 3]) + count +------- + 3352 +(1 row) + +-- coerced expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1/2.0 > 2)::int::bool::text::bool; +DEBUG: generating subplan 13_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (((((((value_1)::numeric OPERATOR(pg_catalog./) 2.0) OPERATOR(pg_catalog.>) '2'::numeric))::integer)::boolean)::text)::boolean +DEBUG: generating subplan 13_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 13 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('13_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('13_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((((((users_table.value_1)::numeric OPERATOR(pg_catalog./) 2.0) OPERATOR(pg_catalog.>) (2)::numeric))::integer)::boolean)::text)::boolean + count +------- + 729 +(1 row) + +-- case expression on a single table can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (CASE WHEN u2.value_1 > 3 THEN u2.value_1 > 2 ELSE false END); +DEBUG: generating subplan 16_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE CASE WHEN (value_1 OPERATOR(pg_catalog.>) 3) THEN (value_1 OPERATOR(pg_catalog.>) 2) ELSE false END +DEBUG: generating subplan 16_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 16 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('16_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('16_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE CASE WHEN (users_table.value_1 OPERATOR(pg_catalog.>) 3) THEN (users_table.value_1 OPERATOR(pg_catalog.>) 2) ELSE false END + count +------- + 9990 +(1 row) + +-- case expression multiple tables cannot be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (CASE WHEN u1.value_1 > 4000 THEN u2.value_1 / 100 > 1 ELSE false END); +DEBUG: generating subplan 19_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 19_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 19 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('19_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('19_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE CASE WHEN (u1.value_1 OPERATOR(pg_catalog.>) 4000) THEN ((users_table.value_1 OPERATOR(pg_catalog./) 100) OPERATOR(pg_catalog.>) 1) ELSE false END + count +------- + 0 +(1 row) + +-- coalesce expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE COALESCE((u2.user_id/5.0)::int::bool, false); +DEBUG: generating subplan 22_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE COALESCE(((((user_id)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false) +DEBUG: generating subplan 22_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('22_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE COALESCE(((((users_table.user_id)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false) + count +------- + 28198 +(1 row) + +-- nullif expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE NULLIF((u2.value_2/5.0)::int::bool, false); +DEBUG: generating subplan 25_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE NULLIF(((((value_2)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false) +DEBUG: generating subplan 25_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 25 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('25_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('25_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE NULLIF(((((users_table.value_2)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false) + count +------- + 18895 +(1 row) + +-- null test can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_3 IS NOT NULL; +DEBUG: generating subplan 28_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_3 IS NOT NULL) +DEBUG: generating subplan 28_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 28 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('28_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('28_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (users_table.value_3 IS NOT NULL) + count +------- + 38501 +(1 row) + +-- functions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE isfinite(u2.time); +DEBUG: generating subplan 31_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE isfinite("time") +DEBUG: generating subplan 31_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 31 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('31_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('31_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE isfinite(users_table."time") + count +------- + 38501 +(1 row) + +-- functions with multiple tables cannot be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE int4smaller(u2.value_1, u1.value_1) = 55; +DEBUG: generating subplan 34_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 34_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 34 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('34_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('34_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (int4smaller(users_table.value_1, u1.value_1) OPERATOR(pg_catalog.=) 55) + count +------- + 0 +(1 row) + +-- functions with multiple columns from the same tables can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE int4smaller(u2.value_1, u2.value_2) = u2.value_1; +DEBUG: generating subplan 37_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_1 OPERATOR(pg_catalog.=) int4smaller(value_1, value_2)) +DEBUG: generating subplan 37_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 37 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('37_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('37_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (int4smaller(users_table.value_1, users_table.value_2) OPERATOR(pg_catalog.=) users_table.value_1) + count +------- + 20686 +(1 row) + +-- row expressions can be pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE row(u2.value_1, 2, 3) > row(u2.value_2, 2, 3); +DEBUG: generating subplan 40_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (ROW(value_1, 2, 3) OPERATOR(pg_catalog.>) ROW(value_2, 2, 3)) +DEBUG: generating subplan 40_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 40 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('40_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('40_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (ROW(users_table.value_1, 2, 3) OPERATOR(pg_catalog.>) ROW(users_table.value_2, 2, 3)) + count +------- + 17815 +(1 row) + +-- multiple expression from the same table can be pushed down together +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) + WHERE + (u2.user_id/1.0)::int::bool::text::bool AND + CASE WHEN u2.value_1 > 4000 THEN u2.value_2 / 100 > 1 ELSE false END AND + COALESCE((u2.user_id/50000)::bool, false) AND + NULLIF((u2.value_3/50000)::int::bool, false) AND + isfinite(u2.time) AND + u2.value_4 IS DISTINCT FROM 50040 AND + row(u2.value_4, 2, 3) > row(2000, 2, 3); +DEBUG: generating subplan 43_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (((((((user_id)::numeric OPERATOR(pg_catalog./) 1.0))::integer)::boolean)::text)::boolean AND CASE WHEN (value_1 OPERATOR(pg_catalog.>) 4000) THEN ((value_2 OPERATOR(pg_catalog./) 100) OPERATOR(pg_catalog.>) 1) ELSE false END AND COALESCE(((user_id OPERATOR(pg_catalog./) 50000))::boolean, false) AND NULLIF((((value_3 OPERATOR(pg_catalog./) '50000'::double precision))::integer)::boolean, false) AND isfinite("time") AND (value_4 IS DISTINCT FROM 50040) AND (ROW(value_4, 2, 3) OPERATOR(pg_catalog.>) ROW(2000, 2, 3))) +DEBUG: generating subplan 43_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 43 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('43_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('43_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((((((users_table.user_id)::numeric OPERATOR(pg_catalog./) 1.0))::integer)::boolean)::text)::boolean AND CASE WHEN (users_table.value_1 OPERATOR(pg_catalog.>) 4000) THEN ((users_table.value_2 OPERATOR(pg_catalog./) 100) OPERATOR(pg_catalog.>) 1) ELSE false END AND COALESCE(((users_table.user_id OPERATOR(pg_catalog./) 50000))::boolean, false) AND NULLIF((((users_table.value_3 OPERATOR(pg_catalog./) (50000)::double precision))::integer)::boolean, false) AND isfinite(users_table."time") AND (users_table.value_4 IS DISTINCT FROM 50040) AND (ROW(users_table.value_4, 2, 3) OPERATOR(pg_catalog.>) ROW(2000, 2, 3))) + count +------- + 0 +(1 row) + +-- subqueries are not pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > + (SELECT avg(user_id) + FROM events_table); +DEBUG: generating subplan 46_1 for subquery SELECT avg(user_id) AS avg FROM public.events_table +DEBUG: generating subplan 46_2 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 46_3 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 46 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('46_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('46_3'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1)::numeric OPERATOR(pg_catalog.>) (SELECT intermediate_result.avg FROM read_intermediate_result('46_1'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric))) + count +------- + 9990 +(1 row) + +-- even subqueries with constant values are not pushdowned +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > + (SELECT 5); +DEBUG: generating subplan 50_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 50_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 50 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('50_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('50_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (users_table.value_1 OPERATOR(pg_catalog.>) (SELECT 5)) + count +------- + 0 +(1 row) + +-- filters involving multiple tables aren't pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 * u1.user_id > 25; +DEBUG: generating subplan 53_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 53_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 53 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('53_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('53_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.*) u1.user_id) OPERATOR(pg_catalog.>) 25) + count +------- + 162 +(1 row) + +-- filter on other tables can only be pushdown +-- as long as they are equality filters on the +-- joining column +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u1.value_1 = 3; +DEBUG: generating subplan 56_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_1 OPERATOR(pg_catalog.=) 3) +DEBUG: generating subplan 56_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 56 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('56_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('56_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (u1.value_1 OPERATOR(pg_catalog.=) 3) + count +------- + 17576 +(1 row) + +-- but not when the filter is gt, lt or any other thing +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u1.value_1 > 3; +DEBUG: generating subplan 59_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 59_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 59 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('59_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('59_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (u1.value_1 OPERATOR(pg_catalog.>) 3) + count +------- + 9990 +(1 row) + +-- when the filter is on another column than the +-- join column, that's obviously not pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u1.value_2 = 3; +DEBUG: generating subplan 62_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 62_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 62 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('62_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('62_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (u1.value_2 OPERATOR(pg_catalog.=) 3) + count +------- + 5618 +(1 row) + +-- or filters on the same table is pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > 4 OR u2.value_4 = 4; +DEBUG: generating subplan 65_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 4) OR (value_4 OPERATOR(pg_catalog.=) 4)) +DEBUG: generating subplan 65_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 65 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('65_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('65_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 4) OR (users_table.value_4 OPERATOR(pg_catalog.=) 4)) + count +------- + 729 +(1 row) + +-- and filters on the same table is pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > 2 and u2.value_4 IS NULL; +DEBUG: generating subplan 68_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 2) AND (value_4 IS NULL)) +DEBUG: generating subplan 68_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 68 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('68_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('68_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) AND (users_table.value_4 IS NULL)) + count +------- + 27566 +(1 row) + +-- filters on different tables are pushdown +-- only the ones that are not ANDed +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u2.value_4 IS NULL) AND (u2.user_id > 4 OR u1.user_id > 3); +DEBUG: generating subplan 71_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 2) OR (value_4 IS NULL)) +DEBUG: generating subplan 71_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 71 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('71_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('71_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (users_table.value_4 IS NULL)) AND ((users_table.user_id OPERATOR(pg_catalog.>) 4) OR (u1.user_id OPERATOR(pg_catalog.>) 3))) + count +------- + 27405 +(1 row) + +-- see the comment above +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u2.value_4 IS NULL) OR (u2.user_id > 4 AND u1.user_id > 3); +DEBUG: generating subplan 74_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 2) OR (value_4 IS NULL) OR (user_id OPERATOR(pg_catalog.>) 4)) +DEBUG: generating subplan 74_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 74 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('74_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('74_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (users_table.value_4 IS NULL) OR ((users_table.user_id OPERATOR(pg_catalog.>) 4) AND (u1.user_id OPERATOR(pg_catalog.>) 3))) + count +------- + 38501 +(1 row) + +-- see the comment above +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id > 4 AND u1.user_id > 3); +DEBUG: generating subplan 77_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (user_id OPERATOR(pg_catalog.>) 4) +DEBUG: generating subplan 77_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 77 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('77_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('77_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (u1.value_4 IS NULL)) AND ((users_table.user_id OPERATOR(pg_catalog.>) 4) AND (u1.user_id OPERATOR(pg_catalog.>) 3))) + count +------- + 7883 +(1 row) + +-- see the comment above +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id > 4 OR u1.user_id > 3); +DEBUG: generating subplan 80_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 80_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 80 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('80_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('80_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (u1.value_4 IS NULL)) AND ((users_table.user_id OPERATOR(pg_catalog.>) 4) OR (u1.user_id OPERATOR(pg_catalog.>) 3))) + count +------- + 27405 +(1 row) + +-- see the comment above +-- but volatile functions are not pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id = 10000 * random() OR u1.user_id > 3); +DEBUG: generating subplan 83_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 83_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 83 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('83_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('83_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (u1.value_4 IS NULL)) AND (((users_table.user_id)::double precision OPERATOR(pg_catalog.=) ((10000)::double precision OPERATOR(pg_catalog.*) random())) OR (u1.user_id OPERATOR(pg_catalog.>) 3))) + count +------- + 22183 +(1 row) + +-- TODO: constant results should be pushed down, but not supported yet +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 AND false); +DEBUG: generating subplan 86_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 86_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 86 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('86_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('86_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) AND false) + count +------- + 0 +(1 row) + +-- TODO: what should the behaviour be? +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN LATERAL + (SELECT value_1, + random() + FROM users_table + WHERE u2.value_2 = 15) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 + AND FALSE); +DEBUG: generating subplan 89_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: Plan 89 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('89_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN LATERAL (SELECT users_table_1.value_1, random() AS random FROM public.users_table users_table_1 WHERE (users_table.value_2 OPERATOR(pg_catalog.=) 15)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) AND false) +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 0bc772b5d..c6e5d3956 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -54,7 +54,7 @@ test: multi_partitioning_utils multi_partitioning replicated_partitioned_table test: subquery_basics subquery_local_tables subquery_executors subquery_and_cte set_operations set_operation_and_local_tables test: subqueries_deep subquery_view subquery_partitioning subquery_complex_target_list subqueries_not_supported subquery_in_where test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins non_colocated_join_order -test: subquery_prepared_statements +test: subquery_prepared_statements recursive_relation_planning_restriction_pushdown # ---------- # Miscellaneous tests to check our query planning behavior diff --git a/src/test/regress/sql/recursive_relation_planning_restriction_pushdown.sql b/src/test/regress/sql/recursive_relation_planning_restriction_pushdown.sql new file mode 100644 index 000000000..5b0edba2d --- /dev/null +++ b/src/test/regress/sql/recursive_relation_planning_restriction_pushdown.sql @@ -0,0 +1,349 @@ +---------------------------------------------------- +-- recursive_relation_planning_restirction_pushdown +-- In this test file, we mosly test whether Citus +-- can successfully pushdown filters to the subquery +-- that is +---------------------------------------------------- + +-- all the queries in this file have the +-- same tables/subqueries combination as below +-- because this test aims to hold the query planning +-- steady, but mostly ensure that filters are handled +-- properly. Note that u2 is the relation that is +-- recursively planned + +-- Setting the debug level so that filters can be observed +SET client_min_messages TO DEBUG1; + +-- no filters on u2 +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1); + + +-- scalar array expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > ANY(ARRAY[2, 1, 6]); + + +-- array operators on the table can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE ARRAY[u2.value_1, u2.value_2] @> (ARRAY[2, 3]); + +-- array operators on different tables cannot be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE ARRAY[u2.value_1, u1.user_id] @> (ARRAY[2, 3]); + + +-- coerced expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1/2.0 > 2)::int::bool::text::bool; + + +-- case expression on a single table can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (CASE WHEN u2.value_1 > 3 THEN u2.value_1 > 2 ELSE false END); + + +-- case expression multiple tables cannot be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (CASE WHEN u1.value_1 > 4000 THEN u2.value_1 / 100 > 1 ELSE false END); + + +-- coalesce expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE COALESCE((u2.user_id/5.0)::int::bool, false); + + +-- nullif expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE NULLIF((u2.value_2/5.0)::int::bool, false); + + +-- null test can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_3 IS NOT NULL; + + +-- functions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE isfinite(u2.time); + +-- functions with multiple tables cannot be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE int4smaller(u2.value_1, u1.value_1) = 55; + +-- functions with multiple columns from the same tables can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE int4smaller(u2.value_1, u2.value_2) = u2.value_1; + +-- row expressions can be pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE row(u2.value_1, 2, 3) > row(u2.value_2, 2, 3); + +-- multiple expression from the same table can be pushed down together +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) + WHERE + (u2.user_id/1.0)::int::bool::text::bool AND + CASE WHEN u2.value_1 > 4000 THEN u2.value_2 / 100 > 1 ELSE false END AND + COALESCE((u2.user_id/50000)::bool, false) AND + NULLIF((u2.value_3/50000)::int::bool, false) AND + isfinite(u2.time) AND + u2.value_4 IS DISTINCT FROM 50040 AND + row(u2.value_4, 2, 3) > row(2000, 2, 3); + + +-- subqueries are not pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > + (SELECT avg(user_id) + FROM events_table); + +-- even subqueries with constant values are not pushdowned +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > + (SELECT 5); + +-- filters involving multiple tables aren't pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 * u1.user_id > 25; + + +-- filter on other tables can only be pushdown +-- as long as they are equality filters on the +-- joining column +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u1.value_1 = 3; + +-- but not when the filter is gt, lt or any other thing +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u1.value_1 > 3; + +-- when the filter is on another column than the +-- join column, that's obviously not pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u1.value_2 = 3; + + +-- or filters on the same table is pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > 4 OR u2.value_4 = 4; + +-- and filters on the same table is pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > 2 and u2.value_4 IS NULL; + + +-- filters on different tables are pushdown +-- only the ones that are not ANDed +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u2.value_4 IS NULL) AND (u2.user_id > 4 OR u1.user_id > 3); + +-- see the comment above +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u2.value_4 IS NULL) OR (u2.user_id > 4 AND u1.user_id > 3); + +-- see the comment above +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id > 4 AND u1.user_id > 3); + +-- see the comment above +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id > 4 OR u1.user_id > 3); + +-- see the comment above +-- but volatile functions are not pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id = 10000 * random() OR u1.user_id > 3); + +-- TODO: constant results should be pushed down, but not supported yet +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 AND false); + +-- TODO: what should the behaviour be? +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN LATERAL + (SELECT value_1, + random() + FROM users_table + WHERE u2.value_2 = 15) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 + AND FALSE); + + + +