Add tests for filter pushdown

recursively_plan_tables
Onder Kalaci 2018-11-26 12:43:11 +03:00
parent 3ac4c1c3a2
commit 6b2a412c12
3 changed files with 886 additions and 1 deletions

View File

@ -0,0 +1,536 @@
----------------------------------------------------
-- recursive_relation_planning_restirction_pushdown
-- In this test file, we mosly test whether Citus
-- can successfully pushdown filters to the subquery
-- that is
----------------------------------------------------
-- all the queries in this file have the
-- same tables/subqueries combination as below
-- because this test aims to hold the query planning
-- steady, but mostly ensure that filters are handled
-- properly. Note that u2 is the relation that is
-- recursively planned
-- Setting the debug level so that filters can be observed
SET client_min_messages TO DEBUG1;
-- no filters on u2
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1);
DEBUG: generating subplan 1_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 1_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 1 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('1_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('1_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1))
count
-------
38501
(1 row)
-- scalar array expressions can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 > ANY(ARRAY[2, 1, 6]);
DEBUG: generating subplan 4_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_1 OPERATOR(pg_catalog.>) ANY ('{2,1,6}'::integer[]))
DEBUG: generating subplan 4_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 4 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('4_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('4_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (users_table.value_1 OPERATOR(pg_catalog.>) ANY (ARRAY[2, 1, 6]))
count
-------
33398
(1 row)
-- array operators on the table can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE ARRAY[u2.value_1, u2.value_2] @> (ARRAY[2, 3]);
DEBUG: generating subplan 7_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (ARRAY[value_1, value_2] OPERATOR(pg_catalog.@>) '{2,3}'::integer[])
DEBUG: generating subplan 7_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 7 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('7_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('7_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (ARRAY[users_table.value_1, users_table.value_2] OPERATOR(pg_catalog.@>) ARRAY[2, 3])
count
-------
4704
(1 row)
-- array operators on different tables cannot be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE ARRAY[u2.value_1, u1.user_id] @> (ARRAY[2, 3]);
DEBUG: generating subplan 10_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 10_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 10 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('10_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('10_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (ARRAY[users_table.value_1, u1.user_id] OPERATOR(pg_catalog.@>) ARRAY[2, 3])
count
-------
3352
(1 row)
-- coerced expressions can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1/2.0 > 2)::int::bool::text::bool;
DEBUG: generating subplan 13_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (((((((value_1)::numeric OPERATOR(pg_catalog./) 2.0) OPERATOR(pg_catalog.>) '2'::numeric))::integer)::boolean)::text)::boolean
DEBUG: generating subplan 13_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 13 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('13_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('13_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((((((users_table.value_1)::numeric OPERATOR(pg_catalog./) 2.0) OPERATOR(pg_catalog.>) (2)::numeric))::integer)::boolean)::text)::boolean
count
-------
729
(1 row)
-- case expression on a single table can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (CASE WHEN u2.value_1 > 3 THEN u2.value_1 > 2 ELSE false END);
DEBUG: generating subplan 16_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE CASE WHEN (value_1 OPERATOR(pg_catalog.>) 3) THEN (value_1 OPERATOR(pg_catalog.>) 2) ELSE false END
DEBUG: generating subplan 16_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 16 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('16_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('16_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE CASE WHEN (users_table.value_1 OPERATOR(pg_catalog.>) 3) THEN (users_table.value_1 OPERATOR(pg_catalog.>) 2) ELSE false END
count
-------
9990
(1 row)
-- case expression multiple tables cannot be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (CASE WHEN u1.value_1 > 4000 THEN u2.value_1 / 100 > 1 ELSE false END);
DEBUG: generating subplan 19_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 19_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 19 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('19_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('19_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE CASE WHEN (u1.value_1 OPERATOR(pg_catalog.>) 4000) THEN ((users_table.value_1 OPERATOR(pg_catalog./) 100) OPERATOR(pg_catalog.>) 1) ELSE false END
count
-------
0
(1 row)
-- coalesce expressions can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE COALESCE((u2.user_id/5.0)::int::bool, false);
DEBUG: generating subplan 22_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE COALESCE(((((user_id)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false)
DEBUG: generating subplan 22_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('22_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE COALESCE(((((users_table.user_id)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false)
count
-------
28198
(1 row)
-- nullif expressions can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE NULLIF((u2.value_2/5.0)::int::bool, false);
DEBUG: generating subplan 25_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE NULLIF(((((value_2)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false)
DEBUG: generating subplan 25_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 25 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('25_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('25_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE NULLIF(((((users_table.value_2)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false)
count
-------
18895
(1 row)
-- null test can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_3 IS NOT NULL;
DEBUG: generating subplan 28_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_3 IS NOT NULL)
DEBUG: generating subplan 28_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 28 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('28_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('28_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (users_table.value_3 IS NOT NULL)
count
-------
38501
(1 row)
-- functions can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE isfinite(u2.time);
DEBUG: generating subplan 31_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE isfinite("time")
DEBUG: generating subplan 31_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 31 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('31_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('31_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE isfinite(users_table."time")
count
-------
38501
(1 row)
-- functions with multiple tables cannot be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE int4smaller(u2.value_1, u1.value_1) = 55;
DEBUG: generating subplan 34_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 34_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 34 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('34_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('34_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (int4smaller(users_table.value_1, u1.value_1) OPERATOR(pg_catalog.=) 55)
count
-------
0
(1 row)
-- functions with multiple columns from the same tables can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE int4smaller(u2.value_1, u2.value_2) = u2.value_1;
DEBUG: generating subplan 37_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_1 OPERATOR(pg_catalog.=) int4smaller(value_1, value_2))
DEBUG: generating subplan 37_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 37 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('37_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('37_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (int4smaller(users_table.value_1, users_table.value_2) OPERATOR(pg_catalog.=) users_table.value_1)
count
-------
20686
(1 row)
-- row expressions can be pushdown
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE row(u2.value_1, 2, 3) > row(u2.value_2, 2, 3);
DEBUG: generating subplan 40_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (ROW(value_1, 2, 3) OPERATOR(pg_catalog.>) ROW(value_2, 2, 3))
DEBUG: generating subplan 40_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 40 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('40_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('40_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (ROW(users_table.value_1, 2, 3) OPERATOR(pg_catalog.>) ROW(users_table.value_2, 2, 3))
count
-------
17815
(1 row)
-- multiple expression from the same table can be pushed down together
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE
(u2.user_id/1.0)::int::bool::text::bool AND
CASE WHEN u2.value_1 > 4000 THEN u2.value_2 / 100 > 1 ELSE false END AND
COALESCE((u2.user_id/50000)::bool, false) AND
NULLIF((u2.value_3/50000)::int::bool, false) AND
isfinite(u2.time) AND
u2.value_4 IS DISTINCT FROM 50040 AND
row(u2.value_4, 2, 3) > row(2000, 2, 3);
DEBUG: generating subplan 43_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (((((((user_id)::numeric OPERATOR(pg_catalog./) 1.0))::integer)::boolean)::text)::boolean AND CASE WHEN (value_1 OPERATOR(pg_catalog.>) 4000) THEN ((value_2 OPERATOR(pg_catalog./) 100) OPERATOR(pg_catalog.>) 1) ELSE false END AND COALESCE(((user_id OPERATOR(pg_catalog./) 50000))::boolean, false) AND NULLIF((((value_3 OPERATOR(pg_catalog./) '50000'::double precision))::integer)::boolean, false) AND isfinite("time") AND (value_4 IS DISTINCT FROM 50040) AND (ROW(value_4, 2, 3) OPERATOR(pg_catalog.>) ROW(2000, 2, 3)))
DEBUG: generating subplan 43_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 43 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('43_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('43_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((((((users_table.user_id)::numeric OPERATOR(pg_catalog./) 1.0))::integer)::boolean)::text)::boolean AND CASE WHEN (users_table.value_1 OPERATOR(pg_catalog.>) 4000) THEN ((users_table.value_2 OPERATOR(pg_catalog./) 100) OPERATOR(pg_catalog.>) 1) ELSE false END AND COALESCE(((users_table.user_id OPERATOR(pg_catalog./) 50000))::boolean, false) AND NULLIF((((users_table.value_3 OPERATOR(pg_catalog./) (50000)::double precision))::integer)::boolean, false) AND isfinite(users_table."time") AND (users_table.value_4 IS DISTINCT FROM 50040) AND (ROW(users_table.value_4, 2, 3) OPERATOR(pg_catalog.>) ROW(2000, 2, 3)))
count
-------
0
(1 row)
-- subqueries are not pushdown
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 >
(SELECT avg(user_id)
FROM events_table);
DEBUG: generating subplan 46_1 for subquery SELECT avg(user_id) AS avg FROM public.events_table
DEBUG: generating subplan 46_2 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 46_3 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 46 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('46_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('46_3'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1)::numeric OPERATOR(pg_catalog.>) (SELECT intermediate_result.avg FROM read_intermediate_result('46_1'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric)))
count
-------
9990
(1 row)
-- even subqueries with constant values are not pushdowned
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 >
(SELECT 5);
DEBUG: generating subplan 50_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 50_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 50 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('50_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('50_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (users_table.value_1 OPERATOR(pg_catalog.>) (SELECT 5))
count
-------
0
(1 row)
-- filters involving multiple tables aren't pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 * u1.user_id > 25;
DEBUG: generating subplan 53_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 53_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 53 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('53_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('53_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.*) u1.user_id) OPERATOR(pg_catalog.>) 25)
count
-------
162
(1 row)
-- filter on other tables can only be pushdown
-- as long as they are equality filters on the
-- joining column
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u1.value_1 = 3;
DEBUG: generating subplan 56_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_1 OPERATOR(pg_catalog.=) 3)
DEBUG: generating subplan 56_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 56 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('56_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('56_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (u1.value_1 OPERATOR(pg_catalog.=) 3)
count
-------
17576
(1 row)
-- but not when the filter is gt, lt or any other thing
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u1.value_1 > 3;
DEBUG: generating subplan 59_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 59_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 59 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('59_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('59_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (u1.value_1 OPERATOR(pg_catalog.>) 3)
count
-------
9990
(1 row)
-- when the filter is on another column than the
-- join column, that's obviously not pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u1.value_2 = 3;
DEBUG: generating subplan 62_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 62_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 62 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('62_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('62_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (u1.value_2 OPERATOR(pg_catalog.=) 3)
count
-------
5618
(1 row)
-- or filters on the same table is pushdown
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 > 4 OR u2.value_4 = 4;
DEBUG: generating subplan 65_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 4) OR (value_4 OPERATOR(pg_catalog.=) 4))
DEBUG: generating subplan 65_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 65 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('65_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('65_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 4) OR (users_table.value_4 OPERATOR(pg_catalog.=) 4))
count
-------
729
(1 row)
-- and filters on the same table is pushdown
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 > 2 and u2.value_4 IS NULL;
DEBUG: generating subplan 68_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 2) AND (value_4 IS NULL))
DEBUG: generating subplan 68_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 68 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('68_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('68_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) AND (users_table.value_4 IS NULL))
count
-------
27566
(1 row)
-- filters on different tables are pushdown
-- only the ones that are not ANDed
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 OR u2.value_4 IS NULL) AND (u2.user_id > 4 OR u1.user_id > 3);
DEBUG: generating subplan 71_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 2) OR (value_4 IS NULL))
DEBUG: generating subplan 71_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 71 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('71_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('71_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (users_table.value_4 IS NULL)) AND ((users_table.user_id OPERATOR(pg_catalog.>) 4) OR (u1.user_id OPERATOR(pg_catalog.>) 3)))
count
-------
27405
(1 row)
-- see the comment above
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 OR u2.value_4 IS NULL) OR (u2.user_id > 4 AND u1.user_id > 3);
DEBUG: generating subplan 74_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 2) OR (value_4 IS NULL) OR (user_id OPERATOR(pg_catalog.>) 4))
DEBUG: generating subplan 74_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 74 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('74_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('74_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (users_table.value_4 IS NULL) OR ((users_table.user_id OPERATOR(pg_catalog.>) 4) AND (u1.user_id OPERATOR(pg_catalog.>) 3)))
count
-------
38501
(1 row)
-- see the comment above
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id > 4 AND u1.user_id > 3);
DEBUG: generating subplan 77_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (user_id OPERATOR(pg_catalog.>) 4)
DEBUG: generating subplan 77_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 77 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('77_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('77_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (u1.value_4 IS NULL)) AND ((users_table.user_id OPERATOR(pg_catalog.>) 4) AND (u1.user_id OPERATOR(pg_catalog.>) 3)))
count
-------
7883
(1 row)
-- see the comment above
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id > 4 OR u1.user_id > 3);
DEBUG: generating subplan 80_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 80_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 80 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('80_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('80_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (u1.value_4 IS NULL)) AND ((users_table.user_id OPERATOR(pg_catalog.>) 4) OR (u1.user_id OPERATOR(pg_catalog.>) 3)))
count
-------
27405
(1 row)
-- see the comment above
-- but volatile functions are not pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id = 10000 * random() OR u1.user_id > 3);
DEBUG: generating subplan 83_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 83_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 83 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('83_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('83_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (u1.value_4 IS NULL)) AND (((users_table.user_id)::double precision OPERATOR(pg_catalog.=) ((10000)::double precision OPERATOR(pg_catalog.*) random())) OR (u1.user_id OPERATOR(pg_catalog.>) 3)))
count
-------
22183
(1 row)
-- TODO: constant results should be pushed down, but not supported yet
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 AND false);
DEBUG: generating subplan 86_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: generating subplan 86_2 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 86 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('86_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('86_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) AND false)
count
-------
0
(1 row)
-- TODO: what should the behaviour be?
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN LATERAL
(SELECT value_1,
random()
FROM users_table
WHERE u2.value_2 = 15) AS u3 USING (value_1)
WHERE (u2.value_1 > 2
AND FALSE);
DEBUG: generating subplan 89_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true
DEBUG: Plan 89 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('89_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN LATERAL (SELECT users_table_1.value_1, random() AS random FROM public.users_table users_table_1 WHERE (users_table.value_2 OPERATOR(pg_catalog.=) 15)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) AND false)
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator

View File

@ -54,7 +54,7 @@ test: multi_partitioning_utils multi_partitioning replicated_partitioned_table
test: subquery_basics subquery_local_tables subquery_executors subquery_and_cte set_operations set_operation_and_local_tables test: subquery_basics subquery_local_tables subquery_executors subquery_and_cte set_operations set_operation_and_local_tables
test: subqueries_deep subquery_view subquery_partitioning subquery_complex_target_list subqueries_not_supported subquery_in_where test: subqueries_deep subquery_view subquery_partitioning subquery_complex_target_list subqueries_not_supported subquery_in_where
test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins non_colocated_join_order test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins non_colocated_join_order
test: subquery_prepared_statements test: subquery_prepared_statements recursive_relation_planning_restriction_pushdown
# ---------- # ----------
# Miscellaneous tests to check our query planning behavior # Miscellaneous tests to check our query planning behavior

View File

@ -0,0 +1,349 @@
----------------------------------------------------
-- recursive_relation_planning_restirction_pushdown
-- In this test file, we mosly test whether Citus
-- can successfully pushdown filters to the subquery
-- that is
----------------------------------------------------
-- all the queries in this file have the
-- same tables/subqueries combination as below
-- because this test aims to hold the query planning
-- steady, but mostly ensure that filters are handled
-- properly. Note that u2 is the relation that is
-- recursively planned
-- Setting the debug level so that filters can be observed
SET client_min_messages TO DEBUG1;
-- no filters on u2
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1);
-- scalar array expressions can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 > ANY(ARRAY[2, 1, 6]);
-- array operators on the table can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE ARRAY[u2.value_1, u2.value_2] @> (ARRAY[2, 3]);
-- array operators on different tables cannot be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE ARRAY[u2.value_1, u1.user_id] @> (ARRAY[2, 3]);
-- coerced expressions can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1/2.0 > 2)::int::bool::text::bool;
-- case expression on a single table can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (CASE WHEN u2.value_1 > 3 THEN u2.value_1 > 2 ELSE false END);
-- case expression multiple tables cannot be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (CASE WHEN u1.value_1 > 4000 THEN u2.value_1 / 100 > 1 ELSE false END);
-- coalesce expressions can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE COALESCE((u2.user_id/5.0)::int::bool, false);
-- nullif expressions can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE NULLIF((u2.value_2/5.0)::int::bool, false);
-- null test can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_3 IS NOT NULL;
-- functions can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE isfinite(u2.time);
-- functions with multiple tables cannot be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE int4smaller(u2.value_1, u1.value_1) = 55;
-- functions with multiple columns from the same tables can be pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE int4smaller(u2.value_1, u2.value_2) = u2.value_1;
-- row expressions can be pushdown
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE row(u2.value_1, 2, 3) > row(u2.value_2, 2, 3);
-- multiple expression from the same table can be pushed down together
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE
(u2.user_id/1.0)::int::bool::text::bool AND
CASE WHEN u2.value_1 > 4000 THEN u2.value_2 / 100 > 1 ELSE false END AND
COALESCE((u2.user_id/50000)::bool, false) AND
NULLIF((u2.value_3/50000)::int::bool, false) AND
isfinite(u2.time) AND
u2.value_4 IS DISTINCT FROM 50040 AND
row(u2.value_4, 2, 3) > row(2000, 2, 3);
-- subqueries are not pushdown
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 >
(SELECT avg(user_id)
FROM events_table);
-- even subqueries with constant values are not pushdowned
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 >
(SELECT 5);
-- filters involving multiple tables aren't pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 * u1.user_id > 25;
-- filter on other tables can only be pushdown
-- as long as they are equality filters on the
-- joining column
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u1.value_1 = 3;
-- but not when the filter is gt, lt or any other thing
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u1.value_1 > 3;
-- when the filter is on another column than the
-- join column, that's obviously not pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u1.value_2 = 3;
-- or filters on the same table is pushdown
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 > 4 OR u2.value_4 = 4;
-- and filters on the same table is pushdown
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE u2.value_1 > 2 and u2.value_4 IS NULL;
-- filters on different tables are pushdown
-- only the ones that are not ANDed
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 OR u2.value_4 IS NULL) AND (u2.user_id > 4 OR u1.user_id > 3);
-- see the comment above
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 OR u2.value_4 IS NULL) OR (u2.user_id > 4 AND u1.user_id > 3);
-- see the comment above
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id > 4 AND u1.user_id > 3);
-- see the comment above
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id > 4 OR u1.user_id > 3);
-- see the comment above
-- but volatile functions are not pushed down
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id = 10000 * random() OR u1.user_id > 3);
-- TODO: constant results should be pushed down, but not supported yet
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN
(SELECT value_1,
random()
FROM users_table) AS u3 USING (value_1)
WHERE (u2.value_1 > 2 AND false);
-- TODO: what should the behaviour be?
SELECT count(*)
FROM users_table u1
JOIN users_table u2 USING(value_1)
JOIN LATERAL
(SELECT value_1,
random()
FROM users_table
WHERE u2.value_2 = 15) AS u3 USING (value_1)
WHERE (u2.value_1 > 2
AND FALSE);