mirror of https://github.com/citusdata/citus.git
Phase - II: recursively plan non-recurring subqueries too
parent
f339450a9d
commit
f52381387e
|
@ -878,12 +878,14 @@ RecursivelyPlanDistributedJoinNode(Node *node, Query *query,
|
|||
else if (distributedRte->rtekind == RTE_SUBQUERY)
|
||||
{
|
||||
/*
|
||||
* XXX: Similar to JoinExpr, we don't know how to recursively plan distributed
|
||||
* subqueries within join expressions yet.
|
||||
* We don't try logging the subquery here because RecursivelyPlanSubquery
|
||||
* will anyway do so if the query doesn't reference the outer query.
|
||||
*/
|
||||
ereport(DEBUG4, (errmsg("recursive planner cannot plan distributed "
|
||||
"subqueries within join expressions yet")));
|
||||
return;
|
||||
ereport(DEBUG1, (errmsg("recursively planning the distributed subquery "
|
||||
"since it is part of a distributed join node "
|
||||
"that is outer joined with a recurring rel")));
|
||||
|
||||
RecursivelyPlanSubquery(distributedRte->subquery, recursivePlanningContext);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -1357,22 +1357,17 @@ DEBUG: Router planner cannot handle multi-shard select queries
|
|||
{ "8" : "test18", "8" : "test28", "8" : "test38", "8" : "test48", "8" : "test58", "8" : "test68", "8" : "test78", "8" : "test8", "8" : "test88", "8" : "test98" }
|
||||
(1 row)
|
||||
|
||||
-- this test can only work if the CTE is recursively
|
||||
-- planned
|
||||
WITH b AS (SELECT * FROM test_table)
|
||||
SELECT count(*) FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key);
|
||||
DEBUG: CTE b is going to be inlined via distributed planning
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((ref.x OPERATOR(pg_catalog.=) b.key)))
|
||||
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key)))
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key)))
|
||||
DEBUG: Creating router plan
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
|
@ -1397,8 +1392,6 @@ DEBUG: Router planner cannot handle multi-shard select queries
|
|||
480
|
||||
(1 row)
|
||||
|
||||
-- cte a has to be recursively planned because of OFFSET 0
|
||||
-- after that, cte b also requires recursive planning
|
||||
WITH a AS (SELECT * FROM test_table OFFSET 0),
|
||||
b AS (SELECT * FROM test_table)
|
||||
SELECT min(a.key) FROM a LEFT JOIN b ON (a.value = b.value);
|
||||
|
@ -1407,13 +1400,10 @@ DEBUG: CTE b is going to be inlined via distributed planning
|
|||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((a.value OPERATOR(pg_catalog.=) b.value)))
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_2 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table
|
||||
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((a.value OPERATOR(pg_catalog.=) b.value)))
|
||||
DEBUG: Creating router plan
|
||||
min
|
||||
|
@ -1450,14 +1440,11 @@ DEBUG: CTE cte_2 is going to be inlined via distributed planning
|
|||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3)
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT test_table.value FROM cte_inline.test_table WHERE (test_table.key OPERATOR(pg_catalog.>) 1)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3
|
||||
DEBUG: recursively planning left side of the full join since the other side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1)
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3)
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1)
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3
|
||||
DEBUG: Creating router plan
|
||||
value
|
||||
---------------------------------------------------------------------
|
||||
|
|
|
@ -1357,22 +1357,17 @@ DEBUG: Router planner cannot handle multi-shard select queries
|
|||
{ "8" : "test18", "8" : "test28", "8" : "test38", "8" : "test48", "8" : "test58", "8" : "test68", "8" : "test78", "8" : "test8", "8" : "test88", "8" : "test98" }
|
||||
(1 row)
|
||||
|
||||
-- this test can only work if the CTE is recursively
|
||||
-- planned
|
||||
WITH b AS (SELECT * FROM test_table)
|
||||
SELECT count(*) FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key);
|
||||
DEBUG: CTE b is going to be inlined via distributed planning
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((ref.x OPERATOR(pg_catalog.=) b.key)))
|
||||
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key)))
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key)))
|
||||
DEBUG: Creating router plan
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
|
@ -1397,8 +1392,6 @@ DEBUG: Router planner cannot handle multi-shard select queries
|
|||
480
|
||||
(1 row)
|
||||
|
||||
-- cte a has to be recursively planned because of OFFSET 0
|
||||
-- after that, cte b also requires recursive planning
|
||||
WITH a AS (SELECT * FROM test_table OFFSET 0),
|
||||
b AS (SELECT * FROM test_table)
|
||||
SELECT min(a.key) FROM a LEFT JOIN b ON (a.value = b.value);
|
||||
|
@ -1407,13 +1400,10 @@ DEBUG: CTE b is going to be inlined via distributed planning
|
|||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((a.value OPERATOR(pg_catalog.=) b.value)))
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_2 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table
|
||||
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((a.value OPERATOR(pg_catalog.=) b.value)))
|
||||
DEBUG: Creating router plan
|
||||
min
|
||||
|
@ -1450,14 +1440,11 @@ DEBUG: CTE cte_2 is going to be inlined via distributed planning
|
|||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3)
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT test_table.value FROM cte_inline.test_table WHERE (test_table.key OPERATOR(pg_catalog.>) 1)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3
|
||||
DEBUG: recursively planning left side of the full join since the other side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1)
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3)
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1)
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3
|
||||
DEBUG: Creating router plan
|
||||
value
|
||||
---------------------------------------------------------------------
|
||||
|
|
|
@ -107,7 +107,7 @@ FROM (
|
|||
ERROR: the query contains a join that requires repartitioning
|
||||
HINT: Set citus.enable_repartition_joins to on to enable repartitioning
|
||||
-- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2)
|
||||
-- still, recursive planning will kick in to plan some part of the query
|
||||
-- but, we can plan the query thanks to recursive planning
|
||||
SET client_min_messages TO DEBUG1;
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
|
@ -145,14 +145,22 @@ FROM (
|
|||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = (t2.user_id)/2)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
) t GROUP BY user_id, hasdone_event
|
||||
RETURNING user_id, value_1_agg, value_2_agg;
|
||||
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT u.user_id, 'step=>1'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[100, 101, 102])))
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT u.user_id, 'step=>2'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[103, 104, 105])))
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone)
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT u.user_id, 'step=>1'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[100, 101, 102]))) UNION SELECT u.user_id, 'step=>2'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[103, 104, 105])))
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT user_id, 'Has done event'::text AS hasdone_event FROM public.events_table e WHERE ((user_id OPERATOR(pg_catalog.>=) 10) AND (user_id OPERATOR(pg_catalog.<=) 25) AND (event_type OPERATOR(pg_catalog.=) ANY (ARRAY[106, 107, 108])))
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, int4(sum(array_length(events_table, 1))) AS value_1_agg, length(hasdone_event) AS value_2_agg FROM (SELECT t1.user_id, array_agg(t1.event ORDER BY t1."time") AS events_table, COALESCE(t2.hasdone_event, 'Has not done event'::text) AS hasdone_event FROM ((SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone)) t1 LEFT JOIN (SELECT intermediate_result.user_id, intermediate_result.hasdone_event FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, hasdone_event text)) t2 ON ((t1.user_id OPERATOR(pg_catalog.=) (t2.user_id OPERATOR(pg_catalog./) 2)))) GROUP BY t1.user_id, t2.hasdone_event) t GROUP BY user_id, hasdone_event
|
||||
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||
user_id | value_1_agg | value_2_agg
|
||||
---------------------------------------------------------------------
|
||||
(0 rows)
|
||||
|
||||
RESET client_min_messages;
|
||||
---------------------------------------------------------------------
|
||||
---------------------------------------------------------------------
|
||||
|
@ -229,7 +237,7 @@ ORDER BY
|
|||
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
|
||||
-- not pushable since the JOIN condition is not equi JOIN
|
||||
-- (subquery_1 JOIN subquery_2)
|
||||
-- still, recursive planning will kick in
|
||||
-- but, we can plan the query thanks to recursive planning
|
||||
SET client_min_messages TO DEBUG1;
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg)
|
||||
SELECT
|
||||
|
@ -295,14 +303,22 @@ WHERE
|
|||
GROUP BY
|
||||
count_pay, user_id
|
||||
ORDER BY
|
||||
count_pay;
|
||||
count_pay
|
||||
RETURNING user_id, value_1_agg, value_2_agg;
|
||||
DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, 'action=>1'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 10) AND (events_table.event_type OPERATOR(pg_catalog.<) 12))
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id, 'action=>2'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 12) AND (events_table.event_type OPERATOR(pg_catalog.<) 14))
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone)
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, 'action=>1'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 10) AND (events_table.event_type OPERATOR(pg_catalog.<) 12)) UNION SELECT users_table.user_id, 'action=>2'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 12) AND (events_table.event_type OPERATOR(pg_catalog.<) 14))
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT user_id, count(*) AS count_pay FROM public.users_table WHERE ((user_id OPERATOR(pg_catalog.>=) 10) AND (user_id OPERATOR(pg_catalog.<=) 70) AND (value_1 OPERATOR(pg_catalog.>) 15) AND (value_1 OPERATOR(pg_catalog.<) 17)) GROUP BY user_id HAVING (count(*) OPERATOR(pg_catalog.>) 1)
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, int4(avg(array_length(events_table, 1))) AS value_1_agg, int4(count_pay) AS value_2_agg FROM (SELECT subquery_1.user_id, array_agg(subquery_1.event ORDER BY subquery_1."time") AS events_table, COALESCE(subquery_2.count_pay, (0)::bigint) AS count_pay FROM ((SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone)) subquery_1 LEFT JOIN (SELECT intermediate_result.user_id, intermediate_result.count_pay FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, count_pay bigint)) subquery_2 ON ((subquery_1.user_id OPERATOR(pg_catalog.>) subquery_2.user_id))) GROUP BY subquery_1.user_id, subquery_2.count_pay) subquery_top WHERE (array_ndims(events_table) OPERATOR(pg_catalog.>) 0) GROUP BY count_pay, user_id ORDER BY count_pay
|
||||
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||
user_id | value_1_agg | value_2_agg
|
||||
---------------------------------------------------------------------
|
||||
(0 rows)
|
||||
|
||||
RESET client_min_messages;
|
||||
---------------------------------------------------------------------
|
||||
---------------------------------------------------------------------
|
||||
|
|
|
@ -679,8 +679,14 @@ SELECT * FROM
|
|||
WHERE
|
||||
user_id > 2 and value_2 = 1) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
user_id
|
||||
---------------------------------------------------------------------
|
||||
|
||||
3
|
||||
5
|
||||
4
|
||||
(4 rows)
|
||||
|
||||
-- we could even support the following where the subquery
|
||||
-- on the outer part of the left join contains a reference table
|
||||
SELECT max(events_all.cnt), events_all.usr_id
|
||||
|
@ -1126,7 +1132,7 @@ count(*) AS cnt, "generated_group_field"
|
|||
84 | 0
|
||||
(6 rows)
|
||||
|
||||
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||
-- RIGHT JOINs used with INNER JOINs should work even if the reference table exist in the
|
||||
-- right side of the RIGHT JOIN.
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
|
@ -1164,8 +1170,16 @@ count(*) AS cnt, "generated_group_field"
|
|||
ORDER BY
|
||||
cnt DESC, generated_group_field ASC
|
||||
LIMIT 10;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
cnt | generated_group_field
|
||||
---------------------------------------------------------------------
|
||||
1007 | 2
|
||||
952 | 5
|
||||
773 | 1
|
||||
696 | 3
|
||||
433 | 4
|
||||
190 | 0
|
||||
(6 rows)
|
||||
|
||||
-- right join where the inner part of the join includes a reference table
|
||||
-- joined with hash partitioned table using non-equi join
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||
|
@ -1316,8 +1330,27 @@ FROM
|
|||
ORDER BY
|
||||
user_id
|
||||
limit 50;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
user_id | lastseen
|
||||
---------------------------------------------------------------------
|
||||
1 |
|
||||
1 |
|
||||
2 | Thu Nov 23 17:26:14.563216 2017
|
||||
2 | Thu Nov 23 17:26:14.563216 2017
|
||||
2 | Thu Nov 23 17:26:14.563216 2017
|
||||
3 | Thu Nov 23 18:08:26.550729 2017
|
||||
3 | Thu Nov 23 18:08:26.550729 2017
|
||||
3 | Thu Nov 23 18:08:26.550729 2017
|
||||
4 |
|
||||
4 |
|
||||
4 |
|
||||
5 |
|
||||
5 |
|
||||
5 |
|
||||
5 |
|
||||
5 |
|
||||
6 |
|
||||
(17 rows)
|
||||
|
||||
--
|
||||
-- UNIONs and JOINs with reference tables, should error out
|
||||
--
|
||||
|
@ -1553,9 +1586,6 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c
|
|||
67
|
||||
(1 row)
|
||||
|
||||
-- we could not push this query not due to non colocated
|
||||
-- subqueries (i.e., they are recursively planned)
|
||||
-- but due to outer join restrictions
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
FROM
|
||||
|
@ -1593,9 +1623,20 @@ count(*) AS cnt, "generated_group_field"
|
|||
cnt DESC, generated_group_field ASC
|
||||
LIMIT 10;
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_2 AS generated_group_field FROM public.users_table users
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS cnt, generated_group_field FROM (SELECT "eventQuery".user_id, random() AS random, "eventQuery".generated_group_field FROM (SELECT multi_group_wrapper_1."time", multi_group_wrapper_1.event_user_id, multi_group_wrapper_1.user_id, left_group_by_1.generated_group_field, random() AS random FROM ((SELECT temp_data_queries."time", temp_data_queries.event_user_id, user_filters_1.user_id FROM ((SELECT events."time", events.user_id AS event_user_id FROM public.events_table events WHERE (events.user_id OPERATOR(pg_catalog.>) 2)) temp_data_queries JOIN (SELECT users.user_id FROM public.users_reference_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 2) AND (users.value_2 OPERATOR(pg_catalog.=) 5))) user_filters_1 ON ((temp_data_queries.event_user_id OPERATOR(pg_catalog.<) user_filters_1.user_id)))) multi_group_wrapper_1 RIGHT JOIN (SELECT intermediate_result.user_id, intermediate_result.generated_group_field FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, generated_group_field integer)) left_group_by_1 ON ((left_group_by_1.user_id OPERATOR(pg_catalog.>) multi_group_wrapper_1.event_user_id)))) "eventQuery") "pushedDownQuery" GROUP BY generated_group_field ORDER BY (count(*)) DESC, generated_group_field LIMIT 10
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
DEBUG: recursively planning left side of the right join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT temp_data_queries."time", temp_data_queries.event_user_id, user_filters_1.user_id FROM ((SELECT events."time", events.user_id AS event_user_id FROM public.events_table events WHERE (events.user_id OPERATOR(pg_catalog.>) 2)) temp_data_queries JOIN (SELECT users.user_id FROM public.users_reference_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 2) AND (users.value_2 OPERATOR(pg_catalog.=) 5))) user_filters_1 ON ((temp_data_queries.event_user_id OPERATOR(pg_catalog.<) user_filters_1.user_id)))
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS cnt, generated_group_field FROM (SELECT "eventQuery".user_id, random() AS random, "eventQuery".generated_group_field FROM (SELECT multi_group_wrapper_1."time", multi_group_wrapper_1.event_user_id, multi_group_wrapper_1.user_id, left_group_by_1.generated_group_field, random() AS random FROM ((SELECT intermediate_result."time", intermediate_result.event_user_id, intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event_user_id integer, user_id integer)) multi_group_wrapper_1 RIGHT JOIN (SELECT intermediate_result.user_id, intermediate_result.generated_group_field FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, generated_group_field integer)) left_group_by_1 ON ((left_group_by_1.user_id OPERATOR(pg_catalog.>) multi_group_wrapper_1.event_user_id)))) "eventQuery") "pushedDownQuery" GROUP BY generated_group_field ORDER BY (count(*)) DESC, generated_group_field LIMIT 10
|
||||
cnt | generated_group_field
|
||||
---------------------------------------------------------------------
|
||||
2042 | 1
|
||||
1675 | 2
|
||||
1470 | 4
|
||||
1259 | 3
|
||||
941 | 0
|
||||
686 | 5
|
||||
(6 rows)
|
||||
|
||||
RESET client_min_messages;
|
||||
-- two hash partitioned relations are not joined
|
||||
-- on partiton keys although reference table is fine
|
||||
|
@ -2132,12 +2173,14 @@ SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INN
|
|||
6
|
||||
(1 row)
|
||||
|
||||
-- one example where unsupported outer join is deep inside a subquery
|
||||
-- one example where supported outer join is deep inside a subquery
|
||||
SELECT *, random() FROM (
|
||||
SELECT *,random() FROM user_buy_test_table WHERE user_id > (
|
||||
SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
user_id | item_id | buy_count | random | random
|
||||
---------------------------------------------------------------------
|
||||
(0 rows)
|
||||
|
||||
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
|
|
|
@ -534,8 +534,13 @@ WHERE
|
|||
GROUP BY user_id
|
||||
HAVING count(*) > 1 AND sum(value_2) > 29
|
||||
ORDER BY 1;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
user_id
|
||||
---------------------------------------------------------------------
|
||||
2
|
||||
3
|
||||
4
|
||||
(3 rows)
|
||||
|
||||
-- NOT EXISTS query has non-equi join
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
|
|
|
@ -715,8 +715,6 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c
|
|||
(1 row)
|
||||
|
||||
-- recursive planning should kick in for outer joins as well
|
||||
-- but this time recursive planning might convert the query
|
||||
-- into a not supported join
|
||||
SELECT true AS valid FROM explain_json_2($$
|
||||
|
||||
SELECT
|
||||
|
@ -729,8 +727,15 @@ SELECT true AS valid FROM explain_json_2($$
|
|||
$$);
|
||||
DEBUG: function does not have co-located tables
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT value_2, random() AS random FROM public.users_table
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT users_table.value_2, random() AS random FROM public.users_table) u1 RIGHT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2))
|
||||
ERROR: cannot pushdown the subquery
|
||||
DEBUG: recursively planning left side of the right join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT value_2, random() AS random FROM public.users_table
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u1 RIGHT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2))
|
||||
valid
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- set operations may produce not very efficient plans
|
||||
-- although we could have picked a as our anchor subquery,
|
||||
-- we pick foo in this case and recursively plan a
|
||||
|
|
|
@ -70,19 +70,34 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM
|
|||
ERROR: cannot handle complex subqueries when the router executor is disabled
|
||||
SET citus.enable_router_execution TO true;
|
||||
-- OUTER JOINs where the outer part is recursively planned and not the other way
|
||||
-- around is not supported
|
||||
-- around are supported
|
||||
SELECT
|
||||
foo.value_2
|
||||
INTO result_table
|
||||
FROM
|
||||
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) LIMIT 5) as foo
|
||||
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) ORDER BY users_table.value_2 LIMIT 5) as foo
|
||||
LEFT JOIN
|
||||
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
|
||||
ON(foo.value_2 = bar.value_2);
|
||||
DEBUG: push down of limit count: 5
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) LIMIT 5
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar ON ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2)))
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) ORDER BY users_table.value_2 LIMIT 5
|
||||
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar ON ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2)))
|
||||
SELECT COUNT(*) = 60 FROM result_table WHERE value_2 = 0;
|
||||
?column?
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT COUNT(*) = 0 FROM result_table WHERE value_2 != 0;
|
||||
?column?
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
DROP TABLE result_table;
|
||||
-- We do not support GROUPING SETS in subqueries
|
||||
-- This also includes ROLLUP or CUBE clauses
|
||||
SELECT * FROM (SELECT user_id, value_1 FROM users_table GROUP BY GROUPING SETS ((user_id), (value_1))) s;
|
||||
|
|
|
@ -840,8 +840,18 @@ LEFT JOIN
|
|||
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed
|
||||
ON distinct_undistribured.user_id = exsist_in_distributed.user_id
|
||||
ORDER BY 2 DESC, 1 DESC;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
user_id | user_id
|
||||
---------------------------------------------------------------------
|
||||
7 |
|
||||
6 |
|
||||
4 |
|
||||
2 |
|
||||
8 | 8
|
||||
5 | 5
|
||||
3 | 3
|
||||
1 | 1
|
||||
(8 rows)
|
||||
|
||||
-- similar query as the above, but this time
|
||||
-- use NOT EXITS, which is pretty common struct
|
||||
WITH distinct_undistribured AS
|
||||
|
@ -855,9 +865,20 @@ LEFT JOIN
|
|||
WHERE NOT EXISTS
|
||||
(SELECT NULL
|
||||
FROM distinct_undistribured
|
||||
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id
|
||||
ORDER BY 1,2;
|
||||
user_id | user_id
|
||||
---------------------------------------------------------------------
|
||||
1 |
|
||||
2 |
|
||||
3 |
|
||||
4 |
|
||||
5 |
|
||||
6 |
|
||||
7 |
|
||||
8 |
|
||||
(8 rows)
|
||||
|
||||
-- same NOT EXISTS struct, but with CTE
|
||||
-- so should work
|
||||
WITH distinct_undistribured AS (
|
||||
|
@ -905,8 +926,11 @@ LEFT JOIN
|
|||
(SELECT NULL
|
||||
FROM distinct_undistribured d2
|
||||
WHERE d1.user_id = d2.user_id )) AS bar USING (user_id);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
8
|
||||
(1 row)
|
||||
|
||||
-- should work fine with materialized ctes
|
||||
WITH distinct_undistribured AS MATERIALIZED (
|
||||
SELECT DISTINCT user_id
|
||||
|
@ -951,8 +975,18 @@ LEFT JOIN
|
|||
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed
|
||||
ON distinct_undistribured.user_id = exsist_in_distributed.user_id
|
||||
ORDER BY 2 DESC, 1 DESC;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
user_id | user_id
|
||||
---------------------------------------------------------------------
|
||||
7 |
|
||||
6 |
|
||||
4 |
|
||||
2 |
|
||||
8 | 8
|
||||
5 | 5
|
||||
3 | 3
|
||||
1 | 1
|
||||
(8 rows)
|
||||
|
||||
WITH distinct_undistribured AS MATERIALIZED
|
||||
(SELECT DISTINCT user_id
|
||||
FROM test_cte)
|
||||
|
@ -964,9 +998,20 @@ LEFT JOIN
|
|||
WHERE NOT EXISTS
|
||||
(SELECT NULL
|
||||
FROM distinct_undistribured
|
||||
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id
|
||||
ORDER BY 1,2;
|
||||
user_id | user_id
|
||||
---------------------------------------------------------------------
|
||||
1 |
|
||||
2 |
|
||||
3 |
|
||||
4 |
|
||||
5 |
|
||||
6 |
|
||||
7 |
|
||||
8 |
|
||||
(8 rows)
|
||||
|
||||
-- NOT EXISTS struct, with cte inlining disabled
|
||||
WITH distinct_undistribured AS MATERIALIZED(
|
||||
SELECT DISTINCT user_id
|
||||
|
@ -1013,8 +1058,11 @@ LEFT JOIN
|
|||
(SELECT NULL
|
||||
FROM distinct_undistribured d2
|
||||
WHERE d1.user_id = d2.user_id )) AS bar USING (user_id);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
8
|
||||
(1 row)
|
||||
|
||||
-- some test with failures
|
||||
WITH a AS MATERIALIZED (SELECT * FROM users_table LIMIT 10)
|
||||
SELECT user_id/0 FROM users_table JOIN a USING (user_id);
|
||||
|
|
|
@ -566,8 +566,6 @@ EXECUTE retry_planning(4);
|
|||
EXECUTE retry_planning(5);
|
||||
EXECUTE retry_planning(6);
|
||||
|
||||
-- this test can only work if the CTE is recursively
|
||||
-- planned
|
||||
WITH b AS (SELECT * FROM test_table)
|
||||
SELECT count(*) FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key);
|
||||
|
||||
|
@ -578,8 +576,6 @@ WITH a AS (SELECT * FROM test_table),
|
|||
b AS (SELECT * FROM test_table)
|
||||
SELECT count(*) FROM a LEFT JOIN b ON (a.value = b.value);
|
||||
|
||||
-- cte a has to be recursively planned because of OFFSET 0
|
||||
-- after that, cte b also requires recursive planning
|
||||
WITH a AS (SELECT * FROM test_table OFFSET 0),
|
||||
b AS (SELECT * FROM test_table)
|
||||
SELECT min(a.key) FROM a LEFT JOIN b ON (a.value = b.value);
|
||||
|
|
|
@ -112,7 +112,7 @@ FROM (
|
|||
) t GROUP BY user_id, hasdone_event;
|
||||
|
||||
-- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2)
|
||||
-- still, recursive planning will kick in to plan some part of the query
|
||||
-- but, we can plan the query thanks to recursive planning
|
||||
SET client_min_messages TO DEBUG1;
|
||||
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||
|
@ -153,7 +153,8 @@ FROM (
|
|||
|
||||
) t2 ON (t1.user_id = (t2.user_id)/2)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
) t GROUP BY user_id, hasdone_event
|
||||
RETURNING user_id, value_1_agg, value_2_agg;
|
||||
RESET client_min_messages;
|
||||
|
||||
------------------------------------
|
||||
|
@ -232,7 +233,7 @@ ORDER BY
|
|||
|
||||
-- not pushable since the JOIN condition is not equi JOIN
|
||||
-- (subquery_1 JOIN subquery_2)
|
||||
-- still, recursive planning will kick in
|
||||
-- but, we can plan the query thanks to recursive planning
|
||||
SET client_min_messages TO DEBUG1;
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg)
|
||||
SELECT
|
||||
|
@ -298,7 +299,8 @@ WHERE
|
|||
GROUP BY
|
||||
count_pay, user_id
|
||||
ORDER BY
|
||||
count_pay;
|
||||
count_pay
|
||||
RETURNING user_id, value_1_agg, value_2_agg;
|
||||
RESET client_min_messages;
|
||||
|
||||
------------------------------------
|
||||
|
|
|
@ -723,7 +723,7 @@ count(*) AS cnt, "generated_group_field"
|
|||
cnt DESC, generated_group_field ASC
|
||||
LIMIT 10;
|
||||
|
||||
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||
-- RIGHT JOINs used with INNER JOINs should work even if the reference table exist in the
|
||||
-- right side of the RIGHT JOIN.
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
|
@ -1097,9 +1097,6 @@ SELECT count(*) FROM
|
|||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2
|
||||
WHERE subquery_1.user_id != subquery_2.user_id ;
|
||||
|
||||
-- we could not push this query not due to non colocated
|
||||
-- subqueries (i.e., they are recursively planned)
|
||||
-- but due to outer join restrictions
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
FROM
|
||||
|
@ -1449,7 +1446,7 @@ SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INN
|
|||
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19);
|
||||
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id);
|
||||
|
||||
-- one example where unsupported outer join is deep inside a subquery
|
||||
-- one example where supported outer join is deep inside a subquery
|
||||
SELECT *, random() FROM (
|
||||
SELECT *,random() FROM user_buy_test_table WHERE user_id > (
|
||||
SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo;
|
||||
|
|
|
@ -532,8 +532,6 @@ $$);
|
|||
|
||||
|
||||
-- recursive planning should kick in for outer joins as well
|
||||
-- but this time recursive planning might convert the query
|
||||
-- into a not supported join
|
||||
SELECT true AS valid FROM explain_json_2($$
|
||||
|
||||
SELECT
|
||||
|
|
|
@ -68,15 +68,19 @@ FROM
|
|||
SET citus.enable_router_execution TO true;
|
||||
|
||||
-- OUTER JOINs where the outer part is recursively planned and not the other way
|
||||
-- around is not supported
|
||||
-- around are supported
|
||||
SELECT
|
||||
foo.value_2
|
||||
INTO result_table
|
||||
FROM
|
||||
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) LIMIT 5) as foo
|
||||
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) ORDER BY users_table.value_2 LIMIT 5) as foo
|
||||
LEFT JOIN
|
||||
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
|
||||
ON(foo.value_2 = bar.value_2);
|
||||
|
||||
SELECT COUNT(*) = 60 FROM result_table WHERE value_2 = 0;
|
||||
SELECT COUNT(*) = 0 FROM result_table WHERE value_2 != 0;
|
||||
DROP TABLE result_table;
|
||||
|
||||
-- We do not support GROUPING SETS in subqueries
|
||||
-- This also includes ROLLUP or CUBE clauses
|
||||
|
|
|
@ -573,7 +573,8 @@ LEFT JOIN
|
|||
WHERE NOT EXISTS
|
||||
(SELECT NULL
|
||||
FROM distinct_undistribured
|
||||
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id;
|
||||
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id
|
||||
ORDER BY 1,2;
|
||||
|
||||
-- same NOT EXISTS struct, but with CTE
|
||||
-- so should work
|
||||
|
@ -658,7 +659,8 @@ LEFT JOIN
|
|||
WHERE NOT EXISTS
|
||||
(SELECT NULL
|
||||
FROM distinct_undistribured
|
||||
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id;
|
||||
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id
|
||||
ORDER BY 1,2;
|
||||
|
||||
-- NOT EXISTS struct, with cte inlining disabled
|
||||
WITH distinct_undistribured AS MATERIALIZED(
|
||||
|
|
Loading…
Reference in New Issue