diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 0cb935a13..07bb8a9b2 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -928,19 +928,91 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi "functions"; } - if (subqueryTree->limitOffset) + /* + * Correlated subqueries are effectively functions that are repeatedly called + * for the values of the vars that point to the outer query. We can liberally + * push down SQL features within such a function, as long as co-located join + * checks are applied. + */ + if (!contain_vars_of_level((Node *) subqueryTree, 1)) { - preconditionsSatisfied = false; - errorDetail = "Offset clause is currently unsupported when a subquery " - "references a column from another query"; - } + if (subqueryTree->limitOffset) + { + preconditionsSatisfied = false; + errorDetail = "Offset clause is currently unsupported when a subquery " + "references a column from another query"; + } - /* limit is not supported when SubqueryPushdown is not set */ - if (subqueryTree->limitCount && !SubqueryPushdown) - { - preconditionsSatisfied = false; - errorDetail = "Limit in subquery is currently unsupported when a " - "subquery references a column from another query"; + /* limit is not supported when SubqueryPushdown is not set */ + if (subqueryTree->limitCount && !SubqueryPushdown) + { + preconditionsSatisfied = false; + errorDetail = "Limit in subquery is currently unsupported when a " + "subquery references a column from another query"; + } + + /* group clause list must include partition column */ + if (subqueryTree->groupClause) + { + List *groupClauseList = subqueryTree->groupClause; + List *targetEntryList = subqueryTree->targetList; + List *groupTargetEntryList = GroupTargetEntryList(groupClauseList, + targetEntryList); + bool groupOnPartitionColumn = + TargetListOnPartitionColumn(subqueryTree, groupTargetEntryList); + if (!groupOnPartitionColumn) + { + preconditionsSatisfied = false; + errorDetail = "Group by list without partition column is currently " + "unsupported when a subquery references a column " + "from another query"; + } + } + + /* we don't support aggregates without group by */ + if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL)) + { + preconditionsSatisfied = false; + errorDetail = "Aggregates without group by are currently unsupported " + "when a subquery references a column from another query"; + } + + /* having clause without group by on partition column is not supported */ + if (subqueryTree->havingQual && (subqueryTree->groupClause == NULL)) + { + preconditionsSatisfied = false; + errorDetail = "Having qual without group by on partition column is " + "currently unsupported when a subquery references " + "a column from another query"; + } + + /* + * We support window functions when the window function + * is partitioned on distribution column. + */ + if (subqueryTree->hasWindowFuncs && !SafeToPushdownWindowFunction(subqueryTree, + &errorInfo)) + { + errorDetail = (char *) errorInfo->data; + preconditionsSatisfied = false; + } + + /* distinct clause list must include partition column */ + if (subqueryTree->distinctClause) + { + List *distinctClauseList = subqueryTree->distinctClause; + List *targetEntryList = subqueryTree->targetList; + List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList, + targetEntryList); + bool distinctOnPartitionColumn = + TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList); + if (!distinctOnPartitionColumn) + { + preconditionsSatisfied = false; + errorDetail = "Distinct on columns without partition column is " + "currently unsupported"; + } + } } /* @@ -981,24 +1053,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi errorDetail = "For Update/Share commands are currently unsupported"; } - /* group clause list must include partition column */ - if (subqueryTree->groupClause) - { - List *groupClauseList = subqueryTree->groupClause; - List *targetEntryList = subqueryTree->targetList; - List *groupTargetEntryList = GroupTargetEntryList(groupClauseList, - targetEntryList); - bool groupOnPartitionColumn = TargetListOnPartitionColumn(subqueryTree, - groupTargetEntryList); - if (!groupOnPartitionColumn) - { - preconditionsSatisfied = false; - errorDetail = "Group by list without partition column is currently " - "unsupported when a subquery references a column " - "from another query"; - } - } - /* grouping sets are not allowed in subqueries*/ if (subqueryTree->groupingSets) { @@ -1007,51 +1061,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi "or ROLLUP"; } - /* - * We support window functions when the window function - * is partitioned on distribution column. - */ - if (subqueryTree->hasWindowFuncs && !SafeToPushdownWindowFunction(subqueryTree, - &errorInfo)) - { - errorDetail = (char *) errorInfo->data; - preconditionsSatisfied = false; - } - - /* we don't support aggregates without group by */ - if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL)) - { - preconditionsSatisfied = false; - errorDetail = "Aggregates without group by are currently unsupported " - "when a subquery references a column from another query"; - } - - /* having clause without group by on partition column is not supported */ - if (subqueryTree->havingQual && (subqueryTree->groupClause == NULL)) - { - preconditionsSatisfied = false; - errorDetail = "Having qual without group by on partition column is " - "currently unsupported when a subquery references " - "a column from another query"; - } - - /* distinct clause list must include partition column */ - if (subqueryTree->distinctClause) - { - List *distinctClauseList = subqueryTree->distinctClause; - List *targetEntryList = subqueryTree->targetList; - List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList, - targetEntryList); - bool distinctOnPartitionColumn = - TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList); - if (!distinctOnPartitionColumn) - { - preconditionsSatisfied = false; - errorDetail = "Distinct on columns without partition column is " - "currently unsupported"; - } - } - deferredError = DeferErrorIfFromClauseRecurs(subqueryTree); if (deferredError) { diff --git a/src/test/regress/expected/multi_insert_select_window.out b/src/test/regress/expected/multi_insert_select_window.out index 2f522b127..0cf605990 100644 --- a/src/test/regress/expected/multi_insert_select_window.out +++ b/src/test/regress/expected/multi_insert_select_window.out @@ -773,8 +773,6 @@ WHERE ) GROUP BY user_id; -ERROR: cannot push down this subquery -DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported INSERT INTO agg_results_window(user_id, value_2_agg) SELECT * FROM ( SELECT diff --git a/src/test/regress/expected/multi_mx_router_planner.out b/src/test/regress/expected/multi_mx_router_planner.out index 9486225c5..f122bb4d0 100644 --- a/src/test/regress/expected/multi_mx_router_planner.out +++ b/src/test/regress/expected/multi_mx_router_planner.out @@ -488,7 +488,6 @@ DEBUG: query has a single distribution column value: 1 SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1) AS special_price FROM articles_hash_mx a; DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: skipping recursive planning for the subquery since it contains references to outer queries ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- simple lookup query SELECT * diff --git a/src/test/regress/expected/multi_router_planner.out b/src/test/regress/expected/multi_router_planner.out index f55c98050..047d77176 100644 --- a/src/test/regress/expected/multi_router_planner.out +++ b/src/test/regress/expected/multi_router_planner.out @@ -677,7 +677,6 @@ DEBUG: query has a single distribution column value: 1 SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash a2 WHERE a.id = a2.id LIMIT 1) AS special_price FROM articles_hash a; DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: skipping recursive planning for the subquery since it contains references to outer queries ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- simple lookup query SELECT * diff --git a/src/test/regress/expected/multi_router_planner_fast_path.out b/src/test/regress/expected/multi_router_planner_fast_path.out index 003c78162..1fef5c721 100644 --- a/src/test/regress/expected/multi_router_planner_fast_path.out +++ b/src/test/regress/expected/multi_router_planner_fast_path.out @@ -423,7 +423,6 @@ DEBUG: query has a single distribution column value: 1 SELECT a.title AS name, (SELECT a2.id FROM articles_hash a2 WHERE a.id = a2.id LIMIT 1) AS special_price FROM articles_hash a; DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: skipping recursive planning for the subquery since it contains references to outer queries ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- simple lookup query just works SELECT * diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index d9309efe2..1d526f575 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -1275,8 +1275,16 @@ FROM ORDER BY user_id limit 50; -ERROR: cannot push down this subquery -DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query + user_id | lastseen +--------------------------------------------------------------------- + 2 | Thu Nov 23 17:26:14.563216 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 17:26:14.563216 2017 + 5 | Thu Nov 23 18:08:26.550729 2017 + 5 | Thu Nov 23 17:26:14.563216 2017 + 6 | Thu Nov 23 18:08:26.550729 2017 +(6 rows) + -- we recursively plan some queries but fail in the end -- since some_users_data since it has a reference -- from an outer query which is not recursively planned @@ -1312,8 +1320,12 @@ FROM ORDER BY user_id limit 50; -ERROR: cannot push down this subquery -DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query + user_id | lastseen +--------------------------------------------------------------------- + 4 | Thu Nov 23 17:26:14.563216 2017 + 6 | Thu Nov 23 18:08:26.550729 2017 +(2 rows) + -- LATERAL JOINs used with INNER JOINs SET citus.subquery_pushdown to ON; NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks. @@ -1502,11 +1514,28 @@ LIMIT 10; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3)) -DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: skipping recursive planning for the subquery since it contains references to outer queries -ERROR: cannot push down this subquery -DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.<>) user_where_1_join_1.user_id)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10 +DEBUG: Creating router plan + user_id | lastseen +--------------------------------------------------------------------- + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 +(10 rows) + SET citus.enable_repartition_joins to ON; SET client_min_messages TO DEBUG1; -- recursively planner since the inner JOIN is not on the partition key @@ -1563,8 +1592,25 @@ ORDER BY user_id DESC, lastseen DESC LIMIT 10; DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_1 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3)) -ERROR: cannot push down this subquery -DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.=) user_where_1_join_1.value_1)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10 +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10 + user_id | lastseen +--------------------------------------------------------------------- + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 +(10 rows) + SET citus.enable_repartition_joins to OFF; RESET client_min_messages; -- not supported since upper LATERAL JOIN is not equi join @@ -1676,8 +1722,20 @@ FROM ORDER BY user_id DESC, lastseen DESC LIMIT 10; -ERROR: cannot push down this subquery -DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query + user_id | lastseen +--------------------------------------------------------------------- + 5 | Thu Nov 23 17:26:14.563216 2017 + 5 | Thu Nov 23 17:26:14.563216 2017 + 5 | Thu Nov 23 17:26:14.563216 2017 + 5 | Thu Nov 23 17:26:14.563216 2017 + 5 | Thu Nov 23 17:26:14.563216 2017 + 5 | Thu Nov 23 17:26:14.563216 2017 + 5 | Thu Nov 23 17:26:14.563216 2017 + 5 | Thu Nov 23 17:26:14.563216 2017 + 5 | Thu Nov 23 17:26:14.563216 2017 + 5 | Thu Nov 23 17:26:14.563216 2017 +(10 rows) + -- NESTED INNER JOINs SELECT count(*) AS value, "generated_group_field" @@ -2321,8 +2379,10 @@ FROM ORDER BY value_2 DESC, user_id DESC LIMIT 10; -ERROR: cannot push down this subquery -DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query + user_id | value_2 +--------------------------------------------------------------------- +(0 rows) + -- lets test some unsupported set operations -- not supported since we use INTERSECT SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType diff --git a/src/test/regress/expected/multi_subquery_in_where_clause.out b/src/test/regress/expected/multi_subquery_in_where_clause.out index a7cac9cbc..c5066b275 100644 --- a/src/test/regress/expected/multi_subquery_in_where_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_clause.out @@ -610,8 +610,10 @@ WHERE user_id OFFSET 3 ); -ERROR: cannot push down this subquery -DETAIL: Offset clause is currently unsupported when a subquery references a column from another query + user_id +--------------------------------------------------------------------- +(0 rows) + -- we can detect unsupported subqueries even if they appear -- in WHERE subquery -> FROM subquery -> WHERE subquery -- but we can recursively plan that anyway diff --git a/src/test/regress/expected/subqueries_not_supported.out b/src/test/regress/expected/subqueries_not_supported.out index 0bb5da386..fcd33386e 100644 --- a/src/test/regress/expected/subqueries_not_supported.out +++ b/src/test/regress/expected/subqueries_not_supported.out @@ -83,46 +83,6 @@ DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM pu DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar ON ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2))) ERROR: cannot pushdown the subquery DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join --- Aggregates in subquery without partition column can be planned recursively --- unless there is a reference to an outer query -SELECT - * -FROM - users_table -WHERE - user_id IN - ( - SELECT - SUM(events_table.user_id) - FROM - events_table - WHERE - users_table.user_id = events_table.user_id - ) -; -ERROR: cannot push down this subquery -DETAIL: Aggregates without group by are currently unsupported when a subquery references a column from another query --- Having qual without group by on partition column can be planned recursively --- unless there is a reference to an outer query -SELECT - * -FROM - users_table -WHERE - user_id IN - ( - SELECT - SUM(events_table.user_id) - FROM - events_table - WHERE - events_table.user_id = users_table.user_id - HAVING - MIN(value_2) > 2 - ) -; -ERROR: cannot push down this subquery -DETAIL: Having qual without group by on partition column is currently unsupported when a subquery references a column from another query -- We do not support GROUPING SETS in subqueries -- This also includes ROLLUP or CUBE clauses SELECT * FROM (SELECT user_id, value_1 FROM users_table GROUP BY GROUPING SETS ((user_id), (value_1))) s; diff --git a/src/test/regress/expected/subquery_in_targetlist.out b/src/test/regress/expected/subquery_in_targetlist.out index d3352e32a..d266ff3b0 100644 --- a/src/test/regress/expected/subquery_in_targetlist.out +++ b/src/test/regress/expected/subquery_in_targetlist.out @@ -31,14 +31,20 @@ ORDER BY 1,2 LIMIT 1; SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id) FROM events_table e ORDER BY 1,2 LIMIT 1; -ERROR: cannot push down this subquery -DETAIL: Aggregates without group by are currently unsupported when a subquery references a column from another query + event_type | max +--------------------------------------------------------------------- + 0 | Thu Nov 23 13:52:54.83829 2017 +(1 row) + -- correlated subquery wtth limit SELECT event_type, (SELECT time FROM users_table WHERE user_id = e.user_id ORDER BY time LIMIT 1) FROM events_table e ORDER BY 1,2 LIMIT 1; -ERROR: cannot push down this subquery -DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query + event_type | time +--------------------------------------------------------------------- + 0 | Wed Nov 22 18:19:49.944985 2017 +(1 row) + -- correlated subquery with group by distribution column SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id GROUP BY user_id) FROM events_table e @@ -52,8 +58,11 @@ ORDER BY 1,2 LIMIT 1; SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id GROUP BY e.user_id) FROM events_table e ORDER BY 1,2 LIMIT 1; -ERROR: cannot push down this subquery -DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query + event_type | max +--------------------------------------------------------------------- + 0 | Thu Nov 23 13:52:54.83829 2017 +(1 row) + -- correlated subquery co-located join in outer query SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id GROUP BY user_id) FROM users_table u JOIN events_table e USING (user_id) @@ -81,8 +90,11 @@ ERROR: complex joins are only supported when all distributed tables are co-loca SELECT event_type, (SELECT max(time) FROM users_reference_table WHERE user_id = e.value_2) FROM events_table e ORDER BY 1,2 LIMIT 1; -ERROR: cannot push down this subquery -DETAIL: Aggregates without group by are currently unsupported when a subquery references a column from another query + event_type | max +--------------------------------------------------------------------- + 0 | Thu Nov 23 13:52:54.83829 2017 +(1 row) + -- correlated subquery with reference table and group by SELECT event_type, (SELECT max(time) FROM users_reference_table WHERE user_id = e.value_2 GROUP BY user_id) FROM events_table e diff --git a/src/test/regress/expected/subquery_in_where.out b/src/test/regress/expected/subquery_in_where.out index 39221a7e5..138c2fcc9 100644 --- a/src/test/regress/expected/subquery_in_where.out +++ b/src/test/regress/expected/subquery_in_where.out @@ -691,6 +691,48 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c 101 (1 row) +-- correlated subquery with aggregate in WHERE +SELECT + * +FROM + users_table +WHERE + user_id IN + ( + SELECT + SUM(events_table.user_id) + FROM + events_table + WHERE + users_table.user_id = events_table.user_id + ) +; + user_id | time | value_1 | value_2 | value_3 | value_4 +--------------------------------------------------------------------- +(0 rows) + +-- correlated subquery with aggregate in HAVING +SELECT + * +FROM + users_table +WHERE + user_id IN + ( + SELECT + SUM(events_table.user_id) + FROM + events_table + WHERE + events_table.user_id = users_table.user_id + HAVING + MIN(value_2) > 2 + ) +; + user_id | time | value_1 | value_2 | value_3 | value_4 +--------------------------------------------------------------------- +(0 rows) + -- Local tables also planned recursively, so using it as part of the FROM clause -- make the clause recurring CREATE TABLE local_table(id int, value_1 int); diff --git a/src/test/regress/expected/with_set_operations.out b/src/test/regress/expected/with_set_operations.out index c3ccbd4bd..461a184e5 100644 --- a/src/test/regress/expected/with_set_operations.out +++ b/src/test/regress/expected/with_set_operations.out @@ -413,7 +413,14 @@ DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT user_id FROM public.users DEBUG: generating subplan XXX_2 for CTE cte_1: SELECT user_id FROM public.users_table DEBUG: generating subplan XXX_3 for subquery SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT DISTINCT user_id FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT users_table.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, public.users_table WHERE ((users_table.value_2 OPERATOR(pg_catalog.=) foo.user_id) AND (events_table.user_id OPERATOR(pg_catalog.=) users_table.user_id)) LIMIT 5)) ORDER BY user_id DESC -ERROR: cannot push down this subquery -DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query + user_id +--------------------------------------------------------------------- + 5 + 4 + 3 + 2 + 1 +(5 rows) + SET client_min_messages TO DEFAULT; SET search_path TO public; diff --git a/src/test/regress/sql/subqueries_not_supported.sql b/src/test/regress/sql/subqueries_not_supported.sql index b08fe254c..242623a3f 100644 --- a/src/test/regress/sql/subqueries_not_supported.sql +++ b/src/test/regress/sql/subqueries_not_supported.sql @@ -78,46 +78,6 @@ FROM ON(foo.value_2 = bar.value_2); --- Aggregates in subquery without partition column can be planned recursively --- unless there is a reference to an outer query -SELECT - * -FROM - users_table -WHERE - user_id IN - ( - SELECT - SUM(events_table.user_id) - FROM - events_table - WHERE - users_table.user_id = events_table.user_id - ) -; - - --- Having qual without group by on partition column can be planned recursively --- unless there is a reference to an outer query -SELECT - * -FROM - users_table -WHERE - user_id IN - ( - SELECT - SUM(events_table.user_id) - FROM - events_table - WHERE - events_table.user_id = users_table.user_id - HAVING - MIN(value_2) > 2 - ) -; - - -- We do not support GROUPING SETS in subqueries -- This also includes ROLLUP or CUBE clauses SELECT * FROM (SELECT user_id, value_1 FROM users_table GROUP BY GROUPING SETS ((user_id), (value_1))) s; diff --git a/src/test/regress/sql/subquery_in_where.sql b/src/test/regress/sql/subquery_in_where.sql index 884307845..539fa27f7 100644 --- a/src/test/regress/sql/subquery_in_where.sql +++ b/src/test/regress/sql/subquery_in_where.sql @@ -502,6 +502,43 @@ WHERE value_1 IN (SELECT value_1 FROM users_Table) OR (EXISTS (SELECT * FROM events_table)); +-- correlated subquery with aggregate in WHERE +SELECT + * +FROM + users_table +WHERE + user_id IN + ( + SELECT + SUM(events_table.user_id) + FROM + events_table + WHERE + users_table.user_id = events_table.user_id + ) +; + +-- correlated subquery with aggregate in HAVING +SELECT + * +FROM + users_table +WHERE + user_id IN + ( + SELECT + SUM(events_table.user_id) + FROM + events_table + WHERE + events_table.user_id = users_table.user_id + HAVING + MIN(value_2) > 2 + ) +; + + -- Local tables also planned recursively, so using it as part of the FROM clause -- make the clause recurring CREATE TABLE local_table(id int, value_1 int);