diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index c32bbe7a0..27b28653a 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -2978,12 +2978,12 @@ FindReferencedTableColumn(Expr *columnExpression, List *parentQueryList, Query * /* * ExtractQueryWalker walks over a query, and finds all queries in the query - * tree and returns these queries. + * tree and returns these queries. Note that the function also recurses into + * the subqueries in WHERE clause. */ bool ExtractQueryWalker(Node *node, List **queryList) { - bool walkerResult = false; if (node == NULL) { return false; @@ -2994,11 +2994,10 @@ ExtractQueryWalker(Node *node, List **queryList) Query *query = (Query *) node; (*queryList) = lappend(*queryList, query); - walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList, - QTW_EXAMINE_RTES); + return query_tree_walker(query, ExtractQueryWalker, queryList, 0); } - return walkerResult; + return expression_tree_walker(node, ExtractQueryWalker, queryList); } diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 1fb012375..99c889876 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -394,11 +394,9 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, PlannerRestrictionContext * plannerRestrictionContext) { - ListCell *rangeTableEntryCell = NULL; - ListCell *sublinkCell = NULL; - List *subqueryEntryList = NIL; bool outerMostQueryHasLimit = false; - List *sublinkList = NIL; + ListCell *subqueryCell = NULL; + List *subqueryList = NIL; DeferredErrorMessage *error = NULL; RelationRestrictionContext *relationRestrictionContext = plannerRestrictionContext->relationRestrictionContext; @@ -437,13 +435,20 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, "equality operator.", NULL); } - subqueryEntryList = SubqueryEntryList(originalQuery); - foreach(rangeTableEntryCell, subqueryEntryList) - { - RangeTblEntry *rangeTableEntry = lfirst(rangeTableEntryCell); - Query *subquery = rangeTableEntry->subquery; + /* + * We first extract all the queries that appear in the orignal query. Later, + * we delete the original query given that error rules does not apply to the + * top level query. + */ + ExtractQueryWalker((Node *) originalQuery, &subqueryList); + subqueryList = list_delete(subqueryList, originalQuery); - error = DeferErrorIfCannotPushdownSubquery(subquery, outerMostQueryHasLimit); + /* iterate on the subquery list and error out accordingly */ + foreach(subqueryCell, subqueryList) + { + Query *subquery = lfirst(subqueryCell); + error = DeferErrorIfCannotPushdownSubquery(subquery, + outerMostQueryHasLimit); if (error) { return error; @@ -456,23 +461,6 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, } } - sublinkList = SublinkList(originalQuery); - foreach(sublinkCell, sublinkList) - { - SubLink *sublink = (SubLink *) lfirst(sublinkCell); - Node *subselect = sublink->subselect; - - if (subselect && IsA(subselect, Query)) - { - error = DeferErrorIfCannotPushdownSubquery((Query *) subselect, - outerMostQueryHasLimit); - if (error) - { - return error; - } - } - } - return NULL; } @@ -489,6 +477,18 @@ DeferErrorIfUnsupportedFilters(Query *subquery) ListCell *queryCell = NULL; List *subqueryOpExpressionList = NIL; List *relationIdList = RelationIdList(subquery); + Var *partitionColumn = NULL; + Oid relationId = InvalidOid; + + /* + * If there are no appropriate relations, we're going to error out on + * DeferErrorIfCannotPushdownSubquery(). It may happen once the subquery + * does not include a relation. + */ + if (relationIdList == NIL) + { + return NULL; + } /* * Get relation id of any relation in the subquery and create partiton column @@ -496,8 +496,8 @@ DeferErrorIfUnsupportedFilters(Query *subquery) * expressions on different tables. Then we compare these operator expressions * to see if they consist of same operator and constant value. */ - Oid relationId = linitial_oid(relationIdList); - Var *partitionColumn = PartitionColumn(relationId, 0); + relationId = linitial_oid(relationIdList); + partitionColumn = PartitionColumn(relationId, 0); ExtractQueryWalker((Node *) subquery, &queryList); foreach(queryCell, queryList) @@ -617,8 +617,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi { bool preconditionsSatisfied = true; char *errorDetail = NULL; - List *subqueryEntryList = NIL; - ListCell *rangeTableEntryCell = NULL; DeferredErrorMessage *deferredError = NULL; deferredError = DeferErrorIfUnsupportedTableCombination(subqueryTree); @@ -748,41 +746,20 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi errorDetail, NULL); } - /* recursively do same check for subqueries of this query */ - subqueryEntryList = SubqueryEntryList(subqueryTree); - foreach(rangeTableEntryCell, subqueryEntryList) - { - RangeTblEntry *rangeTableEntry = - (RangeTblEntry *) lfirst(rangeTableEntryCell); - - Query *innerSubquery = rangeTableEntry->subquery; - deferredError = DeferErrorIfCannotPushdownSubquery(innerSubquery, - outerMostQueryHasLimit); - if (deferredError) - { - return deferredError; - } - } - return NULL; } /* * DeferErrorIfUnsupportedUnionQuery is a helper function for ErrorIfCannotPushdownSubquery(). - * It basically iterates over the subqueries that reside under the given set operations. - * * The function also errors out for set operations INTERSECT and EXCEPT. */ static DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree, bool outerMostQueryHasLimit) { - List *rangeTableIndexList = NIL; - ListCell *rangeTableIndexCell = NULL; List *setOperationStatementList = NIL; ListCell *setOperationStatmentCell = NULL; - List *rangeTableList = subqueryTree->rtable; ExtractSetOperationStatmentWalker((Node *) subqueryTree->setOperations, &setOperationStatementList); @@ -799,24 +776,6 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree, } } - ExtractRangeTableIndexWalker((Node *) subqueryTree->setOperations, - &rangeTableIndexList); - foreach(rangeTableIndexCell, rangeTableIndexList) - { - int rangeTableIndex = lfirst_int(rangeTableIndexCell); - RangeTblEntry *rangeTableEntry = rt_fetch(rangeTableIndex, rangeTableList); - DeferredErrorMessage *deferredError = NULL; - - Assert(rangeTableEntry->rtekind == RTE_SUBQUERY); - - deferredError = DeferErrorIfCannotPushdownSubquery(rangeTableEntry->subquery, - outerMostQueryHasLimit); - if (deferredError) - { - return deferredError; - } - } - return NULL; } diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index 5fe534e9c..a694c8b8d 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -2981,8 +2981,6 @@ CopyRelationRestrictionContext(RelationRestrictionContext *oldContext) /* not copyable, but readonly */ newRestriction->plannerInfo = oldRestriction->plannerInfo; - newRestriction->prunedShardIntervalList = - copyObject(oldRestriction->prunedShardIntervalList); newContext->relationRestrictionList = lappend(newContext->relationRestrictionList, newRestriction); diff --git a/src/test/regress/expected/multi_insert_select.out b/src/test/regress/expected/multi_insert_select.out index d099a635c..77272acfe 100644 --- a/src/test/regress/expected/multi_insert_select.out +++ b/src/test/regress/expected/multi_insert_select.out @@ -130,7 +130,7 @@ FROM raw_events_first WHERE user_id < 0; -NOTICE: evaluating on master +ERROR: Subqueries without relations are not allowed in INSERT ... SELECT queries -- make sure we don't evaluate stable functions with column arguments CREATE OR REPLACE FUNCTION evaluate_on_master(x int) RETURNS int LANGUAGE plpgsql STABLE @@ -672,11 +672,7 @@ INSERT INTO agg_events raw_events_first.user_id, (SELECT * FROM sub_cte) FROM raw_events_first; -DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) WITH sub_cte AS (SELECT 1) SELECT user_id, (SELECT sub_cte."?column?" FROM sub_cte) FROM public.raw_events_first_13300000 raw_events_first WHERE ((worker_hash(user_id) >= '-2147483648'::integer) AND (worker_hash(user_id) <= '-1073741825'::integer)) -DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) WITH sub_cte AS (SELECT 1) SELECT user_id, (SELECT sub_cte."?column?" FROM sub_cte) FROM public.raw_events_first_13300001 raw_events_first WHERE ((worker_hash(user_id) >= '-1073741824'::integer) AND (worker_hash(user_id) <= '-1'::integer)) -DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) WITH sub_cte AS (SELECT 1) SELECT user_id, (SELECT sub_cte."?column?" FROM sub_cte) FROM public.raw_events_first_13300002 raw_events_first WHERE ((worker_hash(user_id) >= 0) AND (worker_hash(user_id) <= 1073741823)) -DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) WITH sub_cte AS (SELECT 1) SELECT user_id, (SELECT sub_cte."?column?" FROM sub_cte) FROM public.raw_events_first_13300003 raw_events_first WHERE ((worker_hash(user_id) >= 1073741824) AND (worker_hash(user_id) <= 2147483647)) -DEBUG: Plan is router executable +ERROR: Subqueries without relations are not allowed in INSERT ... SELECT queries -- We do not support any set operations INSERT INTO raw_events_first(user_id) @@ -1581,6 +1577,30 @@ ERROR: grouping sets are not allowed in INSERT ... SELECT queries SET client_min_messages TO INFO; -- avoid constraint violations TRUNCATE raw_events_first; +-- we don't support LIMIT even if it exists in the subqueries +-- in where clause +INSERT INTO agg_events(user_id) +SELECT user_id +FROM users_table +WHERE user_id + IN (SELECT + user_id + FROM ( + ( + SELECT + user_id + FROM + ( + SELECT + e1.user_id + FROM + users_table u1, events_table e1 + WHERE + e1.user_id = u1.user_id LIMIT 3 + ) as f_inner + ) + ) AS f2); +ERROR: LIMIT clauses are not allowed in INSERT ... SELECT queries -- Altering a table and selecting from it using a multi-shard statement -- in the same transaction is allowed because we will use the same -- connections for all co-located placements. diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index fb17b1f6e..2414ee2d2 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -2438,7 +2438,7 @@ FROM FROM (SELECT 1 as user_id, now(), 3 AS event - ) events_subquery_4) OFFSET 3) t1 + ) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT @@ -2498,7 +2498,7 @@ FROM FROM (SELECT 1 as user_id, now(), 3 AS event - ) events_subquery_4) OFFSET 3) t1 + ) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT random()::int as user_id) AS t diff --git a/src/test/regress/expected/multi_subquery_in_where_clause.out b/src/test/regress/expected/multi_subquery_in_where_clause.out index 52e4a69ec..cc6e455bd 100644 --- a/src/test/regress/expected/multi_subquery_in_where_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_clause.out @@ -530,6 +530,35 @@ WHERE ); ERROR: cannot push down this subquery DETAIL: Offset clause is currently unsupported +-- we can detect errors even if they appear in WHERE -> FROM -> WHERE +-- subqueries +SELECT user_id +FROM users_table +WHERE user_id + IN (SELECT + f_inner.user_id + FROM + ( + SELECT + e1.user_id + FROM + users_table u1, events_table e1 + WHERE + e1.user_id = u1.user_id + ) as f_inner, + ( + SELECT + e1.user_id + FROM + users_table u1, events_table e1 + WHERE + e1.user_id = u1.user_id + AND e1.user_id IN (SELECT user_id FROM users_table LIMIT 3 ) + ) as f_outer + WHERE f_inner.user_id = f_outer.user_id + ); +ERROR: cannot push down this subquery +DETAIL: Limit in subquery is currently unsupported -- semi join is not on the partition key for the third subquery SELECT user_id FROM users_table diff --git a/src/test/regress/sql/multi_insert_select.sql b/src/test/regress/sql/multi_insert_select.sql index 000dc1755..513512292 100644 --- a/src/test/regress/sql/multi_insert_select.sql +++ b/src/test/regress/sql/multi_insert_select.sql @@ -1311,6 +1311,30 @@ SET client_min_messages TO INFO; -- avoid constraint violations TRUNCATE raw_events_first; +-- we don't support LIMIT even if it exists in the subqueries +-- in where clause +INSERT INTO agg_events(user_id) +SELECT user_id +FROM users_table +WHERE user_id + IN (SELECT + user_id + FROM ( + ( + SELECT + user_id + FROM + ( + SELECT + e1.user_id + FROM + users_table u1, events_table e1 + WHERE + e1.user_id = u1.user_id LIMIT 3 + ) as f_inner + ) + ) AS f2); + -- Altering a table and selecting from it using a multi-shard statement -- in the same transaction is allowed because we will use the same -- connections for all co-located placements. diff --git a/src/test/regress/sql/multi_subquery_complex_queries.sql b/src/test/regress/sql/multi_subquery_complex_queries.sql index 656dc7e4e..300a79a94 100644 --- a/src/test/regress/sql/multi_subquery_complex_queries.sql +++ b/src/test/regress/sql/multi_subquery_complex_queries.sql @@ -2216,7 +2216,7 @@ FROM FROM (SELECT 1 as user_id, now(), 3 AS event - ) events_subquery_4) OFFSET 3) t1 + ) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT @@ -2275,7 +2275,7 @@ FROM FROM (SELECT 1 as user_id, now(), 3 AS event - ) events_subquery_4) OFFSET 3) t1 + ) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT random()::int as user_id) AS t diff --git a/src/test/regress/sql/multi_subquery_in_where_clause.sql b/src/test/regress/sql/multi_subquery_in_where_clause.sql index ae880400d..75ebf2ef2 100644 --- a/src/test/regress/sql/multi_subquery_in_where_clause.sql +++ b/src/test/regress/sql/multi_subquery_in_where_clause.sql @@ -462,6 +462,34 @@ WHERE OFFSET 3 ); +-- we can detect errors even if they appear in WHERE -> FROM -> WHERE +-- subqueries +SELECT user_id +FROM users_table +WHERE user_id + IN (SELECT + f_inner.user_id + FROM + ( + SELECT + e1.user_id + FROM + users_table u1, events_table e1 + WHERE + e1.user_id = u1.user_id + ) as f_inner, + ( + SELECT + e1.user_id + FROM + users_table u1, events_table e1 + WHERE + e1.user_id = u1.user_id + AND e1.user_id IN (SELECT user_id FROM users_table LIMIT 3 ) + ) as f_outer + WHERE f_inner.user_id = f_outer.user_id + ); + -- semi join is not on the partition key for the third subquery SELECT user_id FROM users_table