diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 51e1c48ed..db9aea8be 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -56,6 +56,21 @@ typedef struct QualifierWalkerContext } QualifierWalkerContext; +/* + * RecurringTuplesType is used to distinguish different types of expressions + * that always produce the same set of tuples when a shard is queried. We make + * this distinction to produce relevant error messages when recurring tuples + * are used in a way that would give incorrect results. + */ +typedef enum RecurringTuplesType +{ + RECURRING_TUPLES_INVALID = 0, + RECURRING_TUPLES_REFERENCE_TABLE, + RECURRING_TUPLES_FUNCTION, + RECURRING_TUPLES_EMPTY_JOIN_TREE +} RecurringTuplesType; + + /* Function pointer type definition for apply join rule functions */ typedef MultiNode *(*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode, Var *partitionColumn, JoinType joinType, @@ -90,9 +105,9 @@ static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query); static bool FullCompositeFieldList(List *compositeFieldList); static MultiNode * MultiPlanTree(Query *queryTree); static void ErrorIfQueryNotSupported(Query *queryTree); -static bool HasUnsupportedReferenceTableJoin( +static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin( PlannerRestrictionContext *plannerRestrictionContext); -static bool ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo); +static bool ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo); static bool HasUnsupportedJoinWalker(Node *node, void *context); static bool ErrorHintRequired(const char *errorHint, Query *queryTree); static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryRepartition(Query * @@ -102,8 +117,10 @@ static bool HasOuterJoin(Query *queryTree); static bool HasOuterJoinWalker(Node *node, void *maxJoinLevel); static bool HasComplexJoinOrder(Query *queryTree); static bool HasComplexRangeTableType(Query *queryTree); -static bool RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo, - RelOptInfo *relationInfo); +static bool RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, + RelOptInfo *relationInfo, + RecurringTuplesType *recurType); +static bool HasRecurringTuples(Node *node, RecurringTuplesType *recurType); static void ValidateClauseList(List *clauseList); static void ValidateSubqueryPushdownClauseList(List *clauseList); static bool ExtractFromExpressionWalker(Node *node, @@ -564,13 +581,10 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, } /* we shouldn't allow reference tables in the outer part of outer joins */ - if (HasUnsupportedReferenceTableJoin(plannerRestrictionContext)) + error = DeferredErrorIfUnsupportedRecurringTuplesJoin(plannerRestrictionContext); + if (error) { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot pushdown the subquery", - "There exist a reference table in the outer part of the " - "outer join", - NULL); + return error; } /* @@ -612,7 +626,7 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, * sublinks into joins. * * In some cases, sublinks are pulled up and converted into outer joins. Those cases - * are already handled with HasUnsupportedReferenceTableJoin(). + * are already handled with DeferredErrorIfUnsupportedRecurringTuplesJoin(). * * If the sublinks are not pulled up, we should still error out in if any reference table * appears in the FROM clause of a subquery. @@ -622,19 +636,39 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, static DeferredErrorMessage * DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree) { + RecurringTuplesType recurType = RECURRING_TUPLES_INVALID; + if (!queryTree->hasSubLinks) { return NULL; } - if (HasReferenceTable((Node *) queryTree->rtable)) + if (HasRecurringTuples((Node *) queryTree->rtable, &recurType)) { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot pushdown the subquery", - "Reference tables are not allowed in FROM " - "clause when the query has subqueries in " - "WHERE clause", - NULL); + if (recurType == RECURRING_TUPLES_REFERENCE_TABLE) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot pushdown the subquery", + "Reference tables are not allowed in FROM " + "clause when the query has subqueries in " + "WHERE clause", NULL); + } + else if (recurType == RECURRING_TUPLES_FUNCTION) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot pushdown the subquery", + "Functions are not allowed in FROM " + "clause when the query has subqueries in " + "WHERE clause", NULL); + } + else + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot pushdown the subquery", + "Subqueries without FROM are not allowed in FROM " + "clause when the outer query has subqueries in " + "WHERE clause", NULL); + } } return NULL; @@ -800,10 +834,12 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi return deferredError; } - if (subqueryTree->rtable == NIL) + if (subqueryTree->rtable == NIL && + contain_mutable_functions((Node *) subqueryTree->targetList)) { preconditionsSatisfied = false; - errorDetail = "Subqueries without relations are unsupported"; + errorDetail = "Subqueries without a FROM clause can only contain immutable " + "functions"; } if (subqueryTree->limitOffset) @@ -948,6 +984,7 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree, { List *setOperationStatementList = NIL; ListCell *setOperationStatmentCell = NULL; + RecurringTuplesType recurType = RECURRING_TUPLES_INVALID; ExtractSetOperationStatmentWalker((Node *) subqueryTree->setOperations, &setOperationStatementList); @@ -973,12 +1010,9 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree, leftArgRTI = ((RangeTblRef *) leftArg)->rtindex; leftArgSubquery = (Node *) rt_fetch(leftArgRTI, subqueryTree->rtable)->subquery; - if (HasReferenceTable(leftArgSubquery)) + if (HasRecurringTuples(leftArgSubquery, &recurType)) { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot push down this subquery ", - "Reference tables are not supported with union" - " operator", NULL); + break; } } @@ -988,16 +1022,36 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree, rightArgRTI = ((RangeTblRef *) rightArg)->rtindex; rightArgSubquery = (Node *) rt_fetch(rightArgRTI, subqueryTree->rtable)->subquery; - if (HasReferenceTable(rightArgSubquery)) + if (HasRecurringTuples(rightArgSubquery, &recurType)) { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot push down this subquery", - "Reference tables are not supported with union" - " operator", NULL); + break; } } } + if (recurType == RECURRING_TUPLES_REFERENCE_TABLE) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot push down this subquery", + "Reference tables are not supported with union operator", + NULL); + } + else if (recurType == RECURRING_TUPLES_FUNCTION) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot push down this subquery", + "Table functions are not supported with union operator", + NULL); + } + else if (recurType == RECURRING_TUPLES_EMPTY_JOIN_TREE) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot push down this subquery", + "Subqueries without a FROM clause are not supported with " + "union operator", NULL); + } + + return NULL; } @@ -1043,16 +1097,15 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree) List *rangeTableList = queryTree->rtable; List *joinTreeTableIndexList = NIL; ListCell *joinTreeTableIndexCell = NULL; - bool unsupporteTableCombination = false; + bool unsupportedTableCombination = false; char *errorDetail = NULL; - uint32 relationRangeTableCount = 0; - uint32 subqueryRangeTableCount = 0; /* * Extract all range table indexes from the join tree. Note that sub-queries * that get pulled up by PostgreSQL don't appear in this join tree. */ ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); + foreach(joinTreeTableIndexCell, joinTreeTableIndexList) { /* @@ -1066,28 +1119,50 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree) (RangeTblEntry *) list_nth(rangeTableList, rangeTableListIndex); /* - * Check if the range table in the join tree is a simple relation or a - * subquery. + * Check if the range table in the join tree is a simple relation, a + * subquery, or immutable function. */ - if (rangeTableEntry->rtekind == RTE_RELATION) + if (rangeTableEntry->rtekind == RTE_RELATION || + rangeTableEntry->rtekind == RTE_SUBQUERY) { - relationRangeTableCount++; + /* accepted */ } - else if (rangeTableEntry->rtekind == RTE_SUBQUERY) + else if (rangeTableEntry->rtekind == RTE_FUNCTION) { - subqueryRangeTableCount++; + if (contain_mutable_functions((Node *) rangeTableEntry->functions)) + { + unsupportedTableCombination = true; + errorDetail = "Only immutable functions can be used as a table " + "expressions in a multi-shard query"; + } + else + { + /* immutable function RTEs are treated as reference tables */ + } + } + else if (rangeTableEntry->rtekind == RTE_CTE) + { + unsupportedTableCombination = true; + errorDetail = "CTEs in multi-shard queries are currently unsupported"; + break; + } + else if (rangeTableEntry->rtekind == RTE_VALUES) + { + unsupportedTableCombination = true; + errorDetail = "VALUES in multi-shard queries is currently unsupported"; + break; } else { - unsupporteTableCombination = true; - errorDetail = "Table expressions other than simple relations and " - "subqueries are currently unsupported"; + unsupportedTableCombination = true; + errorDetail = "Table expressions other than relations, subqueries, " + "and immutable functions are currently unsupported"; break; } } /* finally check and error out if not satisfied */ - if (unsupporteTableCombination) + if (unsupportedTableCombination) { return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, "cannot push down this subquery", @@ -1685,7 +1760,7 @@ MultiPlanTree(Query *queryTree) /* - * HasUnsupportedReferenceTableJoin returns true if there exists a outer join + * DeferredErrorIfUnsupportedRecurringTuplesJoin returns true if there exists a outer join * between reference table and distributed tables which does not follow * the rules : * - Reference tables can not be located in the outer part of the semi join or the @@ -1700,12 +1775,14 @@ MultiPlanTree(Query *queryTree) * definitely have duplicate rows. Beside, reference tables can not be used * with full outer joins because of the same reason. */ -static bool -HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionContext) +static DeferredErrorMessage * +DeferredErrorIfUnsupportedRecurringTuplesJoin( + PlannerRestrictionContext *plannerRestrictionContext) { List *joinRestrictionList = plannerRestrictionContext->joinRestrictionContext->joinRestrictionList; ListCell *joinRestrictionCell = NULL; + RecurringTuplesType recurType = RECURRING_TUPLES_INVALID; foreach(joinRestrictionCell, joinRestrictionList) { @@ -1718,36 +1795,62 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo if (joinType == JOIN_SEMI || joinType == JOIN_ANTI || joinType == JOIN_LEFT) { - if (ShouldRecurseForReferenceTableJoinChecks(outerrel) && - RelationInfoContainsReferenceTable(plannerInfo, outerrel)) + if (ShouldRecurseForRecurringTuplesJoinChecks(outerrel) && + RelationInfoContainsRecurringTuples(plannerInfo, outerrel, &recurType)) { - return true; + break; } } else if (joinType == JOIN_FULL) { - if ((ShouldRecurseForReferenceTableJoinChecks(innerrel) && - RelationInfoContainsReferenceTable(plannerInfo, innerrel)) || - (ShouldRecurseForReferenceTableJoinChecks(outerrel) && - RelationInfoContainsReferenceTable(plannerInfo, outerrel))) + if ((ShouldRecurseForRecurringTuplesJoinChecks(innerrel) && + RelationInfoContainsRecurringTuples(plannerInfo, innerrel, + &recurType)) || + (ShouldRecurseForRecurringTuplesJoinChecks(outerrel) && + RelationInfoContainsRecurringTuples(plannerInfo, outerrel, &recurType))) { - return true; + break; } } } - return false; + if (recurType == RECURRING_TUPLES_REFERENCE_TABLE) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot pushdown the subquery", + "There exist a reference table in the outer " + "part of the outer join", NULL); + } + else if (recurType == RECURRING_TUPLES_FUNCTION) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot pushdown the subquery", + "There exist a table function in the outer " + "part of the outer join", NULL); + } + else if (recurType == RECURRING_TUPLES_EMPTY_JOIN_TREE) + { + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "cannot pushdown the subquery", + "There exist a subquery without FROM in the outer " + "part of the outer join", NULL); + } + + return NULL; } /* - * ShouldRecurseForReferenceTableJoinChecks is a helper function for deciding - * on whether the input relOptInfo should be checked for unsupported reference - * tables. + * ShouldRecurseForRecurringTuplesJoinChecks is a helper function for deciding + * on whether the input relOptInfo should be checked for table expressions that + * generate the same tuples in every query on a shard. We use this to avoid + * redundant checks and false positives in complex join trees. */ static bool -ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo) +ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo) { + bool shouldRecurse = true; + /* * We shouldn't recursively go down for joins since we're already * going to process each join seperately. Otherwise we'd restrict @@ -1784,23 +1887,39 @@ ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo) if (list_length(subroot->join_rel_list) > 0) { - return false; + RelOptInfo *subqueryJoin = linitial(subroot->join_rel_list); + + /* + * Subqueries without relations (e.g. SELECT 1) are a little funny. + * They are treated as having a join, but the join is between 0 + * relations and won't be in the join restriction list and therefore + * won't be revisited in DeferredErrorIfUnsupportedRecurringTuplesJoin. + * + * We therefore only skip joins with >0 relations. + */ + if (bms_num_members(subqueryJoin->relids) > 0) + { + shouldRecurse = false; + } } } - return true; + return shouldRecurse; } /* - * RelationInfoContainsReferenceTable checks whether the relationInfo - * contains any reference tables. If found, the function returns true. + * RelationInfoContainsRecurringTuples checks whether the relationInfo + * contains any recurring table expression, namely a reference table, + * or immutable function. If found, RelationInfoContainsRecurringTuples + * returns true. * * Note that since relation ids of relationInfo indexes to the range * table entry list of planner info, planner info is also passed. */ static bool -RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relationInfo) +RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relationInfo, + RecurringTuplesType *recurType) { Relids relids = bms_copy(relationInfo->relids); int relationId = -1; @@ -1810,7 +1929,7 @@ RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relatio RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId]; /* relationInfo has this range table entry */ - if (HasReferenceTable((Node *) rangeTableEntry)) + if (HasRecurringTuples((Node *) rangeTableEntry, recurType)) { return true; } @@ -1821,29 +1940,71 @@ RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relatio /* - * HasReferenceTable checks whether there exist a reference table in the - * given node. + * HasRecurringTuples returns whether any part of the expression will generate + * the same set of tuples in every query on shards when executing a distributed + * query. */ bool -HasReferenceTable(Node *node) +HasRecurringTuples(Node *node, RecurringTuplesType *recurType) { - List *relationList = NIL; - ListCell *relationCell = NULL; - ExtractRangeTableRelationWalkerWithRTEExpand(node, &relationList); - - foreach(relationCell, relationList) + if (node == NULL) { - RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(relationCell); - Oid relationId = rangeTableEntry->relid; - - if (IsDistributedTable(relationId) && PartitionMethod(relationId) == - DISTRIBUTE_BY_NONE) - { - return true; - } + return false; } - return false; + if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node; + + if (rangeTableEntry->rtekind == RTE_RELATION) + { + Oid relationId = rangeTableEntry->relid; + if (IsDistributedTable(relationId) && + PartitionMethod(relationId) == DISTRIBUTE_BY_NONE) + { + *recurType = RECURRING_TUPLES_REFERENCE_TABLE; + + /* + * Tuples from reference tables will recur in every query on shards + * that includes it. + */ + return true; + } + } + else if (rangeTableEntry->rtekind == RTE_FUNCTION) + { + *recurType = RECURRING_TUPLES_FUNCTION; + + /* + * Tuples from functions will recur in every query on shards that includes + * it. + */ + return true; + } + + return range_table_walker(list_make1(rangeTableEntry), HasRecurringTuples, + recurType, 0); + } + else if (IsA(node, Query)) + { + Query *query = (Query *) node; + + if (query->rtable == NIL) + { + *recurType = RECURRING_TUPLES_EMPTY_JOIN_TREE; + + /* + * Queries with empty join trees will recur in every query on shards + * that includes it. + */ + return true; + } + + return query_tree_walker((Query *) node, HasRecurringTuples, + recurType, QTW_EXAMINE_RTES); + } + + return expression_tree_walker(node, HasRecurringTuples, recurType); } @@ -3212,54 +3373,6 @@ ExtractRangeTableRelationWalker(Node *node, List **rangeTableRelationList) } -/* - * ExtractRangeTableRelationWalkerWithRTEExpand obtains the list of relations - * from the given node. Note that the difference between this function and - * ExtractRangeTableRelationWalker is that this one recursively - * walk into range table entries if it can. - */ -bool -ExtractRangeTableRelationWalkerWithRTEExpand(Node *node, List **rangeTableRelationList) -{ - bool walkIsComplete = false; - - if (node == NULL) - { - return walkIsComplete; - } - else if (IsA(node, RangeTblEntry)) - { - RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node; - List *rangeTableList = list_make1(rangeTableEntry); - - if (rangeTableEntry->rtekind == RTE_RELATION) - { - (*rangeTableRelationList) = lappend(*rangeTableRelationList, rangeTableEntry); - } - else - { - walkIsComplete = range_table_walker(rangeTableList, - ExtractRangeTableRelationWalkerWithRTEExpand, - rangeTableRelationList, 0); - } - } - else if (IsA(node, Query)) - { - walkIsComplete = query_tree_walker((Query *) node, - ExtractRangeTableRelationWalkerWithRTEExpand, - rangeTableRelationList, QTW_EXAMINE_RTES); - } - else - { - walkIsComplete = expression_tree_walker(node, - ExtractRangeTableRelationWalkerWithRTEExpand, - rangeTableRelationList); - } - - return walkIsComplete; -} - - /* * ExtractRangeTableEntryWalker walks over a query tree, and finds all range * table entries. For recursing into the query tree, this function uses the diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 786df8736..4fc40b535 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -1314,6 +1314,12 @@ ContainsUnionSubquery(Query *queryTree) return false; } + /* subquery without FROM */ + if (joiningRangeTableCount == 0) + { + return false; + } + subqueryRteIndex = linitial_int(joinTreeTableIndexList); rangeTableEntry = rt_fetch(subqueryRteIndex, rangeTableList); if (rangeTableEntry->rtekind != RTE_SUBQUERY) diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index 2465b93ad..693902ab4 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -210,8 +210,6 @@ extern List * TableEntryList(List *rangeTableList); extern List * UsedTableEntryList(Query *query); extern bool ExtractRangeTableRelationWalker(Node *node, List **rangeTableList); extern bool ExtractRangeTableEntryWalker(Node *node, List **rangeTableList); -extern bool ExtractRangeTableRelationWalkerWithRTEExpand(Node *node, - List **rangeTableList); extern List * pull_var_clause_default(Node *node); extern bool OperatorImplementsEquality(Oid opno); diff --git a/src/include/distributed/multi_physical_planner.h b/src/include/distributed/multi_physical_planner.h index 1e16286c4..7b491ca2d 100644 --- a/src/include/distributed/multi_physical_planner.h +++ b/src/include/distributed/multi_physical_planner.h @@ -287,7 +287,6 @@ extern Const * MakeInt4Constant(Datum constantValue); extern int CompareShardPlacements(const void *leftElement, const void *rightElement); extern bool ShardIntervalsOverlap(ShardInterval *firstInterval, ShardInterval *secondInterval); -extern bool HasReferenceTable(Node *node); /* function declarations for Task and Task list operations */ extern bool TasksEqual(const Task *a, const Task *b); diff --git a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out index a1cfaea68..80e6963a2 100644 --- a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out +++ b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out @@ -694,7 +694,7 @@ FROM ( users_table.value_1 > 10 AND users_table.value_1 < 12 ) u LEFT JOIN LATERAL ( SELECT event_type, time - FROM events_table, (SELECT 1 as x) as f + FROM events_table, (SELECT random()::int as x) as f WHERE user_id = u.user_id AND events_table.event_type > 10 AND events_table.event_type < 12 ) t ON true @@ -702,4 +702,4 @@ FROM ( ) AS shard_union ORDER BY user_lastseen DESC; ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported +DETAIL: Subqueries without a FROM clause can only contain immutable functions diff --git a/src/test/regress/expected/multi_subquery.out b/src/test/regress/expected/multi_subquery.out index 01a87d0d8..981ce47af 100644 --- a/src/test/regress/expected/multi_subquery.out +++ b/src/test/regress/expected/multi_subquery.out @@ -71,6 +71,12 @@ FROM l_orderkey) AS unit_prices; ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- Subqueries without relation with a volatile functions (non-constant) +SELECT count(*) FROM ( + SELECT l_orderkey FROM lineitem_subquery JOIN (SELECT random()::int r) sub ON (l_orderkey = r) +) b; +ERROR: cannot push down this subquery +DETAIL: Subqueries without a FROM clause can only contain immutable functions -- Check that we error out if there is non relation subqueries SELECT count(*) FROM ( @@ -78,7 +84,7 @@ SELECT count(*) FROM (SELECT 1::bigint) ) b; ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported +DETAIL: Subqueries without a FROM clause are not supported with union operator -- Check that we error out if queries in union do not include partition columns. SELECT count(*) FROM ( diff --git a/src/test/regress/expected/multi_subquery_behavioral_analytics.out b/src/test/regress/expected/multi_subquery_behavioral_analytics.out index 0ff786728..f47f4b28b 100644 --- a/src/test/regress/expected/multi_subquery_behavioral_analytics.out +++ b/src/test/regress/expected/multi_subquery_behavioral_analytics.out @@ -1803,7 +1803,7 @@ ORDER BY 1,2; (2 rows) DROP FUNCTION array_index(ANYARRAY, ANYELEMENT); --- a not supported query due to constant range table entry +-- a query with a constant subquery SELECT count(*) as subquery_count FROM ( SELECT @@ -1822,9 +1822,12 @@ FROM ( ON a.user_id = b.user_id WHERE b.user_id IS NULL GROUP BY a.user_id; -ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported --- same with INNER JOIN + subquery_count +---------------- + 1 +(1 row) + +-- volatile function in the subquery SELECT count(*) as subquery_count FROM ( SELECT @@ -1838,13 +1841,13 @@ FROM ( ) as a INNER JOIN ( SELECT - 1 as user_id + random()::int as user_id ) AS b ON a.user_id = b.user_id WHERE b.user_id IS NULL GROUP BY a.user_id; ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported +DETAIL: Subqueries without a FROM clause can only contain immutable functions -- this is slightly different, we use RTE_VALUEs here SELECT Count(*) AS subquery_count FROM (SELECT @@ -1866,7 +1869,7 @@ FROM (SELECT WHERE b.user_id IS NULL GROUP BY a.user_id; ERROR: cannot push down this subquery -DETAIL: Table expressions other than simple relations and subqueries are currently unsupported +DETAIL: VALUES in multi-shard queries is currently unsupported -- same query without LIMIT/OFFSET returns 30 rows SET client_min_messages TO DEBUG1; -- now, lets use a simple expression on the LIMIT and explicit coercion on the OFFSET diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index 6f11c2db1..b4591c669 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -2440,7 +2440,7 @@ GROUP BY ORDER BY types; ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported +DETAIL: Subqueries without a FROM clause are not supported with union operator -- similar to the above, but constant rte is on the right side of the query SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -2495,5 +2495,5 @@ GROUP BY ORDER BY types; ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported +DETAIL: Subqueries without a FROM clause are not supported with union operator SET citus.enable_router_execution TO TRUE; diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 3f0ad2069..7dd1b14e6 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -228,6 +228,91 @@ FROM 97 | 71002659 (10 rows) +-- table function can be the inner relationship in a join +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table JOIN generate_series(1,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; + count +------- + 10 +(1 row) + +-- table function cannot be used without subquery pushdown +SELECT count(*) FROM user_buy_test_table JOIN generate_series(1,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id = users_ref_test_table.id; +ERROR: could not run distributed query with complex table expressions +HINT: Consider using an equality filter on the distributed table's partition column. +-- table function can be the inner relationship in an outer join +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table LEFT JOIN generate_series(1,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; + count +------- + 10 +(1 row) + +-- table function cannot be the outer relationship in an outer join +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table RIGHT JOIN generate_series(1,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; +ERROR: cannot pushdown the subquery +DETAIL: There exist a table function in the outer part of the outer join +-- volatile functions cannot be used as table expressions +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table JOIN random() AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; +ERROR: cannot push down this subquery +DETAIL: Only immutable functions can be used as a table expressions in a multi-shard query +-- cannot sneak in a volatile function as a parameter +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table JOIN generate_series(random()::int,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; +ERROR: cannot push down this subquery +DETAIL: Only immutable functions can be used as a table expressions in a multi-shard query +-- cannot perform a union with table function +SELECT count(*) FROM + (SELECT user_id FROM user_buy_test_table + UNION ALL + SELECT id FROM generate_series(1,10) AS users_ref_test_table(id)) subquery_1; +ERROR: cannot push down this subquery +DETAIL: Table functions are not supported with union operator +-- subquery without FROM can be the inner relationship in a join +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table JOIN (SELECT 4 AS id) users_ref_test_table + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; + count +------- + 1 +(1 row) + +-- subquery without FROM triggers subquery pushdown +SELECT count(*) FROM user_buy_test_table JOIN (SELECT 5 AS id) users_ref_test_table +ON user_buy_test_table.item_id = users_ref_test_table.id; + count +------- + 1 +(1 row) + +-- subquery without FROM can be the inner relationship in an outer join +SELECT count(*) FROM user_buy_test_table LEFT JOIN (SELECT 5 AS id) users_ref_test_table +ON user_buy_test_table.item_id = users_ref_test_table.id; + count +------- + 4 +(1 row) + +-- subquery without FROM cannot be the outer relationship in an outer join +SELECT count(*) FROM user_buy_test_table RIGHT JOIN (SELECT 5 AS id) users_ref_test_table +ON user_buy_test_table.item_id = users_ref_test_table.id; +ERROR: cannot pushdown the subquery +DETAIL: There exist a subquery without FROM in the outer part of the outer join +-- cannot perform a union with subquery without FROM +SELECT count(*) FROM + (SELECT user_id FROM user_buy_test_table + UNION ALL + SELECT id FROM (SELECT 5 AS id) users_ref_test_table) subquery_1; +ERROR: cannot push down this subquery +DETAIL: Subqueries without a FROM clause are not supported with union operator -- should be able to pushdown since reference table is in the -- inner part of the left join SELECT diff --git a/src/test/regress/expected/multi_subquery_in_where_clause.out b/src/test/regress/expected/multi_subquery_in_where_clause.out index eaa0254b0..a42ce49f4 100644 --- a/src/test/regress/expected/multi_subquery_in_where_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_clause.out @@ -499,16 +499,34 @@ FROM ( ORDER BY 2 DESC, 1; ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. --- subquery in where clause doesn't have a relation -SELECT +-- subquery in where clause doesn't have a relation, but is constant +SELECT user_id -FROM +FROM users_table -WHERE - value_2 > - (SELECT 1); +WHERE + value_2 > + (SELECT 1) +ORDER BY 1 ASC +LIMIT 2; + user_id +--------- + 0 + 0 +(2 rows) + +-- subquery in where clause has a volatile function and no relation +SELECT + user_id +FROM + users_table +WHERE + value_2 > + (SELECT random()) +ORDER BY 1 ASC +LIMIT 2; ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported +DETAIL: Subqueries without a FROM clause can only contain immutable functions -- OFFSET is not supported in the subquey SELECT user_id diff --git a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out index ec1399f65..cf3f46876 100644 --- a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out @@ -64,6 +64,40 @@ WHERE LIMIT 3; ERROR: cannot pushdown the subquery DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause +-- immutable functions are also treated as reference tables +SELECT + user_id +FROM + (SELECT user_id FROM generate_series(1,10) AS series(user_id)) users_reference_table +WHERE + NOT EXISTS + (SELECT + value_2 + FROM + events_table + WHERE + users_reference_table.user_id = events_table.user_id + ) +LIMIT 3; +ERROR: cannot pushdown the subquery +DETAIL: Functions are not allowed in FROM clause when the query has subqueries in WHERE clause +-- subqueries without FROM are also treated as reference tables +SELECT + user_id +FROM + (SELECT 5 AS user_id) users_reference_table +WHERE + NOT EXISTS + (SELECT + value_2 + FROM + events_table + WHERE + users_reference_table.user_id = events_table.user_id + ) +LIMIT 3; +ERROR: cannot pushdown the subquery +DETAIL: Subqueries without FROM are not allowed in FROM clause when the outer query has subqueries in WHERE clause -- subqueries in WHERE with IN operator without equality SELECT users_table.user_id, count(*) @@ -88,6 +122,54 @@ LIMIT 3; 46 | 115 (3 rows) +-- immutable functions are also treated as reference tables +SELECT + users_table.user_id, count(*) +FROM + users_table +WHERE + value_2 IN + (SELECT + value_2 + FROM + generate_series(1,10) AS events_reference_table(user_id) + WHERE + users_table.user_id > events_reference_table.user_id + ) +GROUP BY users_table.user_id +ORDER BY 2 DESC, 1 DESC +LIMIT 3; + user_id | count +---------+------- + 12 | 121 + 87 | 117 + 59 | 115 +(3 rows) + +-- immutable functions are also treated as reference tables +SELECT + users_table.user_id, count(*) +FROM + users_table +WHERE + value_2 IN + (SELECT + value_2 + FROM + (SELECT 5 AS user_id) AS events_reference_table + WHERE + users_table.user_id > events_reference_table.user_id + ) +GROUP BY users_table.user_id +ORDER BY 2 DESC, 1 DESC +LIMIT 3; + user_id | count +---------+------- + 12 | 121 + 87 | 117 + 59 | 115 +(3 rows) + -- should error out since reference table exist on the left side -- of the left lateral join SELECT user_id, value_2 FROM users_table WHERE diff --git a/src/test/regress/expected/multi_subquery_union.out b/src/test/regress/expected/multi_subquery_union.out index eddb00f1f..69acc4edc 100644 --- a/src/test/regress/expected/multi_subquery_union.out +++ b/src/test/regress/expected/multi_subquery_union.out @@ -914,7 +914,7 @@ FROM (SELECT 1) ) b; ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported +DETAIL: Subqueries without a FROM clause are not supported with union operator -- we don't support subqueries without relations SELECT * @@ -925,7 +925,7 @@ FROM (SELECT (random() * 100)::int) ) b; ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported +DETAIL: Subqueries without a FROM clause are not supported with union operator -- we don't support subqueries without relations SELECT user_id, value_3 @@ -946,7 +946,7 @@ FROM ORDER BY 1 DESC, 2 DESC LIMIT 5; ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported +DETAIL: Subqueries without a FROM clause are not supported with union operator SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() @@ -990,7 +990,7 @@ FROM GROUP BY types ORDER BY types; ERROR: cannot push down this subquery -DETAIL: Subqueries without relations are unsupported +DETAIL: Subqueries without a FROM clause are not supported with union operator SET citus.enable_router_execution TO true; DROP TABLE events_reference_table; DROP TABLE users_reference_table; diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index 50c0a5039..dd55fb597 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -715,12 +715,12 @@ CREATE VIEW cte_view_1 AS WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 15) SELECT * FROM c1 WHERE value_2 < 500; SELECT * FROM cte_view_1; ERROR: cannot push down this subquery -DETAIL: Table expressions other than simple relations and subqueries are currently unsupported +DETAIL: CTEs in multi-shard queries are currently unsupported -- this is single shard query but still not supported since it has view + cte -- router planner can't detect it SELECT * FROM cte_view_1 WHERE user_id = 8; ERROR: cannot push down this subquery -DETAIL: Table expressions other than simple relations and subqueries are currently unsupported +DETAIL: CTEs in multi-shard queries are currently unsupported -- if CTE itself prunes down to a single shard than the view is supported (router plannable) CREATE VIEW cte_view_2 AS WITH c1 AS (SELECT * FROM users_table WHERE user_id = 8) SELECT * FROM c1 WHERE value_1 = 15; diff --git a/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql b/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql index a4fb4d06b..98159f36b 100644 --- a/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql +++ b/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql @@ -681,7 +681,7 @@ FROM ( users_table.value_1 > 10 AND users_table.value_1 < 12 ) u LEFT JOIN LATERAL ( SELECT event_type, time - FROM events_table, (SELECT 1 as x) as f + FROM events_table, (SELECT random()::int as x) as f WHERE user_id = u.user_id AND events_table.event_type > 10 AND events_table.event_type < 12 ) t ON true diff --git a/src/test/regress/sql/multi_subquery.sql b/src/test/regress/sql/multi_subquery.sql index 800e5389c..2b2afd586 100644 --- a/src/test/regress/sql/multi_subquery.sql +++ b/src/test/regress/sql/multi_subquery.sql @@ -70,6 +70,11 @@ FROM GROUP BY l_orderkey) AS unit_prices; +-- Subqueries without relation with a volatile functions (non-constant) +SELECT count(*) FROM ( + SELECT l_orderkey FROM lineitem_subquery JOIN (SELECT random()::int r) sub ON (l_orderkey = r) +) b; + -- Check that we error out if there is non relation subqueries SELECT count(*) FROM ( diff --git a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql index 051a1265f..2d8ed41b1 100644 --- a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql +++ b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql @@ -1432,7 +1432,7 @@ SELECT * FROM run_command_on_workers('DROP FUNCTION array_index(ANYARRAY, ANYELE ORDER BY 1,2; DROP FUNCTION array_index(ANYARRAY, ANYELEMENT); --- a not supported query due to constant range table entry +-- a query with a constant subquery SELECT count(*) as subquery_count FROM ( SELECT @@ -1452,7 +1452,7 @@ FROM ( WHERE b.user_id IS NULL GROUP BY a.user_id; --- same with INNER JOIN +-- volatile function in the subquery SELECT count(*) as subquery_count FROM ( SELECT @@ -1466,7 +1466,7 @@ FROM ( ) as a INNER JOIN ( SELECT - 1 as user_id + random()::int as user_id ) AS b ON a.user_id = b.user_id WHERE b.user_id IS NULL diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 731daa60a..55dbb2e30 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -136,6 +136,64 @@ FROM ) as foo GROUP BY user_id ORDER BY 2 DESC LIMIT 10; +-- table function can be the inner relationship in a join +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table JOIN generate_series(1,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; + +-- table function cannot be used without subquery pushdown +SELECT count(*) FROM user_buy_test_table JOIN generate_series(1,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id = users_ref_test_table.id; + +-- table function can be the inner relationship in an outer join +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table LEFT JOIN generate_series(1,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; + +-- table function cannot be the outer relationship in an outer join +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table RIGHT JOIN generate_series(1,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; + +-- volatile functions cannot be used as table expressions +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table JOIN random() AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; + +-- cannot sneak in a volatile function as a parameter +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table JOIN generate_series(random()::int,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; + +-- cannot perform a union with table function +SELECT count(*) FROM + (SELECT user_id FROM user_buy_test_table + UNION ALL + SELECT id FROM generate_series(1,10) AS users_ref_test_table(id)) subquery_1; + +-- subquery without FROM can be the inner relationship in a join +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table JOIN (SELECT 4 AS id) users_ref_test_table + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; + +-- subquery without FROM triggers subquery pushdown +SELECT count(*) FROM user_buy_test_table JOIN (SELECT 5 AS id) users_ref_test_table +ON user_buy_test_table.item_id = users_ref_test_table.id; + +-- subquery without FROM can be the inner relationship in an outer join +SELECT count(*) FROM user_buy_test_table LEFT JOIN (SELECT 5 AS id) users_ref_test_table +ON user_buy_test_table.item_id = users_ref_test_table.id; + +-- subquery without FROM cannot be the outer relationship in an outer join +SELECT count(*) FROM user_buy_test_table RIGHT JOIN (SELECT 5 AS id) users_ref_test_table +ON user_buy_test_table.item_id = users_ref_test_table.id; + +-- cannot perform a union with subquery without FROM +SELECT count(*) FROM + (SELECT user_id FROM user_buy_test_table + UNION ALL + SELECT id FROM (SELECT 5 AS id) users_ref_test_table) subquery_1; + -- should be able to pushdown since reference table is in the -- inner part of the left join SELECT diff --git a/src/test/regress/sql/multi_subquery_in_where_clause.sql b/src/test/regress/sql/multi_subquery_in_where_clause.sql index 6d60a2c1b..472e1bc3c 100644 --- a/src/test/regress/sql/multi_subquery_in_where_clause.sql +++ b/src/test/regress/sql/multi_subquery_in_where_clause.sql @@ -433,14 +433,27 @@ FROM ( ) q ORDER BY 2 DESC, 1; --- subquery in where clause doesn't have a relation -SELECT +-- subquery in where clause doesn't have a relation, but is constant +SELECT user_id -FROM +FROM users_table -WHERE - value_2 > - (SELECT 1); +WHERE + value_2 > + (SELECT 1) +ORDER BY 1 ASC +LIMIT 2; + +-- subquery in where clause has a volatile function and no relation +SELECT + user_id +FROM + users_table +WHERE + value_2 > + (SELECT random()) +ORDER BY 1 ASC +LIMIT 2; -- OFFSET is not supported in the subquey SELECT diff --git a/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql b/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql index c996df7af..8bc716cd0 100644 --- a/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql @@ -55,6 +55,38 @@ WHERE ) LIMIT 3; +-- immutable functions are also treated as reference tables +SELECT + user_id +FROM + (SELECT user_id FROM generate_series(1,10) AS series(user_id)) users_reference_table +WHERE + NOT EXISTS + (SELECT + value_2 + FROM + events_table + WHERE + users_reference_table.user_id = events_table.user_id + ) +LIMIT 3; + +-- subqueries without FROM are also treated as reference tables +SELECT + user_id +FROM + (SELECT 5 AS user_id) users_reference_table +WHERE + NOT EXISTS + (SELECT + value_2 + FROM + events_table + WHERE + users_reference_table.user_id = events_table.user_id + ) +LIMIT 3; + -- subqueries in WHERE with IN operator without equality SELECT users_table.user_id, count(*) @@ -73,6 +105,43 @@ GROUP BY users_table.user_id ORDER BY 2 DESC, 1 DESC LIMIT 3; +-- immutable functions are also treated as reference tables +SELECT + users_table.user_id, count(*) +FROM + users_table +WHERE + value_2 IN + (SELECT + value_2 + FROM + generate_series(1,10) AS events_reference_table(user_id) + WHERE + users_table.user_id > events_reference_table.user_id + ) +GROUP BY users_table.user_id +ORDER BY 2 DESC, 1 DESC +LIMIT 3; + +-- immutable functions are also treated as reference tables +SELECT + users_table.user_id, count(*) +FROM + users_table +WHERE + value_2 IN + (SELECT + value_2 + FROM + (SELECT 5 AS user_id) AS events_reference_table + WHERE + users_table.user_id > events_reference_table.user_id + ) +GROUP BY users_table.user_id +ORDER BY 2 DESC, 1 DESC +LIMIT 3; + + -- should error out since reference table exist on the left side -- of the left lateral join SELECT user_id, value_2 FROM users_table WHERE