diff --git a/src/backend/distributed/planner/insert_select_planner.c b/src/backend/distributed/planner/insert_select_planner.c index 9f9dde1cb..9e1b6a5f9 100644 --- a/src/backend/distributed/planner/insert_select_planner.c +++ b/src/backend/distributed/planner/insert_select_planner.c @@ -215,7 +215,7 @@ CreateDistributedInsertSelectPlan(Query *originalQuery, RelationRestrictionContext *relationRestrictionContext = plannerRestrictionContext->relationRestrictionContext; bool allReferenceTables = relationRestrictionContext->allReferenceTables; - bool queryContainsDistributionKeyEquality = false; + bool allDistributionKeysInQueryAreEqual = false; distributedPlan->operation = originalQuery->commandType; @@ -232,8 +232,8 @@ CreateDistributedInsertSelectPlan(Query *originalQuery, return distributedPlan; } - queryContainsDistributionKeyEquality = - QueryContainsDistributionKeyEquality(plannerRestrictionContext, originalQuery); + allDistributionKeysInQueryAreEqual = + AllDistributionKeysInQueryAreEqual(originalQuery, plannerRestrictionContext); /* * Plan select query for each shard in the target table. Do so by replacing the @@ -253,7 +253,7 @@ CreateDistributedInsertSelectPlan(Query *originalQuery, modifyTask = RouterModifyTaskForShardInterval(originalQuery, targetShardInterval, relationRestrictionContext, taskIdIndex, - queryContainsDistributionKeyEquality); + allDistributionKeysInQueryAreEqual); /* add the task if it could be created */ if (modifyTask != NULL) diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index a2cf53a03..da683914a 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -152,13 +152,13 @@ static MultiNode * ApplyCartesianProduct(MultiNode *leftNode, MultiNode *rightNo * functions will be removed with upcoming subqery changes. */ static bool ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery); +static bool JoinTreeContainsSubqueryWalker(Node *joinTreeNode, void *context); static bool IsFunctionRTE(Node *node); -static bool FindNodeCheck(Node *node, bool (*check)(Node *)); +static bool IsNodeQuery(Node *node); static MultiNode * SubqueryMultiNodeTree(Query *originalQuery, Query *queryTree, PlannerRestrictionContext * plannerRestrictionContext); -static bool ExtractSublinkWalker(Node *node, List **sublinkList); static MultiNode * SubqueryPushdownMultiNodeTree(Query *queryTree); static List * CreateSubqueryTargetEntryList(List *columnList); @@ -218,7 +218,7 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery) * given that if postgres already flattened the subqueries, MultiPlanTree() * can plan corresponding distributed plan. */ - if (SubqueryEntryList(rewrittenQuery) != NIL) + if (JoinTreeContainsSubquery(rewrittenQuery)) { return true; } @@ -229,7 +229,7 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery) * standard_planner() may replace the sublinks with anti/semi joins and * MultiPlanTree() cannot plan such queries. */ - if (SublinkList(originalQuery) != NIL) + if (WhereClauseContainsSubquery(originalQuery)) { return true; } @@ -257,6 +257,55 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery) } +/* + * JoinTreeContainsSubquery returns true if the input query contains any subqueries + * in the join tree (e.g., FROM clause). + */ +bool +JoinTreeContainsSubquery(Query *query) +{ + FromExpr *joinTree = query->jointree; + + if (!joinTree) + { + return false; + } + + return JoinTreeContainsSubqueryWalker((Node *) joinTree, query); +} + + +/* + * JoinTreeContainsSubqueryWalker returns true if the input joinTreeNode + * references to a subquery. Otherwise, recurses into the expression. + */ +static bool +JoinTreeContainsSubqueryWalker(Node *joinTreeNode, void *context) +{ + if (joinTreeNode == NULL) + { + return false; + } + + if (IsA(joinTreeNode, RangeTblRef)) + { + Query *query = (Query *) context; + + RangeTblRef *rangeTableRef = (RangeTblRef *) joinTreeNode; + RangeTblEntry *rangeTableEntry = rt_fetch(rangeTableRef->rtindex, query->rtable); + + if (rangeTableEntry->rtekind == RTE_SUBQUERY) + { + return true; + } + + return false; + } + + return expression_tree_walker(joinTreeNode, JoinTreeContainsSubqueryWalker, context); +} + + /* * IsFunctionRTE determines whether the given node is a function RTE. */ @@ -283,7 +332,7 @@ IsFunctionRTE(Node *node) * To call this function directly with an RTE, use: * range_table_walker(rte, FindNodeCheck, check, QTW_EXAMINE_RTES) */ -static bool +bool FindNodeCheck(Node *node, bool (*check)(Node *)) { if (node == NULL) @@ -311,53 +360,38 @@ FindNodeCheck(Node *node, bool (*check)(Node *)) /* - * SublinkList finds the subquery nodes in the where clause of the given query. Note - * that the function should be called on the original query given that postgres - * standard_planner() may convert the subqueries in WHERE clause to joins. + * WhereClauseContainsSubquery returns true if the input query contains + * any subqueries in the WHERE clause. */ -List * -SublinkList(Query *originalQuery) +bool +WhereClauseContainsSubquery(Query *query) { - FromExpr *joinTree = originalQuery->jointree; + FromExpr *joinTree = query->jointree; Node *queryQuals = NULL; - List *sublinkList = NIL; if (!joinTree) { - return NIL; + return false; } queryQuals = joinTree->quals; - ExtractSublinkWalker(queryQuals, &sublinkList); - return sublinkList; + return FindNodeCheck(queryQuals, IsNodeQuery); } /* - * ExtractSublinkWalker walks over a quals node, and finds all sublinks - * in that node. + * IsNodeQuery returns true if the given node is a Query. */ static bool -ExtractSublinkWalker(Node *node, List **sublinkList) +IsNodeQuery(Node *node) { - bool walkerResult = false; if (node == NULL) { return false; } - if (IsA(node, SubLink)) - { - (*sublinkList) = lappend(*sublinkList, node); - } - else - { - walkerResult = expression_tree_walker(node, ExtractSublinkWalker, - sublinkList); - } - - return walkerResult; + return IsA(node, Query); } @@ -2055,7 +2089,7 @@ DeferErrorIfQueryNotSupported(Query *queryTree) * There could be Sublinks in the target list as well. To produce better * error messages we're checking sublinks in the where clause. */ - if (queryTree->hasSubLinks && SublinkList(queryTree) == NIL) + if (queryTree->hasSubLinks && !WhereClauseContainsSubquery(queryTree)) { preconditionsSatisfied = false; errorMessage = "could not run distributed query with subquery outside the " diff --git a/src/backend/distributed/planner/query_colocation_checker.c b/src/backend/distributed/planner/query_colocation_checker.c new file mode 100644 index 000000000..f3154e168 --- /dev/null +++ b/src/backend/distributed/planner/query_colocation_checker.c @@ -0,0 +1,314 @@ +/*------------------------------------------------------------------------- + * + * query_colocation_checker.c implements the logic for determining + * whether any subqueries in a given query are co-located (e.g., + * distribution keys of the relations inside subqueries are equal). + * + * The main logic behind non colocated subquery joins is that we pick + * an anchor range table entry and check for distribution key equality + * of any other subqueries in the given query. If for a given subquery, + * we cannot find distribution key equality with the anchor rte, we + * recursively plan that subquery. + * + * We also used a hacky solution for picking relations as the anchor range + * table entries. The hack is that we wrap them into a subquery. This is only + * necessary since some of the attribute equivalance checks are based on + * queries rather than range table entries. + * + * Copyright (c) 2018, Citus Data, Inc. + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "distributed/multi_logical_planner.h" +#include "distributed/query_colocation_checker.h" +#include "distributed/pg_dist_partition.h" +#include "distributed/relation_restriction_equivalence.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "parser/parsetree.h" +#include "parser/parse_relation.h" +#include "optimizer/planner.h" +#include "optimizer/prep.h" + + +static RangeTblEntry * AnchorRte(Query *subquery); +static Query * WrapRteRelationIntoSubquery(RangeTblEntry *rteRelation); +static List * UnionRelationRestrictionLists(List *firstRelationList, + List *secondRelationList); + + +/* + * CreateColocatedJoinChecker is a helper function that simply calculates + * a ColocatedJoinChecker with the given input and returns it. + */ +ColocatedJoinChecker +CreateColocatedJoinChecker(Query *subquery, PlannerRestrictionContext *restrictionContext) +{ + ColocatedJoinChecker colocatedJoinChecker; + + RangeTblEntry *anchorRangeTblEntry = NULL; + Query *anchorSubquery = NULL; + PlannerRestrictionContext *anchorPlannerRestrictionContext = NULL; + RelationRestrictionContext *anchorRelationRestrictionContext = NULL; + List *anchorRestrictionEquivalences = NIL; + + /* we couldn't pick an anchor subquery, no need to continue */ + anchorRangeTblEntry = AnchorRte(subquery); + if (anchorRangeTblEntry == NULL) + { + colocatedJoinChecker.anchorRelationRestrictionList = NIL; + + return colocatedJoinChecker; + } + + if (anchorRangeTblEntry->rtekind == RTE_RELATION) + { + /* + * If we get a relation as our anchor, wrap into a subquery. The only + * reason that we wrap the relation into a subquery is that some of the utility + * functions (i.e., FilterPlannerRestrictionForQuery()) rely on queries + * not relations. + */ + anchorSubquery = WrapRteRelationIntoSubquery(anchorRangeTblEntry); + } + else if (anchorRangeTblEntry->rtekind == RTE_SUBQUERY) + { + anchorSubquery = anchorRangeTblEntry->subquery; + } + else + { + /* we don't expect any other RTE type here */ + pg_unreachable(); + } + + anchorPlannerRestrictionContext = + FilterPlannerRestrictionForQuery(restrictionContext, anchorSubquery); + anchorRelationRestrictionContext = + anchorPlannerRestrictionContext->relationRestrictionContext; + anchorRestrictionEquivalences = + GenerateAllAttributeEquivalences(anchorPlannerRestrictionContext); + + /* fill the non colocated planning context */ + colocatedJoinChecker.subquery = subquery; + colocatedJoinChecker.subqueryPlannerRestriction = restrictionContext; + + colocatedJoinChecker.anchorRelationRestrictionList = + anchorRelationRestrictionContext->relationRestrictionList; + colocatedJoinChecker.anchorAttributeEquivalences = anchorRestrictionEquivalences; + + return colocatedJoinChecker; +} + + +/* + * AnchorRte gets a query and searches for a relation or a subquery within + * the join tree of the query such that we can use it as our anchor range + * table entry during our non colocated subquery planning. + * + * The function returns NULL if it cannot find a proper range table entry for our + * purposes. See the function for the details. + */ +static RangeTblEntry * +AnchorRte(Query *subquery) +{ + FromExpr *joinTree = subquery->jointree; + Relids joinRelIds = get_relids_in_jointree((Node *) joinTree, false); + int currentRTEIndex = -1; + RangeTblEntry *anchorRangeTblEntry = NULL; + + /* + * Pick a random anchor relation or subquery (i.e., the first) for now. We + * might consider picking a better rte as the anchor. For example, we could + * iterate on the joinRelIds, and check which rteIndex has more distribution + * key equiality with rteIndexes. For the time being, the current primitive + * approach helps us in many cases. + */ + while ((currentRTEIndex = bms_next_member(joinRelIds, currentRTEIndex)) >= 0) + { + RangeTblEntry *currentRte = rt_fetch(currentRTEIndex, subquery->rtable); + + /* + * We always prefer distributed releations if we can find any. The + * reason is that Citus is currently able to recursively plan + * subqueries, but not relations. + * + * For the subqueries, make sure that the subquery contains at least one + * distributed table and doesn't have a set operation. + * + * TODO: The set operation restriction might sound weird, but, the restriction + * equivalance generation functions ignore set operations. We should + * integrate the logic in SafeToPushdownUnionSubquery() to + * GenerateAllAttributeEquivalences() such that the latter becomes aware of + * the set operations. + */ + if (anchorRangeTblEntry == NULL && currentRte->rtekind == RTE_SUBQUERY && + QueryContainsDistributedTableRTE(currentRte->subquery) && + currentRte->subquery->setOperations == NULL && + !ContainsUnionSubquery(currentRte->subquery)) + { + /* found a subquery, keep it if we cannot find a relation */ + anchorRangeTblEntry = currentRte; + } + else if (currentRte->rtekind == RTE_RELATION) + { + Oid relationId = currentRte->relid; + + if (PartitionMethod(relationId) == DISTRIBUTE_BY_NONE) + { + /* + * Reference tables should not be the anchor rte since they + * don't have distribution key. + */ + continue; + } + + anchorRangeTblEntry = currentRte; + break; + } + } + + return anchorRangeTblEntry; +} + + +/* + * SubqueryColocated returns true if the input subquery has a distribution + * key equality with the anchor subquery. In other words, we refer the + * distribution key equality of relations as "colocation" in this context. + */ +bool +SubqueryColocated(Query *subquery, ColocatedJoinChecker *checker) +{ + List *anchorRelationRestrictionList = checker->anchorRelationRestrictionList; + List *anchorAttributeEquivalances = checker->anchorAttributeEquivalences; + + PlannerRestrictionContext *restrictionContext = checker->subqueryPlannerRestriction; + PlannerRestrictionContext *filteredPlannerContext = + FilterPlannerRestrictionForQuery(restrictionContext, subquery); + List *filteredRestrictionList = + filteredPlannerContext->relationRestrictionContext->relationRestrictionList; + + List *unionedRelationRestrictionList = NULL; + RelationRestrictionContext *unionedRelationRestrictionContext = NULL; + PlannerRestrictionContext *unionedPlannerRestrictionContext = NULL; + + /* + * We merge the relation restrictions of the input subquery and the anchor + * restrictions to form a temporary relation restriction context. The aim of + * forming this temporary context is to check whether the context contains + * distribution key equality or not. + */ + unionedRelationRestrictionList = + UnionRelationRestrictionLists(anchorRelationRestrictionList, + filteredRestrictionList); + + /* + * We already have the attributeEquivalances, thus, only need to prepare + * the planner restrictions with unioned relations for our purpose of + * distribution key equality. Note that we don't need to calculate the + * join restrictions, we're already relying on the attributeEquivalances + * provided by the context. + */ + unionedRelationRestrictionContext = palloc0(sizeof(RelationRestrictionContext)); + unionedRelationRestrictionContext->relationRestrictionList = + unionedRelationRestrictionList; + + unionedPlannerRestrictionContext = palloc0(sizeof(PlannerRestrictionContext)); + unionedPlannerRestrictionContext->relationRestrictionContext = + unionedRelationRestrictionContext; + + if (!RestrictionEquivalenceForPartitionKeysViaEquivalances( + unionedPlannerRestrictionContext, + anchorAttributeEquivalances)) + { + return false; + } + + return true; +} + + +/* + * WrapRteRelationIntoSubquery wraps the given relation range table entry + * in a newly constructed "(SELECT * FROM table_name as anchor_relation)" query. + * + * Note that the query returned by this function does not contain any filters or + * projections. The returned query should be used cautiosly and it is mostly + * designed for generating a stub query. + */ +static Query * +WrapRteRelationIntoSubquery(RangeTblEntry *rteRelation) +{ + Query *subquery = makeNode(Query); + RangeTblRef *newRangeTableRef = makeNode(RangeTblRef); + RangeTblEntry *newRangeTableEntry = NULL; + Var *targetColumn = NULL; + TargetEntry *targetEntry = NULL; + + subquery->commandType = CMD_SELECT; + + /* we copy the input rteRelation to preserve the rteIdentity */ + newRangeTableEntry = copyObject(rteRelation); + subquery->rtable = list_make1(newRangeTableEntry); + + /* set the FROM expression to the subquery */ + newRangeTableRef = makeNode(RangeTblRef); + newRangeTableRef->rtindex = 1; + subquery->jointree = makeFromExpr(list_make1(newRangeTableRef), NULL); + + /* Need the whole row as a junk var */ + targetColumn = makeWholeRowVar(newRangeTableEntry, newRangeTableRef->rtindex, 0, + false); + + /* create a dummy target entry */ + targetEntry = makeTargetEntry((Expr *) targetColumn, 1, "wholerow", true); + + subquery->targetList = lappend(subquery->targetList, targetEntry); + + return subquery; +} + + +/* + * UnionRelationRestrictionLists merges two relation restriction lists + * and returns a newly allocated list. The merged relation restriction + * list doesn't contain any duplicate elements. + */ +static List * +UnionRelationRestrictionLists(List *firstRelationList, List *secondRelationList) +{ + RelationRestrictionContext *unionedRestrictionContext = NULL; + List *unionedRelationRestrictionList = NULL; + ListCell *relationRestrictionCell = NULL; + Relids rteIdentities = NULL; + List *allRestrictionList = NIL; + + /* list_concat destructively modifies the first list, thus copy it */ + firstRelationList = list_copy(firstRelationList); + allRestrictionList = list_concat(firstRelationList, secondRelationList); + + foreach(relationRestrictionCell, allRestrictionList) + { + RelationRestriction *restriction = + (RelationRestriction *) lfirst(relationRestrictionCell); + int rteIdentity = GetRTEIdentity(restriction->rte); + + /* already have the same rte, skip */ + if (bms_is_member(rteIdentity, rteIdentities)) + { + continue; + } + + unionedRelationRestrictionList = + lappend(unionedRelationRestrictionList, restriction); + + rteIdentities = bms_add_member(rteIdentities, rteIdentity); + } + + unionedRestrictionContext = palloc0(sizeof(RelationRestrictionContext)); + unionedRestrictionContext->relationRestrictionList = unionedRelationRestrictionList; + + return unionedRelationRestrictionList; +} diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index acb818d1b..583ef3489 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -62,12 +62,15 @@ #include "distributed/multi_logical_planner.h" #include "distributed/multi_router_planner.h" #include "distributed/multi_physical_planner.h" -#include "distributed/recursive_planning.h" #include "distributed/multi_server_executor.h" +#include "distributed/query_colocation_checker.h" +#include "distributed/recursive_planning.h" #include "distributed/relation_restriction_equivalence.h" #include "lib/stringinfo.h" #include "optimizer/planner.h" +#include "optimizer/prep.h" #include "parser/parsetree.h" +#include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/nodes.h" #include "nodes/pg_list.h" @@ -86,7 +89,7 @@ typedef struct RecursivePlanningContext { int level; uint64 planId; - bool queryContainsDistributionKeyEquality; /* used for some optimizations */ + bool allDistributionKeysInQueryAreEqual; /* used for some optimizations */ List *subPlanList; PlannerRestrictionContext *plannerRestrictionContext; } RecursivePlanningContext; @@ -116,12 +119,24 @@ typedef struct VarLevelsUpWalkerContext static DeferredErrorMessage * RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext * context); - static bool ShouldRecursivelyPlanNonColocatedSubqueries(Query *subquery, RecursivePlanningContext * context); +static bool ContainsSubquery(Query *query); static void RecursivelyPlanNonColocatedSubqueries(Query *subquery, RecursivePlanningContext *context); +static void RecursivelyPlanNonColocatedJoinWalker(Node *joinNode, + ColocatedJoinChecker * + colocatedJoinChecker, + RecursivePlanningContext * + recursivePlanningContext); +static void RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query, + ColocatedJoinChecker * + colocatedJoinChecker, + RecursivePlanningContext * + recursivePlanningContext); +static List * SublinkList(Query *originalQuery); +static bool ExtractSublinkWalker(Node *node, List **sublinkList); static bool ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query); static bool RecursivelyPlanAllSubqueries(Node *node, RecursivePlanningContext *planningContext); @@ -130,9 +145,9 @@ static DeferredErrorMessage * RecursivelyPlanCTEs(Query *query, static bool RecursivelyPlanSubqueryWalker(Node *node, RecursivePlanningContext *context); static bool ShouldRecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *context); -static bool SubqueryContainsDistributionKeyEquality(Query *subquery, - PlannerRestrictionContext * - restrictionContext); +static bool AllDistributionKeysInSubqueryAreEqual(Query *subquery, + PlannerRestrictionContext * + restrictionContext); static bool ShouldRecursivelyPlanSetOperation(Query *query, RecursivePlanningContext *context); static void RecursivelyPlanSetOperations(Query *query, Node *node, @@ -185,8 +200,8 @@ GenerateSubplansForSubqueriesAndCTEs(uint64 planId, Query *originalQuery, * calculating this wouldn't help us at all, we should individually check * each each subquery and subquery joins among subqueries. */ - context.queryContainsDistributionKeyEquality = - QueryContainsDistributionKeyEquality(plannerRestrictionContext, originalQuery); + context.allDistributionKeysInQueryAreEqual = + AllDistributionKeysInQueryAreEqual(originalQuery, plannerRestrictionContext); error = RecursivelyPlanSubqueriesAndCTEs(originalQuery, &context); if (error != NULL) @@ -278,6 +293,10 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context); } + /* + * If the query doesn't have distribution key equality, + * recursively plan some of its subqueries. + */ if (ShouldRecursivelyPlanNonColocatedSubqueries(query, context)) { RecursivelyPlanNonColocatedSubqueries(query, context); @@ -299,8 +318,11 @@ static bool ShouldRecursivelyPlanNonColocatedSubqueries(Query *subquery, RecursivePlanningContext *context) { - /* if the input query already contains the equality, simply return */ - if (context->queryContainsDistributionKeyEquality) + /* + * If the input query already contains the equality, simply return since it is not + * possible to find any non colocated subqueries. + */ + if (context->allDistributionKeysInQueryAreEqual) { return false; } @@ -313,18 +335,11 @@ ShouldRecursivelyPlanNonColocatedSubqueries(Query *subquery, * only bother non-colocated subquery joins, which only happens when * there are subqueries. */ - if (SubqueryEntryList(subquery) == NIL && SublinkList(subquery) == NIL) + if (!ContainsSubquery(subquery)) { return false; } - /* - * At this point, we might be recursively planning a a subquery which will be pulled - * by PostgreSQL standard_planner (i.e., tpch_7_nested). However, checking for those - * cases are pretty complicated and, seems not super useful thing to implement. - */ - - /* direct joins with local tables are not supported by any of Citus planners */ if (FindNodeCheckInRangeTableList(subquery->rtable, IsLocalTableRTE)) { @@ -334,8 +349,8 @@ ShouldRecursivelyPlanNonColocatedSubqueries(Query *subquery, /* * Finally, check whether this subquery contains distribution key equality or not. */ - if (!SubqueryContainsDistributionKeyEquality(subquery, - context->plannerRestrictionContext)) + if (!AllDistributionKeysInSubqueryAreEqual(subquery, + context->plannerRestrictionContext)) { return true; } @@ -344,9 +359,228 @@ ShouldRecursivelyPlanNonColocatedSubqueries(Query *subquery, } +/* + * ContainsSubquery returns true if the input query contains any subqueries + * in the FROM or WHERE clauses. + */ +static bool +ContainsSubquery(Query *query) +{ + return JoinTreeContainsSubquery(query) || WhereClauseContainsSubquery(query); +} + + +/* + * RecursivelyPlanNonColocatedSubqueries gets a query which includes one or more + * other subqueries that are not joined on their distribution keys. The function + * tries to recursively plan some of the subqueries to make the input query + * executable by Citus. + * + * The function picks an anchor subquery and iterates on the remaining subqueries. + * Whenever it finds a non colocated subquery with the anchor subquery, the function + * decides to recursively plan the non colocated subquery. + * + * The function first handles subqueries in FROM clause (i.e., jointree->fromlist) and then + * subqueries in WHERE clause (i.e., jointree->quals). + * + * The function does not treat outer joins seperately. Thus, we might end up with + * a query where the function decides to recursively plan an outer side of an outer + * join (i.e., LEFT side of LEFT JOIN). For simplicity, we chose to do so and handle + * outer joins with a seperate pass on the join tree. + */ static void -RecursivelyPlanNonColocatedSubqueries(Query *query, RecursivePlanningContext *context) -{ } +RecursivelyPlanNonColocatedSubqueries(Query *subquery, RecursivePlanningContext *context) +{ + ColocatedJoinChecker colocatedJoinChecker; + + FromExpr *joinTree = subquery->jointree; + PlannerRestrictionContext *restrictionContext = NULL; + + /* create the context for the non colocated subquery planning */ + restrictionContext = context->plannerRestrictionContext; + colocatedJoinChecker = CreateColocatedJoinChecker(subquery, restrictionContext); + + /* + * Although this is a rare case, we weren't able to pick an anchor + * range table entry, so we cannot continue. + */ + if (colocatedJoinChecker.anchorRelationRestrictionList == NIL) + { + return; + } + + /* handle from clause subqueries first */ + RecursivelyPlanNonColocatedJoinWalker((Node *) joinTree, &colocatedJoinChecker, + context); + + /* handle subqueries in WHERE clause */ + RecursivelyPlanNonColocatedSubqueriesInWhere(subquery, &colocatedJoinChecker, + context); +} + + +/* + * RecursivelyPlanNonColocatedJoinWalker gets a join node and walks over it to find + * subqueries that live under the node. + * + * When a subquery found, its checked whether the subquery is colocated with the + * anchor subquery specified in the nonColocatedJoinContext. If not, + * the subquery is recursively planned. + */ +static void +RecursivelyPlanNonColocatedJoinWalker(Node *joinNode, + ColocatedJoinChecker *colocatedJoinChecker, + RecursivePlanningContext *recursivePlanningContext) +{ + if (joinNode == NULL) + { + return; + } + else if (IsA(joinNode, FromExpr)) + { + FromExpr *fromExpr = (FromExpr *) joinNode; + ListCell *fromExprCell; + + /* + * For each element of the from list, check whether the element is + * colocated with the anchor subquery by recursing until we + * find the subqueries. + */ + foreach(fromExprCell, fromExpr->fromlist) + { + Node *fromElement = (Node *) lfirst(fromExprCell); + + RecursivelyPlanNonColocatedJoinWalker(fromElement, colocatedJoinChecker, + recursivePlanningContext); + } + } + else if (IsA(joinNode, JoinExpr)) + { + JoinExpr *joinExpr = (JoinExpr *) joinNode; + + /* recurse into the left subtree */ + RecursivelyPlanNonColocatedJoinWalker(joinExpr->larg, colocatedJoinChecker, + recursivePlanningContext); + + /* recurse into the right subtree */ + RecursivelyPlanNonColocatedJoinWalker(joinExpr->rarg, colocatedJoinChecker, + recursivePlanningContext); + } + else if (IsA(joinNode, RangeTblRef)) + { + int rangeTableIndex = ((RangeTblRef *) joinNode)->rtindex; + List *rangeTableList = colocatedJoinChecker->subquery->rtable; + RangeTblEntry *rte = rt_fetch(rangeTableIndex, rangeTableList); + Query *subquery = NULL; + + /* we're only interested in subqueries for now */ + if (rte->rtekind != RTE_SUBQUERY) + { + return; + } + + /* + * If the subquery is not colocated with the anchor subquery, + * recursively plan it. + */ + subquery = rte->subquery; + if (!SubqueryColocated(subquery, colocatedJoinChecker)) + { + RecursivelyPlanSubquery(subquery, recursivePlanningContext); + } + } + else + { + pg_unreachable(); + } +} + + +/* + * RecursivelyPlanNonColocatedJoinWalker gets a query and walks over its sublinks + * to find subqueries that live in WHERE clause. + * + * When a subquery found, its checked whether the subquery is colocated with the + * anchor subquery specified in the nonColocatedJoinContext. If not, + * the subquery is recursively planned. + */ +static void +RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query, + ColocatedJoinChecker *colocatedJoinChecker, + RecursivePlanningContext * + recursivePlanningContext) +{ + List *sublinkList = SublinkList(query); + ListCell *sublinkCell = NULL; + + foreach(sublinkCell, sublinkList) + { + SubLink *sublink = (SubLink *) lfirst(sublinkCell); + Query *subselect = (Query *) sublink->subselect; + + /* subselect is probably never NULL, but anyway lets keep the check */ + if (subselect == NULL) + { + continue; + } + + if (!SubqueryColocated(subselect, colocatedJoinChecker)) + { + RecursivelyPlanSubquery(subselect, recursivePlanningContext); + } + } +} + + +/* + * SublinkList finds the subquery nodes in the where clause of the given query. Note + * that the function should be called on the original query given that postgres + * standard_planner() may convert the subqueries in WHERE clause to joins. + */ +static List * +SublinkList(Query *originalQuery) +{ + FromExpr *joinTree = originalQuery->jointree; + Node *queryQuals = NULL; + List *sublinkList = NIL; + + if (!joinTree) + { + return NIL; + } + + queryQuals = joinTree->quals; + ExtractSublinkWalker(queryQuals, &sublinkList); + + return sublinkList; +} + + +/* + * ExtractSublinkWalker walks over a quals node, and finds all sublinks + * in that node. + */ +static bool +ExtractSublinkWalker(Node *node, List **sublinkList) +{ + bool walkerResult = false; + if (node == NULL) + { + return false; + } + + if (IsA(node, SubLink)) + { + (*sublinkList) = lappend(*sublinkList, node); + } + else + { + walkerResult = expression_tree_walker(node, ExtractSublinkWalker, + sublinkList); + } + + return walkerResult; +} /* @@ -632,12 +866,13 @@ ShouldRecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *context * If the input query to the planner doesn't contain distribution key equality, * we should further check whether this individual subquery contains or not. * - * If all the relations are not joined on their distribution keys for the - * subquery, we cannot pushdown the it, thus, recursively plan it. + * If all relations are not joined on their distribution keys for the given + * subquery, we cannot push push it down and therefore we should try to + * recursively plan it. */ - if (!context->queryContainsDistributionKeyEquality && - !SubqueryContainsDistributionKeyEquality(subquery, - context->plannerRestrictionContext)) + if (!context->allDistributionKeysInQueryAreEqual && + !AllDistributionKeysInSubqueryAreEqual(subquery, + context->plannerRestrictionContext)) { return true; } @@ -663,16 +898,16 @@ ShouldRecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *context /* - * SubqueryContainsDistributionKeyEquality is a wrapper function - * for QueryContainsDistributionKeyEquality(). Here, we filter the + * AllDistributionKeysInSubqueryAreEqual is a wrapper function + * for AllDistributionKeysInQueryAreEqual(). Here, we filter the * planner restrictions for the given subquery and do the restriction * equality checks on the filtered restriction. */ static bool -SubqueryContainsDistributionKeyEquality(Query *subquery, - PlannerRestrictionContext *restrictionContext) +AllDistributionKeysInSubqueryAreEqual(Query *subquery, + PlannerRestrictionContext *restrictionContext) { - bool queryContainsDistributionKeyEquality = false; + bool allDistributionKeysInSubqueryAreEqual = false; PlannerRestrictionContext *filteredRestrictionContext = NULL; /* we don't support distribution eq. checks for CTEs yet */ @@ -684,9 +919,9 @@ SubqueryContainsDistributionKeyEquality(Query *subquery, filteredRestrictionContext = FilterPlannerRestrictionForQuery(restrictionContext, subquery); - queryContainsDistributionKeyEquality = - QueryContainsDistributionKeyEquality(filteredRestrictionContext, subquery); - if (!queryContainsDistributionKeyEquality) + allDistributionKeysInSubqueryAreEqual = + AllDistributionKeysInQueryAreEqual(subquery, filteredRestrictionContext); + if (!allDistributionKeysInSubqueryAreEqual) { return false; } diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 717f825c9..631152b8e 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -66,9 +66,8 @@ typedef struct AttributeEquivalenceClassMember static bool ContextContainsLocalRelation(RelationRestrictionContext *restrictionContext); static Var * FindTranslatedVar(List *appendRelList, Oid relationOid, Index relationRteIndex, Index *partitionKeyIndex); -static bool EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList, - RelationRestrictionContext * - restrictionContext); +static bool ContainsMultipleDistributedRelations(PlannerRestrictionContext * + plannerRestrictionContext); static List * GenerateAttributeEquivalencesForRelationRestrictions( RelationRestrictionContext *restrictionContext); static AttributeEquivalenceClass * AttributeEquivalenceClassForEquivalenceClass( @@ -145,15 +144,15 @@ static bool JoinRestrictionListExistsInContext(JoinRestriction *joinRestrictionI /* - * QueryContainsDistributionKeyEquality returns true if either + * AllDistributionKeysInQueryAreEqual returns true if either * (i) there exists join in the query and all relations joined on their * partition keys * (ii) there exists only union set operations and all relations has * partition keys in the same ordinal position in the query */ bool -QueryContainsDistributionKeyEquality(PlannerRestrictionContext *plannerRestrictionContext, - Query *originalQuery) +AllDistributionKeysInQueryAreEqual(Query *originalQuery, + PlannerRestrictionContext *plannerRestrictionContext) { bool restrictionEquivalenceForPartitionKeys = false; RelationRestrictionContext *restrictionContext = NULL; @@ -430,6 +429,9 @@ FindTranslatedVar(List *appendRelList, Oid relationOid, Index relationRteIndex, * RTE_RELATION follows the above rule, we can conclude that all RTE_RELATIONs are * joined on their partition keys. * + * Before doing the expensive equality checks, we do a cheaper check to understand + * whether there are more than one distributed relations. Otherwise, we exit early. + * * The function returns true if all relations are joined on their partition keys. * Otherwise, the function returns false. We ignore reference tables at all since * they don't have partition keys. @@ -451,15 +453,61 @@ FindTranslatedVar(List *appendRelList, Oid relationOid, Index relationRteIndex, * RestrictionEquivalenceForPartitionKeys uses both relation restrictions and join restrictions * to find as much as information that Postgres planner provides to extensions. For the * details of the usage, please see GenerateAttributeEquivalencesForRelationRestrictions() - * and GenerateAttributeEquivalencesForJoinRestrictions() + * and GenerateAttributeEquivalencesForJoinRestrictions(). */ bool -RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext * - plannerRestrictionContext) +RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *restrictionContext) +{ + List *attributeEquivalenceList = NIL; + + /* there is a single distributed relation, no need to continue */ + if (!ContainsMultipleDistributedRelations(restrictionContext)) + { + return true; + } + + attributeEquivalenceList = GenerateAllAttributeEquivalences(restrictionContext); + + return RestrictionEquivalenceForPartitionKeysViaEquivalances(restrictionContext, + attributeEquivalenceList); +} + + +/* + * RestrictionEquivalenceForPartitionKeysViaEquivalances follows the same rules + * with RestrictionEquivalenceForPartitionKeys(). The only difference is that + * this function allows passing pre-computed attribute equivalances along with + * the planner restriction context. + */ +bool +RestrictionEquivalenceForPartitionKeysViaEquivalances(PlannerRestrictionContext * + plannerRestrictionContext, + List *allAttributeEquivalenceList) +{ + RelationRestrictionContext *restrictionContext = + plannerRestrictionContext->relationRestrictionContext; + + /* there is a single distributed relation, no need to continue */ + if (!ContainsMultipleDistributedRelations(plannerRestrictionContext)) + { + return true; + } + + return EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList, + restrictionContext); +} + + +/* + * ContainsMultipleDistributedRelations returns true if the input planner + * restriction context contains more than one distributed relation. + */ +static bool +ContainsMultipleDistributedRelations(PlannerRestrictionContext * + plannerRestrictionContext) { RelationRestrictionContext *restrictionContext = plannerRestrictionContext->relationRestrictionContext; - List *allAttributeEquivalenceList = NIL; uint32 referenceRelationCount = ReferenceRelationCount(restrictionContext); uint32 totalRelationCount = list_length(restrictionContext->relationRestrictionList); @@ -480,14 +528,10 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext * */ if (nonReferenceRelationCount <= 1) { - return true; + return false; } - allAttributeEquivalenceList = - GenerateAllAttributeEquivalences(plannerRestrictionContext); - - return EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList, - restrictionContext); + return true; } @@ -555,7 +599,7 @@ ReferenceRelationCount(RelationRestrictionContext *restrictionContext) * whether all the relations exists in the common equivalence class. * */ -static bool +bool EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList, RelationRestrictionContext *restrictionContext) { @@ -629,6 +673,11 @@ GenerateAttributeEquivalencesForRelationRestrictions(RelationRestrictionContext List *attributeEquivalenceList = NIL; ListCell *relationRestrictionCell = NULL; + if (restrictionContext == NULL) + { + return attributeEquivalenceList; + } + foreach(relationRestrictionCell, restrictionContext->relationRestrictionList) { RelationRestriction *relationRestriction = @@ -980,6 +1029,11 @@ GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext * List *attributeEquivalenceList = NIL; ListCell *joinRestrictionCell = NULL; + if (joinRestrictionContext == NULL) + { + return attributeEquivalenceList; + } + foreach(joinRestrictionCell, joinRestrictionContext->joinRestrictionList) { JoinRestriction *joinRestriction = diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index 73ece0cda..9fac464dd 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -187,7 +187,9 @@ extern bool SubqueryPushdown; extern MultiTreeRoot * MultiLogicalPlanCreate(Query *originalQuery, Query *queryTree, PlannerRestrictionContext * plannerRestrictionContext); -extern List * SublinkList(Query *originalQuery); +extern bool JoinTreeContainsSubquery(Query *query); +extern bool WhereClauseContainsSubquery(Query *query); +extern bool FindNodeCheck(Node *node, bool (*check)(Node *)); extern bool SingleRelationRepartitionSubquery(Query *queryTree); extern DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool diff --git a/src/include/distributed/query_colocation_checker.h b/src/include/distributed/query_colocation_checker.h new file mode 100644 index 000000000..2654122ea --- /dev/null +++ b/src/include/distributed/query_colocation_checker.h @@ -0,0 +1,39 @@ +/*------------------------------------------------------------------------- + * + * query_colocation_checker.h + * General Citus planner code. + * + * Copyright (c) 2017, Citus Data, Inc. + *------------------------------------------------------------------------- + */ + +#ifndef QUERY_COLOCATION_CHECKER_H +#define QUERY_COLOCATION_CHECKER_H + + +#include "distributed/distributed_planner.h" +#include "nodes/parsenodes.h" +#include "nodes/primnodes.h" + + +/* + * ColocatedJoinChecker is a helper structure that is used to decide + * whether any subqueries should be recursively planned due joins non + * colocated joins. + */ +typedef struct ColocatedJoinChecker +{ + Query *subquery; + List *anchorAttributeEquivalences; + List *anchorRelationRestrictionList; + PlannerRestrictionContext *subqueryPlannerRestriction; +} ColocatedJoinChecker; + + +extern ColocatedJoinChecker CreateColocatedJoinChecker(Query *subquery, + PlannerRestrictionContext * + restrictionContext); +extern bool SubqueryColocated(Query *subquery, ColocatedJoinChecker *context); + + +#endif /* QUERY_COLOCATION_CHECKER_H */ diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index 1f9ef5245..13fda045f 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -15,14 +15,18 @@ #include "distributed/distributed_planner.h" -extern bool QueryContainsDistributionKeyEquality(PlannerRestrictionContext * - plannerRestrictionContext, - Query *originalQuery); +extern bool AllDistributionKeysInQueryAreEqual(Query *originalQuery, + PlannerRestrictionContext * + plannerRestrictionContext); extern bool SafeToPushdownUnionSubquery(PlannerRestrictionContext * plannerRestrictionContext); extern bool ContainsUnionSubquery(Query *queryTree); extern bool RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext * plannerRestrictionContext); +bool RestrictionEquivalenceForPartitionKeysViaEquivalances(PlannerRestrictionContext * + plannerRestrictionContext, + List * + allAttributeEquivalenceList); extern List * GenerateAllAttributeEquivalences(PlannerRestrictionContext * plannerRestrictionContext); extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext); @@ -34,4 +38,8 @@ extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery( extern JoinRestrictionContext * RemoveDuplicateJoinRestrictions(JoinRestrictionContext * joinRestrictionContext); +extern bool EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList, + RelationRestrictionContext * + restrictionContext); + #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */ diff --git a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out index 69e3c46df..c03c72f05 100644 --- a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out +++ b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out @@ -121,6 +121,8 @@ FROM ( ERROR: the query contains a join that requires repartitioning HINT: Set citus.enable_repartition_joins to on to enable repartitioning -- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2) +-- still, recursive planning will kick in to plan some part of the query +SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg ) SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event) FROM ( @@ -159,7 +161,15 @@ FROM ( ) t2 ON (t1.user_id = (t2.user_id)/2) GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: Collecting INSERT ... SELECT results on coordinator +DEBUG: generating subplan 10_1 for subquery SELECT u.user_id, 'step=>1'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id = e.user_id) AND (u.user_id >= 10) AND (u.user_id <= 25) AND (e.event_type = ANY (ARRAY[100, 101, 102]))) +DEBUG: generating subplan 10_2 for subquery SELECT u.user_id, 'step=>2'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id = e.user_id) AND (u.user_id >= 10) AND (u.user_id <= 25) AND (e.event_type = ANY (ARRAY[103, 104, 105]))) +DEBUG: Plan 10 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('10_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('10_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) +DEBUG: generating subplan 9_1 for subquery SELECT u.user_id, 'step=>1'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id = e.user_id) AND (u.user_id >= 10) AND (u.user_id <= 25) AND (e.event_type = ANY (ARRAY[100, 101, 102]))) UNION SELECT u.user_id, 'step=>2'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id = e.user_id) AND (u.user_id >= 10) AND (u.user_id <= 25) AND (e.event_type = ANY (ARRAY[103, 104, 105]))) +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +RESET client_min_messages; ------------------------------------ ------------------------------------ -- Funnel, grouped by the number of times a user has done an event @@ -235,6 +245,8 @@ ORDER BY ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not pushable since the JOIN condition is not equi JOIN -- (subquery_1 JOIN subquery_2) +-- still, recursive planning will kick in +SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg) SELECT user_id, @@ -300,7 +312,15 @@ GROUP BY count_pay, user_id ORDER BY count_pay; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries +DEBUG: Collecting INSERT ... SELECT results on coordinator +DEBUG: generating subplan 19_1 for subquery SELECT users_table.user_id, 'action=>1'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (users_table.user_id >= 10) AND (users_table.user_id <= 70) AND (events_table.event_type > 10) AND (events_table.event_type < 12)) +DEBUG: generating subplan 19_2 for subquery SELECT users_table.user_id, 'action=>2'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (users_table.user_id >= 10) AND (users_table.user_id <= 70) AND (events_table.event_type > 12) AND (events_table.event_type < 14)) +DEBUG: Plan 19 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('19_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('19_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) +DEBUG: generating subplan 18_1 for subquery SELECT users_table.user_id, 'action=>1'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (users_table.user_id >= 10) AND (users_table.user_id <= 70) AND (events_table.event_type > 10) AND (events_table.event_type < 12)) UNION SELECT users_table.user_id, 'action=>2'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (users_table.user_id >= 10) AND (users_table.user_id <= 70) AND (events_table.event_type > 12) AND (events_table.event_type < 14)) +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +RESET client_min_messages; ------------------------------------ ------------------------------------ -- Most recently seen users_table events_table diff --git a/src/test/regress/expected/multi_mx_router_planner.out b/src/test/regress/expected/multi_mx_router_planner.out index 5fa9ae5ff..2f5a4e150 100644 --- a/src/test/regress/expected/multi_mx_router_planner.out +++ b/src/test/regress/expected/multi_mx_router_planner.out @@ -441,71 +441,36 @@ DEBUG: push down of limit count: 5 -- subqueries are supported in FROM clause but they are not router plannable SELECT articles_hash_mx.id,test.word_count FROM articles_hash_mx, (SELECT id, word_count FROM articles_hash_mx) AS test WHERE test.id = articles_hash_mx.id -ORDER BY articles_hash_mx.id; -DEBUG: join prunable for task partitionId 0 and 1 -DEBUG: join prunable for task partitionId 0 and 2 -DEBUG: join prunable for task partitionId 0 and 3 -DEBUG: join prunable for task partitionId 1 and 0 -DEBUG: join prunable for task partitionId 1 and 2 -DEBUG: join prunable for task partitionId 1 and 3 -DEBUG: join prunable for task partitionId 2 and 0 -DEBUG: join prunable for task partitionId 2 and 1 -DEBUG: join prunable for task partitionId 2 and 3 -DEBUG: join prunable for task partitionId 3 and 0 -DEBUG: join prunable for task partitionId 3 and 1 -DEBUG: join prunable for task partitionId 3 and 2 -DEBUG: pruning merge fetch taskId 1 -DETAIL: Creating dependency on merge taskId 5 -DEBUG: pruning merge fetch taskId 2 -DETAIL: Creating dependency on merge taskId 5 -DEBUG: pruning merge fetch taskId 4 -DETAIL: Creating dependency on merge taskId 8 -DEBUG: pruning merge fetch taskId 5 -DETAIL: Creating dependency on merge taskId 8 -DEBUG: pruning merge fetch taskId 7 -DETAIL: Creating dependency on merge taskId 11 -DEBUG: pruning merge fetch taskId 8 -DETAIL: Creating dependency on merge taskId 11 -DEBUG: pruning merge fetch taskId 10 -DETAIL: Creating dependency on merge taskId 14 -DEBUG: pruning merge fetch taskId 11 -DETAIL: Creating dependency on merge taskId 14 -ERROR: the query contains a join that requires repartitioning -HINT: Set citus.enable_repartition_joins to on to enable repartitioning +ORDER BY test.word_count DESC, articles_hash_mx.id LIMIT 5; +DEBUG: generating subplan 85_1 for subquery SELECT id, word_count FROM public.articles_hash_mx +DEBUG: Plan 85 query after replacing subqueries and CTEs: SELECT articles_hash_mx.id, test.word_count FROM public.articles_hash_mx, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('85_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE (test.id = articles_hash_mx.id) ORDER BY test.word_count DESC, articles_hash_mx.id LIMIT 5 +DEBUG: push down of limit count: 5 + id | word_count +----+------------ + 50 | 19519 + 14 | 19094 + 48 | 18610 + 12 | 18185 + 46 | 17702 +(5 rows) + SELECT articles_hash_mx.id,test.word_count FROM articles_hash_mx, (SELECT id, word_count FROM articles_hash_mx) AS test WHERE test.id = articles_hash_mx.id and articles_hash_mx.author_id = 1 ORDER BY articles_hash_mx.id; -DEBUG: join prunable for task partitionId 0 and 1 -DEBUG: join prunable for task partitionId 0 and 2 -DEBUG: join prunable for task partitionId 0 and 3 -DEBUG: join prunable for task partitionId 1 and 0 -DEBUG: join prunable for task partitionId 1 and 2 -DEBUG: join prunable for task partitionId 1 and 3 -DEBUG: join prunable for task partitionId 2 and 0 -DEBUG: join prunable for task partitionId 2 and 1 -DEBUG: join prunable for task partitionId 2 and 3 -DEBUG: join prunable for task partitionId 3 and 0 -DEBUG: join prunable for task partitionId 3 and 1 -DEBUG: join prunable for task partitionId 3 and 2 -DEBUG: pruning merge fetch taskId 1 -DETAIL: Creating dependency on merge taskId 3 -DEBUG: pruning merge fetch taskId 2 -DETAIL: Creating dependency on merge taskId 5 -DEBUG: pruning merge fetch taskId 4 -DETAIL: Creating dependency on merge taskId 5 -DEBUG: pruning merge fetch taskId 5 -DETAIL: Creating dependency on merge taskId 8 -DEBUG: pruning merge fetch taskId 7 -DETAIL: Creating dependency on merge taskId 7 -DEBUG: pruning merge fetch taskId 8 -DETAIL: Creating dependency on merge taskId 11 -DEBUG: pruning merge fetch taskId 10 -DETAIL: Creating dependency on merge taskId 9 -DEBUG: pruning merge fetch taskId 11 -DETAIL: Creating dependency on merge taskId 14 -ERROR: the query contains a join that requires repartitioning -HINT: Set citus.enable_repartition_joins to on to enable repartitioning +DEBUG: generating subplan 87_1 for subquery SELECT id, word_count FROM public.articles_hash_mx +DEBUG: Plan 87 query after replacing subqueries and CTEs: SELECT articles_hash_mx.id, test.word_count FROM public.articles_hash_mx, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('87_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE ((test.id = articles_hash_mx.id) AND (articles_hash_mx.author_id = 1)) ORDER BY articles_hash_mx.id +DEBUG: Creating router plan +DEBUG: Plan is router executable + id | word_count +----+------------ + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + -- subqueries are not supported in SELECT clause SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1) AS special_price FROM articles_hash_mx a; @@ -610,10 +575,10 @@ SELECT a.author_id as first_author, b.word_count as second_word_count WHERE a.author_id = 2 and a.author_id = b.author_id LIMIT 3; DEBUG: Found no worker with all shard placements -DEBUG: generating subplan 94_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM public.articles_single_shard_hash_mx +DEBUG: generating subplan 96_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM public.articles_single_shard_hash_mx DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: Plan 94 query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM public.articles_hash_mx a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('94_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id = 2) AND (a.author_id = b.author_id)) LIMIT 3 +DEBUG: Plan 96 query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM public.articles_hash_mx a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('96_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id = 2) AND (a.author_id = b.author_id)) LIMIT 3 DEBUG: Creating router plan DEBUG: Plan is router executable first_author | second_word_count @@ -798,9 +763,9 @@ SET client_min_messages TO DEBUG1; UNION (SELECT * FROM articles_hash_mx WHERE author_id = 2) ORDER BY 1,2; -DEBUG: generating subplan 108_1 for subquery SELECT id, author_id, title, word_count FROM public.articles_hash_mx WHERE (author_id = 1) -DEBUG: generating subplan 108_2 for subquery SELECT id, author_id, title, word_count FROM public.articles_hash_mx WHERE (author_id = 2) -DEBUG: Plan 108 query after replacing subqueries and CTEs: SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('108_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer) UNION SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('108_2'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer) ORDER BY 1, 2 +DEBUG: generating subplan 110_1 for subquery SELECT id, author_id, title, word_count FROM public.articles_hash_mx WHERE (author_id = 1) +DEBUG: generating subplan 110_2 for subquery SELECT id, author_id, title, word_count FROM public.articles_hash_mx WHERE (author_id = 2) +DEBUG: Plan 110 query after replacing subqueries and CTEs: SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('110_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer) UNION SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('110_2'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer) ORDER BY 1, 2 id | author_id | title | word_count ----+-----------+--------------+------------ 1 | 1 | arsenous | 9572 diff --git a/src/test/regress/expected/multi_router_planner.out b/src/test/regress/expected/multi_router_planner.out index 64cc2548f..3f7215f17 100644 --- a/src/test/regress/expected/multi_router_planner.out +++ b/src/test/regress/expected/multi_router_planner.out @@ -556,71 +556,36 @@ DEBUG: Plan is router executable -- subqueries are supported in FROM clause but they are not router plannable SELECT articles_hash.id,test.word_count FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id -ORDER BY articles_hash.id; -DEBUG: join prunable for task partitionId 0 and 1 -DEBUG: join prunable for task partitionId 0 and 2 -DEBUG: join prunable for task partitionId 0 and 3 -DEBUG: join prunable for task partitionId 1 and 0 -DEBUG: join prunable for task partitionId 1 and 2 -DEBUG: join prunable for task partitionId 1 and 3 -DEBUG: join prunable for task partitionId 2 and 0 -DEBUG: join prunable for task partitionId 2 and 1 -DEBUG: join prunable for task partitionId 2 and 3 -DEBUG: join prunable for task partitionId 3 and 0 -DEBUG: join prunable for task partitionId 3 and 1 -DEBUG: join prunable for task partitionId 3 and 2 -DEBUG: pruning merge fetch taskId 1 -DETAIL: Creating dependency on merge taskId 5 -DEBUG: pruning merge fetch taskId 2 -DETAIL: Creating dependency on merge taskId 5 -DEBUG: pruning merge fetch taskId 4 -DETAIL: Creating dependency on merge taskId 8 -DEBUG: pruning merge fetch taskId 5 -DETAIL: Creating dependency on merge taskId 8 -DEBUG: pruning merge fetch taskId 7 -DETAIL: Creating dependency on merge taskId 11 -DEBUG: pruning merge fetch taskId 8 -DETAIL: Creating dependency on merge taskId 11 -DEBUG: pruning merge fetch taskId 10 -DETAIL: Creating dependency on merge taskId 14 -DEBUG: pruning merge fetch taskId 11 -DETAIL: Creating dependency on merge taskId 14 -ERROR: the query contains a join that requires repartitioning -HINT: Set citus.enable_repartition_joins to on to enable repartitioning +ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; +DEBUG: generating subplan 88_1 for subquery SELECT id, word_count FROM public.articles_hash +DEBUG: Plan 88 query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM public.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('88_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE (test.id = articles_hash.id) ORDER BY test.word_count DESC, articles_hash.id LIMIT 5 +DEBUG: push down of limit count: 5 + id | word_count +----+------------ + 50 | 19519 + 14 | 19094 + 48 | 18610 + 12 | 18185 + 46 | 17702 +(5 rows) + SELECT articles_hash.id,test.word_count FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id and articles_hash.author_id = 1 ORDER BY articles_hash.id; -DEBUG: join prunable for task partitionId 0 and 1 -DEBUG: join prunable for task partitionId 0 and 2 -DEBUG: join prunable for task partitionId 0 and 3 -DEBUG: join prunable for task partitionId 1 and 0 -DEBUG: join prunable for task partitionId 1 and 2 -DEBUG: join prunable for task partitionId 1 and 3 -DEBUG: join prunable for task partitionId 2 and 0 -DEBUG: join prunable for task partitionId 2 and 1 -DEBUG: join prunable for task partitionId 2 and 3 -DEBUG: join prunable for task partitionId 3 and 0 -DEBUG: join prunable for task partitionId 3 and 1 -DEBUG: join prunable for task partitionId 3 and 2 -DEBUG: pruning merge fetch taskId 1 -DETAIL: Creating dependency on merge taskId 3 -DEBUG: pruning merge fetch taskId 2 -DETAIL: Creating dependency on merge taskId 5 -DEBUG: pruning merge fetch taskId 4 -DETAIL: Creating dependency on merge taskId 5 -DEBUG: pruning merge fetch taskId 5 -DETAIL: Creating dependency on merge taskId 8 -DEBUG: pruning merge fetch taskId 7 -DETAIL: Creating dependency on merge taskId 7 -DEBUG: pruning merge fetch taskId 8 -DETAIL: Creating dependency on merge taskId 11 -DEBUG: pruning merge fetch taskId 10 -DETAIL: Creating dependency on merge taskId 9 -DEBUG: pruning merge fetch taskId 11 -DETAIL: Creating dependency on merge taskId 14 -ERROR: the query contains a join that requires repartitioning -HINT: Set citus.enable_repartition_joins to on to enable repartitioning +DEBUG: generating subplan 90_1 for subquery SELECT id, word_count FROM public.articles_hash +DEBUG: Plan 90 query after replacing subqueries and CTEs: SELECT articles_hash.id, test.word_count FROM public.articles_hash, (SELECT intermediate_result.id, intermediate_result.word_count FROM read_intermediate_result('90_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, word_count integer)) test WHERE ((test.id = articles_hash.id) AND (articles_hash.author_id = 1)) ORDER BY articles_hash.id +DEBUG: Creating router plan +DEBUG: Plan is router executable + id | word_count +----+------------ + 1 | 9572 + 11 | 1347 + 21 | 5890 + 31 | 7271 + 41 | 11814 +(5 rows) + -- subqueries are not supported in SELECT clause SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash a2 WHERE a.id = a2.id LIMIT 1) AS special_price FROM articles_hash a; @@ -725,10 +690,10 @@ SELECT a.author_id as first_author, b.word_count as second_word_count WHERE a.author_id = 2 and a.author_id = b.author_id LIMIT 3; DEBUG: Found no worker with all shard placements -DEBUG: generating subplan 97_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM public.articles_single_shard_hash +DEBUG: generating subplan 99_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM public.articles_single_shard_hash DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: Plan 97 query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM public.articles_hash a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('97_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id = 2) AND (a.author_id = b.author_id)) LIMIT 3 +DEBUG: Plan 99 query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM public.articles_hash a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('99_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id = 2) AND (a.author_id = b.author_id)) LIMIT 3 DEBUG: Creating router plan DEBUG: Plan is router executable first_author | second_word_count diff --git a/src/test/regress/expected/multi_simple_queries_0.out b/src/test/regress/expected/multi_simple_queries_0.out index 7f681d277..b52940258 100644 --- a/src/test/regress/expected/multi_simple_queries_0.out +++ b/src/test/regress/expected/multi_simple_queries_0.out @@ -215,60 +215,7 @@ ERROR: Complex subqueries and CTEs are not supported when task_executor_type is SELECT articles.id,test.word_count FROM articles, (SELECT id, word_count FROM articles) AS test WHERE test.id = articles.id ORDER BY articles.id; - id | word_count -----+------------ - 1 | 9572 - 2 | 13642 - 3 | 10480 - 4 | 14551 - 5 | 11389 - 6 | 15459 - 7 | 12298 - 8 | 16368 - 9 | 438 - 10 | 17277 - 11 | 1347 - 12 | 18185 - 13 | 2255 - 14 | 19094 - 15 | 3164 - 16 | 2 - 17 | 4073 - 18 | 911 - 19 | 4981 - 20 | 1820 - 21 | 5890 - 22 | 2728 - 23 | 6799 - 24 | 3637 - 25 | 7707 - 26 | 4545 - 27 | 8616 - 28 | 5454 - 29 | 9524 - 30 | 6363 - 31 | 7271 - 32 | 11342 - 33 | 8180 - 34 | 12250 - 35 | 9089 - 36 | 13159 - 37 | 9997 - 38 | 14067 - 39 | 10906 - 40 | 14976 - 41 | 11814 - 42 | 15885 - 43 | 12723 - 44 | 16793 - 45 | 864 - 46 | 17702 - 47 | 1772 - 48 | 18610 - 49 | 2681 - 50 | 19519 -(50 rows) - +ERROR: Complex subqueries and CTEs are not supported when task_executor_type is set to 'task-tracker' -- subqueries are not supported in SELECT clause SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard a2 WHERE a.id = a2.id LIMIT 1) AS special_price FROM articles a; diff --git a/src/test/regress/expected/multi_subquery_behavioral_analytics.out b/src/test/regress/expected/multi_subquery_behavioral_analytics.out index f919fc19b..db4877dd8 100644 --- a/src/test/regress/expected/multi_subquery_behavioral_analytics.out +++ b/src/test/regress/expected/multi_subquery_behavioral_analytics.out @@ -779,7 +779,8 @@ SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM assets; DROP TABLE assets; -- count number of distinct users who have value_1 equal to 5 or 13 but not 3 --- original query that fails +-- is recusrively planned +SET client_min_messages TO DEBUG1; SELECT count(*) FROM ( SELECT @@ -794,7 +795,14 @@ SELECT count(*) FROM HAVING count(distinct value_1) = 2 ) as foo; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: generating subplan 23_1 for subquery SELECT user_id FROM public.users_table WHERE (value_1 = 4) +DEBUG: Plan 23 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.user_id FROM public.users_table WHERE (((users_table.value_1 = 1) OR (users_table.value_1 = 3)) AND (NOT (users_table.user_id IN (SELECT intermediate_result.user_id FROM read_intermediate_result('23_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))))) GROUP BY users_table.user_id HAVING (count(DISTINCT users_table.value_1) = 2)) foo + count +------- + 1 +(1 row) + +RESET client_min_messages; -- previous push down query SELECT subquery_count FROM (SELECT count(*) as subquery_count FROM diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index b139ee711..a41ca453f 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -442,7 +442,8 @@ DEBUG: Plan 16 query after replacing subqueries and CTEs: SELECT event_types AS RESET client_min_messages; SET citus.enable_repartition_joins to OFF; --- not supported since the join is not equi join +-- recursively planned since the join is not equi join +SET client_min_messages TO DEBUG1; SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() @@ -503,7 +504,17 @@ GROUP BY types ORDER BY types; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: generating subplan 22_1 for subquery SELECT user_id, "time", unnest(collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT events_subquery_1.user_id, events_subquery_1."time", events_subquery_1.event FROM (SELECT events.user_id, events."time", 0 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[1, 2]))) events_subquery_1 UNION SELECT events_subquery_2.user_id, events_subquery_2."time", events_subquery_2.event FROM (SELECT events.user_id, events."time", 1 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[3, 4]))) events_subquery_2 UNION SELECT events_subquery_3.user_id, events_subquery_3."time", events_subquery_3.event FROM (SELECT events.user_id, events."time", 2 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[5, 6]))) events_subquery_3 UNION SELECT events_subquery_4.user_id, events_subquery_4."time", events_subquery_4.event FROM (SELECT events.user_id, events."time", 3 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[4, 5]))) events_subquery_4) t1 GROUP BY t1.user_id) t +DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT event_types AS types, count(*) AS sumofeventtype FROM (SELECT q.user_id, q."time", q.event_types, t.user_id, random() AS random FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_types FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_types integer)) q JOIN (SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 > 0) AND (users.value_1 < 4))) t ON ((t.user_id <> q.user_id)))) final_query(user_id, "time", event_types, user_id_1, random) GROUP BY event_types ORDER BY event_types + types | sumofeventtype +-------+---------------- + 0 | 2088 + 1 | 2163 + 2 | 397 + 3 | 1397 +(4 rows) + +RESET client_min_messages; -- not supported since subquery 3 includes a JOIN with non-equi join SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -981,8 +992,9 @@ LIMIT 10; 6 | 72 (2 rows) --- not supported since the join between t and t2 is not equi join +-- recursively planned since the join between t and t2 is not equi join -- union all with inner and left joins +SET client_min_messages TO DEBUG1; SELECT user_id, count(*) as cnt FROM (SELECT first_query.user_id, random() @@ -1054,7 +1066,15 @@ INNER JOIN GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: generating subplan 42_1 for subquery SELECT DISTINCT user_id FROM public.events_table events WHERE (event_type = ANY (ARRAY[0, 6])) GROUP BY user_id +DEBUG: Plan 42 query after replacing subqueries and CTEs: SELECT user_id, count(*) AS cnt FROM (SELECT first_query.user_id, random() AS random FROM ((SELECT t.user_id, t."time", unnest(t.collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT events_subquery_1.user_id, events_subquery_1."time", events_subquery_1.event FROM (SELECT events.user_id, events."time", 0 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[1, 2]))) events_subquery_1 UNION ALL SELECT events_subquery_2.user_id, events_subquery_2."time", events_subquery_2.event FROM (SELECT events.user_id, events."time", 1 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[3, 4]))) events_subquery_2 UNION ALL SELECT events_subquery_3.user_id, events_subquery_3."time", events_subquery_3.event FROM (SELECT events.user_id, events."time", 2 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[5, 6]))) events_subquery_3 UNION ALL SELECT events_subquery_4.user_id, events_subquery_4."time", events_subquery_4.event FROM (SELECT events.user_id, events."time", 3 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[1, 6]))) events_subquery_4) t1 GROUP BY t1.user_id) t) first_query JOIN (SELECT t.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 > 0) AND (users.value_1 < 4))) t LEFT JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('42_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) t2 ON ((t2.user_id > t.user_id))) WHERE (t2.user_id IS NULL)) second_query ON ((first_query.user_id = second_query.user_id)))) final_query GROUP BY user_id ORDER BY (count(*)) DESC, user_id DESC LIMIT 10 + user_id | cnt +---------+----- + 5 | 324 + 6 | 72 +(2 rows) + +RESET client_min_messages; -- -- Union, inner join and left join -- @@ -1425,7 +1445,8 @@ LIMIT 10; (1 row) SET citus.subquery_pushdown to OFF; --- not supported since the inner JOIN is not equi join +-- not supported since the inner JOIN is not equi join and LATERAL JOIN prevents recursive planning +SET client_min_messages TO DEBUG2; SELECT user_id, lastseen FROM (SELECT @@ -1478,7 +1499,11 @@ FROM ORDER BY user_id DESC LIMIT 10; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: generating subplan 53_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id > 1) AND (user_id < 4) AND (value_2 > 3)) +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +ERROR: cannot push down this subquery +DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query SET citus.enable_repartition_joins to ON; SET client_min_messages TO DEBUG1; -- recursively planner since the inner JOIN is not on the partition key @@ -1534,9 +1559,7 @@ FROM ORDER BY user_id DESC LIMIT 10; -DEBUG: cannot use real time executor with repartition jobs -HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. -DEBUG: generating subplan 55_1 for subquery SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id > 1) AND (users.user_id < 4) AND (users.value_1 > 2))) user_where_1_1 JOIN (SELECT users.user_id, users.value_1 FROM public.users_table users WHERE ((users.user_id > 1) AND (users.user_id < 4) AND (users.value_2 > 3))) user_where_1_join_1 ON ((user_where_1_1.user_id = user_where_1_join_1.value_1))) +DEBUG: generating subplan 56_1 for subquery SELECT user_id, value_1 FROM public.users_table users WHERE ((user_id > 1) AND (user_id < 4) AND (value_2 > 3)) ERROR: cannot push down this subquery DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query SET citus.enable_repartition_joins to OFF; @@ -1746,11 +1769,8 @@ GROUP BY "generated_group_field" ORDER BY generated_group_field DESC, value DESC; -DEBUG: cannot use real time executor with repartition jobs -HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. -DEBUG: generating subplan 63_1 for subquery SELECT user_where_1_1.real_user_id FROM ((SELECT users.user_id AS real_user_id FROM public.users_table users WHERE ((users.user_id > 1) AND (users.user_id < 4) AND (users.value_2 > 3))) user_where_1_1 JOIN (SELECT users.user_id, users.value_2 FROM public.users_table users WHERE ((users.user_id > 1) AND (users.user_id < 4) AND (users.value_3 > (3)::double precision))) user_where_1_join_1 ON ((user_where_1_1.real_user_id = user_where_1_join_1.value_2))) -DEBUG: generating subplan 63_2 for subquery SELECT DISTINCT real_user_id, generated_group_field FROM (SELECT "eventQuery".real_user_id, "eventQuery"."time", random() AS random, "eventQuery".value_2 AS generated_group_field FROM (SELECT temp_data_queries."time", temp_data_queries.user_id, temp_data_queries.value_2, user_filters_1.real_user_id FROM ((SELECT events."time", events.user_id, events.value_2 FROM public.events_table events WHERE ((events.user_id > 1) AND (events.user_id < 4) AND (events.event_type = ANY (ARRAY[4, 5])))) temp_data_queries JOIN (SELECT intermediate_result.real_user_id FROM read_intermediate_result('63_1'::text, 'binary'::citus_copy_format) intermediate_result(real_user_id integer)) user_filters_1 ON ((temp_data_queries.user_id = user_filters_1.real_user_id)))) "eventQuery") "pushedDownQuery" -DEBUG: Plan 63 query after replacing subqueries and CTEs: SELECT count(*) AS value, generated_group_field FROM (SELECT intermediate_result.real_user_id, intermediate_result.generated_group_field FROM read_intermediate_result('63_2'::text, 'binary'::citus_copy_format) intermediate_result(real_user_id integer, generated_group_field integer)) "pushedDownQuery" GROUP BY generated_group_field ORDER BY generated_group_field DESC, (count(*)) DESC +DEBUG: generating subplan 64_1 for subquery SELECT user_id, value_2 FROM public.users_table users WHERE ((user_id > 1) AND (user_id < 4) AND (value_3 > (3)::double precision)) +DEBUG: Plan 64 query after replacing subqueries and CTEs: SELECT count(*) AS value, generated_group_field FROM (SELECT DISTINCT "pushedDownQuery_1".real_user_id, "pushedDownQuery_1".generated_group_field FROM (SELECT "eventQuery".real_user_id, "eventQuery"."time", random() AS random, "eventQuery".value_2 AS generated_group_field FROM (SELECT temp_data_queries."time", temp_data_queries.user_id, temp_data_queries.value_2, user_filters_1.real_user_id FROM ((SELECT events."time", events.user_id, events.value_2 FROM public.events_table events WHERE ((events.user_id > 1) AND (events.user_id < 4) AND (events.event_type = ANY (ARRAY[4, 5])))) temp_data_queries JOIN (SELECT user_where_1_1.real_user_id FROM ((SELECT users.user_id AS real_user_id FROM public.users_table users WHERE ((users.user_id > 1) AND (users.user_id < 4) AND (users.value_2 > 3))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('64_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) user_where_1_join_1 ON ((user_where_1_1.real_user_id = user_where_1_join_1.value_2)))) user_filters_1 ON ((temp_data_queries.user_id = user_filters_1.real_user_id)))) "eventQuery") "pushedDownQuery_1") "pushedDownQuery" GROUP BY generated_group_field ORDER BY generated_group_field DESC, (count(*)) DESC value | generated_group_field -------+----------------------- 1 | 5 @@ -1759,7 +1779,7 @@ DEBUG: Plan 63 query after replacing subqueries and CTEs: SELECT count(*) AS va 1 | 0 (4 rows) --- recursive planning didn't kick-in since the non-equi join is among subqueries +-- recursive planning kicked-in since the non-equi join is among subqueries SELECT count(*) AS value, "generated_group_field" FROM @@ -1801,7 +1821,16 @@ GROUP BY "generated_group_field" ORDER BY generated_group_field DESC, value DESC; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: generating subplan 66_1 for subquery SELECT user_id, value_2 FROM public.users_table users WHERE ((user_id > 1) AND (user_id < 4) AND (value_3 > (3)::double precision)) +DEBUG: Plan 66 query after replacing subqueries and CTEs: SELECT count(*) AS value, generated_group_field FROM (SELECT DISTINCT "pushedDownQuery_1".real_user_id, "pushedDownQuery_1".generated_group_field FROM (SELECT "eventQuery".real_user_id, "eventQuery"."time", random() AS random, "eventQuery".value_2 AS generated_group_field FROM (SELECT temp_data_queries."time", temp_data_queries.user_id, temp_data_queries.value_2, user_filters_1.real_user_id FROM ((SELECT events."time", events.user_id, events.value_2 FROM public.events_table events WHERE ((events.user_id > 1) AND (events.user_id < 4) AND (events.event_type = ANY (ARRAY[4, 5])))) temp_data_queries JOIN (SELECT user_where_1_1.real_user_id FROM ((SELECT users.user_id AS real_user_id FROM public.users_table users WHERE ((users.user_id > 1) AND (users.user_id < 4) AND (users.value_2 > 3))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('66_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) user_where_1_join_1 ON ((user_where_1_1.real_user_id >= user_where_1_join_1.user_id)))) user_filters_1 ON ((temp_data_queries.user_id = user_filters_1.real_user_id)))) "eventQuery") "pushedDownQuery_1") "pushedDownQuery" GROUP BY generated_group_field ORDER BY generated_group_field DESC, (count(*)) DESC + value | generated_group_field +-------+----------------------- + 1 | 5 + 2 | 2 + 2 | 1 + 1 | 0 +(4 rows) + SET citus.enable_repartition_joins to OFF; RESET client_min_messages; @@ -1854,8 +1883,9 @@ ORDER BY cnt, value_3 DESC LIMIT 10; SET citus.enable_repartition_joins to ON; SET client_min_messages TO DEBUG1; --- not supported since there is no column equality at all --- but still recursive planning is tried +-- although there is no column equality at all +-- still recursive planning plans "some_users_data" +-- and the query becomes OK SELECT "value_3", count(*) AS cnt FROM @@ -1892,8 +1922,18 @@ FROM ) segmentalias_1) "tempQuery" GROUP BY "value_3" ORDER BY cnt, value_3 DESC LIMIT 10; -ERROR: cannot perform distributed planning on this query -DETAIL: Cartesian products are currently unsupported +DEBUG: generating subplan 69_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id > 1) AND (user_id < 4) AND (value_2 > 3)) +DEBUG: Plan 69 query after replacing subqueries and CTEs: SELECT value_3, count(*) AS cnt FROM (SELECT segmentalias_1.value_3, segmentalias_1.user_id, random() AS random FROM (SELECT users_in_segment_1.user_id, users_in_segment_1.value_3 FROM ((SELECT all_buckets_1.user_id, (all_buckets_1.value_3 * (2)::double precision) AS value_3 FROM (SELECT simple_user_where_1.user_id, simple_user_where_1.value_3 FROM (SELECT users.user_id, users.value_3 FROM public.users_table users WHERE ((users.user_id > 1) AND (users.user_id < 4) AND (users.value_2 > 2))) simple_user_where_1) all_buckets_1) users_in_segment_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('69_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) some_users_data ON (true))) segmentalias_1) "tempQuery" GROUP BY value_3 ORDER BY (count(*)), value_3 DESC LIMIT 10 + value_3 | cnt +---------+----- + 0 | 14 + 10 | 42 + 4 | 42 + 8 | 56 + 6 | 56 + 2 | 70 +(6 rows) + SET citus.enable_repartition_joins to OFF; RESET client_min_messages; -- nested LATERAL JOINs diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index c8b35ef33..c02bbf347 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -1171,16 +1171,24 @@ SELECT count(*) FROM ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; ERROR: cannot pushdown the subquery DETAIL: There exist a reference table in the outer part of the outer join --- we don't allow non equi join among hash partitioned tables +-- we do allow non equi join among subqueries via recursive planning +SET client_min_messages TO DEBUG1; SELECT count(*) FROM (SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1, (SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2 WHERE subquery_1.user_id != subquery_2.user_id ; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator --- we cannot push this query since hash partitioned tables --- are not joined on partition keys with equality +DEBUG: generating subplan 79_1 for subquery SELECT user_buy_test_table.user_id, random() AS random FROM (public.user_buy_test_table LEFT JOIN public.users_ref_test_table ON ((user_buy_test_table.user_id > users_ref_test_table.id))) +DEBUG: Plan 79 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT user_buy_test_table.user_id, random() AS random FROM (public.user_buy_test_table LEFT JOIN public.users_ref_test_table ON ((user_buy_test_table.item_id > users_ref_test_table.id)))) subquery_1, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('79_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) subquery_2 WHERE (subquery_1.user_id <> subquery_2.user_id) + count +------- + 67 +(1 row) + +-- we could not push this query not due to non colocated +-- subqueries (i.e., they are recursively planned) +-- but due to outer join restrictions SELECT count(*) AS cnt, "generated_group_field" FROM @@ -1217,7 +1225,11 @@ count(*) AS cnt, "generated_group_field" ORDER BY cnt DESC, generated_group_field ASC LIMIT 10; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: generating subplan 81_1 for subquery SELECT user_id, value_2 AS generated_group_field FROM public.users_table users +DEBUG: Plan 81 query after replacing subqueries and CTEs: SELECT count(*) AS cnt, generated_group_field FROM (SELECT "eventQuery".user_id, random() AS random, "eventQuery".generated_group_field FROM (SELECT multi_group_wrapper_1."time", multi_group_wrapper_1.event_user_id, multi_group_wrapper_1.user_id, left_group_by_1.generated_group_field, random() AS random FROM ((SELECT temp_data_queries."time", temp_data_queries.event_user_id, user_filters_1.user_id FROM ((SELECT events."time", events.user_id AS event_user_id FROM public.events_table events WHERE (events.user_id > 2)) temp_data_queries JOIN (SELECT users.user_id FROM public.users_reference_table users WHERE ((users.user_id > 2) AND (users.value_2 = 5))) user_filters_1 ON ((temp_data_queries.event_user_id < user_filters_1.user_id)))) multi_group_wrapper_1 RIGHT JOIN (SELECT intermediate_result.user_id, intermediate_result.generated_group_field FROM read_intermediate_result('81_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, generated_group_field integer)) left_group_by_1 ON ((left_group_by_1.user_id > multi_group_wrapper_1.event_user_id)))) "eventQuery") "pushedDownQuery" GROUP BY generated_group_field ORDER BY (count(*)) DESC, generated_group_field LIMIT 10 +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +RESET client_min_messages; -- two hash partitioned relations are not joined -- on partiton keys although reference table is fine -- to push down diff --git a/src/test/regress/expected/multi_subquery_in_where_clause.out b/src/test/regress/expected/multi_subquery_in_where_clause.out index 5040ef724..83c061abf 100644 --- a/src/test/regress/expected/multi_subquery_in_where_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_clause.out @@ -646,13 +646,24 @@ WHERE user_id 5 (2 rows) --- semi join is not on the partition key for the third subquery +-- semi join is not on the partition key for the third subquery, and recursively planned +SET client_min_messages TO DEBUG1; SELECT user_id FROM users_table WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4) - AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator + AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6) ORDER BY 1 DESC LIMIT 3; +DEBUG: generating subplan 26_1 for subquery SELECT user_id FROM public.users_table WHERE ((value_1 >= 5) AND (value_1 <= 6)) +DEBUG: Plan 26 query after replacing subqueries and CTEs: SELECT user_id FROM public.users_table WHERE ((user_id IN (SELECT users_table_1.user_id FROM public.users_table users_table_1 WHERE ((users_table_1.value_1 >= 1) AND (users_table_1.value_1 <= 2)))) AND (user_id IN (SELECT users_table_1.user_id FROM public.users_table users_table_1 WHERE ((users_table_1.value_1 >= 3) AND (users_table_1.value_1 <= 4)))) AND (value_2 IN (SELECT intermediate_result.user_id FROM read_intermediate_result('26_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))) ORDER BY user_id DESC LIMIT 3 +DEBUG: push down of limit count: 3 + user_id +--------- + 6 + 6 + 6 +(3 rows) + +RESET client_min_messages; CREATE FUNCTION test_join_function(integer, integer) RETURNS bool AS 'select $1 > $2;' LANGUAGE SQL diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index a34c38b16..10d5f6f93 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -172,17 +172,27 @@ SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_linei -- repartition query on view join -- it passes planning, fails at execution stage -SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey); -ERROR: the query contains a join that requires repartitioning -HINT: Set citus.enable_repartition_joins to on to enable repartitioning -SET citus.task_executor_type to "task-tracker"; +SET client_min_messages TO DEBUG1; +SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey) ORDER BY o_orderkey DESC, o_custkey DESC, o_orderpriority DESC LIMIT 5; +DEBUG: generating subplan 22_1 for subquery SELECT lineitem_hash_part.l_orderkey, lineitem_hash_part.l_partkey, lineitem_hash_part.l_suppkey, lineitem_hash_part.l_linenumber, lineitem_hash_part.l_quantity, lineitem_hash_part.l_extendedprice, lineitem_hash_part.l_discount, lineitem_hash_part.l_tax, lineitem_hash_part.l_returnflag, lineitem_hash_part.l_linestatus, lineitem_hash_part.l_shipdate, lineitem_hash_part.l_commitdate, lineitem_hash_part.l_receiptdate, lineitem_hash_part.l_shipinstruct, lineitem_hash_part.l_shipmode, lineitem_hash_part.l_comment FROM public.lineitem_hash_part WHERE (lineitem_hash_part.l_shipmode = 'AIR'::bpchar) +DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT priority_orders.o_orderkey, priority_orders.o_custkey, priority_orders.o_orderstatus, priority_orders.o_totalprice, priority_orders.o_orderdate, priority_orders.o_orderpriority, priority_orders.o_clerk, priority_orders.o_shippriority, priority_orders.o_comment, air_shipped_lineitems.l_orderkey, air_shipped_lineitems.l_partkey, air_shipped_lineitems.l_suppkey, air_shipped_lineitems.l_linenumber, air_shipped_lineitems.l_quantity, air_shipped_lineitems.l_extendedprice, air_shipped_lineitems.l_discount, air_shipped_lineitems.l_tax, air_shipped_lineitems.l_returnflag, air_shipped_lineitems.l_linestatus, air_shipped_lineitems.l_shipdate, air_shipped_lineitems.l_commitdate, air_shipped_lineitems.l_receiptdate, air_shipped_lineitems.l_shipinstruct, air_shipped_lineitems.l_shipmode, air_shipped_lineitems.l_comment FROM ((SELECT orders_hash_part.o_orderkey, orders_hash_part.o_custkey, orders_hash_part.o_orderstatus, orders_hash_part.o_totalprice, orders_hash_part.o_orderdate, orders_hash_part.o_orderpriority, orders_hash_part.o_clerk, orders_hash_part.o_shippriority, orders_hash_part.o_comment FROM public.orders_hash_part WHERE (orders_hash_part.o_orderpriority < '3-MEDIUM'::bpchar)) priority_orders JOIN (SELECT intermediate_result.l_orderkey, intermediate_result.l_partkey, intermediate_result.l_suppkey, intermediate_result.l_linenumber, intermediate_result.l_quantity, intermediate_result.l_extendedprice, intermediate_result.l_discount, intermediate_result.l_tax, intermediate_result.l_returnflag, intermediate_result.l_linestatus, intermediate_result.l_shipdate, intermediate_result.l_commitdate, intermediate_result.l_receiptdate, intermediate_result.l_shipinstruct, intermediate_result.l_shipmode, intermediate_result.l_comment FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(l_orderkey bigint, l_partkey integer, l_suppkey integer, l_linenumber integer, l_quantity numeric(15,2), l_extendedprice numeric(15,2), l_discount numeric(15,2), l_tax numeric(15,2), l_returnflag character(1), l_linestatus character(1), l_shipdate date, l_commitdate date, l_receiptdate date, l_shipinstruct character(25), l_shipmode character(10), l_comment character varying(44))) air_shipped_lineitems ON ((priority_orders.o_custkey = air_shipped_lineitems.l_suppkey))) ORDER BY priority_orders.o_orderkey DESC, priority_orders.o_custkey DESC, priority_orders.o_orderpriority DESC LIMIT 5 +DEBUG: push down of limit count: 5 + o_orderkey | o_custkey | o_orderstatus | o_totalprice | o_orderdate | o_orderpriority | o_clerk | o_shippriority | o_comment | l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment +------------+-----------+---------------+--------------+-------------+-----------------+-----------------+----------------+-------------------------------------------------------+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+---------------------------+------------+------------------------------------------- + 14821 | 1435 | O | 322002.95 | 06-12-1998 | 2-HIGH | Clerk#000000630 | 0 | n packages are furiously ironic ideas. d | 1607 | 118923 | 1435 | 2 | 37.00 | 71851.04 | 0.05 | 0.02 | N | O | 02-27-1996 | 02-18-1996 | 03-16-1996 | NONE | AIR | alongside + 14790 | 613 | O | 270163.54 | 08-21-1996 | 2-HIGH | Clerk#000000347 | 0 | p. regular deposits wake. final n | 2629 | 123076 | 613 | 2 | 31.00 | 34071.17 | 0.08 | 0.03 | N | O | 05-24-1998 | 05-26-1998 | 06-10-1998 | COLLECT COD | AIR | ate blithely bold, regular deposits. bold + 14758 | 1225 | F | 37812.49 | 10-27-1993 | 2-HIGH | Clerk#000000687 | 0 | ages nag about the furio | 9156 | 176190 | 1225 | 2 | 22.00 | 27856.18 | 0.03 | 0.00 | R | F | 02-08-1994 | 04-01-1994 | 02-24-1994 | DELIVER IN PERSON | AIR | equests dete + 14725 | 569 | O | 261801.45 | 06-17-1995 | 2-HIGH | Clerk#000000177 | 0 | ng asymptotes. final, ironic accounts cajole after | 14688 | 173017 | 569 | 3 | 10.00 | 10900.10 | 0.02 | 0.08 | N | O | 03-14-1997 | 04-22-1997 | 04-05-1997 | COLLECT COD | AIR | riously even packages sleep a + 14657 | 370 | F | 116160.53 | 02-28-1994 | 1-URGENT | Clerk#000000756 | 0 | ly across the ironic, ironic instructions. bold ideas | 5153 | 67863 | 370 | 3 | 30.00 | 54925.80 | 0.09 | 0.01 | N | O | 11-10-1995 | 11-14-1995 | 11-16-1995 | DELIVER IN PERSON | AIR | beans sleep bl +(5 rows) + +RESET client_min_messages; SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey); count ------- 192 (1 row) -SET citus.task_executor_type to DEFAULT; -- materialized views work -- insert into... select works with views CREATE TABLE temp_lineitem(LIKE lineitem_hash_part); @@ -411,6 +421,7 @@ ORDER BY 2 DESC, 1; (3 rows) -- non-partition key joins are not supported inside subquery +-- since the join with a table SELECT * FROM (SELECT ru.user_id, count(*) FROM recent_users ru @@ -419,7 +430,7 @@ SELECT * FROM GROUP BY ru.user_id ORDER BY 2 DESC, 1) s1 ORDER BY 2 DESC, 1; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: bogus varno: 3 -- join between views -- recent users who has an event in recent events SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id; @@ -506,6 +517,7 @@ ORDER BY 2 DESC, 1; (3 rows) -- event vs table non-partition-key join is not supported +-- given that we cannot recursively plan tables yet SELECT * FROM (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru @@ -513,7 +525,7 @@ SELECT * FROM ON(ru.user_id = et.event_type) ) s1 ORDER BY 2 DESC, 1; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: bogus varno: 3 -- create a select only view CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 1 and value_1 <3; CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id); @@ -845,7 +857,7 @@ EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USIN -> Sort Sort Key: remote_scan."time" DESC -> Custom Scan (Citus Real-Time) - -> Distributed Subplan 93_1 + -> Distributed Subplan 95_1 -> Limit -> Sort Sort Key: max((max(remote_scan.lastseen))) DESC diff --git a/src/test/regress/expected/non_colocated_leaf_subquery_joins.out b/src/test/regress/expected/non_colocated_leaf_subquery_joins.out index d71f9b886..d5c0d4512 100644 --- a/src/test/regress/expected/non_colocated_leaf_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_leaf_subquery_joins.out @@ -157,8 +157,8 @@ DEBUG: Plan 14 query after replacing subqueries and CTEs: SELECT event, array_l t (1 row) - -- should not recursively plan any subquery given that we don't support - -- non-colocated subquery joins among the subqueries yet + -- should recursively plan bar subquery given that it is not joined + -- on the distribution key with bar SELECT true AS valid FROM explain_json($$SELECT count(*) FROM @@ -166,7 +166,13 @@ FROM (SELECT users_table.user_id, value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar WHERE foo.user_id = bar.value_1;$$); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: generating subplan 20_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan 20 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('20_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) bar WHERE (foo.user_id = bar.value_1) + valid +------- + t +(1 row) + SET log_error_verbosity TO DEFAULT; SET client_min_messages TO DEFAULT; SET citus.enable_repartition_joins TO DEFAULT; diff --git a/src/test/regress/expected/non_colocated_subquery_joins.out b/src/test/regress/expected/non_colocated_subquery_joins.out new file mode 100644 index 000000000..ae130da41 --- /dev/null +++ b/src/test/regress/expected/non_colocated_subquery_joins.out @@ -0,0 +1,917 @@ +-- =================================================================== +-- test recursive planning functionality for non-colocated subqueries +-- We prefered to use EXPLAIN almost all the queries here, +-- otherwise the execution time of so many repartition queries would +-- be too high for the regression tests. Also, note that we're mostly +-- interested in recurive planning side of the things, thus supressing +-- the actual explain output. +-- =================================================================== +SET client_min_messages TO DEBUG1; +CREATE SCHEMA non_colocated_subquery; +SET search_path TO non_colocated_subquery, public; +-- we don't use the data anyway +CREATE TABLE users_table_local AS SELECT * FROM users_table LIMIT 0; +DEBUG: push down of limit count: 0 +CREATE TABLE events_table_local AS SELECT * FROM events_table LIMIT 0; +DEBUG: push down of limit count: 0 +SET citus.enable_repartition_joins TO ON; +\set VERBOSITY terse +-- Function that parses explain output as JSON +-- copied from multi_explain.sql and had to give +-- a different name via postfix to prevent concurrent +-- create/drop etc. +CREATE OR REPLACE FUNCTION explain_json_2(query text) +RETURNS jsonb +AS $BODY$ +DECLARE + result jsonb; +BEGIN + EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result; + RETURN result; +END; +$BODY$ LANGUAGE plpgsql; +-- leaf queries contain colocated joins +-- but not the subquery +SELECT true AS valid FROM explain_json_2($$ + SELECT + foo.value_2 + FROM + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.value_2 = bar.value_2; +$$); +DEBUG: generating subplan 3_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan 3 query after replacing subqueries and CTEs: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('3_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar WHERE (foo.value_2 = bar.value_2) + valid +------- + t +(1 row) + +-- simple non colocated join with subqueries in WHERE clause +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + events_table + WHERE + event_type + IN + (SELECT event_type FROM events_table WHERE user_id < 100); + +$$); +DEBUG: generating subplan 5_1 for subquery SELECT event_type FROM public.events_table WHERE (user_id < 100) +DEBUG: Plan 5 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table WHERE (event_type IN (SELECT intermediate_result.event_type FROM read_intermediate_result('5_1'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer))) + valid +------- + t +(1 row) + +-- simple non colocated join with subqueries in WHERE clause with NOT IN +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + events_table + WHERE + user_id + NOT IN + (SELECT user_id FROM events_table WHERE event_type = 2); +$$); +DEBUG: generating subplan 7_1 for subquery SELECT user_id FROM public.events_table WHERE (event_type = 2) +DEBUG: Plan 7 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table WHERE (NOT (user_id IN (SELECT intermediate_result.user_id FROM read_intermediate_result('7_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))) + valid +------- + t +(1 row) + +-- Subqueries in WHERE and FROM are mixed +-- In this query, only subquery in WHERE is not a colocated join +SELECT true AS valid FROM explain_json_2($$ + + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.event_type IN (SELECT event_type FROM events_table WHERE user_id < 3); + +$$); +DEBUG: generating subplan 9_1 for subquery SELECT event_type FROM public.events_table WHERE (user_id < 3) +DEBUG: Plan 9 query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) bar WHERE ((foo.user_id = bar.user_id) AND (foo.event_type IN (SELECT intermediate_result.event_type FROM read_intermediate_result('9_1'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer)))) + valid +------- + t +(1 row) + +-- Subqueries in WHERE and FROM are mixed +-- In this query, one of the joins in the FROM clause is not colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT (users_table.user_id / 2) as user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.user_id IN (SELECT user_id FROM events_table WHERE user_id < 10); +$$); +DEBUG: generating subplan 11_1 for subquery SELECT (users_table.user_id / 2) AS user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan 11 query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('11_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((foo.user_id = bar.user_id) AND (foo.user_id IN (SELECT events_table.user_id FROM public.events_table WHERE (events_table.user_id < 10)))) + valid +------- + t +(1 row) + +-- Subqueries in WHERE and FROM are mixed +-- In this query, both the joins in the FROM clause is not colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT (users_table.user_id / 2) as user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.user_id NOT IN (SELECT user_id FROM events_table WHERE user_id < 10); +$$); +DEBUG: generating subplan 13_1 for subquery SELECT (users_table.user_id / 2) AS user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 13_2 for subquery SELECT user_id FROM public.events_table WHERE (user_id < 10) +DEBUG: Plan 13 query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('13_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((foo.user_id = bar.user_id) AND (NOT (foo.user_id IN (SELECT intermediate_result.user_id FROM read_intermediate_result('13_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))))) + valid +------- + t +(1 row) + +-- Subqueries in WHERE and FROM are mixed +-- In this query, one of the joins in the FROM clause is not colocated and subquery in WHERE clause is not colocated +-- similar to the above, but, this time bar is the anchor subquery +SELECT true AS valid FROM explain_json_2($$ + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.event_type IN (SELECT event_type FROM events_table WHERE user_id < 4); +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 16_1 for subquery SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: generating subplan 16_2 for subquery SELECT event_type FROM public.events_table WHERE (user_id < 4) +DEBUG: Plan 16 query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT intermediate_result.user_id, intermediate_result.event_type FROM read_intermediate_result('16_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event_type integer)) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) bar WHERE ((foo.user_id = bar.user_id) AND (foo.event_type IN (SELECT intermediate_result.event_type FROM read_intermediate_result('16_2'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer)))) +ERROR: cannot pushdown the subquery +-- The inner subqueries and the subquery in WHERE are non-located joins +SELECT true AS valid FROM explain_json_2($$ + SELECT foo_top.*, events_table.user_id FROM + ( + + SELECT + foo.user_id, random() + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.event_type AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.event_type IN (SELECT event_type FROM events_table WHERE user_id = 5) + + ) as foo_top, events_table WHERE events_table.user_id = foo_top.user_id; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 19_1 for subquery SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 19_2 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.event_type) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 19_3 for subquery SELECT event_type FROM public.events_table WHERE (user_id = 5) +DEBUG: generating subplan 19_4 for subquery SELECT foo.user_id, random() AS random FROM (SELECT intermediate_result.user_id, intermediate_result.event_type FROM read_intermediate_result('19_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event_type integer)) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('19_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((foo.user_id = bar.user_id) AND (foo.event_type IN (SELECT intermediate_result.event_type FROM read_intermediate_result('19_3'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer)))) +DEBUG: Plan 19 query after replacing subqueries and CTEs: SELECT foo_top.user_id, foo_top.random, events_table.user_id FROM (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('19_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) foo_top, public.events_table WHERE (events_table.user_id = foo_top.user_id) + valid +------- + t +(1 row) + +-- Slightly more complex query where there are 5 joins, 1 of them is non-colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + + WHERE + + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo1.user_id = foo5.value_1 + ) as foo_top; + +$$); +DEBUG: generating subplan 24_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[17, 18, 19, 20]))) +DEBUG: Plan 24 query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('24_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id = foo4.user_id) AND (foo1.user_id = foo2.user_id) AND (foo1.user_id = foo3.user_id) AND (foo1.user_id = foo4.user_id) AND (foo1.user_id = foo5.value_1))) foo_top + valid +------- + t +(1 row) + +-- Very similar to the above query +-- One of the queries is not joined on partition key, but this time subquery itself +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (17,18,19,20)) as foo5 + + WHERE + + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo1.user_id = foo5.user_id + ) as foo_top; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 26_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[17, 18, 19, 20]))) +DEBUG: Plan 26 query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('26_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id = foo4.user_id) AND (foo1.user_id = foo2.user_id) AND (foo1.user_id = foo3.user_id) AND (foo1.user_id = foo4.user_id) AND (foo1.user_id = foo5.user_id))) foo_top + valid +------- + t +(1 row) + +-- There are two non colocated joins, one is in the one of the leaf queries, +-- the other is on the top-level subquery +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + WHERE + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo1.user_id = foo5.value_1 + ) as foo_top; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 28_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 28_2 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[17, 18, 19, 20]))) +DEBUG: Plan 28 query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('28_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('28_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id = foo4.user_id) AND (foo1.user_id = foo2.user_id) AND (foo1.user_id = foo3.user_id) AND (foo1.user_id = foo4.user_id) AND (foo1.user_id = foo5.value_1))) foo_top + valid +------- + t +(1 row) + +-- a similar query to the above, but, this sime the second +-- non colocated join is on the already recursively planned subquery +-- the results should be the same +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + WHERE + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo2.user_id = foo5.value_1 + ) as foo_top; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 31_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 31_2 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[17, 18, 19, 20]))) +DEBUG: Plan 31 query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('31_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('31_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id = foo4.user_id) AND (foo1.user_id = foo2.user_id) AND (foo1.user_id = foo3.user_id) AND (foo1.user_id = foo4.user_id) AND (foo2.user_id = foo5.value_1))) foo_top + valid +------- + t +(1 row) + +-- Deeper subqueries are non-colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT + foo.user_id + FROM + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as foo_top JOIN + + ( + SELECT + foo.user_id + FROM + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as bar_top + ON (foo_top.user_id = bar_top.user_id); +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 34_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 34_2 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: Plan 34 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT foo.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('34_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id = bar.user_id)) foo_top JOIN (SELECT foo.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('34_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id = bar.user_id)) bar_top ON ((foo_top.user_id = bar_top.user_id))) + valid +------- + t +(1 row) + +-- Top level Subquery is not colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT + foo.user_id, foo.value_2 + FROM + (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as foo_top JOIN + + ( + SELECT + foo.user_id + FROM + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as bar + WHERE + foo.user_id = bar.user_id) as bar_top + ON (foo_top.value_2 = bar_top.user_id); + +$$); +DEBUG: generating subplan 37_1 for subquery SELECT foo.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[9, 10, 11, 12])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[13, 14, 15, 16])))) bar WHERE (foo.user_id = bar.user_id) +DEBUG: Plan 37 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT foo.user_id, foo.value_2 FROM (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id = bar.user_id)) foo_top JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('37_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar_top ON ((foo_top.value_2 = bar_top.user_id))) + valid +------- + t +(1 row) + +-- Top level Subquery is not colocated as the above +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT + foo.user_id, foo.value_2 + FROM + (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as foo_top JOIN + ( + SELECT + foo.user_id + FROM + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (13,14,15,16)) as bar + WHERE + foo.user_id = bar.user_id) as bar_top + ON (foo_top.value_2 = bar_top.user_id); +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 39_1 for subquery SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[13, 14, 15, 16]))) +DEBUG: generating subplan 39_2 for subquery SELECT foo.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[9, 10, 11, 12])))) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('39_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE (foo.user_id = bar.user_id) +DEBUG: Plan 39 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT foo.user_id, foo.value_2 FROM (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id = bar.user_id)) foo_top JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('39_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar_top ON ((foo_top.value_2 = bar_top.user_id))) + valid +------- + t +(1 row) + +-- non colocated joins are deep inside the query +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.user_id as my_users FROM events_table, users_table WHERE events_table.event_type = users_table.user_id) as foo + WHERE foo.my_users = users_table.user_id) as mid_level_query + ) as bar; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 42_1 for subquery SELECT events_table.user_id AS my_users FROM public.events_table, public.users_table WHERE (events_table.event_type = users_table.user_id) +DEBUG: Plan 42 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT intermediate_result.my_users FROM read_intermediate_result('42_1'::text, 'binary'::citus_copy_format) intermediate_result(my_users integer)) foo WHERE (foo.my_users = users_table.user_id)) mid_level_query) bar + valid +------- + t +(1 row) + +-- similar to the above, with relation rtes +-- we're able to recursively plan foo +-- note that if we haven't added random() to the subquery, we'd be able run the query +-- via regular repartitioning since PostgreSQL would pull the query up +SELECT true AS valid FROM explain_json_2($$ + + SELECT count(*) FROM ( SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.event_type as my_users, random() FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) as foo + WHERE foo.my_users = users_table.user_id) as mid_level_query ) as bar; + +$$); +DEBUG: generating subplan 44_1 for subquery SELECT events_table.event_type AS my_users, random() AS random FROM public.events_table, public.users_table WHERE (events_table.user_id = users_table.user_id) +DEBUG: Plan 44 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT intermediate_result.my_users, intermediate_result.random FROM read_intermediate_result('44_1'::text, 'binary'::citus_copy_format) intermediate_result(my_users integer, random double precision)) foo WHERE (foo.my_users = users_table.user_id)) mid_level_query) bar + valid +------- + t +(1 row) + +-- same as the above query, but, one level deeper subquery + SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.user_id as my_users FROM events_table, + (SELECT events_table.user_id, random() FROM users_table, events_table WHERE users_table.user_id = events_table.user_id) as selected_users + WHERE events_table.event_type = selected_users.user_id) as foo + + WHERE foo.my_users = users_table.user_id) as mid_level_query + ) as bar; + $$); +DEBUG: generating subplan 46_1 for subquery SELECT events_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) +DEBUG: Plan 46 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT events_table.user_id AS my_users FROM public.events_table, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('46_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) selected_users WHERE (events_table.event_type = selected_users.user_id)) foo WHERE (foo.my_users = users_table.user_id)) mid_level_query) bar + valid +------- + t +(1 row) + +-- deeper query, subquery in WHERE clause +-- this time successfull plan the query since the join on the relation and +-- the subquery on the distribution key +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + + + (SELECT events_table.user_id as my_users FROM events_table, + (SELECT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND + + users_table.user_id IN (SELECT value_2 FROM events_table) + + ) as selected_users + WHERE events_table.user_id = selected_users.user_id) as foo + + WHERE foo.my_users = users_table.user_id) as mid_level_query + + ) as bar; + +$$); +DEBUG: generating subplan 48_1 for subquery SELECT value_2 FROM public.events_table +DEBUG: Plan 48 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT events_table.user_id AS my_users FROM public.events_table, (SELECT events_table_1.user_id FROM public.users_table users_table_1, public.events_table events_table_1 WHERE ((users_table_1.user_id = events_table_1.user_id) AND (users_table_1.user_id IN (SELECT intermediate_result.value_2 FROM read_intermediate_result('48_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))))) selected_users WHERE (events_table.user_id = selected_users.user_id)) foo WHERE (foo.my_users = users_table.user_id)) mid_level_query) bar + valid +------- + t +(1 row) + +-- should recursively plan the subquery in WHERE clause +SELECT true AS valid FROM explain_json_2($$SELECT + count(*) +FROM + users_table +WHERE + value_1 + IN + (SELECT + users_table.user_id + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.value_2 AND event_type IN (5,6));$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 50_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[5, 6]))) +DEBUG: Plan 50 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE (value_1 IN (SELECT intermediate_result.user_id FROM read_intermediate_result('50_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) + valid +------- + t +(1 row) + +-- leaf subquery repartitioning should work fine when used with CTEs +SELECT true AS valid FROM explain_json_2($$ + WITH q1 AS (SELECT user_id FROM users_table) +SELECT count(*) FROM q1, (SELECT + users_table.user_id, random() + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); +DEBUG: generating subplan 52_1 for CTE q1: SELECT user_id FROM public.users_table +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 52_2 for subquery SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: Plan 52 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('52_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) q1, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('52_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) bar WHERE (bar.user_id = q1.user_id) + valid +------- + t +(1 row) + +-- subquery joins should work fine when used with CTEs +SELECT true AS valid FROM explain_json_2($$ + WITH q1 AS (SELECT user_id FROM users_table) + SELECT count(*) FROM q1, (SELECT + users_table.user_id, random() + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); +DEBUG: generating subplan 55_1 for CTE q1: SELECT user_id FROM public.users_table +DEBUG: Plan 55 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('55_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) q1, (SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) bar WHERE (bar.user_id = q1.user_id) + valid +------- + t +(1 row) + +-- should work fine within UNIONs +SELECT true AS valid FROM explain_json_2($$ + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) UNION + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8));$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 57_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: generating subplan 57_2 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan 57 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('57_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('57_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) + valid +------- + t +(1 row) + +-- should work fine within leaf queries of deeper subqueries +SELECT true AS valid FROM explain_json_2($$ +SELECT event, array_length(events_table, 1) +FROM ( + SELECT event, array_agg(t.user_id) AS events_table + FROM ( + SELECT + DISTINCT ON(e.event_type::text) e.event_type::text as event, e.time, e.user_id + FROM + users_table AS u, + events_table AS e, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as bar + WHERE u.user_id = e.user_id AND + u.user_id IN + ( + SELECT + user_id + FROM + users_table + WHERE value_2 >= 5 + AND EXISTS (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) + LIMIT 5 + ) + ) t, users_table WHERE users_table.value_1 = t.event::int + GROUP BY event +) q +ORDER BY 2 DESC, 1; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 60_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: push down of limit count: 5 +DEBUG: generating subplan 60_2 for subquery SELECT user_id FROM public.users_table WHERE ((value_2 >= 5) AND (EXISTS (SELECT intermediate_result.user_id FROM read_intermediate_result('60_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))) LIMIT 5 +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 60_3 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 60_4 for subquery SELECT DISTINCT ON ((e.event_type)::text) (e.event_type)::text AS event, e."time", e.user_id FROM public.users_table u, public.events_table e, (SELECT intermediate_result.user_id FROM read_intermediate_result('60_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((u.user_id = e.user_id) AND (u.user_id IN (SELECT intermediate_result.user_id FROM read_intermediate_result('60_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))) +DEBUG: generating subplan 60_5 for subquery SELECT t.event, array_agg(t.user_id) AS events_table FROM (SELECT intermediate_result.event, intermediate_result."time", intermediate_result.user_id FROM read_intermediate_result('60_4'::text, 'binary'::citus_copy_format) intermediate_result(event text, "time" timestamp without time zone, user_id integer)) t, public.users_table WHERE (users_table.value_1 = (t.event)::integer) GROUP BY t.event +DEBUG: Plan 60 query after replacing subqueries and CTEs: SELECT event, array_length(events_table, 1) AS array_length FROM (SELECT intermediate_result.event, intermediate_result.events_table FROM read_intermediate_result('60_5'::text, 'binary'::citus_copy_format) intermediate_result(event text, events_table integer[])) q ORDER BY (array_length(events_table, 1)) DESC, event + valid +------- + t +(1 row) + +-- this is also supported since we can recursively plan relations as well +-- the relations are joined under a join tree with an alias +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1); +$$); +DEBUG: generating subplan 66_1 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 66 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN public.users_table u2 USING (value_1)) a(value_1, user_id, "time", value_2, value_3, value_4, user_id_1, time_1, value_2_1, value_3_1, value_4_1) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('66_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- a very similar query to the above +-- however, this time we users a subquery instead of join alias, and it works +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT * FROM users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1); +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 68_1 for subquery SELECT u1.value_1, u1.user_id, u1."time", u1.value_2, u1.value_3, u1.value_4, u2.user_id, u2."time", u2.value_2, u2.value_3, u2.value_4 FROM (public.users_table u1 JOIN public.users_table u2 USING (value_1)) +DEBUG: Plan 68 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_1, intermediate_result.user_id, intermediate_result."time", intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4, intermediate_result.user_id_1 AS user_id, intermediate_result.time_1 AS "time", intermediate_result.value_2_1 AS value_2, intermediate_result.value_3_1 AS value_3, intermediate_result.value_4_1 AS value_4 FROM read_intermediate_result('68_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, user_id integer, "time" timestamp without time zone, value_2 integer, value_3 double precision, value_4 bigint, user_id_1 integer, time_1 timestamp without time zone, value_2_1 integer, value_3_1 double precision, value_4_1 bigint)) a(value_1, user_id, "time", value_2, value_3, value_4, user_id_1, time_1, value_2_1, value_3_1, value_4_1) JOIN (SELECT users_table.value_1, random() AS random FROM public.users_table) u3 USING (value_1)) + valid +------- + t +(1 row) + +-- a similar query to the above, this time subquery is on the left +-- and the relation is on the right of the join tree +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT value_2, random() FROM users_table) as u1 + JOIN + events_table + using (value_2); +$$); +DEBUG: generating subplan 70_1 for subquery SELECT value_2, random() AS random FROM public.users_table +DEBUG: Plan 70 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('70_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u1 JOIN public.events_table USING (value_2)) + valid +------- + t +(1 row) + +-- recursive planning should kick in for outer joins as well +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT value_2, random() FROM users_table) as u1 + LEFT JOIN + (SELECT value_2, random() FROM users_table) as u2 + USING(value_2); +$$); +DEBUG: generating subplan 72_1 for subquery SELECT value_2, random() AS random FROM public.users_table +DEBUG: Plan 72 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT users_table.value_2, random() AS random FROM public.users_table) u1 LEFT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('72_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2)) + valid +------- + t +(1 row) + +-- recursive planning should kick in for outer joins as well +-- but this time recursive planning might convert the query +-- into a not supported join +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT value_2, random() FROM users_table) as u1 + RIGHT JOIN + (SELECT value_2, random() FROM users_table) as u2 + USING(value_2); +$$); +DEBUG: generating subplan 74_1 for subquery SELECT value_2, random() AS random FROM public.users_table +DEBUG: Plan 74 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT users_table.value_2, random() AS random FROM public.users_table) u1 RIGHT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('74_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2)) +ERROR: cannot pushdown the subquery +-- set operations may produce not very efficient plans +-- although we could have picked a as our anchor subquery, +-- we pick foo in this case and recursively plan a +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + ( + SELECT user_id FROM users_table + UNION + SELECT user_id FROM users_table + ) a + JOIN + (SELECT value_1 FROM users_table) as foo ON (a.user_id = foo.value_1) + ); +$$); +DEBUG: generating subplan 77_1 for subquery SELECT user_id FROM public.users_table +DEBUG: generating subplan 77_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan 77 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('77_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('77_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: generating subplan 76_1 for subquery SELECT users_table.user_id FROM public.users_table UNION SELECT users_table.user_id FROM public.users_table +DEBUG: Plan 76 query after replacing subqueries and CTEs: SELECT a.user_id, foo.value_1 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('76_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) a JOIN (SELECT users_table.value_1 FROM public.users_table) foo ON ((a.user_id = foo.value_1))) + valid +------- + t +(1 row) + +-- we could do the same with regular tables as well +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + ( + SELECT user_id FROM users_table + UNION + SELECT user_id FROM users_table + ) a + JOIN + users_table as foo ON (a.user_id = foo.value_1) + ); +$$); +DEBUG: generating subplan 81_1 for subquery SELECT user_id FROM public.users_table +DEBUG: generating subplan 81_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan 81 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('81_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('81_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: generating subplan 80_1 for subquery SELECT users_table.user_id FROM public.users_table UNION SELECT users_table.user_id FROM public.users_table +DEBUG: Plan 80 query after replacing subqueries and CTEs: SELECT a.user_id, foo.user_id, foo."time", foo.value_1, foo.value_2, foo.value_3, foo.value_4 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('80_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) a JOIN public.users_table foo ON ((a.user_id = foo.value_1))) + valid +------- + t +(1 row) + +-- this time the the plan is optimial, we are +-- able to keep the UNION query given that foo +-- is the anchor +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + (SELECT user_id FROM users_table) as foo + JOIN + ( + SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4) + UNION + SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8) + ) a + + ON (a.user_id = foo.user_id) + JOIN + + (SELECT value_1 FROM users_table) as bar + + ON(foo.user_id = bar.value_1) + ); +$$); +DEBUG: generating subplan 84_1 for subquery SELECT value_1 FROM public.users_table +DEBUG: Plan 84 query after replacing subqueries and CTEs: SELECT foo.user_id, a.user_id, bar.value_1 FROM (((SELECT users_table.user_id FROM public.users_table) foo JOIN (SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id = ANY (ARRAY[1, 2, 3, 4])) UNION SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id = ANY (ARRAY[5, 6, 7, 8]))) a ON ((a.user_id = foo.user_id))) JOIN (SELECT intermediate_result.value_1 FROM read_intermediate_result('84_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer)) bar ON ((foo.user_id = bar.value_1))) + valid +------- + t +(1 row) + +-- it should be safe to recursively plan non colocated subqueries +-- inside a CTE +SELECT true AS valid FROM explain_json_2($$ + + WITH non_colocated_subquery AS + ( + SELECT + foo.value_2 + FROM + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.value_2 = bar.value_2 + ), + non_colocated_subquery_2 AS + ( + SELECT + count(*) as cnt + FROM + events_table + WHERE + event_type + IN + (SELECT event_type FROM events_table WHERE user_id < 4) + ) + SELECT + * + FROM + non_colocated_subquery, non_colocated_subquery_2 + WHERE + non_colocated_subquery.value_2 != non_colocated_subquery_2.cnt +$$); +DEBUG: generating subplan 86_1 for CTE non_colocated_subquery: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.value_2 = bar.value_2) +DEBUG: generating subplan 87_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan 87 query after replacing subqueries and CTEs: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('87_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar WHERE (foo.value_2 = bar.value_2) +DEBUG: generating subplan 86_2 for CTE non_colocated_subquery_2: SELECT count(*) AS cnt FROM public.events_table WHERE (event_type IN (SELECT events_table_1.event_type FROM public.events_table events_table_1 WHERE (events_table_1.user_id < 4))) +DEBUG: generating subplan 89_1 for subquery SELECT event_type FROM public.events_table WHERE (user_id < 4) +DEBUG: Plan 89 query after replacing subqueries and CTEs: SELECT count(*) AS cnt FROM public.events_table WHERE (event_type IN (SELECT intermediate_result.event_type FROM read_intermediate_result('89_1'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer))) +DEBUG: Plan 86 query after replacing subqueries and CTEs: SELECT non_colocated_subquery.value_2, non_colocated_subquery_2.cnt FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('86_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) non_colocated_subquery, (SELECT intermediate_result.cnt FROM read_intermediate_result('86_2'::text, 'binary'::citus_copy_format) intermediate_result(cnt bigint)) non_colocated_subquery_2 WHERE (non_colocated_subquery.value_2 <> non_colocated_subquery_2.cnt) + valid +------- + t +(1 row) + +-- non colocated subquery joins should work fine along with local tables +SELECT true AS valid FROM explain_json_2($$ + SELECT + count(*) + FROM + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table_local.value_2 FROM users_table_local, events_table_local WHERE users_table_local.user_id = events_table_local.user_id AND event_type IN (5,6,7,8)) as bar, + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as baz + WHERE + foo.value_2 = bar.value_2 + AND + foo.value_2 = baz.value_2 +$$); +DEBUG: generating subplan 91_1 for subquery SELECT users_table_local.value_2 FROM non_colocated_subquery.users_table_local, non_colocated_subquery.events_table_local WHERE ((users_table_local.user_id = events_table_local.user_id) AND (events_table_local.event_type = ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 91_2 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[9, 10, 11, 12]))) +DEBUG: Plan 91 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('91_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar, (SELECT intermediate_result.value_2 FROM read_intermediate_result('91_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) baz WHERE ((foo.value_2 = bar.value_2) AND (foo.value_2 = baz.value_2)) + valid +------- + t +(1 row) + +-- a combination of subqueries in FROM and WHERE clauses +-- we actually recursively plan non colocated subqueries +-- pretty accurate, however, we hit our join checks, which seems too restrictive +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT user_id FROM users_table) as foo + JOIN + ( + SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4) + UNION + SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8) + ) a + + ON (a.user_id = foo.user_id) + JOIN + + (SELECT value_1, value_2 FROM users_table) as bar + + ON(foo.user_id = bar.value_1) + WHERE + value_2 IN (SELECT value_1 FROM users_table WHERE value_2 < 1) + AND + value_1 IN (SELECT value_2 FROM users_table WHERE value_1 < 2) + AND + foo.user_id IN (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2)) +$$); +DEBUG: generating subplan 93_1 for subquery SELECT value_1, value_2 FROM public.users_table +DEBUG: generating subplan 93_2 for subquery SELECT value_1 FROM public.users_table WHERE (value_2 < 1) +DEBUG: generating subplan 93_3 for subquery SELECT value_2 FROM public.users_table WHERE (value_1 < 2) +DEBUG: Plan 93 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (((SELECT users_table.user_id FROM public.users_table) foo JOIN (SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id = ANY (ARRAY[1, 2, 3, 4])) UNION SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id = ANY (ARRAY[5, 6, 7, 8]))) a ON ((a.user_id = foo.user_id))) JOIN (SELECT intermediate_result.value_1, intermediate_result.value_2 FROM read_intermediate_result('93_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, value_2 integer)) bar ON ((foo.user_id = bar.value_1))) WHERE ((bar.value_2 IN (SELECT intermediate_result.value_1 FROM read_intermediate_result('93_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer))) AND (bar.value_1 IN (SELECT intermediate_result.value_2 FROM read_intermediate_result('93_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) AND (foo.user_id IN (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2])))))) +ERROR: cannot pushdown the subquery +-- make sure that we don't pick the refeence table as +-- the anchor +SELECT true AS valid FROM explain_json_2($$ + + SELECT count(*) + FROM + users_reference_table AS users_table_ref, + (SELECT user_id FROM users_Table) AS foo, + (SELECT user_id, value_2 FROM events_Table) AS bar + WHERE + users_table_ref.user_id = foo.user_id + AND foo.user_id = bar.value_2; +$$); +DEBUG: generating subplan 97_1 for subquery SELECT user_id, value_2 FROM public.events_table +DEBUG: Plan 97 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_reference_table users_table_ref, (SELECT users_table.user_id FROM public.users_table) foo, (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('97_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) bar WHERE ((users_table_ref.user_id = foo.user_id) AND (foo.user_id = bar.value_2)) + valid +------- + t +(1 row) + +RESET client_min_messages; +DROP FUNCTION explain_json_2(text); +SET search_path TO 'public'; +DROP SCHEMA non_colocated_subquery CASCADE; +NOTICE: drop cascades to 2 other objects diff --git a/src/test/regress/expected/set_operations.out b/src/test/regress/expected/set_operations.out index b976ac7fd..2ec8c9c6e 100644 --- a/src/test/regress/expected/set_operations.out +++ b/src/test/regress/expected/set_operations.out @@ -639,17 +639,27 @@ SELECT * FROM test a WHERE x IN (SELECT x FROM test b WHERE y = 1 UNION SELECT x 2 | 2 (2 rows) --- subquery union in WHERE clause with partition column equality, without implicit join on partition column +-- subquery union in WHERE clause with partition column equality, without implicit join on partition column is recursively planned SELECT * FROM test a WHERE x NOT IN (SELECT x FROM test b WHERE y = 1 UNION SELECT x FROM test c WHERE y = 2) ORDER BY 1,2; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator --- subquery union in WHERE clause without parition column equality is recursively planned -SELECT * FROM test a WHERE x IN (SELECT x FROM test b UNION SELECT y FROM test c) ORDER BY 1,2; -DEBUG: generating subplan 137_1 for subquery SELECT x FROM recursive_union.test b -DEBUG: generating subplan 137_2 for subquery SELECT y FROM recursive_union.test c +DEBUG: generating subplan 137_1 for subquery SELECT x FROM recursive_union.test b WHERE (y = 1) +DEBUG: generating subplan 137_2 for subquery SELECT x FROM recursive_union.test c WHERE (y = 2) +DEBUG: Plan 137 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('137_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.x FROM read_intermediate_result('137_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan 137_3 for subquery SELECT intermediate_result.x FROM read_intermediate_result('137_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('137_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer) -DEBUG: Plan 137 query after replacing subqueries and CTEs: SELECT x, y FROM recursive_union.test a WHERE (x IN (SELECT intermediate_result.x FROM read_intermediate_result('137_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer))) ORDER BY x, y +DEBUG: generating subplan 136_1 for subquery SELECT b.x FROM recursive_union.test b WHERE (b.y = 1) UNION SELECT c.x FROM recursive_union.test c WHERE (c.y = 2) +DEBUG: Plan 136 query after replacing subqueries and CTEs: SELECT x, y FROM recursive_union.test a WHERE (NOT (x IN (SELECT intermediate_result.x FROM read_intermediate_result('136_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)))) ORDER BY x, y + x | y +---+--- +(0 rows) + +-- subquery union in WHERE clause without parition column equality is recursively planned +SELECT * FROM test a WHERE x IN (SELECT x FROM test b UNION SELECT y FROM test c) ORDER BY 1,2; +DEBUG: generating subplan 140_1 for subquery SELECT x FROM recursive_union.test b +DEBUG: generating subplan 140_2 for subquery SELECT y FROM recursive_union.test c +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 140_3 for subquery SELECT intermediate_result.x FROM read_intermediate_result('140_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('140_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: Plan 140 query after replacing subqueries and CTEs: SELECT x, y FROM recursive_union.test a WHERE (x IN (SELECT intermediate_result.x FROM read_intermediate_result('140_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer))) ORDER BY x, y x | y ---+--- 1 | 1 @@ -658,22 +668,24 @@ DEBUG: Plan 137 query after replacing subqueries and CTEs: SELECT x, y FROM rec -- correlated subquery with union in WHERE clause SELECT * FROM test a WHERE x IN (SELECT x FROM test b UNION SELECT y FROM test c WHERE a.x = c.x) ORDER BY 1,2; -DEBUG: generating subplan 141_1 for subquery SELECT x FROM recursive_union.test b +DEBUG: generating subplan 144_1 for subquery SELECT x FROM recursive_union.test b DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries -DEBUG: Plan 141 query after replacing subqueries and CTEs: SELECT x, y FROM recursive_union.test a WHERE (x IN (SELECT intermediate_result.x FROM read_intermediate_result('141_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT c.y FROM recursive_union.test c WHERE (a.x = c.x))) ORDER BY x, y +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: Plan 144 query after replacing subqueries and CTEs: SELECT x, y FROM recursive_union.test a WHERE (x IN (SELECT intermediate_result.x FROM read_intermediate_result('144_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT c.y FROM recursive_union.test c WHERE (a.x = c.x))) ORDER BY x, y +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- force unions to be planned while subqueries are being planned SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test) ORDER BY 1,2 LIMIT 5) as foo ORDER BY 1 DESC LIMIT 3; -DEBUG: generating subplan 144_1 for subquery SELECT x, y FROM recursive_union.test -DEBUG: generating subplan 144_2 for subquery SELECT x, y FROM recursive_union.test -DEBUG: Plan 144 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('144_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('144_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) ORDER BY 1, 2 LIMIT 5 +DEBUG: generating subplan 147_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 147_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 147 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('147_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('147_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) ORDER BY 1, 2 LIMIT 5 DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan 143_1 for subquery SELECT test.x, test.y FROM recursive_union.test UNION SELECT test.x, test.y FROM recursive_union.test ORDER BY 1, 2 LIMIT 5 -DEBUG: Plan 143 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('143_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) foo ORDER BY x DESC LIMIT 3 +DEBUG: generating subplan 146_1 for subquery SELECT test.x, test.y FROM recursive_union.test UNION SELECT test.x, test.y FROM recursive_union.test ORDER BY 1, 2 LIMIT 5 +DEBUG: Plan 146 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('146_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) foo ORDER BY x DESC LIMIT 3 DEBUG: Creating router plan DEBUG: Plan is router executable x | y @@ -684,12 +696,12 @@ DEBUG: Plan is router executable -- distinct and count distinct should work without any problems select count(DISTINCT t.x) FROM ((SELECT DISTINCT x FROM test) UNION (SELECT DISTINCT y FROM test)) as t(x) ORDER BY 1; -DEBUG: generating subplan 147_1 for subquery SELECT DISTINCT y FROM recursive_union.test -DEBUG: generating subplan 147_2 for subquery SELECT DISTINCT x FROM recursive_union.test +DEBUG: generating subplan 150_1 for subquery SELECT DISTINCT y FROM recursive_union.test +DEBUG: generating subplan 150_2 for subquery SELECT DISTINCT x FROM recursive_union.test DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan 147_3 for subquery SELECT intermediate_result.x FROM read_intermediate_result('147_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('147_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer) -DEBUG: Plan 147 query after replacing subqueries and CTEs: SELECT count(DISTINCT x) AS count FROM (SELECT intermediate_result.x FROM read_intermediate_result('147_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) t(x) ORDER BY (count(DISTINCT x)) +DEBUG: generating subplan 150_3 for subquery SELECT intermediate_result.x FROM read_intermediate_result('150_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('150_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: Plan 150 query after replacing subqueries and CTEs: SELECT count(DISTINCT x) AS count FROM (SELECT intermediate_result.x FROM read_intermediate_result('150_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) t(x) ORDER BY (count(DISTINCT x)) DEBUG: Creating router plan DEBUG: Plan is router executable count @@ -698,12 +710,12 @@ DEBUG: Plan is router executable (1 row) select count(DISTINCT t.x) FROM ((SELECT count(DISTINCT x) FROM test) UNION (SELECT count(DISTINCT y) FROM test)) as t(x) ORDER BY 1; -DEBUG: generating subplan 151_1 for subquery SELECT count(DISTINCT x) AS count FROM recursive_union.test -DEBUG: generating subplan 151_2 for subquery SELECT count(DISTINCT y) AS count FROM recursive_union.test +DEBUG: generating subplan 154_1 for subquery SELECT count(DISTINCT x) AS count FROM recursive_union.test +DEBUG: generating subplan 154_2 for subquery SELECT count(DISTINCT y) AS count FROM recursive_union.test DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan 151_3 for subquery SELECT intermediate_result.count FROM read_intermediate_result('151_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint) UNION SELECT intermediate_result.count FROM read_intermediate_result('151_2'::text, 'binary'::citus_copy_format) intermediate_result(count bigint) -DEBUG: Plan 151 query after replacing subqueries and CTEs: SELECT count(DISTINCT x) AS count FROM (SELECT intermediate_result.count FROM read_intermediate_result('151_3'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) t(x) ORDER BY (count(DISTINCT x)) +DEBUG: generating subplan 154_3 for subquery SELECT intermediate_result.count FROM read_intermediate_result('154_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint) UNION SELECT intermediate_result.count FROM read_intermediate_result('154_2'::text, 'binary'::citus_copy_format) intermediate_result(count bigint) +DEBUG: Plan 154 query after replacing subqueries and CTEs: SELECT count(DISTINCT x) AS count FROM (SELECT intermediate_result.count FROM read_intermediate_result('154_3'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) t(x) ORDER BY (count(DISTINCT x)) DEBUG: Creating router plan DEBUG: Plan is router executable count @@ -713,12 +725,12 @@ DEBUG: Plan is router executable -- other agg. distincts are also supported when group by includes partition key select avg(DISTINCT t.x) FROM ((SELECT avg(DISTINCT y) FROM test GROUP BY x) UNION (SELECT avg(DISTINCT y) FROM test GROUP BY x)) as t(x) ORDER BY 1; -DEBUG: generating subplan 155_1 for subquery SELECT avg(DISTINCT y) AS avg FROM recursive_union.test GROUP BY x -DEBUG: generating subplan 155_2 for subquery SELECT avg(DISTINCT y) AS avg FROM recursive_union.test GROUP BY x +DEBUG: generating subplan 158_1 for subquery SELECT avg(DISTINCT y) AS avg FROM recursive_union.test GROUP BY x +DEBUG: generating subplan 158_2 for subquery SELECT avg(DISTINCT y) AS avg FROM recursive_union.test GROUP BY x DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan 155_3 for subquery SELECT intermediate_result.avg FROM read_intermediate_result('155_1'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric) UNION SELECT intermediate_result.avg FROM read_intermediate_result('155_2'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric) -DEBUG: Plan 155 query after replacing subqueries and CTEs: SELECT avg(DISTINCT x) AS avg FROM (SELECT intermediate_result.avg FROM read_intermediate_result('155_3'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric)) t(x) ORDER BY (avg(DISTINCT x)) +DEBUG: generating subplan 158_3 for subquery SELECT intermediate_result.avg FROM read_intermediate_result('158_1'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric) UNION SELECT intermediate_result.avg FROM read_intermediate_result('158_2'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric) +DEBUG: Plan 158 query after replacing subqueries and CTEs: SELECT avg(DISTINCT x) AS avg FROM (SELECT intermediate_result.avg FROM read_intermediate_result('158_3'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric)) t(x) ORDER BY (avg(DISTINCT x)) DEBUG: Creating router plan DEBUG: Plan is router executable avg @@ -765,9 +777,9 @@ DEBUG: pruning merge fetch taskId 11 DETAIL: Creating dependency on merge taskId 24 DEBUG: cannot use real time executor with repartition jobs HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. -DEBUG: generating subplan 161_1 for subquery SELECT t1.x FROM recursive_union.test t1, recursive_union.test t2 WHERE (t1.x = t2.y) LIMIT 0 -DEBUG: generating subplan 161_2 for subquery SELECT x FROM recursive_union.test -DEBUG: Plan 161 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('161_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('161_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) ORDER BY 1 DESC +DEBUG: generating subplan 164_1 for subquery SELECT t1.x FROM recursive_union.test t1, recursive_union.test t2 WHERE (t1.x = t2.y) LIMIT 0 +DEBUG: generating subplan 164_2 for subquery SELECT x FROM recursive_union.test +DEBUG: Plan 164 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('164_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('164_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) ORDER BY 1 DESC DEBUG: Creating router plan DEBUG: Plan is router executable x @@ -806,9 +818,9 @@ DEBUG: pruning merge fetch taskId 11 DETAIL: Creating dependency on merge taskId 24 DEBUG: cannot use real time executor with repartition jobs HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. -DEBUG: generating subplan 164_1 for subquery SELECT t1.x FROM recursive_union.test t1, recursive_union.test t2 WHERE (t1.x = t2.y) -DEBUG: generating subplan 164_2 for subquery SELECT x FROM recursive_union.test -DEBUG: Plan 164 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('164_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('164_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) ORDER BY 1 DESC +DEBUG: generating subplan 167_1 for subquery SELECT t1.x FROM recursive_union.test t1, recursive_union.test t2 WHERE (t1.x = t2.y) +DEBUG: generating subplan 167_2 for subquery SELECT x FROM recursive_union.test +DEBUG: Plan 167 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('167_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('167_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) ORDER BY 1 DESC DEBUG: Creating router plan DEBUG: Plan is router executable x @@ -821,12 +833,12 @@ SET citus.enable_repartition_joins TO OFF; -- this should be recursively planned CREATE VIEW set_view_recursive AS (SELECT y FROM test) UNION (SELECT y FROM test); SELECT * FROM set_view_recursive ORDER BY 1 DESC; -DEBUG: generating subplan 167_1 for subquery SELECT y FROM recursive_union.test -DEBUG: generating subplan 167_2 for subquery SELECT y FROM recursive_union.test +DEBUG: generating subplan 170_1 for subquery SELECT y FROM recursive_union.test +DEBUG: generating subplan 170_2 for subquery SELECT y FROM recursive_union.test DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan 167_3 for subquery SELECT intermediate_result.y FROM read_intermediate_result('167_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('167_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer) -DEBUG: Plan 167 query after replacing subqueries and CTEs: SELECT y FROM (SELECT intermediate_result.y FROM read_intermediate_result('167_3'::text, 'binary'::citus_copy_format) intermediate_result(y integer)) set_view_recursive ORDER BY y DESC +DEBUG: generating subplan 170_3 for subquery SELECT intermediate_result.y FROM read_intermediate_result('170_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('170_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: Plan 170 query after replacing subqueries and CTEs: SELECT y FROM (SELECT intermediate_result.y FROM read_intermediate_result('170_3'::text, 'binary'::citus_copy_format) intermediate_result(y integer)) set_view_recursive ORDER BY y DESC DEBUG: Creating router plan DEBUG: Plan is router executable y @@ -847,12 +859,12 @@ SELECT * FROM set_view_pushdown ORDER BY 1 DESC; -- this should be recursively planned CREATE VIEW set_view_recursive_second AS SELECT u.x, test.y FROM ((SELECT x, y FROM test) UNION (SELECT 1, 1 FROM test)) u JOIN test USING (x) ORDER BY 1,2; SELECT * FROM set_view_recursive_second; -DEBUG: generating subplan 172_1 for subquery SELECT x, y FROM recursive_union.test -DEBUG: generating subplan 172_2 for subquery SELECT 1, 1 FROM recursive_union.test +DEBUG: generating subplan 175_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 175_2 for subquery SELECT 1, 1 FROM recursive_union.test DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan 172_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('172_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result."?column?", intermediate_result."?column?_1" AS "?column?" FROM read_intermediate_result('172_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer) -DEBUG: Plan 172 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT u.x, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('172_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_union.test USING (x)) ORDER BY u.x, test.y) set_view_recursive_second +DEBUG: generating subplan 175_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('175_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result."?column?", intermediate_result."?column?_1" AS "?column?" FROM read_intermediate_result('175_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer) +DEBUG: Plan 175 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT u.x, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('175_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_union.test USING (x)) ORDER BY u.x, test.y) set_view_recursive_second x | y ---+--- 1 | 1 @@ -861,19 +873,19 @@ DEBUG: Plan 172 query after replacing subqueries and CTEs: SELECT x, y FROM (SE -- this should create lots of recursive calls since both views and set operations lead to recursive plans :) ((SELECT x FROM set_view_recursive_second) INTERSECT (SELECT * FROM set_view_recursive)) EXCEPT (SELECT * FROM set_view_pushdown); -DEBUG: generating subplan 176_1 for subquery SELECT x, y FROM recursive_union.test -DEBUG: generating subplan 176_2 for subquery SELECT 1, 1 FROM recursive_union.test +DEBUG: generating subplan 179_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 179_2 for subquery SELECT 1, 1 FROM recursive_union.test DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan 176_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('176_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result."?column?", intermediate_result."?column?_1" AS "?column?" FROM read_intermediate_result('176_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer) -DEBUG: generating subplan 176_4 for subquery SELECT y FROM recursive_union.test -DEBUG: generating subplan 176_5 for subquery SELECT y FROM recursive_union.test +DEBUG: generating subplan 179_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('179_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result."?column?", intermediate_result."?column?_1" AS "?column?" FROM read_intermediate_result('179_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer) +DEBUG: generating subplan 179_4 for subquery SELECT y FROM recursive_union.test +DEBUG: generating subplan 179_5 for subquery SELECT y FROM recursive_union.test DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan 176_6 for subquery SELECT intermediate_result.y FROM read_intermediate_result('176_4'::text, 'binary'::citus_copy_format) intermediate_result(y integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('176_5'::text, 'binary'::citus_copy_format) intermediate_result(y integer) -DEBUG: generating subplan 176_7 for subquery SELECT x FROM (SELECT u.x, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('176_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_union.test USING (x)) ORDER BY u.x, test.y) set_view_recursive_second -DEBUG: generating subplan 176_8 for subquery SELECT x FROM (SELECT test.x FROM recursive_union.test UNION SELECT test.x FROM recursive_union.test) set_view_pushdown -DEBUG: Plan 176 query after replacing subqueries and CTEs: (SELECT intermediate_result.x FROM read_intermediate_result('176_7'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT set_view_recursive.y FROM (SELECT intermediate_result.y FROM read_intermediate_result('176_6'::text, 'binary'::citus_copy_format) intermediate_result(y integer)) set_view_recursive) EXCEPT SELECT intermediate_result.x FROM read_intermediate_result('176_8'::text, 'binary'::citus_copy_format) intermediate_result(x integer) +DEBUG: generating subplan 179_6 for subquery SELECT intermediate_result.y FROM read_intermediate_result('179_4'::text, 'binary'::citus_copy_format) intermediate_result(y integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('179_5'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: generating subplan 179_7 for subquery SELECT x FROM (SELECT u.x, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('179_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_union.test USING (x)) ORDER BY u.x, test.y) set_view_recursive_second +DEBUG: generating subplan 179_8 for subquery SELECT x FROM (SELECT test.x FROM recursive_union.test UNION SELECT test.x FROM recursive_union.test) set_view_pushdown +DEBUG: Plan 179 query after replacing subqueries and CTEs: (SELECT intermediate_result.x FROM read_intermediate_result('179_7'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT set_view_recursive.y FROM (SELECT intermediate_result.y FROM read_intermediate_result('179_6'::text, 'binary'::citus_copy_format) intermediate_result(y integer)) set_view_recursive) EXCEPT SELECT intermediate_result.x FROM read_intermediate_result('179_8'::text, 'binary'::citus_copy_format) intermediate_result(x integer) DEBUG: Creating router plan DEBUG: Plan is router executable x diff --git a/src/test/regress/expected/subqueries_not_supported.out b/src/test/regress/expected/subqueries_not_supported.out index cea279dd7..34c9ee79a 100644 --- a/src/test/regress/expected/subqueries_not_supported.out +++ b/src/test/regress/expected/subqueries_not_supported.out @@ -101,16 +101,6 @@ LIMIT 10) as foo; ERROR: could not run distributed query because the window function that is used cannot be pushed down HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column --- top level join is not on the distribution key thus not supported --- (use random to prevent Postgres to pull subqueries) -SELECT - foo.value_2 -FROM - (SELECT users_table.value_2, random() FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, - (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar -WHERE - foo.value_2 = bar.value_2; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- OUTER JOINs where the outer part is recursively planned and not the other way -- around is not supported SELECT @@ -121,8 +111,8 @@ FROM (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar ON(foo.value_2 = bar.value_2); DEBUG: push down of limit count: 5 -DEBUG: generating subplan 15_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4]))) LIMIT 5 -DEBUG: Plan 15 query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('15_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) bar ON ((foo.value_2 = bar.value_2))) +DEBUG: generating subplan 14_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[1, 2, 3, 4]))) LIMIT 5 +DEBUG: Plan 14 query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('14_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.user_id) AND (events_table.event_type = ANY (ARRAY[5, 6, 7, 8])))) bar ON ((foo.value_2 = bar.value_2))) ERROR: cannot pushdown the subquery DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join -- Aggregates in subquery without partition column can be planned recursively diff --git a/src/test/regress/expected/subquery_executors.out b/src/test/regress/expected/subquery_executors.out index 00c44ea49..3f689625d 100644 --- a/src/test/regress/expected/subquery_executors.out +++ b/src/test/regress/expected/subquery_executors.out @@ -24,7 +24,7 @@ DEBUG: Plan 2 query after replacing subqueries and CTEs: SELECT count(*) AS cou (1 row) -- subquery with router but not logical plannable --- should fail +-- bar is recursively planned SELECT count(*) FROM @@ -35,7 +35,13 @@ FROM SELECT user_id FROM users_table ) as bar WHERE foo.counter = bar.user_id; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: generating subplan 4_1 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan 4 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.user_id, sum(users_table.value_2) OVER (PARTITION BY users_table.user_id) AS counter FROM public.users_table WHERE (users_table.user_id = 15)) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('4_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE (foo.counter = bar.user_id) + count +------- + 0 +(1 row) + -- subquery with real-time query SELECT count(*) @@ -47,8 +53,8 @@ FROM SELECT user_id FROM users_table ) as bar WHERE foo.value_2 = bar.user_id; -DEBUG: generating subplan 5_1 for subquery SELECT value_2 FROM public.users_table WHERE (user_id <> 15) OFFSET 0 -DEBUG: Plan 5 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('5_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo, (SELECT users_table.user_id FROM public.users_table) bar WHERE (foo.value_2 = bar.user_id) +DEBUG: generating subplan 6_1 for subquery SELECT value_2 FROM public.users_table WHERE (user_id <> 15) OFFSET 0 +DEBUG: Plan 6 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('6_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo, (SELECT users_table.user_id FROM public.users_table) bar WHERE (foo.value_2 = bar.user_id) count ------- 1612 @@ -68,8 +74,8 @@ FROM WHERE foo.value_2 = bar.user_id; DEBUG: cannot use real time executor with repartition jobs HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. -DEBUG: generating subplan 7_1 for subquery SELECT DISTINCT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (users_table.user_id < 2)) -DEBUG: Plan 7 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('7_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo, (SELECT users_table.user_id FROM public.users_table) bar WHERE (foo.value_2 = bar.user_id) +DEBUG: generating subplan 8_1 for subquery SELECT DISTINCT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (users_table.user_id < 2)) +DEBUG: Plan 8 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('8_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo, (SELECT users_table.user_id FROM public.users_table) bar WHERE (foo.value_2 = bar.user_id) count ------- 58 @@ -92,13 +98,13 @@ FROM SELECT user_id FROM users_table_local WHERE user_id = 2 ) baw WHERE foo.value_2 = bar.user_id AND baz.value_2 = bar.user_id AND bar.user_id = baw.user_id; -DEBUG: generating subplan 9_1 for subquery SELECT value_2 FROM public.users_table WHERE (user_id = 15) OFFSET 0 -DEBUG: generating subplan 9_2 for subquery SELECT user_id FROM public.users_table OFFSET 0 +DEBUG: generating subplan 10_1 for subquery SELECT value_2 FROM public.users_table WHERE (user_id = 15) OFFSET 0 +DEBUG: generating subplan 10_2 for subquery SELECT user_id FROM public.users_table OFFSET 0 DEBUG: cannot use real time executor with repartition jobs HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. -DEBUG: generating subplan 9_3 for subquery SELECT DISTINCT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (users_table.user_id < 2)) -DEBUG: generating subplan 9_4 for subquery SELECT user_id FROM subquery_executor.users_table_local WHERE (user_id = 2) -DEBUG: Plan 9 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('9_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('9_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar, (SELECT intermediate_result.value_2 FROM read_intermediate_result('9_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) baz, (SELECT intermediate_result.user_id FROM read_intermediate_result('9_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) baw WHERE ((foo.value_2 = bar.user_id) AND (baz.value_2 = bar.user_id) AND (bar.user_id = baw.user_id)) +DEBUG: generating subplan 10_3 for subquery SELECT DISTINCT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id = events_table.value_2) AND (users_table.user_id < 2)) +DEBUG: generating subplan 10_4 for subquery SELECT user_id FROM subquery_executor.users_table_local WHERE (user_id = 2) +DEBUG: Plan 10 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('10_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('10_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar, (SELECT intermediate_result.value_2 FROM read_intermediate_result('10_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) baz, (SELECT intermediate_result.user_id FROM read_intermediate_result('10_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) baw WHERE ((foo.value_2 = bar.user_id) AND (baz.value_2 = bar.user_id) AND (bar.user_id = baw.user_id)) count ------- 0 @@ -116,9 +122,9 @@ FROM SELECT user_id FROM users_table WHERE user_id = 2 OFFSET 0 ) as bar WHERE foo.value_2 = bar.user_id; -DEBUG: generating subplan 13_1 for subquery SELECT value_2 FROM public.users_table WHERE (user_id = 1) OFFSET 0 -DEBUG: generating subplan 13_2 for subquery SELECT user_id FROM public.users_table WHERE (user_id = 2) OFFSET 0 -DEBUG: Plan 13 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('13_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('13_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE (foo.value_2 = bar.user_id) +DEBUG: generating subplan 14_1 for subquery SELECT value_2 FROM public.users_table WHERE (user_id = 1) OFFSET 0 +DEBUG: generating subplan 14_2 for subquery SELECT user_id FROM public.users_table WHERE (user_id = 2) OFFSET 0 +DEBUG: Plan 14 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('14_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('14_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE (foo.value_2 = bar.user_id) count ------- 18 @@ -135,8 +141,8 @@ FROM SELECT user_id FROM users_table WHERE user_id != 2 ) as bar WHERE foo.value_2 = bar.user_id; -DEBUG: generating subplan 16_1 for subquery SELECT value_2 FROM public.users_table WHERE (user_id = 1) OFFSET 0 -DEBUG: Plan 16 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('16_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo, (SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id <> 2)) bar WHERE (foo.value_2 = bar.user_id) +DEBUG: generating subplan 17_1 for subquery SELECT value_2 FROM public.users_table WHERE (user_id = 1) OFFSET 0 +DEBUG: Plan 17 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('17_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo, (SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id <> 2)) bar WHERE (foo.value_2 = bar.user_id) count ------- 103 diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 6a2903d22..34d1153e3 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -44,7 +44,8 @@ test: multi_partitioning_utils multi_partitioning # ---------- test: subquery_basics subquery_local_tables subquery_executors subquery_and_cte set_operations set_operation_and_local_tables test: subqueries_deep subquery_view subquery_partitioning subquery_complex_target_list subqueries_not_supported subquery_in_where -test: subquery_prepared_statements non_colocated_leaf_subquery_joins +test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins +test: subquery_prepared_statements # ---------- # Miscellaneous tests to check our query planning behavior diff --git a/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql b/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql index 4314af701..d26b668b0 100644 --- a/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql +++ b/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql @@ -124,6 +124,9 @@ FROM ( ) t GROUP BY user_id, hasdone_event; -- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2) +-- still, recursive planning will kick in to plan some part of the query +SET client_min_messages TO DEBUG1; + INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg ) SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event) FROM ( @@ -163,6 +166,7 @@ FROM ( ) t2 ON (t1.user_id = (t2.user_id)/2) GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event; +RESET client_min_messages; ------------------------------------ ------------------------------------ @@ -240,6 +244,8 @@ ORDER BY -- not pushable since the JOIN condition is not equi JOIN -- (subquery_1 JOIN subquery_2) +-- still, recursive planning will kick in +SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg) SELECT user_id, @@ -305,6 +311,7 @@ GROUP BY count_pay, user_id ORDER BY count_pay; +RESET client_min_messages; ------------------------------------ ------------------------------------ diff --git a/src/test/regress/sql/multi_mx_router_planner.sql b/src/test/regress/sql/multi_mx_router_planner.sql index 53cb0ea10..00cc2e68f 100644 --- a/src/test/regress/sql/multi_mx_router_planner.sql +++ b/src/test/regress/sql/multi_mx_router_planner.sql @@ -222,7 +222,7 @@ SELECT * FROM articles_hash_mx, position('om' in 'Thomas') WHERE author_id = 1 o -- subqueries are supported in FROM clause but they are not router plannable SELECT articles_hash_mx.id,test.word_count FROM articles_hash_mx, (SELECT id, word_count FROM articles_hash_mx) AS test WHERE test.id = articles_hash_mx.id -ORDER BY articles_hash_mx.id; +ORDER BY test.word_count DESC, articles_hash_mx.id LIMIT 5; SELECT articles_hash_mx.id,test.word_count diff --git a/src/test/regress/sql/multi_router_planner.sql b/src/test/regress/sql/multi_router_planner.sql index 5041d8355..5c1f4cc5d 100644 --- a/src/test/regress/sql/multi_router_planner.sql +++ b/src/test/regress/sql/multi_router_planner.sql @@ -291,7 +291,7 @@ ORDER BY articles_hash.id; -- subqueries are supported in FROM clause but they are not router plannable SELECT articles_hash.id,test.word_count FROM articles_hash, (SELECT id, word_count FROM articles_hash) AS test WHERE test.id = articles_hash.id -ORDER BY articles_hash.id; +ORDER BY test.word_count DESC, articles_hash.id LIMIT 5; SELECT articles_hash.id,test.word_count diff --git a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql index 7b9091ca1..74ad8d8b7 100644 --- a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql +++ b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql @@ -643,7 +643,8 @@ SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM assets; DROP TABLE assets; -- count number of distinct users who have value_1 equal to 5 or 13 but not 3 --- original query that fails +-- is recusrively planned +SET client_min_messages TO DEBUG1; SELECT count(*) FROM ( SELECT @@ -659,6 +660,8 @@ SELECT count(*) FROM count(distinct value_1) = 2 ) as foo; +RESET client_min_messages; + -- previous push down query SELECT subquery_count FROM (SELECT count(*) as subquery_count FROM diff --git a/src/test/regress/sql/multi_subquery_complex_queries.sql b/src/test/regress/sql/multi_subquery_complex_queries.sql index b5b089c8a..319cc743c 100644 --- a/src/test/regress/sql/multi_subquery_complex_queries.sql +++ b/src/test/regress/sql/multi_subquery_complex_queries.sql @@ -397,7 +397,8 @@ ORDER BY RESET client_min_messages; SET citus.enable_repartition_joins to OFF; --- not supported since the join is not equi join +-- recursively planned since the join is not equi join +SET client_min_messages TO DEBUG1; SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() @@ -458,6 +459,7 @@ GROUP BY types ORDER BY types; +RESET client_min_messages; -- not supported since subquery 3 includes a JOIN with non-equi join SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType @@ -897,8 +899,9 @@ GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; --- not supported since the join between t and t2 is not equi join +-- recursively planned since the join between t and t2 is not equi join -- union all with inner and left joins +SET client_min_messages TO DEBUG1; SELECT user_id, count(*) as cnt FROM (SELECT first_query.user_id, random() @@ -971,6 +974,8 @@ GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; +RESET client_min_messages; + -- -- Union, inner join and left join -- @@ -1309,7 +1314,8 @@ LIMIT 10; SET citus.subquery_pushdown to OFF; --- not supported since the inner JOIN is not equi join +-- not supported since the inner JOIN is not equi join and LATERAL JOIN prevents recursive planning +SET client_min_messages TO DEBUG2; SELECT user_id, lastseen FROM (SELECT @@ -1625,7 +1631,7 @@ GROUP BY ORDER BY generated_group_field DESC, value DESC; --- recursive planning didn't kick-in since the non-equi join is among subqueries +-- recursive planning kicked-in since the non-equi join is among subqueries SELECT count(*) AS value, "generated_group_field" FROM @@ -1715,8 +1721,9 @@ ORDER BY cnt, value_3 DESC LIMIT 10; SET citus.enable_repartition_joins to ON; SET client_min_messages TO DEBUG1; --- not supported since there is no column equality at all --- but still recursive planning is tried +-- although there is no column equality at all +-- still recursive planning plans "some_users_data" +-- and the query becomes OK SELECT "value_3", count(*) AS cnt FROM diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 334462226..24aedcef8 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -908,7 +908,8 @@ SELECT count(*) FROM (SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; --- we don't allow non equi join among hash partitioned tables +-- we do allow non equi join among subqueries via recursive planning +SET client_min_messages TO DEBUG1; SELECT count(*) FROM (SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1, @@ -916,8 +917,9 @@ SELECT count(*) FROM ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2 WHERE subquery_1.user_id != subquery_2.user_id ; --- we cannot push this query since hash partitioned tables --- are not joined on partition keys with equality +-- we could not push this query not due to non colocated +-- subqueries (i.e., they are recursively planned) +-- but due to outer join restrictions SELECT count(*) AS cnt, "generated_group_field" FROM @@ -955,6 +957,9 @@ count(*) AS cnt, "generated_group_field" cnt DESC, generated_group_field ASC LIMIT 10; +RESET client_min_messages; + + -- two hash partitioned relations are not joined -- on partiton keys although reference table is fine -- to push down diff --git a/src/test/regress/sql/multi_subquery_in_where_clause.sql b/src/test/regress/sql/multi_subquery_in_where_clause.sql index d152d7a03..4ae327e57 100644 --- a/src/test/regress/sql/multi_subquery_in_where_clause.sql +++ b/src/test/regress/sql/multi_subquery_in_where_clause.sql @@ -547,12 +547,14 @@ WHERE user_id WHERE f_inner.user_id = f_outer.user_id ) ORDER BY 1 LIMIT 3; --- semi join is not on the partition key for the third subquery +-- semi join is not on the partition key for the third subquery, and recursively planned +SET client_min_messages TO DEBUG1; SELECT user_id FROM users_table WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4) - AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6); + AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6) ORDER BY 1 DESC LIMIT 3; +RESET client_min_messages; CREATE FUNCTION test_join_function(integer, integer) RETURNS bool AS 'select $1 > $2;' diff --git a/src/test/regress/sql/multi_view.sql b/src/test/regress/sql/multi_view.sql index 60574b151..cd89cdd83 100644 --- a/src/test/regress/sql/multi_view.sql +++ b/src/test/regress/sql/multi_view.sql @@ -80,11 +80,11 @@ SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_linei -- repartition query on view join -- it passes planning, fails at execution stage -SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey); +SET client_min_messages TO DEBUG1; +SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey) ORDER BY o_orderkey DESC, o_custkey DESC, o_orderpriority DESC LIMIT 5; +RESET client_min_messages; -SET citus.task_executor_type to "task-tracker"; SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey); -SET citus.task_executor_type to DEFAULT; -- materialized views work -- insert into... select works with views @@ -217,6 +217,7 @@ SELECT * FROM ORDER BY 2 DESC, 1; -- non-partition key joins are not supported inside subquery +-- since the join with a table SELECT * FROM (SELECT ru.user_id, count(*) FROM recent_users ru @@ -263,6 +264,7 @@ SELECT * FROM ORDER BY 2 DESC, 1; -- event vs table non-partition-key join is not supported +-- given that we cannot recursively plan tables yet SELECT * FROM (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru diff --git a/src/test/regress/sql/non_colocated_leaf_subquery_joins.sql b/src/test/regress/sql/non_colocated_leaf_subquery_joins.sql index 854c041a4..46271ad66 100644 --- a/src/test/regress/sql/non_colocated_leaf_subquery_joins.sql +++ b/src/test/regress/sql/non_colocated_leaf_subquery_joins.sql @@ -105,8 +105,8 @@ FROM ( ORDER BY 2 DESC, 1; $$); - -- should not recursively plan any subquery given that we don't support - -- non-colocated subquery joins among the subqueries yet + -- should recursively plan bar subquery given that it is not joined + -- on the distribution key with bar SELECT true AS valid FROM explain_json($$SELECT count(*) FROM diff --git a/src/test/regress/sql/non_colocated_subquery_joins.sql b/src/test/regress/sql/non_colocated_subquery_joins.sql new file mode 100644 index 000000000..2d77d86f7 --- /dev/null +++ b/src/test/regress/sql/non_colocated_subquery_joins.sql @@ -0,0 +1,700 @@ +-- =================================================================== +-- test recursive planning functionality for non-colocated subqueries +-- We prefered to use EXPLAIN almost all the queries here, +-- otherwise the execution time of so many repartition queries would +-- be too high for the regression tests. Also, note that we're mostly +-- interested in recurive planning side of the things, thus supressing +-- the actual explain output. +-- =================================================================== + +SET client_min_messages TO DEBUG1; + +CREATE SCHEMA non_colocated_subquery; + +SET search_path TO non_colocated_subquery, public; + +-- we don't use the data anyway +CREATE TABLE users_table_local AS SELECT * FROM users_table LIMIT 0; +CREATE TABLE events_table_local AS SELECT * FROM events_table LIMIT 0; + + +SET citus.enable_repartition_joins TO ON; +\set VERBOSITY terse + +-- Function that parses explain output as JSON +-- copied from multi_explain.sql and had to give +-- a different name via postfix to prevent concurrent +-- create/drop etc. +CREATE OR REPLACE FUNCTION explain_json_2(query text) +RETURNS jsonb +AS $BODY$ +DECLARE + result jsonb; +BEGIN + EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result; + RETURN result; +END; +$BODY$ LANGUAGE plpgsql; + + +-- leaf queries contain colocated joins +-- but not the subquery +SELECT true AS valid FROM explain_json_2($$ + SELECT + foo.value_2 + FROM + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.value_2 = bar.value_2; +$$); + + +-- simple non colocated join with subqueries in WHERE clause +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + events_table + WHERE + event_type + IN + (SELECT event_type FROM events_table WHERE user_id < 100); + +$$); + +-- simple non colocated join with subqueries in WHERE clause with NOT IN +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + events_table + WHERE + user_id + NOT IN + (SELECT user_id FROM events_table WHERE event_type = 2); +$$); + + +-- Subqueries in WHERE and FROM are mixed +-- In this query, only subquery in WHERE is not a colocated join +SELECT true AS valid FROM explain_json_2($$ + + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.event_type IN (SELECT event_type FROM events_table WHERE user_id < 3); + +$$); + + +-- Subqueries in WHERE and FROM are mixed +-- In this query, one of the joins in the FROM clause is not colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT (users_table.user_id / 2) as user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.user_id IN (SELECT user_id FROM events_table WHERE user_id < 10); +$$); + +-- Subqueries in WHERE and FROM are mixed +-- In this query, both the joins in the FROM clause is not colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT (users_table.user_id / 2) as user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.user_id NOT IN (SELECT user_id FROM events_table WHERE user_id < 10); +$$); + + +-- Subqueries in WHERE and FROM are mixed +-- In this query, one of the joins in the FROM clause is not colocated and subquery in WHERE clause is not colocated +-- similar to the above, but, this time bar is the anchor subquery +SELECT true AS valid FROM explain_json_2($$ + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.event_type IN (SELECT event_type FROM events_table WHERE user_id < 4); +$$); + + + +-- The inner subqueries and the subquery in WHERE are non-located joins +SELECT true AS valid FROM explain_json_2($$ + SELECT foo_top.*, events_table.user_id FROM + ( + + SELECT + foo.user_id, random() + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.event_type AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.event_type IN (SELECT event_type FROM events_table WHERE user_id = 5) + + ) as foo_top, events_table WHERE events_table.user_id = foo_top.user_id; +$$); + +-- Slightly more complex query where there are 5 joins, 1 of them is non-colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + + WHERE + + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo1.user_id = foo5.value_1 + ) as foo_top; + +$$); + + + +-- Very similar to the above query +-- One of the queries is not joined on partition key, but this time subquery itself +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (17,18,19,20)) as foo5 + + WHERE + + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo1.user_id = foo5.user_id + ) as foo_top; +$$); + + +-- There are two non colocated joins, one is in the one of the leaf queries, +-- the other is on the top-level subquery +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + WHERE + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo1.user_id = foo5.value_1 + ) as foo_top; +$$); + + +-- a similar query to the above, but, this sime the second +-- non colocated join is on the already recursively planned subquery +-- the results should be the same +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + WHERE + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo2.user_id = foo5.value_1 + ) as foo_top; +$$); + +-- Deeper subqueries are non-colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT + foo.user_id + FROM + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as foo_top JOIN + + ( + SELECT + foo.user_id + FROM + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as bar_top + ON (foo_top.user_id = bar_top.user_id); +$$); + + + +-- Top level Subquery is not colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT + foo.user_id, foo.value_2 + FROM + (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as foo_top JOIN + + ( + SELECT + foo.user_id + FROM + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as bar + WHERE + foo.user_id = bar.user_id) as bar_top + ON (foo_top.value_2 = bar_top.user_id); + +$$); + +-- Top level Subquery is not colocated as the above +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT + foo.user_id, foo.value_2 + FROM + (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as foo_top JOIN + ( + SELECT + foo.user_id + FROM + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (13,14,15,16)) as bar + WHERE + foo.user_id = bar.user_id) as bar_top + ON (foo_top.value_2 = bar_top.user_id); +$$); + + + +-- non colocated joins are deep inside the query +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.user_id as my_users FROM events_table, users_table WHERE events_table.event_type = users_table.user_id) as foo + WHERE foo.my_users = users_table.user_id) as mid_level_query + ) as bar; +$$); + +-- similar to the above, with relation rtes +-- we're able to recursively plan foo +-- note that if we haven't added random() to the subquery, we'd be able run the query +-- via regular repartitioning since PostgreSQL would pull the query up +SELECT true AS valid FROM explain_json_2($$ + + SELECT count(*) FROM ( SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.event_type as my_users, random() FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) as foo + WHERE foo.my_users = users_table.user_id) as mid_level_query ) as bar; + +$$); + + +-- same as the above query, but, one level deeper subquery + SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.user_id as my_users FROM events_table, + (SELECT events_table.user_id, random() FROM users_table, events_table WHERE users_table.user_id = events_table.user_id) as selected_users + WHERE events_table.event_type = selected_users.user_id) as foo + + WHERE foo.my_users = users_table.user_id) as mid_level_query + ) as bar; + $$); + +-- deeper query, subquery in WHERE clause +-- this time successfull plan the query since the join on the relation and +-- the subquery on the distribution key +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + + + (SELECT events_table.user_id as my_users FROM events_table, + (SELECT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND + + users_table.user_id IN (SELECT value_2 FROM events_table) + + ) as selected_users + WHERE events_table.user_id = selected_users.user_id) as foo + + WHERE foo.my_users = users_table.user_id) as mid_level_query + + ) as bar; + +$$); + +-- should recursively plan the subquery in WHERE clause +SELECT true AS valid FROM explain_json_2($$SELECT + count(*) +FROM + users_table +WHERE + value_1 + IN + (SELECT + users_table.user_id + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.value_2 AND event_type IN (5,6));$$); + +-- leaf subquery repartitioning should work fine when used with CTEs +SELECT true AS valid FROM explain_json_2($$ + WITH q1 AS (SELECT user_id FROM users_table) +SELECT count(*) FROM q1, (SELECT + users_table.user_id, random() + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); + +-- subquery joins should work fine when used with CTEs +SELECT true AS valid FROM explain_json_2($$ + WITH q1 AS (SELECT user_id FROM users_table) + SELECT count(*) FROM q1, (SELECT + users_table.user_id, random() + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); + + +-- should work fine within UNIONs +SELECT true AS valid FROM explain_json_2($$ + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) UNION + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8));$$); + +-- should work fine within leaf queries of deeper subqueries +SELECT true AS valid FROM explain_json_2($$ +SELECT event, array_length(events_table, 1) +FROM ( + SELECT event, array_agg(t.user_id) AS events_table + FROM ( + SELECT + DISTINCT ON(e.event_type::text) e.event_type::text as event, e.time, e.user_id + FROM + users_table AS u, + events_table AS e, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as bar + WHERE u.user_id = e.user_id AND + u.user_id IN + ( + SELECT + user_id + FROM + users_table + WHERE value_2 >= 5 + AND EXISTS (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) + LIMIT 5 + ) + ) t, users_table WHERE users_table.value_1 = t.event::int + GROUP BY event +) q +ORDER BY 2 DESC, 1; +$$); + + + +-- this is also supported since we can recursively plan relations as well +-- the relations are joined under a join tree with an alias +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1); +$$); + +-- a very similar query to the above +-- however, this time we users a subquery instead of join alias, and it works +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT * FROM users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1); +$$); + +-- a similar query to the above, this time subquery is on the left +-- and the relation is on the right of the join tree +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT value_2, random() FROM users_table) as u1 + JOIN + events_table + using (value_2); +$$); + + + +-- recursive planning should kick in for outer joins as well +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT value_2, random() FROM users_table) as u1 + LEFT JOIN + (SELECT value_2, random() FROM users_table) as u2 + USING(value_2); +$$); + + +-- recursive planning should kick in for outer joins as well +-- but this time recursive planning might convert the query +-- into a not supported join +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT value_2, random() FROM users_table) as u1 + RIGHT JOIN + (SELECT value_2, random() FROM users_table) as u2 + USING(value_2); +$$); + + +-- set operations may produce not very efficient plans +-- although we could have picked a as our anchor subquery, +-- we pick foo in this case and recursively plan a +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + ( + SELECT user_id FROM users_table + UNION + SELECT user_id FROM users_table + ) a + JOIN + (SELECT value_1 FROM users_table) as foo ON (a.user_id = foo.value_1) + ); +$$); + +-- we could do the same with regular tables as well +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + ( + SELECT user_id FROM users_table + UNION + SELECT user_id FROM users_table + ) a + JOIN + users_table as foo ON (a.user_id = foo.value_1) + ); +$$); + +-- this time the the plan is optimial, we are +-- able to keep the UNION query given that foo +-- is the anchor +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + (SELECT user_id FROM users_table) as foo + JOIN + ( + SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4) + UNION + SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8) + ) a + + ON (a.user_id = foo.user_id) + JOIN + + (SELECT value_1 FROM users_table) as bar + + ON(foo.user_id = bar.value_1) + ); +$$); + +-- it should be safe to recursively plan non colocated subqueries +-- inside a CTE +SELECT true AS valid FROM explain_json_2($$ + + WITH non_colocated_subquery AS + ( + SELECT + foo.value_2 + FROM + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.value_2 = bar.value_2 + ), + non_colocated_subquery_2 AS + ( + SELECT + count(*) as cnt + FROM + events_table + WHERE + event_type + IN + (SELECT event_type FROM events_table WHERE user_id < 4) + ) + SELECT + * + FROM + non_colocated_subquery, non_colocated_subquery_2 + WHERE + non_colocated_subquery.value_2 != non_colocated_subquery_2.cnt +$$); + +-- non colocated subquery joins should work fine along with local tables +SELECT true AS valid FROM explain_json_2($$ + SELECT + count(*) + FROM + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table_local.value_2 FROM users_table_local, events_table_local WHERE users_table_local.user_id = events_table_local.user_id AND event_type IN (5,6,7,8)) as bar, + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as baz + WHERE + foo.value_2 = bar.value_2 + AND + foo.value_2 = baz.value_2 +$$); + +-- a combination of subqueries in FROM and WHERE clauses +-- we actually recursively plan non colocated subqueries +-- pretty accurate, however, we hit our join checks, which seems too restrictive +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT user_id FROM users_table) as foo + JOIN + ( + SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4) + UNION + SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8) + ) a + + ON (a.user_id = foo.user_id) + JOIN + + (SELECT value_1, value_2 FROM users_table) as bar + + ON(foo.user_id = bar.value_1) + WHERE + value_2 IN (SELECT value_1 FROM users_table WHERE value_2 < 1) + AND + value_1 IN (SELECT value_2 FROM users_table WHERE value_1 < 2) + AND + foo.user_id IN (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2)) +$$); + +-- make sure that we don't pick the refeence table as +-- the anchor +SELECT true AS valid FROM explain_json_2($$ + + SELECT count(*) + FROM + users_reference_table AS users_table_ref, + (SELECT user_id FROM users_Table) AS foo, + (SELECT user_id, value_2 FROM events_Table) AS bar + WHERE + users_table_ref.user_id = foo.user_id + AND foo.user_id = bar.value_2; +$$); + +RESET client_min_messages; +DROP FUNCTION explain_json_2(text); + +SET search_path TO 'public'; +DROP SCHEMA non_colocated_subquery CASCADE; diff --git a/src/test/regress/sql/set_operations.sql b/src/test/regress/sql/set_operations.sql index c911f3e00..4af2da87d 100644 --- a/src/test/regress/sql/set_operations.sql +++ b/src/test/regress/sql/set_operations.sql @@ -121,7 +121,7 @@ SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM ref WHERE a IN (SELECT -- subquery union in WHERE clause with partition column equality and implicit join is pushed down SELECT * FROM test a WHERE x IN (SELECT x FROM test b WHERE y = 1 UNION SELECT x FROM test c WHERE y = 2) ORDER BY 1,2; --- subquery union in WHERE clause with partition column equality, without implicit join on partition column +-- subquery union in WHERE clause with partition column equality, without implicit join on partition column is recursively planned SELECT * FROM test a WHERE x NOT IN (SELECT x FROM test b WHERE y = 1 UNION SELECT x FROM test c WHERE y = 2) ORDER BY 1,2; -- subquery union in WHERE clause without parition column equality is recursively planned diff --git a/src/test/regress/sql/subqueries_not_supported.sql b/src/test/regress/sql/subqueries_not_supported.sql index cdee91c68..f6d868f2b 100644 --- a/src/test/regress/sql/subqueries_not_supported.sql +++ b/src/test/regress/sql/subqueries_not_supported.sql @@ -96,17 +96,6 @@ ORDER BY LIMIT 10) as foo; --- top level join is not on the distribution key thus not supported --- (use random to prevent Postgres to pull subqueries) -SELECT - foo.value_2 -FROM - (SELECT users_table.value_2, random() FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, - (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar -WHERE - foo.value_2 = bar.value_2; - - -- OUTER JOINs where the outer part is recursively planned and not the other way -- around is not supported SELECT diff --git a/src/test/regress/sql/subquery_executors.sql b/src/test/regress/sql/subquery_executors.sql index ca718d13b..7668d4908 100644 --- a/src/test/regress/sql/subquery_executors.sql +++ b/src/test/regress/sql/subquery_executors.sql @@ -22,7 +22,7 @@ FROM WHERE foo.value_2 = bar.user_id; -- subquery with router but not logical plannable --- should fail +-- bar is recursively planned SELECT count(*) FROM