diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 93a6c9165..35a664006 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -42,6 +42,7 @@ #include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "parser/parse_oper.h" +#include "parser/parsetree.h" #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" @@ -151,8 +152,9 @@ static void ErrorIfContainsUnsupportedSubquery(MultiNode *logicalPlanNode, PlannerRestrictionContext * plannerRestrictionContext); static void ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit); +static void ErrorIfUnsupportedSetOperation(Query *subqueryTree, bool outerQueryHasLimit); +static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList); static void ErrorIfUnsupportedTableCombination(Query *queryTree); -static void ErrorIfUnsupportedUnionQuery(Query *unionQuery); static bool TargetListOnPartitionColumn(Query *query, List *targetEntryList); static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query); static bool FullCompositeFieldList(List *compositeFieldList); @@ -2945,18 +2947,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) if (subqueryTree->setOperations) { - SetOperationStmt *setOperationStatement = - (SetOperationStmt *) subqueryTree->setOperations; - - if (setOperationStatement->op == SETOP_UNION) - { - ErrorIfUnsupportedUnionQuery(subqueryTree); - } - else - { - preconditionsSatisfied = false; - errorDetail = "Intersect and Except are currently unsupported"; - } + ErrorIfUnsupportedSetOperation(subqueryTree, outerQueryHasLimit); } if (subqueryTree->hasRecursive) @@ -3047,6 +3038,77 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) } +/* + * ErrorIfUnsupportedSetOperation is a helper function for ErrorIfCannotPushdownSubquery(). + * It basically iterates over the subqueries that reside under the given set operations. + * + * The function also errors out for set operations INTERSECT and EXCEPT. + */ +static void +ErrorIfUnsupportedSetOperation(Query *subqueryTree, bool outerQueryHasLimit) +{ + List *rangeTableList = subqueryTree->rtable; + List *rangeTableIndexList = NIL; + ListCell *rangeTableIndexCell = NULL; + List *setOperationStatementList = NIL; + ListCell *setOperationStatmentCell = NULL; + + ExtractSetOperationStatmentWalker((Node *) subqueryTree->setOperations, + &setOperationStatementList); + foreach(setOperationStatmentCell, setOperationStatementList) + { + SetOperationStmt *setOperation = + (SetOperationStmt *) lfirst(setOperationStatmentCell); + + if (setOperation->op != SETOP_UNION) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot push down this subquery"), + errdetail("Intersect and Except are currently unsupported"))); + } + } + + ExtractRangeTableIndexWalker((Node *) subqueryTree->setOperations, + &rangeTableIndexList); + foreach(rangeTableIndexCell, rangeTableIndexList) + { + int rangeTableIndex = lfirst_int(rangeTableIndexCell); + RangeTblEntry *rangeTableEntry = rt_fetch(rangeTableIndex, rangeTableList); + + Assert(rangeTableEntry->rtekind == RTE_SUBQUERY); + + ErrorIfCannotPushdownSubquery(rangeTableEntry->subquery, outerQueryHasLimit); + } +} + + +/* + * ExtractSetOperationStatementWalker walks over a set operations statment, + * and finds all set operations in the tree. + */ +static bool +ExtractSetOperationStatmentWalker(Node *node, List **setOperationList) +{ + bool walkerResult = false; + if (node == NULL) + { + return false; + } + + if (IsA(node, SetOperationStmt)) + { + SetOperationStmt *setOperation = (SetOperationStmt *) node; + + (*setOperationList) = lappend(*setOperationList, setOperation); + } + + walkerResult = expression_tree_walker(node, ExtractSetOperationStatmentWalker, + setOperationList); + + return walkerResult; +} + + /* * ErrorIfUnsupportedTableCombination checks if the given query tree contains any * unsupported range table combinations. For this, the function walks over all @@ -3103,103 +3165,6 @@ ErrorIfUnsupportedTableCombination(Query *queryTree) } -/* - * ErrorIfUnsupportedUnionQuery checks if the given union query is a supported - * one., otherwise it errors out. For these purpose it checks tree conditions; - * a. Are count of partition column filters same for union subqueries. - * b. Are target lists of union subquries include partition column. - * c. Is it a union clause without All option. - * - * Note that we check equality of filters in ErrorIfUnsupportedFilters(). We - * allow leaf queries not having a filter clause on the partition column. We - * check if a leaf query has a filter on the partition column, it must be same - * with other queries or if leaf query must not have any filter on the partition - * column, both are ok. Because joins and nested queries are transitive, it is - * enough one leaf query to have a filter on the partition column. But unions - * are not transitive, so here we check if they have same count of filters on - * the partition column. If count is more than 0, we already checked that they - * are same, of if count is 0 then both don't have any filter on the partition - * column. - */ -static void -ErrorIfUnsupportedUnionQuery(Query *unionQuery) -{ - bool supportedUnionQuery = true; - bool leftQueryOnPartitionColumn = false; - bool rightQueryOnPartitionColumn = false; - List *rangeTableList = unionQuery->rtable; - SetOperationStmt *unionStatement = (SetOperationStmt *) unionQuery->setOperations; - Query *leftQuery = NULL; - Query *rightQuery = NULL; - List *leftOpExpressionList = NIL; - List *rightOpExpressionList = NIL; - uint32 leftOpExpressionCount = 0; - uint32 rightOpExpressionCount = 0; - char *errorDetail = NULL; - - RangeTblRef *leftRangeTableReference = (RangeTblRef *) unionStatement->larg; - RangeTblRef *rightRangeTableReference = (RangeTblRef *) unionStatement->rarg; - - int leftTableIndex = leftRangeTableReference->rtindex - 1; - int rightTableIndex = rightRangeTableReference->rtindex - 1; - - RangeTblEntry *leftRangeTableEntry = (RangeTblEntry *) list_nth(rangeTableList, - leftTableIndex); - RangeTblEntry *rightRangeTableEntry = (RangeTblEntry *) list_nth(rangeTableList, - rightTableIndex); - - Assert(leftRangeTableEntry->rtekind == RTE_SUBQUERY); - Assert(rightRangeTableEntry->rtekind == RTE_SUBQUERY); - - leftQuery = leftRangeTableEntry->subquery; - rightQuery = rightRangeTableEntry->subquery; - - /* - * Check if subqueries of union have same count of filters on partition - * column. - */ - leftOpExpressionList = PartitionColumnOpExpressionList(leftQuery); - rightOpExpressionList = PartitionColumnOpExpressionList(rightQuery); - - leftOpExpressionCount = list_length(leftOpExpressionList); - rightOpExpressionCount = list_length(rightOpExpressionList); - - if (leftOpExpressionCount != rightOpExpressionCount) - { - supportedUnionQuery = false; - errorDetail = "Union clauses need to have same count of filters on " - "partition column"; - } - - /* check if union subqueries have partition column in their target lists */ - leftQueryOnPartitionColumn = TargetListOnPartitionColumn(leftQuery, - leftQuery->targetList); - rightQueryOnPartitionColumn = TargetListOnPartitionColumn(rightQuery, - rightQuery->targetList); - - if (!(leftQueryOnPartitionColumn && rightQueryOnPartitionColumn)) - { - supportedUnionQuery = false; - errorDetail = "Union clauses need to select partition columns"; - } - - /* check if it is a union all operation */ - if (unionStatement->all) - { - supportedUnionQuery = false; - errorDetail = "Union All clauses are currently unsupported"; - } - - /* finally check and error out if not satisfied */ - if (!supportedUnionQuery) - { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot push down this subquery"), - errdetail("%s", errorDetail))); - } -} - - /* * GroupTargetEntryList walks over group clauses in the given list, finds * matching target entries and return them in a new list. diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index d7dbc75ae..7d7992cda 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -69,6 +69,29 @@ static AttributeEquivalenceClass * AttributeEquivalenceClassForEquivalenceClass( static void AddToAttributeEquivalenceClass(AttributeEquivalenceClass ** attributeEquivalanceClass, PlannerInfo *root, Var *varToBeAdded); +static void AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass * + *attributeEquivalanceClass, + RangeTblEntry * + rangeTableEntry, + PlannerInfo *root, + Var *varToBeAdded); +static Query * GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry, + Var *varToBeAdded); +static void AddUnionAllSetOperationsToAttributeEquivalenceClass( + AttributeEquivalenceClass ** + attributeEquivalanceClass, + PlannerInfo *root, + Var *varToBeAdded); +static void AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass ** + attributeEquivalenceClass, + PlannerInfo *root, + SetOperationStmt * + setOperation, + Var *varToBeAdded); +static void AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass ** + attrEquivalenceClass, + RangeTblEntry *rangeTableEntry, + Var *varToBeAdded); static Var * GetVarFromAssignedParam(List *parentPlannerParamList, Param *plannerParam); static List * GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext @@ -632,106 +655,271 @@ GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext * * - Generate an AttributeEquivalenceMember and add to the input * AttributeEquivalenceClass * - If the RTE that corresponds to a subquery - * - Find the corresponding target entry via varno - * - if subquery entry is a set operation (i.e., only UNION/UNION ALL allowed) - * - recursively add both left and right sides of the set operation's + * - If the RTE that corresponds to a UNION ALL subquery + * - Iterate on each of the appendRels (i.e., each of the UNION ALL query) + * - Recursively add all children of the set operation's + * corresponding target entries + * - If the corresponding subquery entry is a UNION set operation + * - Recursively add all children of the set operation's * corresponding target entries - * - if subquery is not a set operation - * - recursively try to add the corresponding target entry to the + * - If the corresponding subquery is a regular subquery (i.e., No set operations) + * - Recursively try to add the corresponding target entry to the * equivalence class - * - * Note that this function only adds partition keys to the attributeEquivalanceClass. - * This implies that there wouldn't be any columns for reference tables. */ static void AddToAttributeEquivalenceClass(AttributeEquivalenceClass **attributeEquivalanceClass, PlannerInfo *root, Var *varToBeAdded) { - RangeTblEntry *rangeTableEntry = root->simple_rte_array[varToBeAdded->varno]; + RangeTblEntry *rangeTableEntry = NULL; + /* punt if it's a whole-row var rather than a plain column reference */ + if (varToBeAdded->varattno == InvalidAttrNumber) + { + return; + } + + /* we also don't want to process ctid, tableoid etc */ + if (varToBeAdded->varattno < InvalidAttrNumber) + { + return; + } + + rangeTableEntry = root->simple_rte_array[varToBeAdded->varno]; if (rangeTableEntry->rtekind == RTE_RELATION) { - AttributeEquivalenceClassMember *attributeEqMember = NULL; - Oid relationId = rangeTableEntry->relid; - Var *relationPartitionKey = NULL; - - if (PartitionMethod(relationId) == DISTRIBUTE_BY_NONE) - { - return; - } - - relationPartitionKey = PartitionKey(relationId); - if (relationPartitionKey->varattno != varToBeAdded->varattno) - { - return; - } - - attributeEqMember = palloc0(sizeof(AttributeEquivalenceClassMember)); - - attributeEqMember->varattno = varToBeAdded->varattno; - attributeEqMember->varno = varToBeAdded->varno; - attributeEqMember->rteIdentity = GetRTEIdentity(rangeTableEntry); - attributeEqMember->relationId = rangeTableEntry->relid; - - (*attributeEquivalanceClass)->equivalentAttributes = - lappend((*attributeEquivalanceClass)->equivalentAttributes, - attributeEqMember); + AddRteRelationToAttributeEquivalenceClass(attributeEquivalanceClass, + rangeTableEntry, + varToBeAdded); } - else if (rangeTableEntry->rtekind == RTE_SUBQUERY && !rangeTableEntry->inh) + else if (rangeTableEntry->rtekind == RTE_SUBQUERY) { - Query *subquery = rangeTableEntry->subquery; - RelOptInfo *baseRelOptInfo = NULL; - TargetEntry *subqueryTargetEntry = NULL; + AddRteSubqueryToAttributeEquivalenceClass(attributeEquivalanceClass, + rangeTableEntry, root, + varToBeAdded); + } +} - /* punt if it's a whole-row var rather than a plain column reference */ - if (varToBeAdded->varattno == InvalidAttrNumber) - { - return; - } - /* we also don't want to process ctid, tableoid etc */ - if (varToBeAdded->varattno < InvalidAttrNumber) - { - return; - } +/* + * AddRteSubqueryToAttributeEquivalenceClass adds the given var to the given + * attribute equivalence class. + * + * The main algorithm is outlined in AddToAttributeEquivalenceClass(). + */ +static void +AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass + **attributeEquivalanceClass, + RangeTblEntry *rangeTableEntry, + PlannerInfo *root, Var *varToBeAdded) +{ + RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno); + TargetEntry *subqueryTargetEntry = NULL; + Query *targetSubquery = GetTargetSubquery(root, rangeTableEntry, varToBeAdded); - baseRelOptInfo = find_base_rel(root, varToBeAdded->varno); + subqueryTargetEntry = get_tle_by_resno(targetSubquery->targetList, + varToBeAdded->varattno); - /* If the subquery hasn't been planned yet, we have to punt */ + /* if we fail to find corresponding target entry, do not proceed */ + if (subqueryTargetEntry == NULL || subqueryTargetEntry->resjunk) + { + return; + } + + /* we're only interested in Vars */ + if (!IsA(subqueryTargetEntry->expr, Var)) + { + return; + } + + varToBeAdded = (Var *) subqueryTargetEntry->expr; + + /* + * "inh" flag is set either when inheritance or "UNION ALL" exists in the + * subquery. Here we're only interested in the "UNION ALL" case. + * + * Else, we check one more thing: Does the subquery contain a "UNION" query. + * If so, we recursively traverse all "UNION" tree and add the corresponding + * target list elements to the attribute equivalence. + * + * Finally, if it is a regular subquery (i.e., does not contain UNION or UNION ALL), + * we simply recurse to find the corresponding RTE_RELATION to add to the + * equivalence class. + * + * Note that we're treating "UNION" and "UNION ALL" clauses differently given + * that postgres planner process/plans them separately. + */ + if (rangeTableEntry->inh) + { + AddUnionAllSetOperationsToAttributeEquivalenceClass(attributeEquivalanceClass, + root, varToBeAdded); + } + else if (targetSubquery->setOperations) + { + AddUnionSetOperationsToAttributeEquivalenceClass(attributeEquivalanceClass, + baseRelOptInfo->subroot, + (SetOperationStmt *) + targetSubquery->setOperations, + varToBeAdded); + } + else if (varToBeAdded && IsA(varToBeAdded, Var) && varToBeAdded->varlevelsup == 0) + { + AddToAttributeEquivalenceClass(attributeEquivalanceClass, + baseRelOptInfo->subroot, varToBeAdded); + } +} + + +/* + * GetTargetSubquery returns the corresponding subquery for the given planner root, + * range table entry and the var. + * + * The aim of this function is to simplify extracting the subquery in case of "UNION ALL" + * queries. + */ +static Query * +GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry, Var *varToBeAdded) +{ + Query *targetSubquery = NULL; + + /* + * For subqueries other than "UNION ALL", find the corresponding targetSubquery. See + * the details of how we process subqueries in the below comments. + */ + if (!rangeTableEntry->inh) + { + RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno); + + /* If the targetSubquery hasn't been planned yet, we have to punt */ if (baseRelOptInfo->subroot == NULL) { - return; + return NULL; } Assert(IsA(baseRelOptInfo->subroot, PlannerInfo)); - subquery = baseRelOptInfo->subroot->parse; - Assert(IsA(subquery, Query)); - - /* Get the subquery output expression referenced by the upper Var */ - subqueryTargetEntry = get_tle_by_resno(subquery->targetList, - varToBeAdded->varattno); - if (subqueryTargetEntry == NULL || subqueryTargetEntry->resjunk) - { - ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("subquery %s does not have attribute %d", - rangeTableEntry->eref->aliasname, - varToBeAdded->varattno))); - } - - if (!IsA(subqueryTargetEntry->expr, Var)) - { - return; - } - - varToBeAdded = (Var *) subqueryTargetEntry->expr; - - if (varToBeAdded && IsA(varToBeAdded, Var) && varToBeAdded->varlevelsup == 0) - { - AddToAttributeEquivalenceClass(attributeEquivalanceClass, - baseRelOptInfo->subroot, varToBeAdded); - } + targetSubquery = baseRelOptInfo->subroot->parse; + Assert(IsA(targetSubquery, Query)); } + else + { + targetSubquery = rangeTableEntry->subquery; + } + + return targetSubquery; +} + + +/* + * AddUnionAllSetOperationsToAttributeEquivalenceClass recursively iterates on all the + * append rels, sets the varno's accordingly and adds the + * var the given equivalence class. + */ +static void +AddUnionAllSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass ** + attributeEquivalanceClass, + PlannerInfo *root, + Var *varToBeAdded) +{ + List *appendRelList = root->append_rel_list; + ListCell *appendRelCell = NULL; + + /* iterate on the queries that are part of UNION ALL subselects */ + foreach(appendRelCell, appendRelList) + { + AppendRelInfo *appendRelInfo = (AppendRelInfo *) lfirst(appendRelCell); + + /* + * We're only interested in UNION ALL clauses and parent_reloid is invalid + * only for UNION ALL (i.e., equals to a legitimate Oid for inheritance) + */ + if (appendRelInfo->parent_reloid != InvalidOid) + { + continue; + } + + /* set the varno accordingly for this specific child */ + varToBeAdded->varno = appendRelInfo->child_relid; + + AddToAttributeEquivalenceClass(attributeEquivalanceClass, root, + varToBeAdded); + } +} + + +/* + * AddUnionSetOperationsToAttributeEquivalenceClass recursively iterates on all the + * setOperations and adds each corresponding target entry to the given equivalence + * class. + * + * Although the function silently accepts INTERSECT and EXPECT set operations, they are + * rejected later in the planning. We prefer this behavior to provide better error + * messages. + */ +static void +AddUnionSetOperationsToAttributeEquivalenceClass(AttributeEquivalenceClass ** + attributeEquivalenceClass, + PlannerInfo *root, + SetOperationStmt *setOperation, + Var *varToBeAdded) +{ + List *rangeTableIndexList = NIL; + ListCell *rangeTableIndexCell = NULL; + + ExtractRangeTableIndexWalker((Node *) setOperation, &rangeTableIndexList); + + foreach(rangeTableIndexCell, rangeTableIndexList) + { + int rangeTableIndex = lfirst_int(rangeTableIndexCell); + + varToBeAdded->varno = rangeTableIndex; + AddToAttributeEquivalenceClass(attributeEquivalenceClass, root, varToBeAdded); + } +} + + +/* + * AddRteRelationToAttributeEquivalenceClass adds the given var to the given equivalence + * class using the rteIdentity provided by the rangeTableEntry. Note that + * rteIdentities are only assigned to RTE_RELATIONs and this function asserts + * the input rte to be an RTE_RELATION. + * + * Note that this function only adds partition keys to the attributeEquivalanceClass. + * This implies that there wouldn't be any columns for reference tables. + */ +static void +AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass ** + attrEquivalenceClass, + RangeTblEntry *rangeTableEntry, + Var *varToBeAdded) +{ + AttributeEquivalenceClassMember *attributeEqMember = NULL; + Oid relationId = InvalidOid; + Var *relationPartitionKey = NULL; + + Assert(rangeTableEntry->rtekind == RTE_RELATION); + + relationId = rangeTableEntry->relid; + if (PartitionMethod(relationId) == DISTRIBUTE_BY_NONE) + { + return; + } + + relationPartitionKey = PartitionKey(relationId); + if (relationPartitionKey->varattno != varToBeAdded->varattno) + { + return; + } + + attributeEqMember = palloc0(sizeof(AttributeEquivalenceClassMember)); + + attributeEqMember->varattno = varToBeAdded->varattno; + attributeEqMember->varno = varToBeAdded->varno; + attributeEqMember->rteIdentity = GetRTEIdentity(rangeTableEntry); + attributeEqMember->relationId = rangeTableEntry->relid; + + (*attrEquivalenceClass)->equivalentAttributes = + lappend((*attrEquivalenceClass)->equivalentAttributes, + attributeEqMember); } @@ -821,8 +1009,10 @@ static bool AttributeEquivalancesAreEqual(AttributeEquivalenceClass *firstAttributeEquivalance, AttributeEquivalenceClass *secondAttributeEquivalance) { - List *firstEquivalenceMemberList = firstAttributeEquivalance->equivalentAttributes; - List *secondEquivalenceMemberList = secondAttributeEquivalance->equivalentAttributes; + List *firstEquivalenceMemberList = + firstAttributeEquivalance->equivalentAttributes; + List *secondEquivalenceMemberList = + secondAttributeEquivalance->equivalentAttributes; ListCell *firstAttributeEquivalanceCell = NULL; ListCell *secondAttributeEquivalanceCell = NULL; diff --git a/src/test/regress/expected/.gitignore b/src/test/regress/expected/.gitignore index 9711f0c5e..7d87f6e93 100644 --- a/src/test/regress/expected/.gitignore +++ b/src/test/regress/expected/.gitignore @@ -17,3 +17,4 @@ /worker_copy.out /multi_complex_count_distinct.out /multi_mx_copy_data.out +/multi_insert_select_behavioral_analytics_create_table.out diff --git a/src/test/regress/expected/multi_subquery_behavioral_analytics.out b/src/test/regress/expected/multi_subquery_behavioral_analytics.out index 69097972d..df832e96d 100644 --- a/src/test/regress/expected/multi_subquery_behavioral_analytics.out +++ b/src/test/regress/expected/multi_subquery_behavioral_analytics.out @@ -38,7 +38,6 @@ ORDER BY 2 DESC, 1; ------------------------------------ -- Funnel grouped by whether or not a user has done an event -- This has multiple subqueries joinin at the top level --- Query will be supported when we enable unions ------------------------------------ SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event FROM ( @@ -77,8 +76,18 @@ FROM ( GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event ORDER BY user_id; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. + user_id | sum | length | hasdone_event +---------+-----+--------+-------------------- + 10 | 1 | 18 | Has not done event + 12 | 1 | 14 | Has done event + 13 | 2 | 18 | Has not done event + 15 | 1 | 18 | Has not done event + 17 | 1 | 18 | Has not done event + 19 | 1 | 14 | Has done event + 20 | 2 | 18 | Has not done event + 23 | 1 | 18 | Has not done event +(8 rows) + -- same query but multiple joins are one level below, returns count of row instead of actual rows SELECT count(*) FROM ( @@ -119,8 +128,11 @@ FROM ( GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event ORDER BY user_id) u; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. + count +------- + 8 +(1 row) + -- Same queries written without unions SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event FROM ( @@ -202,7 +214,6 @@ FROM ( ------------------------------------ -- Funnel, grouped by the number of times a user has done an event --- These will be supported when we add unions ------------------------------------ SELECT user_id, @@ -268,8 +279,18 @@ GROUP BY count_pay, user_id ORDER BY event_average DESC, count_pay DESC, user_id DESC; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. + user_id | event_average | count_pay +---------+------------------------+----------- + 69 | 1.00000000000000000000 | 0 + 65 | 1.00000000000000000000 | 0 + 58 | 1.00000000000000000000 | 0 + 49 | 1.00000000000000000000 | 0 + 40 | 1.00000000000000000000 | 0 + 32 | 1.00000000000000000000 | 0 + 29 | 1.00000000000000000000 | 0 + 18 | 1.00000000000000000000 | 0 +(8 rows) + SELECT user_id, avg(array_length(events_table, 1)) AS event_average, @@ -336,8 +357,18 @@ HAVING avg(array_length(events_table, 1)) > 0 ORDER BY event_average DESC, count_pay DESC, user_id DESC; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. + user_id | event_average | count_pay +---------+------------------------+----------- + 69 | 1.00000000000000000000 | 0 + 65 | 1.00000000000000000000 | 0 + 58 | 1.00000000000000000000 | 0 + 49 | 1.00000000000000000000 | 0 + 40 | 1.00000000000000000000 | 0 + 32 | 1.00000000000000000000 | 0 + 29 | 1.00000000000000000000 | 0 + 18 | 1.00000000000000000000 | 0 +(8 rows) + -- Same queries rewritten without using unions SELECT user_id, diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index 7939f58b6..6c8c70f1b 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -10,165 +10,1053 @@ ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000; SET citus.subquery_pushdown TO TRUE; SET citus.enable_router_execution TO FALSE; -- - -- NOT SUPPORTED YET BUT SHOULD BE ADDED + -- UNIONs and JOINs mixed -- ---SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType ---FROM --- ( SELECT *, random() --- FROM --- ( SELECT "t"."user_id", --- "t"."time", --- unnest("t"."collected_events") AS "event_types" --- FROM --- ( SELECT "t1"."user_id", --- min("t1"."time") AS "time", --- array_agg(("t1"."event") --- ORDER BY time ASC, event DESC) AS collected_events --- FROM ( --- (SELECT * --- FROM --- (SELECT "events"."user_id", --- "events"."time", --- 0 AS event --- FROM events_table as "events" --- WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 1 AS event --- FROM events_table as "events" --- WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 2 AS event --- FROM events_table as "events" --- WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 3 AS event --- FROM events_table as "events" --- WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 --- GROUP BY "t1"."user_id") AS t) "q" ---INNER JOIN --- (SELECT "users"."user_id" --- FROM users_table as "users" --- WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query ---GROUP BY types ---ORDER BY types; +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + types | sumofeventtype +-------+---------------- + 0 | 115 + 1 | 82 + 2 | 160 + 3 | 158 +(4 rows) + +-- same query with target entries shuffled inside UNIONs +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + types | sumofeventtype +-------+---------------- + 0 | 115 + 1 | 82 + 2 | 160 + 3 | 158 +(4 rows) + +-- not supported since events_subquery_2 doesn't have partition key on the target list +-- within the shuffled target list +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."user_id" * 2 + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- not supported since events_subquery_2 doesn't have partition key on the target list +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."value_2" as user_id + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- not supported since events_subquery_2 doesn't have partition key on the target list +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + types | sumofeventtype +-------+---------------- + 0 | 115 + 1 | 82 + 2 | 160 + 3 | 158 +(4 rows) + +-- we can support arbitrary subqueries within UNIONs +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + ( + SELECT * FROM + ( + SELECT + max("events"."time"), + 0 AS event, + "events"."user_id" + FROM + events_table as "events", users_table as "users" + WHERE + events.user_id = users.user_id AND + event_type IN (10, 11, 12, 13, 14, 15) + GROUP BY "events"."user_id" + ) as events_subquery_5 + ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + types | sumofeventtype +-------+---------------- + 0 | 115 + 2 | 160 + 3 | 158 +(3 rows) + +-- not supported since events_subquery_5 is not joined on partition key +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + ( + SELECT * FROM + ( + SELECT + max("events"."time"), + 0 AS event, + "events"."user_id" + FROM + events_table as "events", users_table as "users" + WHERE + events.user_id = users.value_2 AND + event_type IN (10, 11, 12, 13, 14, 15) + GROUP BY "events"."user_id" + ) as events_subquery_5 + ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- not supported since the join is not equi join +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id != q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- not supported since subquery 3 includes a JOIN with non-equi join +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."value_2" as user_id + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events", users_table as "users" + WHERE event_type IN (20, 21, 22, 23, 24, 25) AND users.user_id != events.user_id) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- similar query with more union statements (to enable UNION tree become larger) +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 4 AS event + FROM events_table as "events" + WHERE event_type IN (31, 32, 33, 34, 35, 36)) events_subquery_5) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 5 AS event + FROM events_table as "events" + WHERE event_type IN (37, 38, 39, 40, 41, 42)) events_subquery_6) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 6 AS event + FROM events_table as "events" + WHERE event_type IN (50, 51, 52, 53, 54, 55)) events_subquery_6) + ) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + types | sumofeventtype +-------+---------------- + 0 | 115 + 1 | 82 + 2 | 160 + 3 | 158 + 4 | 117 + 5 | 98 + 6 | 167 +(7 rows) + +-- +-- UNION ALL Queries +-- +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + types | sumofeventtype +-------+---------------- + 0 | 115 + 1 | 82 + 2 | 160 + 3 | 158 +(4 rows) + +-- same query target list entries shuffled +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + types | sumofeventtype +-------+---------------- + 0 | 115 + 1 | 82 + 2 | 160 + 3 | 158 +(4 rows) + +-- not supported since subquery 3 does not have partition key +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."value_2", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- not supported since events_subquery_4 does not have partition key on the +-- target list +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, 2 * "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- union all with inner and left joins +SELECT user_id, count(*) as cnt +FROM + (SELECT first_query.user_id, + Random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "first_query" +INNER JOIN + (SELECT "t"."user_id" + FROM + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t + LEFT OUTER JOIN + ( + SELECT DISTINCT "events"."user_id" as user_id + FROM events_table as "events" + WHERE event_type IN (35, 36, 37, 38) + GROUP BY user_id + ) as t2 + on (t2.user_id = t.user_id) WHERE t2.user_id is NULL) as second_query + ON ("first_query".user_id = "second_query".user_id)) as final_query +GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; + user_id | cnt +---------+----- + 27 | 35 + 87 | 27 + 74 | 20 + 72 | 16 + 12 | 16 + 66 | 15 + 56 | 15 + 40 | 15 + 23 | 12 + 59 | 10 +(10 rows) + +-- not supported since the join between t and t2 is not equi join +-- union all with inner and left joins +SELECT user_id, count(*) as cnt +FROM + (SELECT first_query.user_id, + Random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "first_query" +INNER JOIN + (SELECT "t"."user_id" + FROM + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t + LEFT OUTER JOIN + ( + SELECT DISTINCT "events"."user_id" as user_id + FROM events_table as "events" + WHERE event_type IN (35, 36, 37, 38) + GROUP BY user_id + ) as t2 + on (t2.user_id > t.user_id) WHERE t2.user_id is NULL) as second_query + ON ("first_query".user_id = "second_query".user_id)) as final_query +GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -- - -- NOT SUPPORTED YET BUT SHOULD BE ADDED + -- Union, inner join and left join -- ---SELECT user_id, count(*) as cnt ---FROM --- (SELECT first_query.user_id, --- Random() --- FROM --- ( SELECT "t"."user_id", --- "t"."time", --- unnest("t"."collected_events") AS "event_types" --- FROM --- ( SELECT "t1"."user_id", --- min("t1"."time") AS "time", --- array_agg(("t1"."event") --- ORDER BY time ASC, event DESC) AS collected_events --- FROM ( --- (SELECT * --- FROM --- (SELECT "events"."user_id", --- "events"."time", --- 0 AS event --- FROM events_table as "events" --- WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 1 AS event --- FROM events_table as "events" --- WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 2 AS event --- FROM events_table as "events" --- WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 3 AS event --- FROM events_table as "events" --- WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 --- GROUP BY "t1"."user_id") AS t) "first_query" ---INNER JOIN --- (SELECT "t"."user_id" --- FROM --- (SELECT "users"."user_id" --- FROM users_table as "users" --- WHERE value_1 > 50 and value_1 < 70) AS t --- left OUTER JOIN --- (SELECT DISTINCT("events"."user_id") --- FROM events_table as "events" --- WHERE event_type IN (26, 27, 28, 29, 30, 13) --- GROUP BY "events"."user_id") as t2 on t2.user_id = t.user_id ) as second_query --- ON ("first_query".user_id = "second_query".user_id)) as final_query ---GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; +SELECT user_id, count(*) as cnt +FROM + (SELECT first_query.user_id, + Random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "first_query" +INNER JOIN + (SELECT "t"."user_id" + FROM + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t + LEFT OUTER JOIN + ( + SELECT DISTINCT "events"."user_id" as user_id + FROM events_table as "events" + WHERE event_type IN (35, 36, 37, 38) + GROUP BY user_id + ) as t2 + on (t2.user_id = t.user_id) WHERE t2.user_id is NULL) as second_query + ON ("first_query".user_id = "second_query".user_id)) as final_query +GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; + user_id | cnt +---------+----- + 27 | 35 + 87 | 27 + 74 | 20 + 72 | 16 + 12 | 16 + 66 | 15 + 56 | 15 + 40 | 15 + 23 | 12 + 59 | 10 +(10 rows) + -- - -- NOT SUPPORTED YET BUT SHOULD BE ADDED + -- Unions, left / inner joins -- ---SELECT user_id, count(*) as cnt ---FROM --- (SELECT first_query.user_id, --- Random() --- FROM --- ( SELECT "t"."user_id", --- "t"."time", --- unnest("t"."collected_events") AS "event_types" --- FROM --- ( SELECT "t1"."user_id", --- min("t1"."time") AS "time", --- array_agg(("t1"."event") --- ORDER BY time ASC, event DESC) AS collected_events --- FROM ( --- (SELECT * --- FROM --- (SELECT "events"."user_id", --- "events"."time", --- 0 AS event --- FROM events_table as "events" --- WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 1 AS event --- FROM events_table as "events" --- WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 2 AS event --- FROM events_table as "events" --- WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 3 AS event --- FROM events_table as "events" --- WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 --- GROUP BY "t1"."user_id") AS t) "first_query" ---INNER JOIN --- (SELECT "t"."user_id" --- FROM --- (SELECT "users"."user_id" --- FROM users_table as "users" --- WHERE value_1 > 50 and value_1 < 70) AS t --- left OUTER JOIN --- (SELECT DISTINCT("events"."user_id") --- FROM events_table as "events" --- WHERE event_type IN (26, 27, 28, 29, 30, 13) --- GROUP BY "events"."user_id") as t2 on t2.user_id = t.user_id ) as second_query --- ON ("first_query".user_id = "second_query".user_id)) as final_query ---GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; +SELECT user_id, count(*) as cnt +FROM + (SELECT first_query.user_id, + Random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "first_query" +INNER JOIN + (SELECT "t"."user_id" + FROM + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t + left OUTER JOIN + (SELECT DISTINCT("events"."user_id") + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13) + GROUP BY user_id + ) as t2 on t2.user_id = t.user_id where t2.user_id is NULL) as second_query + ON ("first_query".user_id = "second_query".user_id)) as final_query +GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; + user_id | cnt +---------+----- + 66 | 15 + 23 | 12 + 98 | 9 + 22 | 9 + 83 | 8 + 78 | 8 + 32 | 8 + 89 | 6 + 80 | 6 + 75 | 6 +(10 rows) + -- Simple LATERAL JOINs with GROUP BYs in each side SELECT * FROM @@ -1029,5 +1917,115 @@ FROM limit 10; ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- lets test some unsupported set operations +-- not supported since we use INTERSECT +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + INTERSECT + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot push down this subquery +DETAIL: Intersect and Except are currently unsupported +-- not supported due to offset +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13) ) events_subquery_4) OFFSET 3 ) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot push down this subquery +DETAIL: Offset clause is currently unsupported +-- not supported due to window functions +SELECT user_id, + some_vals +FROM ( + SELECT * , + Row_number() over (PARTITION BY "user_id" ORDER BY "user_id") AS "some_vals", + Random() + FROM users_table + ) user_id +ORDER BY 1, + 2 limit 10; +ERROR: cannot push down this subquery +DETAIL: Window functions are currently unsupported SET citus.subquery_pushdown TO FALSE; SET citus.enable_router_execution TO TRUE; diff --git a/src/test/regress/input/multi_subquery.source b/src/test/regress/input/multi_subquery.source index 3077f89ed..8b42e4dcb 100644 --- a/src/test/regress/input/multi_subquery.source +++ b/src/test/regress/input/multi_subquery.source @@ -172,12 +172,12 @@ FROM l_orderkey) AS unit_prices; -- Check that we error out if there is union all. - -SELECT count(*) FROM -( - (SELECT l_orderkey FROM lineitem_subquery) UNION ALL - (SELECT 1::bigint) -) b; +-- TEMPORARLY DISABLE UNION ALL +-- SELECT count(*) FROM +-- ( +-- (SELECT l_orderkey FROM lineitem_subquery) UNION ALL +-- (SELECT 1::bigint) +--) b; --- -- TEMPORARLY DISABLE UNIONS WITHOUT JOINS diff --git a/src/test/regress/output/multi_subquery.source b/src/test/regress/output/multi_subquery.source index 5c0d6dab3..8e6b7ca96 100644 --- a/src/test/regress/output/multi_subquery.source +++ b/src/test/regress/output/multi_subquery.source @@ -172,13 +172,12 @@ FROM ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -- Check that we error out if there is union all. -SELECT count(*) FROM -( - (SELECT l_orderkey FROM lineitem_subquery) UNION ALL - (SELECT 1::bigint) -) b; -ERROR: cannot push down this subquery -DETAIL: Union All clauses are currently unsupported +-- TEMPORARLY DISABLE UNION ALL +-- SELECT count(*) FROM +-- ( +-- (SELECT l_orderkey FROM lineitem_subquery) UNION ALL +-- (SELECT 1::bigint) +--) b; --- -- TEMPORARLY DISABLE UNIONS WITHOUT JOINS --- @@ -984,8 +983,12 @@ FROM hasdone) AS subquery_top GROUP BY hasdone; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. + event_average | hasdone +--------------------+--------------------- + 4.0000000000000000 | Has not done paying + 2.5000000000000000 | Has done paying +(2 rows) + -- Union, left join and having subquery pushdown SELECT avg(array_length(events, 1)) AS event_average, @@ -1056,8 +1059,11 @@ GROUP BY count_pay ORDER BY count_pay; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. + event_average | count_pay +--------------------+----------- + 3.0000000000000000 | 0 +(1 row) + -- Lateral join subquery pushdown SELECT @@ -1226,8 +1232,51 @@ FROM hasdone) AS subquery_top GROUP BY hasdone; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + HashAggregate (cost=0.00..0.00 rows=0 width=0) + Group Key: remote_scan.hasdone + -> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=57637 dbname=regression + -> GroupAggregate (cost=100.80..100.85 rows=2 width=48) + Group Key: subquery_top.hasdone + -> Sort (cost=100.80..100.80 rows=2 width=64) + Sort Key: subquery_top.hasdone + -> Subquery Scan on subquery_top (cost=100.72..100.79 rows=2 width=64) + -> GroupAggregate (cost=100.72..100.77 rows=2 width=112) + Group Key: subquery_1.tenant_id, subquery_1.user_id, ('Has done paying'::text) + -> Sort (cost=100.72..100.72 rows=2 width=88) + Sort Key: subquery_1.tenant_id, subquery_1.user_id, ('Has done paying'::text) + -> Hash Right Join (cost=100.55..100.71 rows=2 width=88) + Hash Cond: (events.composite_id = subquery_1.composite_id) + -> Unique (cost=76.64..76.64 rows=1 width=80) + -> Sort (cost=76.64..76.64 rows=1 width=80) + Sort Key: ((events.composite_id).tenant_id), ((events.composite_id).user_id) + -> Seq Scan on events_270011 events (cost=0.00..76.62 rows=1 width=80) + Filter: (((event_type)::text = 'pay'::text) AND (composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash (cost=23.89..23.89 rows=2 width=88) + -> Subquery Scan on subquery_1 (cost=23.84..23.89 rows=2 width=88) + -> Unique (cost=23.84..23.87 rows=2 width=88) + -> Sort (cost=23.84..23.84 rows=2 width=88) + Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), (NULL::user_composite_type), ('action=>1'::text), events_1.event_time + -> Append (cost=0.00..23.83 rows=2 width=88) + -> Nested Loop (cost=0.00..11.90 rows=1 width=88) + Join Filter: ((NULL::user_composite_type) = events_1.composite_id) + -> Result (cost=0.00..0.01 rows=1 width=40) + One-Time Filter: false + -> Seq Scan on events_270011 events_1 (cost=0.00..11.62 rows=1 width=40) + Filter: ((event_type)::text = 'click'::text) + -> Nested Loop (cost=0.00..11.90 rows=1 width=88) + Join Filter: ((NULL::user_composite_type) = events_2.composite_id) + -> Result (cost=0.00..0.01 rows=1 width=40) + One-Time Filter: false + -> Seq Scan on events_270011 events_2 (cost=0.00..11.62 rows=1 width=40) + Filter: ((event_type)::text = 'submit'::text) +(41 rows) + -- Union, left join and having subquery pushdown EXPLAIN SELECT avg(array_length(events, 1)) AS event_average, @@ -1298,8 +1347,7 @@ GROUP BY count_pay ORDER BY count_pay; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: bogus varattno for OUTER_VAR var: 3 -- Lateral join subquery pushdown EXPLAIN SELECT tenant_id, diff --git a/src/test/regress/output/multi_subquery_0.source b/src/test/regress/output/multi_subquery_0.source index 8a16c9be4..dd0846a16 100644 --- a/src/test/regress/output/multi_subquery_0.source +++ b/src/test/regress/output/multi_subquery_0.source @@ -172,13 +172,12 @@ FROM ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -- Check that we error out if there is union all. -SELECT count(*) FROM -( - (SELECT l_orderkey FROM lineitem_subquery) UNION ALL - (SELECT 1::bigint) -) b; -ERROR: cannot push down this subquery -DETAIL: Union All clauses are currently unsupported +-- TEMPORARLY DISABLE UNION ALL +-- SELECT count(*) FROM +-- ( +-- (SELECT l_orderkey FROM lineitem_subquery) UNION ALL +-- (SELECT 1::bigint) +--) b; --- -- TEMPORARLY DISABLE UNIONS WITHOUT JOINS --- @@ -984,8 +983,12 @@ FROM hasdone) AS subquery_top GROUP BY hasdone; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. + event_average | hasdone +--------------------+--------------------- + 4.0000000000000000 | Has not done paying + 2.5000000000000000 | Has done paying +(2 rows) + -- Union, left join and having subquery pushdown SELECT avg(array_length(events, 1)) AS event_average, @@ -1056,8 +1059,11 @@ GROUP BY count_pay ORDER BY count_pay; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. + event_average | count_pay +--------------------+----------- + 3.0000000000000000 | 0 +(1 row) + -- Lateral join subquery pushdown SELECT @@ -1221,8 +1227,40 @@ FROM hasdone) AS subquery_top GROUP BY hasdone; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + HashAggregate (cost=0.00..0.00 rows=0 width=0) + Group Key: remote_scan.hasdone + -> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate (cost=77.02..77.04 rows=2 width=64) + Group Key: COALESCE(('Has done paying'::text), 'Has not done paying'::text) + -> GroupAggregate (cost=76.93..76.98 rows=2 width=88) + Group Key: subquery_1.tenant_id, subquery_1.user_id, ('Has done paying'::text) + -> Sort (cost=76.93..76.93 rows=2 width=88) + Sort Key: subquery_1.tenant_id, subquery_1.user_id, ('Has done paying'::text) + -> Hash Right Join (cost=76.76..76.92 rows=2 width=88) + Hash Cond: (events.composite_id = subquery_1.composite_id) + -> Unique (cost=76.64..76.64 rows=1 width=32) + -> Sort (cost=76.64..76.64 rows=1 width=32) + Sort Key: ((events.composite_id).tenant_id), ((events.composite_id).user_id) + -> Seq Scan on events_270011 events (cost=0.00..76.62 rows=1 width=32) + Filter: (((event_type)::text = 'pay'::text) AND (composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash (cost=0.10..0.10 rows=2 width=88) + -> Subquery Scan on subquery_1 (cost=0.05..0.10 rows=2 width=88) + -> Unique (cost=0.05..0.08 rows=2 width=0) + -> Sort (cost=0.05..0.06 rows=2 width=0) + Sort Key: ((composite_id).tenant_id), ((composite_id).user_id), composite_id, ('action=>1'::text), event_time + -> Append (cost=0.00..0.04 rows=2 width=0) + -> Result (cost=0.00..0.01 rows=1 width=0) + One-Time Filter: false + -> Result (cost=0.00..0.01 rows=1 width=0) + One-Time Filter: false +(30 rows) + -- Union, left join and having subquery pushdown EXPLAIN SELECT avg(array_length(events, 1)) AS event_average, @@ -1293,8 +1331,7 @@ GROUP BY count_pay ORDER BY count_pay; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: bogus varattno for OUTER_VAR var: 3 -- Lateral join subquery pushdown EXPLAIN SELECT tenant_id, diff --git a/src/test/regress/sql/.gitignore b/src/test/regress/sql/.gitignore index a41eca963..98052d0c7 100644 --- a/src/test/regress/sql/.gitignore +++ b/src/test/regress/sql/.gitignore @@ -16,3 +16,4 @@ /worker_copy.sql /multi_complex_count_distinct.sql /multi_mx_copy_data.sql +/multi_insert_select_behavioral_analytics_create_table.sql diff --git a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql index c4222d004..8607f347a 100644 --- a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql +++ b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql @@ -33,7 +33,6 @@ ORDER BY 2 DESC, 1; ------------------------------------ -- Funnel grouped by whether or not a user has done an event -- This has multiple subqueries joinin at the top level --- Query will be supported when we enable unions ------------------------------------ SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event FROM ( @@ -180,7 +179,6 @@ FROM ( ------------------------------------ -- Funnel, grouped by the number of times a user has done an event --- These will be supported when we add unions ------------------------------------ SELECT user_id, diff --git a/src/test/regress/sql/multi_subquery_complex_queries.sql b/src/test/regress/sql/multi_subquery_complex_queries.sql index 1f9d9ed65..f7281f75c 100644 --- a/src/test/regress/sql/multi_subquery_complex_queries.sql +++ b/src/test/regress/sql/multi_subquery_complex_queries.sql @@ -12,168 +12,960 @@ SET citus.subquery_pushdown TO TRUE; SET citus.enable_router_execution TO FALSE; -- - -- NOT SUPPORTED YET BUT SHOULD BE ADDED + -- UNIONs and JOINs mixed -- ---SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType ---FROM --- ( SELECT *, random() --- FROM --- ( SELECT "t"."user_id", --- "t"."time", --- unnest("t"."collected_events") AS "event_types" --- FROM --- ( SELECT "t1"."user_id", --- min("t1"."time") AS "time", --- array_agg(("t1"."event") --- ORDER BY time ASC, event DESC) AS collected_events --- FROM ( --- (SELECT * --- FROM --- (SELECT "events"."user_id", --- "events"."time", --- 0 AS event --- FROM events_table as "events" --- WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 1 AS event --- FROM events_table as "events" --- WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 2 AS event --- FROM events_table as "events" --- WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 3 AS event --- FROM events_table as "events" --- WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 --- GROUP BY "t1"."user_id") AS t) "q" ---INNER JOIN --- (SELECT "users"."user_id" --- FROM users_table as "users" --- WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query ---GROUP BY types ---ORDER BY types; +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- same query with target entries shuffled inside UNIONs +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- not supported since events_subquery_2 doesn't have partition key on the target list +-- within the shuffled target list +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."user_id" * 2 + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- not supported since events_subquery_2 doesn't have partition key on the target list +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."value_2" as user_id + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- not supported since events_subquery_2 doesn't have partition key on the target list +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- we can support arbitrary subqueries within UNIONs +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + ( + SELECT * FROM + ( + SELECT + max("events"."time"), + 0 AS event, + "events"."user_id" + FROM + events_table as "events", users_table as "users" + WHERE + events.user_id = users.user_id AND + event_type IN (10, 11, 12, 13, 14, 15) + GROUP BY "events"."user_id" + ) as events_subquery_5 + + ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- not supported since events_subquery_5 is not joined on partition key +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + ( + SELECT * FROM + ( + SELECT + max("events"."time"), + 0 AS event, + "events"."user_id" + FROM + events_table as "events", users_table as "users" + WHERE + events.user_id = users.value_2 AND + event_type IN (10, 11, 12, 13, 14, 15) + GROUP BY "events"."user_id" + ) as events_subquery_5 + + ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- not supported since the join is not equi join +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id != q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- not supported since subquery 3 includes a JOIN with non-equi join +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."value_2" as user_id + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events", users_table as "users" + WHERE event_type IN (20, 21, 22, 23, 24, 25) AND users.user_id != events.user_id) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- similar query with more union statements (to enable UNION tree become larger) +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4) + + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 4 AS event + FROM events_table as "events" + WHERE event_type IN (31, 32, 33, 34, 35, 36)) events_subquery_5) + + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 5 AS event + FROM events_table as "events" + WHERE event_type IN (37, 38, 39, 40, 41, 42)) events_subquery_6) + + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 6 AS event + FROM events_table as "events" + WHERE event_type IN (50, 51, 52, 53, 54, 55)) events_subquery_6) + ) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + + +-- +-- UNION ALL Queries +-- +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- same query target list entries shuffled +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- not supported since subquery 3 does not have partition key +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."value_2", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- not supported since events_subquery_4 does not have partition key on the +-- target list +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT + "events"."time", + 0 AS event, + "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 1 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 2 AS event, "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."time", 3 AS event, 2 * "events"."user_id" + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- union all with inner and left joins +SELECT user_id, count(*) as cnt +FROM + (SELECT first_query.user_id, + Random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "first_query" +INNER JOIN + (SELECT "t"."user_id" + FROM + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t + LEFT OUTER JOIN + ( + SELECT DISTINCT "events"."user_id" as user_id + FROM events_table as "events" + WHERE event_type IN (35, 36, 37, 38) + GROUP BY user_id + ) as t2 + on (t2.user_id = t.user_id) WHERE t2.user_id is NULL) as second_query + ON ("first_query".user_id = "second_query".user_id)) as final_query +GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; + +-- not supported since the join between t and t2 is not equi join +-- union all with inner and left joins +SELECT user_id, count(*) as cnt +FROM + (SELECT first_query.user_id, + Random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "first_query" +INNER JOIN + (SELECT "t"."user_id" + FROM + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t + LEFT OUTER JOIN + ( + SELECT DISTINCT "events"."user_id" as user_id + FROM events_table as "events" + WHERE event_type IN (35, 36, 37, 38) + GROUP BY user_id + ) as t2 + on (t2.user_id > t.user_id) WHERE t2.user_id is NULL) as second_query + ON ("first_query".user_id = "second_query".user_id)) as final_query +GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; -- - -- NOT SUPPORTED YET BUT SHOULD BE ADDED + -- Union, inner join and left join -- ---SELECT user_id, count(*) as cnt ---FROM --- (SELECT first_query.user_id, --- Random() --- FROM --- ( SELECT "t"."user_id", --- "t"."time", --- unnest("t"."collected_events") AS "event_types" --- FROM --- ( SELECT "t1"."user_id", --- min("t1"."time") AS "time", --- array_agg(("t1"."event") --- ORDER BY time ASC, event DESC) AS collected_events --- FROM ( --- (SELECT * --- FROM --- (SELECT "events"."user_id", --- "events"."time", --- 0 AS event --- FROM events_table as "events" --- WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 1 AS event --- FROM events_table as "events" --- WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 2 AS event --- FROM events_table as "events" --- WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 3 AS event --- FROM events_table as "events" --- WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 --- GROUP BY "t1"."user_id") AS t) "first_query" ---INNER JOIN --- (SELECT "t"."user_id" --- FROM --- (SELECT "users"."user_id" --- FROM users_table as "users" --- WHERE value_1 > 50 and value_1 < 70) AS t --- left OUTER JOIN --- (SELECT DISTINCT("events"."user_id") --- FROM events_table as "events" --- WHERE event_type IN (26, 27, 28, 29, 30, 13) --- GROUP BY "events"."user_id") as t2 on t2.user_id = t.user_id ) as second_query --- ON ("first_query".user_id = "second_query".user_id)) as final_query ---GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; - +SELECT user_id, count(*) as cnt +FROM + (SELECT first_query.user_id, + Random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "first_query" +INNER JOIN + (SELECT "t"."user_id" + FROM + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t + LEFT OUTER JOIN + ( + SELECT DISTINCT "events"."user_id" as user_id + FROM events_table as "events" + WHERE event_type IN (35, 36, 37, 38) + GROUP BY user_id + ) as t2 + on (t2.user_id = t.user_id) WHERE t2.user_id is NULL) as second_query + ON ("first_query".user_id = "second_query".user_id)) as final_query +GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; -- - -- NOT SUPPORTED YET BUT SHOULD BE ADDED + -- Unions, left / inner joins -- ---SELECT user_id, count(*) as cnt ---FROM --- (SELECT first_query.user_id, --- Random() --- FROM --- ( SELECT "t"."user_id", --- "t"."time", --- unnest("t"."collected_events") AS "event_types" --- FROM --- ( SELECT "t1"."user_id", --- min("t1"."time") AS "time", --- array_agg(("t1"."event") --- ORDER BY time ASC, event DESC) AS collected_events --- FROM ( --- (SELECT * --- FROM --- (SELECT "events"."user_id", --- "events"."time", --- 0 AS event --- FROM events_table as "events" --- WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 1 AS event --- FROM events_table as "events" --- WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 2 AS event --- FROM events_table as "events" --- WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) --- UNION --- (SELECT * --- FROM --- (SELECT "events"."user_id", "events"."time", 3 AS event --- FROM events_table as "events" --- WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 --- GROUP BY "t1"."user_id") AS t) "first_query" ---INNER JOIN --- (SELECT "t"."user_id" --- FROM --- (SELECT "users"."user_id" --- FROM users_table as "users" --- WHERE value_1 > 50 and value_1 < 70) AS t --- left OUTER JOIN --- (SELECT DISTINCT("events"."user_id") --- FROM events_table as "events" --- WHERE event_type IN (26, 27, 28, 29, 30, 13) --- GROUP BY "events"."user_id") as t2 on t2.user_id = t.user_id ) as second_query --- ON ("first_query".user_id = "second_query".user_id)) as final_query ---GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; +SELECT user_id, count(*) as cnt +FROM + (SELECT first_query.user_id, + Random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "first_query" +INNER JOIN + (SELECT "t"."user_id" + FROM + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t + left OUTER JOIN + (SELECT DISTINCT("events"."user_id") + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13) + GROUP BY user_id + ) as t2 on t2.user_id = t.user_id where t2.user_id is NULL) as second_query + ON ("first_query".user_id = "second_query".user_id)) as final_query +GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; -- Simple LATERAL JOINs with GROUP BYs in each side @@ -886,5 +1678,113 @@ FROM order BY value_2 DESC, user_id DESC limit 10; +-- lets test some unsupported set operations + +-- not supported since we use INTERSECT +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + INTERSECT + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- not supported due to offset +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") + ORDER BY TIME ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", + "events"."time", + 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13) ) events_subquery_4) OFFSET 3 ) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- not supported due to window functions +SELECT user_id, + some_vals +FROM ( + SELECT * , + Row_number() over (PARTITION BY "user_id" ORDER BY "user_id") AS "some_vals", + Random() + FROM users_table + ) user_id +ORDER BY 1, + 2 limit 10; + SET citus.subquery_pushdown TO FALSE; SET citus.enable_router_execution TO TRUE;