diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 5f6bc5037..d9fbfa839 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -97,9 +97,6 @@ static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEn queryRteIdentities); static Relids QueryRteIdentities(Query *queryTree); static DeferredErrorMessage * DeferErrorIfFromClauseRecurs(Query *queryTree); -static DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree, - bool - outerMostQueryHasLimit); static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList); static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree); static bool WindowPartitionOnDistributionColumn(Query *query); @@ -985,8 +982,7 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi if (subqueryTree->setOperations) { - deferredError = DeferErrorIfUnsupportedUnionQuery(subqueryTree, - outerMostQueryHasLimit); + deferredError = DeferErrorIfUnsupportedUnionQuery(subqueryTree); if (deferredError) { return deferredError; @@ -1098,9 +1094,8 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi * DeferErrorIfUnsupportedUnionQuery is a helper function for ErrorIfCannotPushdownSubquery(). * The function also errors out for set operations INTERSECT and EXCEPT. */ -static DeferredErrorMessage * -DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree, - bool outerMostQueryHasLimit) +DeferredErrorMessage * +DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree) { List *setOperationStatementList = NIL; ListCell *setOperationStatmentCell = NULL; @@ -1534,6 +1529,17 @@ FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *)) } +/* + * QueryContainsDistributedTableRTE determines whether the given + * query contains a distributed table. + */ +bool +QueryContainsDistributedTableRTE(Query *query) +{ + return FindNodeCheck((Node *) query, IsDistributedTableRTE); +} + + /* * IsDistributedTableRTE gets a node and returns true if the node * is a range table relation entry that points to a distributed diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index fe0cd5b8e..fd5797701 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -67,10 +67,12 @@ #include "distributed/relation_restriction_equivalence.h" #include "lib/stringinfo.h" #include "optimizer/planner.h" +#include "parser/parsetree.h" #include "nodes/nodeFuncs.h" #include "nodes/nodes.h" #include "nodes/pg_list.h" #include "nodes/primnodes.h" +#include "nodes/relation.h" #include "utils/builtins.h" #include "utils/guc.h" @@ -117,6 +119,10 @@ static DeferredErrorMessage * RecursivelyPlanCTEs(Query *query, RecursivePlanningContext *context); static bool RecursivelyPlanSubqueryWalker(Node *node, RecursivePlanningContext *context); static bool ShouldRecursivelyPlanSubquery(Query *subquery); +static bool ShouldRecursivelyPlanSetOperation(Query *query, + RecursivePlanningContext *context); +static void RecursivelyPlanSetOperations(Query *query, Node *node, + RecursivePlanningContext *context); static bool IsLocalTableRTE(Node *node); static void RecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *planningContext); @@ -217,6 +223,21 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context /* descend into subqueries */ query_tree_walker(query, RecursivelyPlanSubqueryWalker, context, 0); + /* + * At this point, all CTEs, leaf subqueries containing local tables and + * non-pushdownable subqueries have been replaced. We now check for + * combinations of subqueries that cannot be pushed down (e.g. + * UNION ). + * + * This code also runs for the top-level query, which allows us to support + * top-level set operations. + */ + + if (ShouldRecursivelyPlanSetOperation(query, context)) + { + RecursivelyPlanSetOperations(query, (Node *) query->setOperations, context); + } + return NULL; } @@ -450,20 +471,104 @@ ShouldRecursivelyPlanSubquery(Query *subquery) return false; } - /* - * Even if we could recursively plan the subquery, we should ensure - * that the subquery doesn't contain any references to the outer - * queries. - */ - if (ContainsReferencesToOuterQuery(subquery)) - { - elog(DEBUG2, "skipping recursive planning for the subquery since it " - "contains references to outer queries"); + return true; +} + +/* + * ShouldRecursivelyPlanSetOperation determines whether the leaf queries of a + * set operations tree need to be recursively planned in order to support the + * query as a whole. + */ +static bool +ShouldRecursivelyPlanSetOperation(Query *query, RecursivePlanningContext *context) +{ + PlannerRestrictionContext *filteredRestrictionContext = NULL; + + SetOperationStmt *setOperations = (SetOperationStmt *) query->setOperations; + if (setOperations == NULL) + { return false; } - return true; + if (context->level == 0) + { + /* + * We cannot push down top-level set operation. Recursively plan the + * leaf nodes such that it becomes a router query. + */ + return true; + } + + if (setOperations->op != SETOP_UNION) + { + /* + * We can only push down UNION operaionts, plan other set operations + * recursively. + */ + return true; + } + + if (DeferErrorIfUnsupportedUnionQuery(query) != NULL) + { + /* + * If at least one leaf query in the union is recurring, then all + * leaf nodes need to be recurring. + */ + return true; + } + + filteredRestrictionContext = + FilterPlannerRestrictionForQuery(context->plannerRestrictionContext, query); + if (!SafeToPushdownUnionSubquery(filteredRestrictionContext)) + { + /* + * The distribution column is not in the same place in all sides + * of the union, meaning we cannot determine distribution column + * equivalence. Recursive planning is necessary. + */ + return true; + } + + return false; +} + + +/* + * RecursivelyPlanSetOperations descends into a tree of set operations + * (e.g. UNION, INTERSECTS) and recursively plans all leaf nodes that + * contain distributed tables. + */ +static void +RecursivelyPlanSetOperations(Query *query, Node *node, + RecursivePlanningContext *context) +{ + if (IsA(node, SetOperationStmt)) + { + SetOperationStmt *setOperations = (SetOperationStmt *) node; + + RecursivelyPlanSetOperations(query, setOperations->larg, context); + RecursivelyPlanSetOperations(query, setOperations->rarg, context); + } + else if (IsA(node, RangeTblRef)) + { + RangeTblRef *rangeTableRef = (RangeTblRef *) node; + RangeTblEntry *rangeTableEntry = rt_fetch(rangeTableRef->rtindex, + query->rtable); + Query *subquery = rangeTableEntry->subquery; + + if (rangeTableEntry->rtekind == RTE_SUBQUERY && + QueryContainsDistributedTableRTE(subquery)) + { + RecursivelyPlanSubquery(subquery, context); + } + } + else + { + ereport(ERROR, (errmsg("unexpected node type (%d) while " + "expecting set operations or " + "range table references", nodeTag(node)))); + } } @@ -513,6 +618,13 @@ IsLocalTableRTE(Node *node) /* * RecursivelyPlanQuery recursively plans a query, replaces it with a * result query and returns the subplan. + * + * Before we recursively plan the given subquery, we should ensure + * that the subquery doesn't contain any references to the outer + * queries (i.e., such queries cannot be separately planned). In + * that case, the function doesn't recursively plan the input query + * and immediately returns. Later, the planner decides on what to do + * with the query. */ static void RecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *planningContext) @@ -524,6 +636,14 @@ RecursivelyPlanSubquery(Query *subquery, RecursivePlanningContext *planningConte Query *resultQuery = NULL; Query *debugQuery = NULL; + if (ContainsReferencesToOuterQuery(subquery)) + { + elog(DEBUG2, "skipping recursive planning for the subquery since it " + "contains references to outer queries"); + + return; + } + /* * Subquery will go through the standard planner, thus to properly deparse it * we keep its copy: debugQuery. diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 3f71cbaa5..204059b5e 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -11,6 +11,7 @@ #include "postgres.h" #include "distributed/distributed_planner.h" +#include "distributed/metadata_cache.h" #include "distributed/multi_logical_planner.h" #include "distributed/multi_logical_optimizer.h" #include "distributed/pg_dist_partition.h" @@ -1215,7 +1216,15 @@ AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass ** { AttributeEquivalenceClassMember *attributeEqMember = NULL; Oid relationId = rangeTableEntry->relid; - Var *relationPartitionKey = DistPartitionKey(relationId); + Var *relationPartitionKey = NULL; + + /* we don't consider local tables in the equality on columns */ + if (!IsDistributedTable(relationId)) + { + return; + } + + relationPartitionKey = DistPartitionKey(relationId); Assert(rangeTableEntry->rtekind == RTE_RELATION); diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index b1e1d55da..b96af6713 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -191,12 +191,14 @@ extern bool SingleRelationRepartitionSubquery(Query *queryTree); extern DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLimit); +extern DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree); extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery( PlannerRestrictionContext *plannerRestrictionContext, Query *query); extern bool SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail); extern bool TargetListOnPartitionColumn(Query *query, List *targetEntryList); extern bool FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *)); +extern bool QueryContainsDistributedTableRTE(Query *query); extern bool ContainsReadIntermediateResultFunction(Node *node); extern MultiNode * ParentNode(MultiNode *multiNode); extern MultiNode * ChildNode(MultiUnaryNode *multiNode); diff --git a/src/test/regress/expected/multi_insert_select.out b/src/test/regress/expected/multi_insert_select.out index 16dfdb01d..5a658902a 100644 --- a/src/test/regress/expected/multi_insert_select.out +++ b/src/test/regress/expected/multi_insert_select.out @@ -721,15 +721,24 @@ FROM DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries DEBUG: Collecting INSERT ... SELECT results on coordinator ROLLBACK; --- We do not support any set operations +-- We do support set operations through recursive planning +BEGIN; +SET LOCAL client_min_messages TO DEBUG; INSERT INTO raw_events_first(user_id) (SELECT user_id FROM raw_events_first) INTERSECT (SELECT user_id FROM raw_events_first); DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries DEBUG: Collecting INSERT ... SELECT results on coordinator -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. +DEBUG: generating subplan 58_1 for subquery SELECT user_id FROM public.raw_events_first +DEBUG: generating subplan 58_2 for subquery SELECT user_id FROM public.raw_events_first +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 58_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('58_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('58_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: Plan 58 query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('58_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) citus_insert_select_subquery +DEBUG: Creating router plan +DEBUG: Plan is router executable +ROLLBACK; -- If the query is router plannable then it is executed via the coordinator INSERT INTO raw_events_first(user_id) @@ -1078,8 +1087,8 @@ DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -DEBUG: generating subplan 86_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.value_3 AS id FROM public.raw_events_first, public.raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.value_3 -DEBUG: Plan 86 query after replacing subqueries and CTEs: SELECT id, v1, v4 FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('86_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 bigint, id double precision)) foo +DEBUG: generating subplan 88_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.value_3 AS id FROM public.raw_events_first, public.raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.value_3 +DEBUG: Plan 88 query after replacing subqueries and CTEs: SELECT id, v1, v4 FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('88_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 bigint, id double precision)) foo DEBUG: Creating router plan DEBUG: Plan is router executable ERROR: the partition column of table public.agg_events cannot be NULL @@ -1209,8 +1218,8 @@ DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -DEBUG: generating subplan 105_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM public.raw_events_first, public.raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) > (10)::numeric) -DEBUG: Plan 105 query after replacing subqueries and CTEs: SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first, public.reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('105_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id = f2.id))) +DEBUG: generating subplan 107_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM public.raw_events_first, public.raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) > (10)::numeric) +DEBUG: Plan 107 query after replacing subqueries and CTEs: SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first, public.reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('107_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id = f2.id))) -- the second part of the query is not routable since -- GROUP BY not on the partition column (i.e., value_1) and thus join -- on f.id = f2.id is not on the partition key (instead on the sum of partition key) @@ -1251,8 +1260,8 @@ DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -DEBUG: generating subplan 108_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM public.raw_events_first, public.raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) > (10)::numeric) -DEBUG: Plan 108 query after replacing subqueries and CTEs: SELECT f.id FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM public.raw_events_first, public.reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('108_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id = f2.id))) +DEBUG: generating subplan 110_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM public.raw_events_first, public.raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) > (10)::numeric) +DEBUG: Plan 110 query after replacing subqueries and CTEs: SELECT f.id FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM public.raw_events_first, public.reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('110_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id = f2.id))) -- cannot pushdown the query since the JOIN is not equi JOIN INSERT INTO agg_events (user_id, value_4_agg) diff --git a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out index b42c1b99a..69e3c46df 100644 --- a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out +++ b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out @@ -118,7 +118,8 @@ FROM ( ) t2 ON (t1.user_id = t2.user_id) GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning -- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2) INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg ) SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event) diff --git a/src/test/regress/expected/multi_mx_router_planner.out b/src/test/regress/expected/multi_mx_router_planner.out index 49486173f..5fb094fc8 100644 --- a/src/test/regress/expected/multi_mx_router_planner.out +++ b/src/test/regress/expected/multi_mx_router_planner.out @@ -792,21 +792,36 @@ DEBUG: Plan is router executable az (4 rows) --- union queries are not supported if not router plannable --- there is an inconsistency on shard pruning between --- ubuntu/mac disabling log messages for this queries only -SET client_min_messages to 'NOTICE'; +-- union queries are supported through recursive planning +SET client_min_messages TO DEBUG1; (SELECT * FROM articles_hash_mx WHERE author_id = 1) UNION -(SELECT * FROM articles_hash_mx WHERE author_id = 2); -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. +(SELECT * FROM articles_hash_mx WHERE author_id = 2) +ORDER BY 1,2; +DEBUG: generating subplan 108_1 for subquery SELECT id, author_id, title, word_count FROM public.articles_hash_mx WHERE (author_id = 1) +DEBUG: generating subplan 108_2 for subquery SELECT id, author_id, title, word_count FROM public.articles_hash_mx WHERE (author_id = 2) +DEBUG: Plan 108 query after replacing subqueries and CTEs: SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('108_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer) UNION SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('108_2'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer) ORDER BY 1, 2 + id | author_id | title | word_count +----+-----------+--------------+------------ + 1 | 1 | arsenous | 9572 + 2 | 2 | abducing | 13642 + 11 | 1 | alamo | 1347 + 12 | 2 | archiblast | 18185 + 21 | 1 | arcading | 5890 + 22 | 2 | antipope | 2728 + 31 | 1 | athwartships | 7271 + 32 | 2 | amazon | 11342 + 41 | 1 | aznavour | 11814 + 42 | 2 | ausable | 15885 +(10 rows) + SELECT * FROM ( (SELECT * FROM articles_hash_mx WHERE author_id = 1) UNION (SELECT * FROM articles_hash_mx WHERE author_id = 2)) uu ORDER BY 1, 2 LIMIT 5; +DEBUG: push down of limit count: 5 id | author_id | title | word_count ----+-----------+------------+------------ 1 | 1 | arsenous | 9572 diff --git a/src/test/regress/expected/multi_router_planner.out b/src/test/regress/expected/multi_router_planner.out index 8cecadc78..c90de03b6 100644 --- a/src/test/regress/expected/multi_router_planner.out +++ b/src/test/regress/expected/multi_router_planner.out @@ -906,15 +906,35 @@ DEBUG: Plan is router executable az (4 rows) --- top-level union queries are not supported if not router plannable --- there is an inconsistency on shard pruning between --- ubuntu/mac disabling log messages for this queries only +-- top-level union queries are supported through recursive planning SET client_min_messages to 'NOTICE'; -(SELECT * FROM articles_hash WHERE author_id = 1) +( + (SELECT * FROM articles_hash WHERE author_id = 1) + UNION + (SELECT * FROM articles_hash WHERE author_id = 3) +) UNION -(SELECT * FROM articles_hash WHERE author_id = 2); -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. +(SELECT * FROM articles_hash WHERE author_id = 2) +ORDER BY 1,2,3; + id | author_id | title | word_count +----+-----------+--------------+------------ + 1 | 1 | arsenous | 9572 + 2 | 2 | abducing | 13642 + 3 | 3 | asternal | 10480 + 11 | 1 | alamo | 1347 + 12 | 2 | archiblast | 18185 + 13 | 3 | aseyev | 2255 + 21 | 1 | arcading | 5890 + 22 | 2 | antipope | 2728 + 23 | 3 | abhorring | 6799 + 31 | 1 | athwartships | 7271 + 32 | 2 | amazon | 11342 + 33 | 3 | autochrome | 8180 + 41 | 1 | aznavour | 11814 + 42 | 2 | ausable | 15885 + 43 | 3 | affixal | 12723 +(15 rows) + -- unions in subqueries are supported with subquery pushdown SELECT * FROM ( (SELECT * FROM articles_hash WHERE author_id = 1) diff --git a/src/test/regress/expected/multi_simple_queries.out b/src/test/regress/expected/multi_simple_queries.out index e50233fbd..6f2227013 100644 --- a/src/test/regress/expected/multi_simple_queries.out +++ b/src/test/regress/expected/multi_simple_queries.out @@ -167,11 +167,24 @@ SELECT author_id, sum(word_count) AS corpus_size FROM articles 8 | 55410 (3 rows) --- UNION/INTERSECT queries are unsupported if on multiple shards +-- UNION/INTERSECT queries are supported if on multiple shards SELECT * FROM articles WHERE author_id = 10 UNION -SELECT * FROM articles WHERE author_id = 2; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. +SELECT * FROM articles WHERE author_id = 2 +ORDER BY 1,2,3; + id | author_id | title | word_count +----+-----------+------------+------------ + 2 | 2 | abducing | 13642 + 10 | 10 | aggrandize | 17277 + 12 | 2 | archiblast | 18185 + 20 | 10 | absentness | 1820 + 22 | 2 | antipope | 2728 + 30 | 10 | andelee | 6363 + 32 | 2 | amazon | 11342 + 40 | 10 | attemper | 14976 + 42 | 2 | ausable | 15885 + 50 | 10 | anjanette | 19519 +(10 rows) + -- queries using CTEs are supported WITH long_names AS ( SELECT id FROM authors WHERE char_length(name) > 15 ) SELECT title FROM articles ORDER BY 1 LIMIT 5; diff --git a/src/test/regress/expected/multi_simple_queries_0.out b/src/test/regress/expected/multi_simple_queries_0.out index c04e36c28..7f681d277 100644 --- a/src/test/regress/expected/multi_simple_queries_0.out +++ b/src/test/regress/expected/multi_simple_queries_0.out @@ -167,11 +167,24 @@ SELECT author_id, sum(word_count) AS corpus_size FROM articles 8 | 55410 (3 rows) --- UNION/INTERSECT queries are unsupported if on multiple shards +-- UNION/INTERSECT queries are supported if on multiple shards SELECT * FROM articles WHERE author_id = 10 UNION -SELECT * FROM articles WHERE author_id = 2; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. +SELECT * FROM articles WHERE author_id = 2 +ORDER BY 1,2,3; + id | author_id | title | word_count +----+-----------+------------+------------ + 2 | 2 | abducing | 13642 + 10 | 10 | aggrandize | 17277 + 12 | 2 | archiblast | 18185 + 20 | 10 | absentness | 1820 + 22 | 2 | antipope | 2728 + 30 | 10 | andelee | 6363 + 32 | 2 | amazon | 11342 + 40 | 10 | attemper | 14976 + 42 | 2 | ausable | 15885 + 50 | 10 | anjanette | 19519 +(10 rows) + -- queries using CTEs are supported WITH long_names AS ( SELECT id FROM authors WHERE char_length(name) > 15 ) SELECT title FROM articles ORDER BY 1 LIMIT 5; diff --git a/src/test/regress/expected/multi_subquery.out b/src/test/regress/expected/multi_subquery.out index 8c5f18bfc..53f41d0fc 100644 --- a/src/test/regress/expected/multi_subquery.out +++ b/src/test/regress/expected/multi_subquery.out @@ -3,7 +3,6 @@ -- -- no need to set shardid sequence given that we're not creating any shards SET citus.next_shard_id TO 570032; -SET citus.enable_router_execution TO FALSE; -- Check that we error out if shard min/max values are not exactly same. SELECT avg(unit_price) @@ -39,7 +38,11 @@ FROM lineitem_subquery GROUP BY l_suppkey) AS order_counts; -ERROR: cannot handle complex subqueries when the router executor is disabled + avg +-------------------- + 1.7199369356456930 +(1 row) + -- Check that we error out if join is not on partition columns. SELECT avg(unit_price) @@ -76,22 +79,45 @@ SELECT count(*) FROM ( 0 (1 row) --- Check that we error out if there is non relation subqueries +SET client_min_messages TO DEBUG; +-- If there is non relation subqueries then we recursively plan SELECT count(*) FROM ( (SELECT l_orderkey FROM lineitem_subquery) UNION ALL (SELECT 1::bigint) ) b; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT --- Check that we error out if queries in union do not include partition columns. +DEBUG: generating subplan 7_1 for subquery SELECT l_orderkey FROM public.lineitem_subquery +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 7_2 for subquery SELECT intermediate_result.l_orderkey FROM read_intermediate_result('7_1'::text, 'binary'::citus_copy_format) intermediate_result(l_orderkey bigint) UNION ALL SELECT (1)::bigint AS int8 +DEBUG: Plan 7 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.l_orderkey FROM read_intermediate_result('7_2'::text, 'binary'::citus_copy_format) intermediate_result(l_orderkey bigint)) b +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 12001 +(1 row) + +-- If queries in union do not include partition columns then we recursively plan SELECT count(*) FROM ( (SELECT l_orderkey FROM lineitem_subquery) UNION (SELECT l_partkey FROM lineitem_subquery) ) b; -ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position -DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column --- Check that we run union queries if partition column is selected. +DEBUG: generating subplan 10_1 for subquery SELECT l_orderkey FROM public.lineitem_subquery +DEBUG: generating subplan 10_2 for subquery SELECT l_partkey FROM public.lineitem_subquery +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 10_3 for subquery SELECT intermediate_result.l_orderkey FROM read_intermediate_result('10_1'::text, 'binary'::citus_copy_format) intermediate_result(l_orderkey bigint) UNION SELECT intermediate_result.l_partkey FROM read_intermediate_result('10_2'::text, 'binary'::citus_copy_format) intermediate_result(l_partkey integer) +DEBUG: Plan 10 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.l_orderkey FROM read_intermediate_result('10_3'::text, 'binary'::citus_copy_format) intermediate_result(l_orderkey bigint)) b +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 14496 +(1 row) + +-- Check that we push down union queries if partition column is selected (no DEBUG messages) SELECT count(*) FROM ( (SELECT l_orderkey FROM lineitem_subquery) UNION @@ -102,6 +128,7 @@ SELECT count(*) FROM 2985 (1 row) +RESET client_min_messages; -- we'd error out if inner query has Limit but subquery_pushdown is not set -- but we recursively plan the query SELECT @@ -692,6 +719,8 @@ SELECT master_create_worker_shards('subquery_pruning_varchar_test_table', 4, 1); (1 row) +-- temporarily disable router executor to test pruning behaviour of subquery pushdown +SET citus.enable_router_execution TO off; SET client_min_messages TO DEBUG2; SELECT * FROM (SELECT count(*) FROM subquery_pruning_varchar_test_table WHERE a = 'onder' GROUP BY a) @@ -747,6 +776,7 @@ AS foo; (0 rows) DROP TABLE subquery_pruning_varchar_test_table; +RESET citus.enable_router_execution; -- Simple join subquery pushdown SELECT avg(array_length(events, 1)) AS event_average @@ -1001,4 +1031,3 @@ $f$); DROP FUNCTION run_command_on_master_and_workers(p_sql text); SET client_min_messages TO DEFAULT; SET citus.subquery_pushdown to OFF; -SET citus.enable_router_execution TO 'true'; diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index 2b649ae05..1fb18774f 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -7,7 +7,6 @@ -- SET citus.next_shard_id TO 1400000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000; -SET citus.enable_router_execution TO FALSE; -- -- UNIONs and JOINs mixed -- @@ -144,7 +143,7 @@ ORDER BY 3 | 268 (4 rows) --- not supported since events_subquery_2 doesn't have partition key on the target list +-- supported through recursive planning since events_subquery_2 doesn't have partition key on the target list -- within the shuffled target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -202,8 +201,15 @@ GROUP BY types ORDER BY types; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator --- not supported since events_subquery_2 doesn't have partition key on the target list + types | sumofeventtype +-------+---------------- + 0 | 449 + 1 | 234 + 2 | 75 + 3 | 268 +(4 rows) + +-- supported through recursive planning since events_subquery_2 doesn't have partition key on the target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() @@ -260,7 +266,14 @@ GROUP BY types ORDER BY types; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator + types | sumofeventtype +-------+---------------- + 0 | 449 + 1 | 369 + 2 | 75 + 3 | 268 +(4 rows) + -- we can support arbitrary subqueries within UNIONs SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -749,7 +762,7 @@ ORDER BY types; 3 | 268 (4 rows) --- not supported since subquery 3 does not have partition key +-- supported through recursive planning since subquery 3 does not have partition key SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() @@ -800,8 +813,15 @@ INNER JOIN WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator --- not supported since events_subquery_4 does not have partition key on the + types | sumofeventtype +-------+---------------- + 0 | 449 + 1 | 433 + 2 | 62 + 3 | 268 +(4 rows) + +-- supported through recursive planning since events_subquery_4 does not have partition key on the -- target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -857,7 +877,14 @@ INNER JOIN ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator + types | sumofeventtype +-------+---------------- + 0 | 449 + 1 | 433 + 2 | 75 + 3 | 124 +(4 rows) + -- union all with inner and left joins SELECT user_id, count(*) as cnt FROM @@ -2263,12 +2290,16 @@ GROUP BY types ORDER BY types; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. --- not supported due to offset + types | sumofeventtype +-------+---------------- + 0 | 449 + 1 | 433 +(2 rows) + +-- supported through recursive planning SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM - ( SELECT *, random() + ( SELECT * FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM @@ -2312,7 +2343,7 @@ FROM FROM events_table as "events" WHERE - event_type IN (4, 5)) events_subquery_4) OFFSET 3) t1 + event_type IN (4, 5)) events_subquery_4) ORDER BY 1, 2 OFFSET 3) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT @@ -2326,8 +2357,14 @@ GROUP BY types ORDER BY types; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + types | sumofeventtype +-------+---------------- + 0 | 449 + 1 | 425 + 2 | 75 + 3 | 251 +(4 rows) + -- not supported due to non relation rte SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -2386,8 +2423,14 @@ GROUP BY types ORDER BY types; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + types | sumofeventtype +-------+---------------- + 0 | 449 + 1 | 433 + 2 | 75 + 3 | 4 +(4 rows) + -- similar to the above, but constant rte is on the right side of the query SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -2435,12 +2478,17 @@ FROM ) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN - (SELECT random()::int as user_id) AS t + (SELECT 1 as user_id) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. -SET citus.enable_router_execution TO TRUE; + types | sumofeventtype +-------+---------------- + 0 | 4 + 1 | 8 + 2 | 1 + 3 | 1 +(4 rows) + diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 81d3d371e..c8b35ef33 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -6,7 +6,6 @@ -- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests -- SET citus.next_shard_id TO 1400000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000; -SET citus.enable_router_execution TO FALSE; CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int); SELECT create_distributed_table('user_buy_test_table', 'user_id'); create_distributed_table @@ -261,23 +260,52 @@ SELECT count(*) FROM user_buy_test_table RIGHT JOIN (SELECT * FROM generate_seri ON user_buy_test_table.item_id = users_ref_test_table.id; ERROR: cannot pushdown the subquery DETAIL: There exist a table function in the outer part of the outer join --- volatile functions cannot be used as table expressions +-- volatile functions can be used as table expressions through recursive planning +SET client_min_messages TO DEBUG; SELECT count(*) FROM (SELECT random() FROM user_buy_test_table JOIN random() AS users_ref_test_table(id) ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; -ERROR: cannot handle complex subqueries when the router executor is disabled --- cannot sneak in a volatile function as a parameter +DEBUG: generating subplan 30_1 for subquery SELECT random() AS random FROM (public.user_buy_test_table JOIN random() users_ref_test_table(id) ON (((user_buy_test_table.item_id)::double precision > users_ref_test_table.id))) +DEBUG: Plan 30 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.random FROM read_intermediate_result('30_1'::text, 'binary'::citus_copy_format) intermediate_result(random double precision)) subquery_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 4 +(1 row) + +-- can sneak in a volatile function as a parameter SELECT count(*) FROM - (SELECT random() FROM user_buy_test_table JOIN generate_series(random()::int,10) AS users_ref_test_table(id) - ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; -ERROR: cannot handle complex subqueries when the router executor is disabled --- cannot perform a union with table function + (SELECT item_id FROM user_buy_test_table JOIN generate_series(random()::int,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1 +WHERE item_id = 6; +DEBUG: generating subplan 32_1 for subquery SELECT user_buy_test_table.item_id FROM (public.user_buy_test_table JOIN generate_series((random())::integer, 10) users_ref_test_table(id) ON ((user_buy_test_table.item_id > users_ref_test_table.id))) +DEBUG: Plan 32 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.item_id FROM read_intermediate_result('32_1'::text, 'binary'::citus_copy_format) intermediate_result(item_id integer)) subquery_1 WHERE (item_id = 6) +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 0 +(1 row) + +-- can perform a union with table function through recursive planning SELECT count(*) FROM (SELECT user_id FROM user_buy_test_table UNION ALL SELECT id FROM generate_series(1,10) AS users_ref_test_table(id)) subquery_1; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. +DEBUG: generating subplan 34_1 for subquery SELECT user_id FROM public.user_buy_test_table +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 34_2 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('34_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION ALL SELECT users_ref_test_table.id FROM generate_series(1, 10) users_ref_test_table(id) +DEBUG: Plan 34 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('34_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) subquery_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 14 +(1 row) + +RESET client_min_messages; -- subquery without FROM can be the inner relationship in a join SELECT count(*) FROM (SELECT random() FROM user_buy_test_table JOIN (SELECT 4 AS id) users_ref_test_table @@ -308,13 +336,16 @@ SELECT count(*) FROM user_buy_test_table RIGHT JOIN (SELECT 5 AS id) users_ref_t ON user_buy_test_table.item_id = users_ref_test_table.id; ERROR: cannot pushdown the subquery DETAIL: There exist a subquery without FROM in the outer part of the outer join --- cannot perform a union with subquery without FROM +-- can perform a union with subquery without FROM SELECT count(*) FROM (SELECT user_id FROM user_buy_test_table UNION ALL SELECT id FROM (SELECT 5 AS id) users_ref_test_table) subquery_1; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + count +------- + 5 +(1 row) + -- should be able to pushdown since reference table is in the -- inner part of the left join SELECT @@ -983,10 +1014,18 @@ INNER JOIN value_1 > 2 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query ORDER BY - types; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. - -- reference table exist in the subquery of union, should error out + types +LIMIT 5; + types +------- + 0 + 0 + 0 + 0 + 0 +(5 rows) + + -- reference table exist in the subquery of union SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT @@ -1057,8 +1096,13 @@ GROUP BY types ORDER BY types; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + types | sumofeventtype +-------+---------------- + 0 | 217 + 2 | 191 + 3 | 31 +(3 rows) + -- -- Should error out with UNION ALL Queries on reference tables -- @@ -1112,8 +1156,14 @@ INNER JOIN WHERE value_1 > 2 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + types | sumofeventtype +-------+---------------- + 0 | 191 + 1 | 191 + 2 | 31 + 3 | 120 +(4 rows) + -- just a sanity check that we don't allow this if the reference table is on the -- left part of the left join SELECT count(*) FROM @@ -1201,15 +1251,25 @@ SELECT foo.user_id FROM (0 rows) -- not pushdownable since group by is on the reference table column --- recursively planned, but hits unsupported clause type error on the top level query +-- recursively planned SELECT foo.user_id FROM ( SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) GROUP BY r.user_id -) as foo; -ERROR: cannot handle complex subqueries when the router executor is disabled +) as foo +ORDER BY 1 DESC; + user_id +--------- + 6 + 5 + 4 + 3 + 2 + 1 +(6 rows) + -- not pushdownable since the group by contains at least one distributed table --- recursively planned, but hits unsupported clause type error on the top level query +-- recursively planned SELECT foo.user_id FROM ( SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) @@ -1224,18 +1284,38 @@ ORDER BY 1 LIMIT 3; (3 rows) -- not pushdownable since distinct is on the reference table column --- recursively planned, but hits unsupported clause type error on the top level query +-- recursively planned SELECT foo.user_id FROM ( SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) -) as foo; -ERROR: cannot handle complex subqueries when the router executor is disabled +) as foo +ORDER BY 1 DESC +LIMIT 5; + user_id +--------- + 6 + 6 + 6 + 6 + 6 +(5 rows) + -- not supported since distinct on is on the reference table column +-- but recursively planned SELECT foo.user_id FROM ( SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) ) as foo; -ERROR: cannot handle complex subqueries when the router executor is disabled + user_id +--------- + 1 + 2 + 3 + 4 + 5 + 6 +(6 rows) + -- supported since the distinct on contains at least one distributed table SELECT foo.user_id FROM ( @@ -1313,20 +1393,36 @@ SELECT * FROM ( SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, events_table WHERE users_reference_table.user_id = events_table.value_4 ) as foo; -ERROR: cannot handle complex subqueries when the router executor is disabled + user_id +--------- +(0 rows) + SELECT * FROM ( SELECT users_reference_table.user_id FROM users_reference_table, events_table WHERE users_reference_table.user_id = events_table.value_4 GROUP BY 1 ) as foo; -ERROR: cannot handle complex subqueries when the router executor is disabled + user_id +--------- +(0 rows) + -- similiar to the above examples, this time there is a subquery -- whose output is not in the DISTINCT clause SELECT * FROM ( SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, (SELECT user_id, random() FROM events_table) as us_events WHERE users_reference_table.user_id = us_events.user_id -) as foo; -ERROR: cannot handle complex subqueries when the router executor is disabled +) as foo +ORDER BY 1; + user_id +--------- + 1 + 2 + 3 + 4 + 5 + 6 +(6 rows) + -- the following query is safe to push down since the DISTINCT clause include distribution column SELECT * FROM ( @@ -1357,7 +1453,14 @@ SELECT * FROM ) as foo ORDER BY 1 DESC LIMIT 4; -ERROR: cannot handle complex subqueries when the router executor is disabled + user_id | value_4 +---------+--------- + 6 | + 5 | + 4 | + 3 | +(4 rows) + -- test the read_intermediate_result() for GROUP BYs BEGIN; diff --git a/src/test/regress/expected/multi_subquery_in_where_clause.out b/src/test/regress/expected/multi_subquery_in_where_clause.out index f8cc556b8..5040ef724 100644 --- a/src/test/regress/expected/multi_subquery_in_where_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_clause.out @@ -534,7 +534,8 @@ WHERE GROUP BY user_id HAVING count(*) > 1 AND sum(value_2) > 29 ORDER BY 1; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join -- NOT EXISTS query has non-equi join SELECT user_id, array_length(events_table, 1) FROM ( diff --git a/src/test/regress/expected/multi_subquery_union.out b/src/test/regress/expected/multi_subquery_union.out index 8f70a1490..696a366c2 100644 --- a/src/test/regress/expected/multi_subquery_union.out +++ b/src/test/regress/expected/multi_subquery_union.out @@ -4,7 +4,6 @@ -- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql -- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests -- SET citus.next_shard_id TO 1400000; -SET citus.enable_router_execution TO false; -- a very simple union query SELECT user_id, counter FROM ( @@ -47,8 +46,15 @@ FROM ( ) user_id ORDER BY 2 DESC,1 LIMIT 5; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + user_id | counter +---------+--------- + 2 | 5 + 3 | 5 + 4 | 5 + 1 | 4 + 2 | 4 +(5 rows) + -- the same query with union all SELECT user_id, counter FROM ( @@ -76,8 +82,15 @@ FROM ( ) user_id ORDER BY 2 DESC,1 LIMIT 5; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + user_id | counter +---------+--------- + 2 | 5 + 2 | 5 + 3 | 5 + 3 | 5 + 4 | 5 +(5 rows) + -- the same query with group by SELECT user_id, sum(counter) FROM ( @@ -212,8 +225,14 @@ FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25 ) user_id GROUP BY user_id ORDER BY 1 DESC LIMIT 5; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + sum +----- + 135 + 87 + 85 + 69 +(4 rows) + -- similar query as above, with UNION ALL SELECT sum(counter) FROM ( @@ -329,8 +348,15 @@ FROM ( user_id)) AS ftop ORDER BY 2 DESC, 1 DESC LIMIT 5; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + user_id | sum +---------+----- + 2 | 107 + 3 | 101 + 5 | 94 + 4 | 91 + 1 | 62 +(5 rows) + -- top level unions are wrapped into top level aggregations SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -597,8 +623,11 @@ FROM UNION ALL (SELECT user_id FROM events_reference_table) ) b; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + count +------- + 202 +(1 row) + -- similar query without top level agg SELECT user_id @@ -738,19 +767,27 @@ LIMIT 5; (5 rows) -- now lets also have some unsupported queries --- group by is not on the partition key --- but we can still recursively plan it, though that is not suffient for pushdown --- of the whole query +-- group by is not on the partition key, supported through recursive planning SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id UNION SELECT value_1 as user_id, sum(value_2) AS counter FROM users_table GROUP BY value_1 ) user_id -GROUP BY user_id; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. --- partition key is not selected +GROUP BY user_id +ORDER BY 1,2; + user_id | sum +---------+----- + 0 | 31 + 1 | 76 + 2 | 99 + 3 | 119 + 4 | 94 + 5 | 57 + 6 | 22 +(7 rows) + +-- partition key is not selected, supported through recursive planning SELECT sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 1 GROUP BY user_id HAVING sum(value_2) > 25 @@ -764,9 +801,16 @@ FROM ( SELECT 2 * user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25 ) user_id GROUP BY user_id ORDER BY 1 DESC LIMIT 5; -ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position -DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column --- excepts within unions are not supported + sum +----- + 80 + 76 + 55 + 50 + 43 +(5 rows) + +-- excepts within unions are supported through recursive planning SELECT * FROM ( ( @@ -787,9 +831,24 @@ UNION SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id ) user_id_2 GROUP BY user_id) -) as ftop; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. +) as ftop +ORDER BY 1,2; + user_id | sum +---------+----- + 1 | 20 + 1 | 62 + 2 | 50 + 2 | 107 + 3 | 55 + 3 | 101 + 4 | 50 + 4 | 91 + 5 | 63 + 5 | 94 + 6 | 21 + 6 | 43 +(12 rows) + -- non-equi join are not supported since there is no equivalence between the partition column SELECT user_id, sum(counter) FROM ( @@ -798,8 +857,7 @@ FROM ( SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1 ) user_id GROUP BY user_id; -ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position -DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- non-equi join also not supported for UNION ALL SELECT user_id, sum(counter) FROM ( @@ -808,8 +866,7 @@ FROM ( SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1 ) user_id GROUP BY user_id; -ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position -DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- joins inside unions are supported -- slightly more comlex than the above SELECT * FROM ( @@ -942,11 +999,18 @@ SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id UNION - SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id OFFSET 4 + SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id ORDER BY user_id OFFSET 4 ) user_id -GROUP BY user_id; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. +GROUP BY user_id +ORDER BY 1,2; + user_id | sum +---------+----- + 3 | 101 + 4 | 91 + 5 | 94 + 6 | 43 +(4 rows) + -- lower level union does not return partition key with the other relations SELECT * FROM ( @@ -986,9 +1050,30 @@ FROM ( GROUP BY user_id) user_id_2 GROUP BY - user_id)) AS ftop; -ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position -DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column + user_id)) AS ftop +ORDER BY 1,2; + user_id | sum +---------+----- + 1 | 20 + 1 | 62 + 2 | 50 + 2 | 107 + 3 | 55 + 3 | 101 + 4 | 50 + 4 | 91 + 5 | 63 + 5 | 94 + 6 | 21 + 6 | 43 + 22 | 6 + 31 | 5 + 41 | 4 + 42 | 1 + 46 | 3 + 57 | 2 +(18 rows) + -- some UNION all queries that are going to be pulled up SELECT count(*) @@ -998,8 +1083,11 @@ FROM UNION ALL (SELECT 2 * user_id FROM events_table) ) b; -ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position -DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column + count +------- + 202 +(1 row) + -- last query does not have partition key SELECT user_id, value_3 @@ -1019,9 +1107,16 @@ FROM ) b ORDER BY 1 DESC, 2 DESC LIMIT 5; -ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position -DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column --- we don't allow joins within unions + user_id | value_3 +---------+--------- + 6 | 5 + 6 | 5 + 6 | 3 + 6 | 3 + 6 | 3 +(5 rows) + +-- we allow joins within unions SELECT count(*) FROM @@ -1030,11 +1125,12 @@ FROM UNION ALL (SELECT users_table.user_id FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) ) b; -ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position -DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column --- we don't support pushing down subqueries without relations --- recursive planning can replace that query, though the whole --- query is not safe to pushdown + count +------- + 1850 +(1 row) + +-- we support unions on subqueries without relations through recursive planning SELECT count(*) FROM @@ -1043,22 +1139,26 @@ FROM UNION ALL (SELECT 1) ) b; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. --- we don't support pushing down subqueries without relations --- recursive planning can replace that query, though the whole --- query is not safe to pushdown + count +------- + 102 +(1 row) + +-- we support pushing down subqueries without relations through recursive planning SELECT - * + count(*) FROM ( (SELECT user_id FROM users_table) UNION ALL (SELECT (random() * 100)::int) ) b; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. --- we don't support subqueries without relations + count +------- + 102 +(1 row) + +-- we support subqueries without relations within a union SELECT user_id, value_3 FROM @@ -1077,11 +1177,16 @@ FROM ) b ORDER BY 1 DESC, 2 DESC LIMIT 5; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. --- we don't support pushing down subqueries without relations --- recursive planning can replace that query, though the whole --- query is not safe to pushdown + user_id | value_3 +---------+--------- + 6 | 5 + 6 | 5 + 6 | 3 + 6 | 3 + 6 | 3 +(5 rows) + +-- we support pushing down subqueries without relations through recursive planning SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() @@ -1124,8 +1229,13 @@ FROM ) as final_query GROUP BY types ORDER BY types; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. -SET citus.enable_router_execution TO true; + types | sumofeventtype +-------+---------------- + 0 | 43 + 1 | 42 + 2 | 28 + 3 | 1 +(4 rows) + DROP TABLE events_reference_table; DROP TABLE users_reference_table; diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index d997e68f2..aefa72866 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -543,12 +543,21 @@ SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM re 15 (1 row) --- expected this to work but it did not +-- union between views is supported through recursive planning (SELECT user_id FROM recent_users) UNION -(SELECT user_id FROM selected_users); -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. +(SELECT user_id FROM selected_users) +ORDER BY 1; + user_id +--------- + 1 + 2 + 3 + 4 + 5 + 6 +(6 rows) + -- wrapping it inside a SELECT * works SELECT * FROM ( @@ -587,15 +596,18 @@ SELECT count(*) 1 (1 row) --- expected this to work but it does not +-- UNION ALL between views is supported through recursive planning SELECT count(*) FROM ( (SELECT user_id FROM recent_users) UNION ALL (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0; -ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position -DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column + count +------- + 2 +(1 row) + -- expand view definitions and re-run last 2 queries SELECT count(*) FROM ( @@ -620,8 +632,11 @@ SELECT count(*) UNION ALL (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u WHERE user_id < 2 AND user_id > 0; -ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position -DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column + count +------- + 2 +(1 row) + -- test distinct -- distinct is supported if it is on a partition key CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3; @@ -830,7 +845,7 @@ EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USIN -> Sort Sort Key: remote_scan."time" DESC -> Custom Scan (Citus Real-Time) - -> Distributed Subplan 83_1 + -> Distributed Subplan 91_1 -> Limit -> Sort Sort Key: max((max(remote_scan.lastseen))) DESC diff --git a/src/test/regress/expected/set_operation_and_local_tables.out b/src/test/regress/expected/set_operation_and_local_tables.out new file mode 100644 index 000000000..12b6fbb87 --- /dev/null +++ b/src/test/regress/expected/set_operation_and_local_tables.out @@ -0,0 +1,321 @@ +CREATE SCHEMA recursive_set_local; +SET search_path TO recursive_set_local, public; +CREATE TABLE recursive_set_local.test (x int, y int); +SELECT create_distributed_table('test', 'x'); + create_distributed_table +-------------------------- + +(1 row) + +CREATE TABLE recursive_set_local.ref (a int, b int); +SELECT create_reference_table('ref'); + create_reference_table +------------------------ + +(1 row) + +CREATE TABLE recursive_set_local.local_test (x int, y int); +INSERT INTO test VALUES (1,1), (2,2); +INSERT INTO ref VALUES (2,2), (3,3); +INSERT INTO local_test VALUES (3,3), (4,4); +SET client_min_messages TO DEBUG; +-- we should be able to run set operations with local tables +(SELECT x FROM test) INTERSECT (SELECT x FROM local_test) ORDER BY 1 DESC; +DEBUG: generating subplan 3_1 for subquery SELECT x FROM recursive_set_local.local_test +DEBUG: generating subplan 3_2 for subquery SELECT x FROM recursive_set_local.test +DEBUG: Plan 3 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('3_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('3_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) ORDER BY 1 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + x +--- +(0 rows) + +-- we should be able to run set operations with generate series +(SELECT x FROM test) INTERSECT (SELECT i FROM generate_series(0, 100) i) ORDER BY 1 DESC; +DEBUG: generating subplan 5_1 for subquery SELECT x FROM recursive_set_local.test +DEBUG: Plan 5 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('5_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT i.i FROM generate_series(0, 100) i(i) ORDER BY 1 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + x +--- + 2 + 1 +(2 rows) + +-- we'd first recursively plan the query with "test", thus don't need to recursively +-- plan other query +(SELECT x FROM test LIMIT 5) INTERSECT (SELECT i FROM generate_series(0, 100) i) ORDER BY 1 DESC; +DEBUG: push down of limit count: 5 +DEBUG: generating subplan 7_1 for subquery SELECT x FROM recursive_set_local.test LIMIT 5 +DEBUG: Plan 7 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('7_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT i.i FROM generate_series(0, 100) i(i) ORDER BY 1 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + x +--- + 2 + 1 +(2 rows) + +-- this doesn't require any recursive planning +(SELECT a FROM ref) INTERSECT (SELECT i FROM generate_series(0, 100) i) ORDER BY 1 DESC; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a +--- + 3 + 2 +(2 rows) + +-- same query with a failure on the worker (i.e., division by zero) +(SELECT x FROM test) INTERSECT (SELECT i/0 FROM generate_series(0, 100) i) ORDER BY 1 DESC; +DEBUG: generating subplan 10_1 for subquery SELECT x FROM recursive_set_local.test +DEBUG: Plan 10 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('10_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT (i.i / 0) FROM generate_series(0, 100) i(i) ORDER BY 1 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable +WARNING: division by zero +CONTEXT: while executing command on localhost:57637 +ERROR: could not receive query results +-- we should be able to run set operations with generate series and local tables as well +((SELECT x FROM local_test) UNION ALL (SELECT x FROM test)) INTERSECT (SELECT i FROM generate_series(0, 100) i) ORDER BY 1 DESC; +DEBUG: generating subplan 12_1 for subquery SELECT x FROM recursive_set_local.local_test +DEBUG: generating subplan 12_2 for subquery SELECT x FROM recursive_set_local.test +DEBUG: Plan 12 query after replacing subqueries and CTEs: (SELECT intermediate_result.x FROM read_intermediate_result('12_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION ALL SELECT intermediate_result.x FROM read_intermediate_result('12_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) INTERSECT SELECT i.i FROM generate_series(0, 100) i(i) ORDER BY 1 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + x +--- + 4 + 3 + 2 + 1 +(4 rows) + +-- two local tables are on different leaf queries, so safe to plan & execute +((SELECT x FROM local_test) UNION ALL (SELECT x FROM test)) INTERSECT (SELECT x FROM local_test) ORDER BY 1 DESC; +DEBUG: generating subplan 14_1 for subquery SELECT x FROM recursive_set_local.local_test +DEBUG: generating subplan 14_2 for subquery SELECT x FROM recursive_set_local.local_test +DEBUG: generating subplan 14_3 for subquery SELECT x FROM recursive_set_local.test +DEBUG: Plan 14 query after replacing subqueries and CTEs: (SELECT intermediate_result.x FROM read_intermediate_result('14_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION ALL SELECT intermediate_result.x FROM read_intermediate_result('14_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('14_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) ORDER BY 1 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + x +--- + 4 + 3 +(2 rows) + +-- use ctes inside unions along with local tables on the top level +WITH +cte_1 AS (SELECT user_id FROM users_table), +cte_2 AS (SELECT user_id FROM events_table) +((SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) UNION (SELECT x FROM local_test)) INTERSECT (SELECT i FROM generate_series(0, 100) i) +ORDER BY 1 DESC; +DEBUG: generating subplan 16_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 16_2 for CTE cte_2: SELECT user_id FROM public.events_table +DEBUG: generating subplan 16_3 for subquery SELECT x FROM recursive_set_local.local_test +DEBUG: Plan 16 query after replacing subqueries and CTEs: (SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('16_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_2.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('16_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_2 UNION SELECT intermediate_result.x FROM read_intermediate_result('16_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) INTERSECT SELECT i.i FROM generate_series(0, 100) i(i) ORDER BY 1 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + user_id +--------- + 6 + 5 + 4 + 3 + 2 + 1 +(6 rows) + +-- CTEs inside subqueries unioned with local table +-- final query is real-time +SELECT + count(*) +FROM + ( + ((WITH cte_1 AS (SELECT x FROM test) SELECT * FROM cte_1) UNION + (WITH cte_1 AS (SELECT a FROM ref) SELECT * FROM cte_1)) INTERSECT + (SELECT x FROM local_test) + ) as foo, + test + WHERE test.y = foo.x; +DEBUG: generating subplan 19_1 for CTE cte_1: SELECT x FROM recursive_set_local.test +DEBUG: generating subplan 19_2 for CTE cte_1: SELECT a FROM recursive_set_local.ref +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 19_3 for subquery SELECT x FROM recursive_set_local.local_test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 19_4 for subquery (SELECT cte_1.x FROM (SELECT intermediate_result.x FROM read_intermediate_result('19_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) cte_1 UNION SELECT cte_1.a FROM (SELECT intermediate_result.a FROM read_intermediate_result('19_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) cte_1) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('19_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer) +DEBUG: Plan 19 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.x FROM read_intermediate_result('19_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) foo, recursive_set_local.test WHERE (test.y = foo.x) + count +------- + 0 +(1 row) + +-- CTEs inside subqueries unioned with local table +-- final query is router +SELECT + count(*) +FROM + ( + ((WITH cte_1 AS (SELECT x FROM test) SELECT * FROM cte_1) UNION + (WITH cte_1 AS (SELECT a FROM ref) SELECT * FROM cte_1)) INTERSECT + (SELECT x FROM local_test) + ) as foo, + ref + WHERE ref.a = foo.x; +DEBUG: generating subplan 23_1 for CTE cte_1: SELECT x FROM recursive_set_local.test +DEBUG: generating subplan 23_2 for CTE cte_1: SELECT a FROM recursive_set_local.ref +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 23_3 for subquery SELECT x FROM recursive_set_local.local_test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 23_4 for subquery (SELECT cte_1.x FROM (SELECT intermediate_result.x FROM read_intermediate_result('23_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) cte_1 UNION SELECT cte_1.a FROM (SELECT intermediate_result.a FROM read_intermediate_result('23_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) cte_1) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('23_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer) +DEBUG: Plan 23 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.x FROM read_intermediate_result('23_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) foo, recursive_set_local.ref WHERE (ref.a = foo.x) +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 1 +(1 row) + +-- subquery union in WHERE clause without parition column equality is recursively planned including the local tables +SELECT * FROM test a WHERE x IN (SELECT x FROM test b UNION SELECT y FROM test c UNION SELECT y FROM local_test d) ORDER BY 1,2; +DEBUG: generating subplan 27_1 for subquery SELECT y FROM recursive_set_local.local_test d +DEBUG: generating subplan 27_2 for subquery SELECT x FROM recursive_set_local.test b +DEBUG: generating subplan 27_3 for subquery SELECT y FROM recursive_set_local.test c +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 27_4 for subquery SELECT intermediate_result.x FROM read_intermediate_result('27_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('27_3'::text, 'binary'::citus_copy_format) intermediate_result(y integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('27_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: Plan 27 query after replacing subqueries and CTEs: SELECT x, y FROM recursive_set_local.test a WHERE (x IN (SELECT intermediate_result.x FROM read_intermediate_result('27_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer))) ORDER BY x, y + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- same query with subquery in where is wrapped in CTE +SELECT * FROM test a WHERE x IN (WITH cte AS (SELECT x FROM test b UNION SELECT y FROM test c UNION SELECT y FROM local_test d) SELECT * FROM cte) ORDER BY 1,2; +DEBUG: generating subplan 31_1 for CTE cte: SELECT b.x FROM recursive_set_local.test b UNION SELECT c.y FROM recursive_set_local.test c UNION SELECT d.y FROM recursive_set_local.local_test d +DEBUG: generating subplan 32_1 for subquery SELECT y FROM recursive_set_local.local_test d +DEBUG: generating subplan 32_2 for subquery SELECT x FROM recursive_set_local.test b +DEBUG: generating subplan 32_3 for subquery SELECT y FROM recursive_set_local.test c +DEBUG: Plan 32 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('32_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('32_3'::text, 'binary'::citus_copy_format) intermediate_result(y integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('32_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: Plan 31 query after replacing subqueries and CTEs: SELECT x, y FROM recursive_set_local.test a WHERE (x IN (SELECT cte.x FROM (SELECT intermediate_result.x FROM read_intermediate_result('31_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) cte)) ORDER BY x, y + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- not supported since local table is joined with a set operation +SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM test ORDER BY x LIMIT 1)) u JOIN local_test USING (x) ORDER BY 1,2; +DEBUG: push down of limit count: 1 +DEBUG: generating subplan 35_1 for subquery SELECT x, y FROM recursive_set_local.test ORDER BY x LIMIT 1 +DEBUG: generating subplan 35_2 for subquery SELECT x, y FROM recursive_set_local.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 35_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('35_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) EXCEPT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('35_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 35 query after replacing subqueries and CTEs: SELECT u.x, u.y, local_test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('35_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_set_local.local_test USING (x)) ORDER BY u.x, u.y +ERROR: relation local_test is not distributed +-- though we replace some queries including the local query, the intermediate result is on the outer part of an outer join +SELECT * FROM ((SELECT * FROM local_test) INTERSECT (SELECT * FROM test ORDER BY x LIMIT 1)) u LEFT JOIN test USING (x) ORDER BY 1,2; +DEBUG: generating subplan 39_1 for subquery SELECT x, y FROM recursive_set_local.local_test +DEBUG: push down of limit count: 1 +DEBUG: generating subplan 39_2 for subquery SELECT x, y FROM recursive_set_local.test ORDER BY x LIMIT 1 +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 39_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('39_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('39_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 39 query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('39_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u LEFT JOIN recursive_set_local.test USING (x)) ORDER BY u.x, u.y +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- we replace some queries including the local query, the intermediate result is on the inner part of an outer join +SELECT * FROM ((SELECT * FROM local_test) INTERSECT (SELECT * FROM test ORDER BY x LIMIT 1)) u RIGHT JOIN test USING (x) ORDER BY 1,2; +DEBUG: generating subplan 42_1 for subquery SELECT x, y FROM recursive_set_local.local_test +DEBUG: push down of limit count: 1 +DEBUG: generating subplan 42_2 for subquery SELECT x, y FROM recursive_set_local.test ORDER BY x LIMIT 1 +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 42_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('42_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('42_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 42 query after replacing subqueries and CTEs: SELECT test.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('42_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u RIGHT JOIN recursive_set_local.test USING (x)) ORDER BY test.x, u.y + x | y | y +---+---+--- + 1 | | 1 + 2 | | 2 +(2 rows) + +-- recurively plan left part of the join, and run a final real-time query +SELECT * FROM ((SELECT * FROM local_test) INTERSECT (SELECT * FROM test ORDER BY x LIMIT 1)) u INNER JOIN test USING (x) ORDER BY 1,2; +DEBUG: generating subplan 45_1 for subquery SELECT x, y FROM recursive_set_local.local_test +DEBUG: push down of limit count: 1 +DEBUG: generating subplan 45_2 for subquery SELECT x, y FROM recursive_set_local.test ORDER BY x LIMIT 1 +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 45_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('45_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('45_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 45 query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('45_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_set_local.test USING (x)) ORDER BY u.x, u.y + x | y | y +---+---+--- +(0 rows) + +-- distributed table in WHERE clause, but not FROM clause still disallowed +SELECT * FROM ((SELECT x FROM test) UNION (SELECT x FROM (SELECT x FROM local_test) as foo WHERE x IN (SELECT x FROM test))) u ORDER BY 1; +DEBUG: generating subplan 48_1 for subquery SELECT x FROM recursive_set_local.local_test +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs are not allowed in the FROM clause when the query has subqueries in the WHERE clause +SET citus.enable_repartition_joins TO ON; +-- repartition is recursively planned before the set operation +(SELECT x FROM test) INTERSECT (SELECT t1.x FROM test as t1, test as t2 WHERE t1.x = t2.y LIMIT 2) INTERSECT (((SELECT x FROM local_test) UNION ALL (SELECT x FROM test)) INTERSECT (SELECT i FROM generate_series(0, 100) i)) ORDER BY 1 DESC; +DEBUG: push down of limit count: 2 +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 9 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 9 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 14 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 14 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 19 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 19 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 24 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 24 +DEBUG: cannot use real time executor with repartition jobs +HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. +DEBUG: generating subplan 50_1 for subquery SELECT t1.x FROM recursive_set_local.test t1, recursive_set_local.test t2 WHERE (t1.x = t2.y) LIMIT 2 +DEBUG: generating subplan 50_2 for subquery SELECT x FROM recursive_set_local.local_test +DEBUG: generating subplan 50_3 for subquery SELECT x FROM recursive_set_local.test +DEBUG: generating subplan 50_4 for subquery SELECT x FROM recursive_set_local.test +DEBUG: Plan 50 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('50_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('50_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT ((SELECT intermediate_result.x FROM read_intermediate_result('50_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION ALL SELECT intermediate_result.x FROM read_intermediate_result('50_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) INTERSECT SELECT i.i FROM generate_series(0, 100) i(i)) ORDER BY 1 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + x +--- + 2 + 1 +(2 rows) + +SET citus.enable_repartition_joins TO OFF; +RESET client_min_messages; +DROP SCHEMA recursive_set_local CASCADE; +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table test +drop cascades to table ref +drop cascades to table local_test diff --git a/src/test/regress/expected/set_operations.out b/src/test/regress/expected/set_operations.out new file mode 100644 index 000000000..e93e68a35 --- /dev/null +++ b/src/test/regress/expected/set_operations.out @@ -0,0 +1,868 @@ +CREATE SCHEMA recursive_union; +SET search_path TO recursive_union, public; +CREATE TABLE recursive_union.test (x int, y int); +SELECT create_distributed_table('test', 'x'); + create_distributed_table +-------------------------- + +(1 row) + +CREATE TABLE recursive_union.ref (a int, b int); +SELECT create_reference_table('ref'); + create_reference_table +------------------------ + +(1 row) + +INSERT INTO test VALUES (1,1), (2,2); +INSERT INTO ref VALUES (2,2), (3,3); +-- top-level set operations are supported through recursive planning +SET client_min_messages TO DEBUG; +(SELECT * FROM test) UNION (SELECT * FROM test) ORDER BY 1,2; +DEBUG: generating subplan 3_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 3_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 3 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('3_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('3_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +(SELECT * FROM test) UNION (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: generating subplan 6_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 6 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('6_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT ref.a, ref.b FROM recursive_union.ref ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +(SELECT * FROM ref) UNION (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a | b +---+--- + 2 | 2 + 3 | 3 +(2 rows) + +(SELECT * FROM test) UNION ALL (SELECT * FROM test) ORDER BY 1,2; +DEBUG: generating subplan 9_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 9_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 9 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('9_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION ALL SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('9_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 1 | 1 + 2 | 2 + 2 | 2 +(4 rows) + +(SELECT * FROM test) UNION ALL (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: generating subplan 12_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 12 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('12_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION ALL SELECT ref.a, ref.b FROM recursive_union.ref ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 + 2 | 2 + 3 | 3 +(4 rows) + +(SELECT * FROM ref) UNION ALL (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a | b +---+--- + 2 | 2 + 2 | 2 + 3 | 3 + 3 | 3 +(4 rows) + +(SELECT * FROM test) INTERSECT (SELECT * FROM test) ORDER BY 1,2; +DEBUG: generating subplan 15_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 15_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 15 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('15_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('15_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +(SELECT * FROM test) INTERSECT (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: generating subplan 18_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 18 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('18_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT ref.a, ref.b FROM recursive_union.ref ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 2 | 2 +(1 row) + +(SELECT * FROM ref) INTERSECT (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a | b +---+--- + 2 | 2 + 3 | 3 +(2 rows) + +(SELECT * FROM test) INTERSECT ALL (SELECT * FROM test) ORDER BY 1,2; +DEBUG: generating subplan 21_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 21_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 21 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('21_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT ALL SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('21_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +(SELECT * FROM test) INTERSECT ALL (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: generating subplan 24_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 24 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('24_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT ALL SELECT ref.a, ref.b FROM recursive_union.ref ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 2 | 2 +(1 row) + +(SELECT * FROM ref) INTERSECT ALL (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a | b +---+--- + 2 | 2 + 3 | 3 +(2 rows) + +(SELECT * FROM test) EXCEPT (SELECT * FROM test) ORDER BY 1,2; +DEBUG: generating subplan 27_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 27_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 27 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('27_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) EXCEPT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('27_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- +(0 rows) + +(SELECT * FROM test) EXCEPT (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: generating subplan 30_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 30 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('30_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) EXCEPT SELECT ref.a, ref.b FROM recursive_union.ref ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 +(1 row) + +(SELECT * FROM ref) EXCEPT (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a | b +---+--- +(0 rows) + +(SELECT * FROM test) EXCEPT ALL (SELECT * FROM test) ORDER BY 1,2; +DEBUG: generating subplan 33_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 33_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 33 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('33_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) EXCEPT ALL SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('33_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- +(0 rows) + +(SELECT * FROM test) EXCEPT ALL (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: generating subplan 36_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 36 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('36_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) EXCEPT ALL SELECT ref.a, ref.b FROM recursive_union.ref ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 +(1 row) + +(SELECT * FROM ref) EXCEPT ALL (SELECT * FROM ref) ORDER BY 1,2; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a | b +---+--- +(0 rows) + +-- more complex set operation trees are supported +(SELECT * FROM test) +INTERSECT +(SELECT * FROM ref) +UNION ALL +(SELECT s, s FROM generate_series(1,10) s) +EXCEPT +(SELECT 1,1) +UNION +(SELECT test.x, ref.a FROM test LEFT JOIN ref ON (x = a)) +ORDER BY 1,2; +DEBUG: generating subplan 39_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 39_2 for subquery SELECT test.x, ref.a FROM (recursive_union.test LEFT JOIN recursive_union.ref ON ((test.x = ref.a))) +DEBUG: Plan 39 query after replacing subqueries and CTEs: (((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('39_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT ref.a, ref.b FROM recursive_union.ref) UNION ALL SELECT s.s, s.s FROM generate_series(1, 10) s(s)) EXCEPT SELECT 1, 1) UNION SELECT intermediate_result.x, intermediate_result.a FROM read_intermediate_result('39_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, a integer) ORDER BY 1, 2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +----+---- + 1 | + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +-- within a subquery, some unions can be pushed down +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test)) u ORDER BY 1,2; + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +SELECT * FROM ((SELECT x, y FROM test) UNION (SELECT y, x FROM test)) u ORDER BY 1,2; +DEBUG: generating subplan 43_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 43_2 for subquery SELECT y, x FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 43_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('43_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.y, intermediate_result.x FROM read_intermediate_result('43_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer, x integer) +DEBUG: Plan 43 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('43_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM ref)) u ORDER BY 1,2; +DEBUG: generating subplan 47_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 47_2 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('47_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT ref.a, ref.b FROM recursive_union.ref +DEBUG: Plan 47 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('47_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +SELECT * FROM ((SELECT * FROM ref) UNION (SELECT * FROM ref)) u ORDER BY 1,2; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a | b +---+--- + 2 | 2 + 3 | 3 +(2 rows) + +SELECT * FROM ((SELECT * FROM test) UNION ALL (SELECT * FROM test)) u ORDER BY 1,2; + x | y +---+--- + 1 | 1 + 1 | 1 + 2 | 2 + 2 | 2 +(4 rows) + +SELECT * FROM ((SELECT x, y FROM test) UNION ALL (SELECT y, x FROM test)) u ORDER BY 1,2; + x | y +---+--- + 1 | 1 + 1 | 1 + 2 | 2 + 2 | 2 +(4 rows) + +SELECT * FROM ((SELECT * FROM test) UNION ALL (SELECT * FROM ref)) u ORDER BY 1,2; +DEBUG: generating subplan 53_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 53_2 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('53_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION ALL SELECT ref.a, ref.b FROM recursive_union.ref +DEBUG: Plan 53 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('53_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 + 2 | 2 + 3 | 3 +(4 rows) + +SELECT * FROM ((SELECT * FROM ref) UNION ALL (SELECT * FROM ref)) u ORDER BY 1,2; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a | b +---+--- + 2 | 2 + 2 | 2 + 3 | 3 + 3 | 3 +(4 rows) + +SELECT * FROM ((SELECT * FROM test) INTERSECT (SELECT * FROM test)) u ORDER BY 1,2; +DEBUG: generating subplan 57_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 57_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 57_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('57_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('57_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 57 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('57_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +SELECT * FROM ((SELECT x, y FROM test) INTERSECT (SELECT y, x FROM test)) u ORDER BY 1,2; +DEBUG: generating subplan 61_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 61_2 for subquery SELECT y, x FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 61_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('61_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT intermediate_result.y, intermediate_result.x FROM read_intermediate_result('61_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer, x integer) +DEBUG: Plan 61 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('61_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +SELECT * FROM ((SELECT * FROM test) INTERSECT (SELECT * FROM ref)) u ORDER BY 1,2; +DEBUG: generating subplan 65_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 65_2 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('65_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT ref.a, ref.b FROM recursive_union.ref +DEBUG: Plan 65 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('65_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 2 | 2 +(1 row) + +SELECT * FROM ((SELECT * FROM ref) INTERSECT (SELECT * FROM ref)) u ORDER BY 1,2; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a | b +---+--- + 2 | 2 + 3 | 3 +(2 rows) + +SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM test)) u ORDER BY 1,2; +DEBUG: generating subplan 69_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 69_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 69_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('69_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) EXCEPT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('69_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 69 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('69_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- +(0 rows) + +SELECT * FROM ((SELECT x, y FROM test) EXCEPT (SELECT y, x FROM test)) u ORDER BY 1,2; +DEBUG: generating subplan 73_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 73_2 for subquery SELECT y, x FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 73_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('73_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) EXCEPT SELECT intermediate_result.y, intermediate_result.x FROM read_intermediate_result('73_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer, x integer) +DEBUG: Plan 73 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('73_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- +(0 rows) + +SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM ref)) u ORDER BY 1,2; +DEBUG: generating subplan 77_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 77_2 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('77_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) EXCEPT SELECT ref.a, ref.b FROM recursive_union.ref +DEBUG: Plan 77 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('77_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 +(1 row) + +SELECT * FROM ((SELECT * FROM ref) EXCEPT (SELECT * FROM ref)) u ORDER BY 1,2; +DEBUG: Creating router plan +DEBUG: Plan is router executable + a | b +---+--- +(0 rows) + +-- unions can even be pushed down within a join +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test)) u JOIN test USING (x) ORDER BY 1,2; + x | y | y +---+---+--- + 1 | 1 | 1 + 2 | 2 | 2 +(2 rows) + +SELECT * FROM ((SELECT * FROM test) UNION ALL (SELECT * FROM test)) u LEFT JOIN test USING (x) ORDER BY 1,2; + x | y | y +---+---+--- + 1 | 1 | 1 + 1 | 1 | 1 + 2 | 2 | 2 + 2 | 2 | 2 +(4 rows) + +-- unions cannot be pushed down if one leaf recurs +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test ORDER BY x LIMIT 1)) u JOIN test USING (x) ORDER BY 1,2; +DEBUG: push down of limit count: 1 +DEBUG: generating subplan 83_1 for subquery SELECT x, y FROM recursive_union.test ORDER BY x LIMIT 1 +DEBUG: generating subplan 83_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 83_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('83_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('83_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 83 query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('83_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_union.test USING (x)) ORDER BY u.x, u.y + x | y | y +---+---+--- + 1 | 1 | 1 + 2 | 2 | 2 +(2 rows) + +SELECT * FROM ((SELECT * FROM test) UNION ALL (SELECT * FROM test ORDER BY x LIMIT 1)) u LEFT JOIN test USING (x) ORDER BY 1,2; +DEBUG: push down of limit count: 1 +DEBUG: generating subplan 87_1 for subquery SELECT x, y FROM recursive_union.test ORDER BY x LIMIT 1 +DEBUG: generating subplan 87_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 87_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('87_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION ALL SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('87_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 87 query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('87_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u LEFT JOIN recursive_union.test USING (x)) ORDER BY u.x, u.y +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- unions in a join without partition column equality (column names from first query are used for join) +SELECT * FROM ((SELECT x, y FROM test) UNION (SELECT y, x FROM test)) u JOIN test USING (x) ORDER BY 1,2; +DEBUG: generating subplan 91_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 91_2 for subquery SELECT y, x FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 91_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('91_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.y, intermediate_result.x FROM read_intermediate_result('91_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer, x integer) +DEBUG: Plan 91 query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('91_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_union.test USING (x)) ORDER BY u.x, u.y + x | y | y +---+---+--- + 1 | 1 | 1 + 2 | 2 | 2 +(2 rows) + +SELECT * FROM ((SELECT x, y FROM test) UNION (SELECT 1, 1 FROM test)) u JOIN test USING (x) ORDER BY 1,2; +DEBUG: generating subplan 95_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 95_2 for subquery SELECT 1, 1 FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 95_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('95_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result."?column?", intermediate_result."?column?_1" AS "?column?" FROM read_intermediate_result('95_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer) +DEBUG: Plan 95 query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('95_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_union.test USING (x)) ORDER BY u.x, u.y + x | y | y +---+---+--- + 1 | 1 | 1 + 2 | 2 | 2 +(2 rows) + +-- a join between a set operation and a generate_series which is pushdownable + SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test ORDER BY x)) u JOIN generate_series(1,10) x USING (x) ORDER BY 1,2; + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- a join between a set operation and a generate_series which is not pushdownable due to EXCEPT + SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM test ORDER BY x)) u JOIN generate_series(1,10) x USING (x) ORDER BY 1,2; +DEBUG: generating subplan 100_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 100_2 for subquery SELECT x, y FROM recursive_union.test ORDER BY x +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 100_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('100_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) EXCEPT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('100_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 100 query after replacing subqueries and CTEs: SELECT u.x, u.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('100_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN generate_series(1, 10) x(x) USING (x)) ORDER BY u.x, u.y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- +(0 rows) + +-- subqueries in WHERE clause with set operations fails due to the current limitaions of recursive planning IN WHERE clause +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test)) foo WHERE x IN (SELECT y FROM test); +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column +-- subqueries in WHERE clause forced to be recursively planned +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test)) foo WHERE x IN (SELECT y FROM test ORDER BY 1 LIMIT 4) ORDER BY 1; +DEBUG: push down of limit count: 4 +DEBUG: generating subplan 105_1 for subquery SELECT y FROM recursive_union.test ORDER BY y LIMIT 4 +DEBUG: Plan 105 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT test.x, test.y FROM recursive_union.test UNION SELECT test.x, test.y FROM recursive_union.test) foo WHERE (x IN (SELECT intermediate_result.y FROM read_intermediate_result('105_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer))) ORDER BY x + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- now both the set operations and the sublink is recursively planned +SELECT * FROM ((SELECT x,y FROM test) UNION (SELECT y,x FROM test)) foo WHERE x IN (SELECT y FROM test ORDER BY 1 LIMIT 4) ORDER BY 1; +DEBUG: push down of limit count: 4 +DEBUG: generating subplan 107_1 for subquery SELECT y FROM recursive_union.test ORDER BY y LIMIT 4 +DEBUG: generating subplan 107_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 107_3 for subquery SELECT y, x FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 107_4 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('107_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.y, intermediate_result.x FROM read_intermediate_result('107_3'::text, 'binary'::citus_copy_format) intermediate_result(y integer, x integer) +DEBUG: Plan 107 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('107_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) foo WHERE (x IN (SELECT intermediate_result.y FROM read_intermediate_result('107_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer))) ORDER BY x +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- set operations are recursively planned and not the sublink, thus should error out +SELECT * FROM ((SELECT x,y FROM test) UNION (SELECT y,x FROM test)) foo WHERE x IN (SELECT y FROM test) ORDER BY 1; +DEBUG: generating subplan 112_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 112_2 for subquery SELECT y, x FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 112_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('112_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.y, intermediate_result.x FROM read_intermediate_result('112_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer, x integer) +DEBUG: Plan 112 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('112_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) foo WHERE (x IN (SELECT test.y FROM recursive_union.test)) ORDER BY x +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs are not allowed in the FROM clause when the query has subqueries in the WHERE clause +-- set operations works fine with pushdownable window functions +SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_win AS (PARTITION BY x ORDER BY y DESC)) as foo +UNION +SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_win AS (PARTITION BY x ORDER BY y DESC)) as bar +ORDER BY 1 DESC, 2 DESC, 3 DESC; +DEBUG: generating subplan 116_1 for subquery SELECT x, y, rnk FROM (SELECT test.x, test.y, rank() OVER my_win AS rnk FROM recursive_union.test WINDOW my_win AS (PARTITION BY test.x ORDER BY test.y DESC)) foo +DEBUG: generating subplan 116_2 for subquery SELECT x, y, rnk FROM (SELECT test.x, test.y, rank() OVER my_win AS rnk FROM recursive_union.test WINDOW my_win AS (PARTITION BY test.x ORDER BY test.y DESC)) bar +DEBUG: Plan 116 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y, intermediate_result.rnk FROM read_intermediate_result('116_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer, rnk bigint) UNION SELECT intermediate_result.x, intermediate_result.y, intermediate_result.rnk FROM read_intermediate_result('116_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer, rnk bigint) ORDER BY 1 DESC, 2 DESC, 3 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y | rnk +---+---+----- + 2 | 2 | 1 + 1 | 1 | 1 +(2 rows) + +-- set operations errors out with non-pushdownable window functions +SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_win AS (PARTITION BY y ORDER BY x DESC)) as foo +UNION +SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_win AS (PARTITION BY y ORDER BY x DESC)) as bar; +ERROR: could not run distributed query because the window function that is used cannot be pushed down +HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column +-- other set operations in joins also cannot be pushed down +SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM test ORDER BY x LIMIT 1)) u JOIN test USING (x) ORDER BY 1,2; +DEBUG: push down of limit count: 1 +DEBUG: generating subplan 121_1 for subquery SELECT x, y FROM recursive_union.test ORDER BY x LIMIT 1 +DEBUG: generating subplan 121_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 121_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('121_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) EXCEPT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('121_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 121 query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('121_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_union.test USING (x)) ORDER BY u.x, u.y + x | y | y +---+---+--- + 2 | 2 | 2 +(1 row) + +SELECT * FROM ((SELECT * FROM test) INTERSECT (SELECT * FROM test ORDER BY x LIMIT 1)) u LEFT JOIN test USING (x) ORDER BY 1,2; +DEBUG: push down of limit count: 1 +DEBUG: generating subplan 125_1 for subquery SELECT x, y FROM recursive_union.test ORDER BY x LIMIT 1 +DEBUG: generating subplan 125_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 125_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('125_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('125_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 125 query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('125_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u LEFT JOIN recursive_union.test USING (x)) ORDER BY u.x, u.y +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- distributed table in WHERE clause, but not FROM clause still disallowed +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM ref WHERE a IN (SELECT x FROM test))) u ORDER BY 1,2; +ERROR: cannot pushdown the subquery +DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause +-- subquery union in WHERE clause with partition column equality and implicit join is pushed down +SELECT * FROM test a WHERE x IN (SELECT x FROM test b WHERE y = 1 UNION SELECT x FROM test c WHERE y = 2) ORDER BY 1,2; + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- subquery union in WHERE clause with partition column equality, without implicit join on partition column +SELECT * FROM test a WHERE x NOT IN (SELECT x FROM test b WHERE y = 1 UNION SELECT x FROM test c WHERE y = 2) ORDER BY 1,2; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- subquery union in WHERE clause without parition column equality is recursively planned +SELECT * FROM test a WHERE x IN (SELECT x FROM test b UNION SELECT y FROM test c) ORDER BY 1,2; +DEBUG: generating subplan 133_1 for subquery SELECT x FROM recursive_union.test b +DEBUG: generating subplan 133_2 for subquery SELECT y FROM recursive_union.test c +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 133_3 for subquery SELECT intermediate_result.x FROM read_intermediate_result('133_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('133_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: Plan 133 query after replacing subqueries and CTEs: SELECT x, y FROM recursive_union.test a WHERE (x IN (SELECT intermediate_result.x FROM read_intermediate_result('133_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer))) ORDER BY x, y + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- correlated subquery with union in WHERE clause +SELECT * FROM test a WHERE x IN (SELECT x FROM test b UNION SELECT y FROM test c WHERE a.x = c.x) ORDER BY 1,2; +DEBUG: generating subplan 137_1 for subquery SELECT x FROM recursive_union.test b +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: Plan 137 query after replacing subqueries and CTEs: SELECT x, y FROM recursive_union.test a WHERE (x IN (SELECT intermediate_result.x FROM read_intermediate_result('137_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT c.y FROM recursive_union.test c WHERE (a.x = c.x))) ORDER BY x, y +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- force unions to be planned while subqueries are being planned +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test) ORDER BY 1,2 LIMIT 5) as foo ORDER BY 1 DESC LIMIT 3; +DEBUG: generating subplan 140_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 140_2 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Plan 140 query after replacing subqueries and CTEs: SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('140_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('140_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) ORDER BY 1, 2 LIMIT 5 +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 139_1 for subquery SELECT test.x, test.y FROM recursive_union.test UNION SELECT test.x, test.y FROM recursive_union.test ORDER BY 1, 2 LIMIT 5 +DEBUG: Plan 139 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('139_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) foo ORDER BY x DESC LIMIT 3 +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 2 | 2 + 1 | 1 +(2 rows) + +-- distinct and count distinct should work without any problems +select count(DISTINCT t.x) FROM ((SELECT DISTINCT x FROM test) UNION (SELECT DISTINCT y FROM test)) as t(x) ORDER BY 1; +DEBUG: generating subplan 143_1 for subquery SELECT DISTINCT y FROM recursive_union.test +DEBUG: generating subplan 143_2 for subquery SELECT DISTINCT x FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 143_3 for subquery SELECT intermediate_result.x FROM read_intermediate_result('143_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('143_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: Plan 143 query after replacing subqueries and CTEs: SELECT count(DISTINCT x) AS count FROM (SELECT intermediate_result.x FROM read_intermediate_result('143_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) t(x) ORDER BY (count(DISTINCT x)) +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 2 +(1 row) + +select count(DISTINCT t.x) FROM ((SELECT count(DISTINCT x) FROM test) UNION (SELECT count(DISTINCT y) FROM test)) as t(x) ORDER BY 1; +DEBUG: generating subplan 147_1 for subquery SELECT count(DISTINCT x) AS count FROM recursive_union.test +DEBUG: generating subplan 147_2 for subquery SELECT count(DISTINCT y) AS count FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 147_3 for subquery SELECT intermediate_result.count FROM read_intermediate_result('147_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint) UNION SELECT intermediate_result.count FROM read_intermediate_result('147_2'::text, 'binary'::citus_copy_format) intermediate_result(count bigint) +DEBUG: Plan 147 query after replacing subqueries and CTEs: SELECT count(DISTINCT x) AS count FROM (SELECT intermediate_result.count FROM read_intermediate_result('147_3'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) t(x) ORDER BY (count(DISTINCT x)) +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 1 +(1 row) + +-- other agg. distincts are also supported when group by includes partition key +select avg(DISTINCT t.x) FROM ((SELECT avg(DISTINCT y) FROM test GROUP BY x) UNION (SELECT avg(DISTINCT y) FROM test GROUP BY x)) as t(x) ORDER BY 1; +DEBUG: generating subplan 151_1 for subquery SELECT avg(DISTINCT y) AS avg FROM recursive_union.test GROUP BY x +DEBUG: generating subplan 151_2 for subquery SELECT avg(DISTINCT y) AS avg FROM recursive_union.test GROUP BY x +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 151_3 for subquery SELECT intermediate_result.avg FROM read_intermediate_result('151_1'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric) UNION SELECT intermediate_result.avg FROM read_intermediate_result('151_2'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric) +DEBUG: Plan 151 query after replacing subqueries and CTEs: SELECT avg(DISTINCT x) AS avg FROM (SELECT intermediate_result.avg FROM read_intermediate_result('151_3'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric)) t(x) ORDER BY (avg(DISTINCT x)) +DEBUG: Creating router plan +DEBUG: Plan is router executable + avg +------------------------ + 1.50000000000000000000 +(1 row) + +-- other agg. distincts are not supported when group by doesn't include partition key +select count(DISTINCT t.x) FROM ((SELECT avg(DISTINCT y) FROM test GROUP BY y) UNION (SELECT avg(DISTINCT y) FROM test GROUP BY y)) as t(x) ORDER BY 1; +ERROR: cannot compute aggregate (distinct) +DETAIL: table partitioning is unsuitable for aggregate (distinct) +-- one of the leaves is a repartition join +SET citus.enable_repartition_joins TO ON; +-- repartition is recursively planned before the set operation +(SELECT x FROM test) INTERSECT (SELECT t1.x FROM test as t1, test as t2 WHERE t1.x = t2.y LIMIT 0) ORDER BY 1 DESC; +DEBUG: push down of limit count: 0 +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 9 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 9 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 14 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 14 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 19 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 19 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 24 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 24 +DEBUG: cannot use real time executor with repartition jobs +HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. +DEBUG: generating subplan 157_1 for subquery SELECT t1.x FROM recursive_union.test t1, recursive_union.test t2 WHERE (t1.x = t2.y) LIMIT 0 +DEBUG: generating subplan 157_2 for subquery SELECT x FROM recursive_union.test +DEBUG: Plan 157 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('157_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('157_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) ORDER BY 1 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + x +--- +(0 rows) + +-- repartition is recursively planned with the set operation +(SELECT x FROM test) INTERSECT (SELECT t1.x FROM test as t1, test as t2 WHERE t1.x = t2.y) ORDER BY 1 DESC; +DEBUG: generating subplan 160_1 for subquery SELECT x FROM recursive_union.test +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 9 +DEBUG: pruning merge fetch taskId 2 +DETAIL: Creating dependency on merge taskId 9 +DEBUG: pruning merge fetch taskId 4 +DETAIL: Creating dependency on merge taskId 14 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 14 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 19 +DEBUG: pruning merge fetch taskId 8 +DETAIL: Creating dependency on merge taskId 19 +DEBUG: pruning merge fetch taskId 10 +DETAIL: Creating dependency on merge taskId 24 +DEBUG: pruning merge fetch taskId 11 +DETAIL: Creating dependency on merge taskId 24 +DEBUG: cannot use real time executor with repartition jobs +HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. +DEBUG: generating subplan 160_2 for subquery SELECT t1.x FROM recursive_union.test t1, recursive_union.test t2 WHERE (t1.x = t2.y) +DEBUG: Plan 160 query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('160_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('160_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) ORDER BY 1 DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + x +--- + 2 + 1 +(2 rows) + +SET citus.enable_repartition_joins TO OFF; +-- this should be recursively planned +CREATE VIEW set_view_recursive AS (SELECT y FROM test) UNION (SELECT y FROM test); +SELECT * FROM set_view_recursive ORDER BY 1 DESC; +DEBUG: generating subplan 163_1 for subquery SELECT y FROM recursive_union.test +DEBUG: generating subplan 163_2 for subquery SELECT y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 163_3 for subquery SELECT intermediate_result.y FROM read_intermediate_result('163_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('163_2'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: Plan 163 query after replacing subqueries and CTEs: SELECT y FROM (SELECT intermediate_result.y FROM read_intermediate_result('163_3'::text, 'binary'::citus_copy_format) intermediate_result(y integer)) set_view_recursive ORDER BY y DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + y +--- + 2 + 1 +(2 rows) + +-- this should be pushed down +CREATE VIEW set_view_pushdown AS (SELECT x FROM test) UNION (SELECT x FROM test); +SELECT * FROM set_view_pushdown ORDER BY 1 DESC; + x +--- + 2 + 1 +(2 rows) + +-- this should be recursively planned +CREATE VIEW set_view_recursive_second AS SELECT u.x, test.y FROM ((SELECT x, y FROM test) UNION (SELECT 1, 1 FROM test)) u JOIN test USING (x) ORDER BY 1,2; +SELECT * FROM set_view_recursive_second; +DEBUG: generating subplan 168_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 168_2 for subquery SELECT 1, 1 FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 168_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('168_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result."?column?", intermediate_result."?column?_1" AS "?column?" FROM read_intermediate_result('168_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer) +DEBUG: Plan 168 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT u.x, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('168_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_union.test USING (x)) ORDER BY u.x, test.y) set_view_recursive_second + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- this should create lots of recursive calls since both views and set operations lead to recursive plans :) +((SELECT x FROM set_view_recursive_second) INTERSECT (SELECT * FROM set_view_recursive)) EXCEPT (SELECT * FROM set_view_pushdown); +DEBUG: generating subplan 172_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: generating subplan 172_2 for subquery SELECT 1, 1 FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 172_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('172_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result."?column?", intermediate_result."?column?_1" AS "?column?" FROM read_intermediate_result('172_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer) +DEBUG: generating subplan 172_4 for subquery SELECT y FROM recursive_union.test +DEBUG: generating subplan 172_5 for subquery SELECT y FROM recursive_union.test +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 172_6 for subquery SELECT intermediate_result.y FROM read_intermediate_result('172_4'::text, 'binary'::citus_copy_format) intermediate_result(y integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('172_5'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: generating subplan 172_7 for subquery SELECT x FROM (SELECT u.x, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('172_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u JOIN recursive_union.test USING (x)) ORDER BY u.x, test.y) set_view_recursive_second +DEBUG: generating subplan 172_8 for subquery SELECT x FROM (SELECT test.x FROM recursive_union.test UNION SELECT test.x FROM recursive_union.test) set_view_pushdown +DEBUG: Plan 172 query after replacing subqueries and CTEs: (SELECT intermediate_result.x FROM read_intermediate_result('172_7'::text, 'binary'::citus_copy_format) intermediate_result(x integer) INTERSECT SELECT set_view_recursive.y FROM (SELECT intermediate_result.y FROM read_intermediate_result('172_6'::text, 'binary'::citus_copy_format) intermediate_result(y integer)) set_view_recursive) EXCEPT SELECT intermediate_result.x FROM read_intermediate_result('172_8'::text, 'binary'::citus_copy_format) intermediate_result(x integer) +DEBUG: Creating router plan +DEBUG: Plan is router executable + x +--- +(0 rows) + +RESET client_min_messages; +DROP SCHEMA recursive_union CASCADE; +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table test +drop cascades to table ref +drop cascades to view set_view_recursive +drop cascades to view set_view_pushdown +drop cascades to view set_view_recursive_second diff --git a/src/test/regress/expected/with_basics.out b/src/test/regress/expected/with_basics.out index 5a4596c72..53096996a 100644 --- a/src/test/regress/expected/with_basics.out +++ b/src/test/regress/expected/with_basics.out @@ -563,7 +563,7 @@ LIMIT 1 | 90 (5 rows) --- SELECT * FROM (SELECT * FROM cte UNION SELECT * FROM distributed_table) a; should error out +-- SELECT * FROM (SELECT * FROM cte UNION SELECT * FROM distributed_table) a; should work WITH cte AS ( SELECT * FROM users_table ) @@ -574,8 +574,20 @@ ORDER BY 1,2,3,4,5,6 LIMIT 10; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Wed Nov 22 18:49:42.327403 2017 | 3 | 2 | 1 | + 1 | Wed Nov 22 19:03:01.772353 2017 | 4 | 1 | 2 | + 1 | Wed Nov 22 19:07:03.846437 2017 | 1 | 2 | 5 | + 1 | Wed Nov 22 20:56:21.122638 2017 | 2 | 4 | 4 | + 1 | Wed Nov 22 21:06:57.457147 2017 | 4 | 3 | 2 | + 1 | Wed Nov 22 21:47:04.188168 2017 | 4 | 2 | 0 | + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Wed Nov 22 23:22:09.957743 2017 | 1 | 1 | 1 | + 1 | Thu Nov 23 00:42:37.237615 2017 | 2 | 4 | 3 | + 1 | Thu Nov 23 02:59:23.620864 2017 | 4 | 5 | 4 | +(10 rows) + SELECT * FROM ( SELECT * FROM (WITH cte AS ( SELECT * FROM users_table @@ -586,8 +598,20 @@ ORDER BY 1,2,3,4,5,6 LIMIT 10; -ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT -HINT: Consider using an equality filter on the distributed table's partition column. + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Wed Nov 22 18:49:42.327403 2017 | 3 | 2 | 1 | + 1 | Wed Nov 22 19:03:01.772353 2017 | 4 | 1 | 2 | + 1 | Wed Nov 22 19:07:03.846437 2017 | 1 | 2 | 5 | + 1 | Wed Nov 22 20:56:21.122638 2017 | 2 | 4 | 4 | + 1 | Wed Nov 22 21:06:57.457147 2017 | 4 | 3 | 2 | + 1 | Wed Nov 22 21:47:04.188168 2017 | 4 | 2 | 0 | + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Wed Nov 22 23:22:09.957743 2017 | 1 | 1 | 1 | + 1 | Thu Nov 23 00:42:37.237615 2017 | 2 | 4 | 3 | + 1 | Thu Nov 23 02:59:23.620864 2017 | 4 | 5 | 4 | +(10 rows) + -- SELECT * FROM (SELECT * FROM cte UNION SELECT * FROM cte) a; should work WITH cte AS ( SELECT * FROM users_table WHERE user_id IN (1, 2) diff --git a/src/test/regress/expected/with_set_operations.out b/src/test/regress/expected/with_set_operations.out new file mode 100644 index 000000000..b62fd62d6 --- /dev/null +++ b/src/test/regress/expected/with_set_operations.out @@ -0,0 +1,417 @@ +-- =================================================================== +-- test recursive planning functionality with Set Operations and CTEs +-- =================================================================== +SET client_min_messages TO DEBUG1; +-- use ctes inside unions on the top level +WITH +cte_1 AS (SELECT user_id FROM users_table), +cte_2 AS (SELECT user_id FROM events_table) +(SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) +ORDER BY 1 DESC; +DEBUG: generating subplan 1_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 1_2 for CTE cte_2: SELECT user_id FROM public.events_table +DEBUG: Plan 1 query after replacing subqueries and CTEs: SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('1_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_2.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('1_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_2 ORDER BY 1 DESC + user_id +--------- + 6 + 5 + 4 + 3 + 2 + 1 +(6 rows) + +-- use ctes inside unions in a subquery +WITH +cte_1 AS (SELECT user_id FROM users_table), +cte_2 AS (SELECT user_id FROM events_table) +SELECT + count(*) +FROM ( + (SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) + ) as foo; +DEBUG: generating subplan 4_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 4_2 for CTE cte_2: SELECT user_id FROM public.events_table +DEBUG: generating subplan 4_3 for subquery SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('4_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_2.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('4_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_2 +DEBUG: Plan 4 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('4_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo + count +------- + 6 +(1 row) + +-- cte with unions of other ctes +WITH +cte_1 AS (SELECT user_id FROM users_table), +cte_2 AS (SELECT user_id FROM events_table), +cte_3 AS ((SELECT * FROM cte_1) UNION (SELECT * FROM cte_2)) +SELECT * FROM cte_3 ORDER BY 1 DESC; +DEBUG: generating subplan 8_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 8_2 for CTE cte_2: SELECT user_id FROM public.events_table +DEBUG: generating subplan 8_3 for CTE cte_3: SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('8_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_2.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('8_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_2 +DEBUG: Plan 8 query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('8_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_3 ORDER BY user_id DESC + user_id +--------- + 6 + 5 + 4 + 3 + 2 + 1 +(6 rows) + +-- cte with unions of distributed table +WITH +cte_1 AS ((SELECT user_id FROM users_table) UNION (SELECT user_id FROM users_table)) +SELECT * FROM cte_1 ORDER BY 1 DESC; +DEBUG: generating subplan 12_1 for CTE cte_1: SELECT users_table.user_id FROM public.users_table UNION SELECT users_table.user_id FROM public.users_table +DEBUG: generating subplan 13_1 for subquery SELECT user_id FROM public.users_table +DEBUG: generating subplan 13_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan 13 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('13_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('13_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: Plan 12 query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('12_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 ORDER BY user_id DESC + user_id +--------- + 6 + 5 + 4 + 3 + 2 + 1 +(6 rows) + +-- cte with unions of tables is intersected with another query +WITH +cte_1 AS ((SELECT user_id FROM users_table) UNION (SELECT user_id FROM users_table)) +(SELECT * FROM cte_1) INTERSECT (SELECT user_id FROM users_table) ORDER BY 1 DESC; +DEBUG: generating subplan 16_1 for CTE cte_1: SELECT users_table.user_id FROM public.users_table UNION SELECT users_table.user_id FROM public.users_table +DEBUG: generating subplan 17_1 for subquery SELECT user_id FROM public.users_table +DEBUG: generating subplan 17_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan 17 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('17_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('17_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: generating subplan 16_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan 16 query after replacing subqueries and CTEs: SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('16_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('16_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) ORDER BY 1 DESC + user_id +--------- + 6 + 5 + 4 + 3 + 2 + 1 +(6 rows) + +-- cte with unions of tables is intersected with another query that involves except +WITH +cte_1 AS ((SELECT user_id FROM users_table) UNION (SELECT user_id FROM users_table)) +(SELECT * FROM cte_1) + INTERSECT +((SELECT user_id FROM events_table WHERE user_id < 3) EXCEPT (SELECT user_id FROM users_table WHERE user_id > 4)) ORDER BY 1 DESC; +DEBUG: generating subplan 21_1 for CTE cte_1: SELECT users_table.user_id FROM public.users_table UNION SELECT users_table.user_id FROM public.users_table +DEBUG: generating subplan 22_1 for subquery SELECT user_id FROM public.users_table +DEBUG: generating subplan 22_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('22_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: generating subplan 21_2 for subquery SELECT user_id FROM public.events_table WHERE (user_id < 3) +DEBUG: generating subplan 21_3 for subquery SELECT user_id FROM public.users_table WHERE (user_id > 4) +DEBUG: Plan 21 query after replacing subqueries and CTEs: SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('21_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 INTERSECT (SELECT intermediate_result.user_id FROM read_intermediate_result('21_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) EXCEPT SELECT intermediate_result.user_id FROM read_intermediate_result('21_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) ORDER BY 1 DESC + user_id +--------- + 2 + 1 +(2 rows) + +-- CTE inside a top level EXCEPT +(WITH cte_1 AS (SELECT user_id FROM events_table WHERE user_id < 3) SELECT * FROM cte_1) INTERSECT (SELECT user_id FROM users_table) ORDER BY 1; +DEBUG: generating subplan 27_1 for CTE cte_1: SELECT user_id FROM public.events_table WHERE (user_id < 3) +DEBUG: generating subplan 27_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan 27 query after replacing subqueries and CTEs: SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('27_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('27_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) ORDER BY 1 + user_id +--------- + 1 + 2 +(2 rows) + +-- INTERSECT inside a CTE, which is inside a subquery +SELECT + DISTINCT users_table.user_id +FROM + users_table, + (WITH cte_1 AS (SELECT user_id FROM events_table WHERE user_id < 3 INTERSECT + SELECT user_id FROM events_table WHERE user_id < 2) + SELECT * FROM cte_1) as foo +WHERE + users_table.user_id = foo.user_id +ORDER BY 1 DESC; +DEBUG: generating subplan 30_1 for CTE cte_1: SELECT events_table.user_id FROM public.events_table WHERE (events_table.user_id < 3) INTERSECT SELECT events_table.user_id FROM public.events_table WHERE (events_table.user_id < 2) +DEBUG: generating subplan 31_1 for subquery SELECT user_id FROM public.events_table WHERE (user_id < 3) +DEBUG: generating subplan 31_2 for subquery SELECT user_id FROM public.events_table WHERE (user_id < 2) +DEBUG: Plan 31 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('31_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('31_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: Plan 30 query after replacing subqueries and CTEs: SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('30_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1) foo WHERE (users_table.user_id = foo.user_id) ORDER BY users_table.user_id DESC + user_id +--------- + 1 +(1 row) + +-- UNION is created via outputs of CTEs, which is inside a subquery +-- and the subquery is joined with a distributed table +SELECT + count(*) +FROM + events_table, + ( + WITH + cte_1 AS (SELECT user_id FROM users_table), + cte_2 AS (SELECT user_id FROM events_table) + (SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) + ) as foo +WHERE foo.user_id = events_table.event_type; +DEBUG: generating subplan 34_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 34_2 for CTE cte_2: SELECT user_id FROM public.events_table +DEBUG: generating subplan 34_3 for subquery SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('34_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_2.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('34_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_2 +DEBUG: Plan 34 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table, (SELECT intermediate_result.user_id FROM read_intermediate_result('34_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo WHERE (foo.user_id = events_table.event_type) + count +------- + 95 +(1 row) + +-- joins inside unions that are safe to pushdown +(SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id ) +INTERSECT +(SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id ) +ORDER BY 1 DESC; +DEBUG: generating subplan 38_1 for subquery SELECT DISTINCT events_table.user_id FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) +DEBUG: generating subplan 38_2 for subquery SELECT DISTINCT events_table.user_id FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) +DEBUG: Plan 38 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('38_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('38_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) ORDER BY 1 DESC + user_id +--------- + 6 + 5 + 4 + 3 + 2 + 1 +(6 rows) + +-- joins inside unions that are not safe to pushdown +(SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id LIMIT 10) +INTERSECT +(SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id LIMIT 10) +ORDER BY 1 DESC; +DEBUG: push down of limit count: 10 +DEBUG: generating subplan 41_1 for subquery SELECT DISTINCT events_table.user_id FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) LIMIT 10 +DEBUG: push down of limit count: 10 +DEBUG: generating subplan 41_2 for subquery SELECT DISTINCT events_table.user_id FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) LIMIT 10 +DEBUG: Plan 41 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('41_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('41_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) ORDER BY 1 DESC + user_id +--------- + 6 + 5 + 4 + 3 + 2 + 1 +(6 rows) + +-- joins inside unions that are not safe to pushdown inside a subquery +SELECT + count(*) +FROM + (SELECT DISTINCT value_2 FROM events_table) as events_table, + (WITH foo AS + ((SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id ) + INTERSECT + (SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id LIMIT 10)) + SELECT * FROM foo) + as foo +WHERE + foo.user_id = events_table.value_2; +DEBUG: generating subplan 44_1 for subquery SELECT DISTINCT value_2 FROM public.events_table +DEBUG: generating subplan 44_2 for CTE foo: SELECT DISTINCT events_table.user_id FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) INTERSECT (SELECT DISTINCT events_table.user_id FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) LIMIT 10) +DEBUG: push down of limit count: 10 +DEBUG: generating subplan 46_1 for subquery SELECT DISTINCT events_table.user_id FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) LIMIT 10 +DEBUG: generating subplan 46_2 for subquery SELECT DISTINCT events_table.user_id FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) +DEBUG: Plan 46 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('46_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('46_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: Plan 44 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('44_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) events_table, (SELECT foo_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('44_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo_1) foo WHERE (foo.user_id = events_table.value_2) + count +------- + 5 +(1 row) + +-- joins inside unions some safe to pushdown +SELECT + count(*) +FROM + (WITH events_table AS (SELECT DISTINCT user_id FROM events_table) SELECT * FROM events_table) as events_table, + ((SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id ) + INTERSECT + (SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id LIMIT 10)) as foo +WHERE + foo.user_id = events_table.user_id; +DEBUG: generating subplan 49_1 for CTE events_table: SELECT DISTINCT user_id FROM public.events_table +DEBUG: push down of limit count: 10 +DEBUG: generating subplan 49_2 for subquery SELECT DISTINCT events_table.user_id FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) LIMIT 10 +DEBUG: generating subplan 49_3 for subquery SELECT DISTINCT events_table.user_id FROM public.users_table, public.events_table WHERE (users_table.user_id = events_table.user_id) +DEBUG: generating subplan 49_4 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('49_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('49_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: Plan 49 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT events_table_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('49_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) events_table_1) events_table, (SELECT intermediate_result.user_id FROM read_intermediate_result('49_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo WHERE (foo.user_id = events_table.user_id) + count +------- + 6 +(1 row) + +-- CTE inside unions +(WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) UNION +(WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) ORDER BY 1 DESC; +DEBUG: generating subplan 54_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 54_2 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: Plan 54 query after replacing subqueries and CTEs: SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('54_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('54_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 ORDER BY 1 DESC + user_id +--------- + 6 + 5 + 4 + 3 + 2 + 1 +(6 rows) + +-- more complex CTEs inside unions +SELECT + count(*) +FROM + ( + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) UNION + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) + ) as foo, + users_table + WHERE users_table.value_2 = foo.user_id; +DEBUG: generating subplan 57_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 57_2 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 57_3 for subquery SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('57_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('57_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 +DEBUG: Plan 57 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('57_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, public.users_table WHERE (users_table.value_2 = foo.user_id) + count +------- + 92 +(1 row) + +-- CTEs with less alias than the input subquery +(WITH cte_1(x) AS (SELECT user_id, value_2 FROM users_table) SELECT * FROM cte_1) UNION +(WITH cte_1(x) AS (SELECT user_id, value_2 FROM users_table) SELECT * FROM cte_1) ORDER BY 1 DESC, 2 DESC LIMIT 5; +DEBUG: generating subplan 61_1 for CTE cte_1: SELECT user_id, value_2 FROM public.users_table +DEBUG: generating subplan 61_2 for CTE cte_1: SELECT user_id, value_2 FROM public.users_table +DEBUG: Plan 61 query after replacing subqueries and CTEs: SELECT cte_1.x, cte_1.value_2 FROM (SELECT intermediate_result.user_id AS x, intermediate_result.value_2 FROM read_intermediate_result('61_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) cte_1 UNION SELECT cte_1.x, cte_1.value_2 FROM (SELECT intermediate_result.user_id AS x, intermediate_result.value_2 FROM read_intermediate_result('61_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) cte_1 ORDER BY 1 DESC, 2 DESC LIMIT 5 + x | value_2 +---+--------- + 6 | 4 + 6 | 3 + 6 | 2 + 6 | 1 + 6 | 0 +(5 rows) + +-- simple subqueries in WHERE with unions +SELECT + count(*) +FROM + users_table +WHERE + value_2 IN + ( + WITH cte_1 AS + ( + (SELECT user_id FROM users_table) + UNION + (SELECT user_id FROM events_table) + ) SELECT DISTINCT user_id FROM cte_1 +) +ORDER BY 1 DESC; +DEBUG: generating subplan 64_1 for CTE cte_1: SELECT users_table.user_id FROM public.users_table UNION SELECT events_table.user_id FROM public.events_table +DEBUG: generating subplan 65_1 for subquery SELECT user_id FROM public.users_table +DEBUG: generating subplan 65_2 for subquery SELECT user_id FROM public.events_table +DEBUG: Plan 65 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('65_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('65_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: Plan 64 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE (value_2 IN (SELECT DISTINCT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('64_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1)) ORDER BY (count(*)) DESC + count +------- + 92 +(1 row) + +-- simple subqueries in WHERE with unions and ctes +SELECT + count(*) +FROM + users_table +WHERE + value_2 IN + ( + WITH + cte_1 AS (SELECT user_id FROM users_table), + cte_2 AS (SELECT user_id FROM events_table) + (SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) +) +ORDER BY 1 DESC; +DEBUG: generating subplan 68_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 68_2 for CTE cte_2: SELECT user_id FROM public.events_table +DEBUG: generating subplan 68_3 for subquery SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('68_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_2.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('68_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_2 +DEBUG: Plan 68 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE (value_2 IN (SELECT intermediate_result.user_id FROM read_intermediate_result('68_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) ORDER BY (count(*)) DESC + count +------- + 92 +(1 row) + +-- unions and ctes inside subqueries in where clause with a pushdownable correlated subquery +SELECT + DISTINCT user_id +FROM + events_table +WHERE + event_type IN +( + SELECT + users_table.user_id + FROM + ( + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) UNION + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) + ) as foo, + users_table + WHERE users_table.value_2 = foo.user_id AND events_table.user_id = users_table.user_id +) +ORDER BY 1 DESC; +DEBUG: generating subplan 72_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 72_2 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 72_3 for subquery SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('72_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('72_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 +DEBUG: Plan 72 query after replacing subqueries and CTEs: SELECT DISTINCT user_id FROM public.events_table WHERE (event_type IN (SELECT users_table.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('72_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, public.users_table WHERE ((users_table.value_2 = foo.user_id) AND (events_table.user_id = users_table.user_id)))) ORDER BY user_id DESC + user_id +--------- + 5 + 4 + 3 + 2 + 1 +(5 rows) + +-- unions and ctes inside subqueries in where clause with a not pushdownable correlated subquery +-- should error out +SELECT + DISTINCT user_id +FROM + events_table +WHERE + event_type IN +( + SELECT + users_table.user_id + FROM + ( + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) UNION + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) + ) as foo, + users_table + WHERE users_table.value_2 = foo.user_id AND events_table.user_id = users_table.user_id + LIMIT 5 +) +ORDER BY 1 DESC; +DEBUG: generating subplan 76_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 76_2 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan 76_3 for subquery SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('76_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('76_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 +DEBUG: Plan 76 query after replacing subqueries and CTEs: SELECT DISTINCT user_id FROM public.events_table WHERE (event_type IN (SELECT users_table.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('76_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, public.users_table WHERE ((users_table.value_2 = foo.user_id) AND (events_table.user_id = users_table.user_id)) LIMIT 5)) ORDER BY user_id DESC +ERROR: cannot push down this subquery +DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query +SET client_min_messages TO DEFAULT; +SET search_path TO public; diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 7c31a3981..7c54faad7 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -42,7 +42,7 @@ test: multi_partitioning_utils multi_partitioning # ---------- # Tests for recursive subquery planning # ---------- -test: subquery_basics subquery_local_tables subquery_executors subquery_and_cte +test: subquery_basics subquery_local_tables subquery_executors subquery_and_cte set_operations set_operation_and_local_tables test: subqueries_deep subquery_view subquery_partitioning subquery_complex_target_list subqueries_not_supported test: subquery_prepared_statements @@ -95,7 +95,8 @@ test: multi_large_table_join_planning multi_large_table_pruning multi_large_tabl # Tests for recursive planning. # --------- -test: with_nested with_where with_basics with_prepare with_modifying with_executors with_join with_partitioning +test: with_nested with_where with_basics with_prepare with_set_operations +test: with_modifying with_executors with_join with_partitioning # ---------- # Tests to check our large record loading and shard deletion behavior diff --git a/src/test/regress/sql/multi_insert_select.sql b/src/test/regress/sql/multi_insert_select.sql index e19ad1160..6d85c4230 100644 --- a/src/test/regress/sql/multi_insert_select.sql +++ b/src/test/regress/sql/multi_insert_select.sql @@ -538,11 +538,14 @@ FROM ROLLBACK; --- We do not support any set operations +-- We do support set operations through recursive planning +BEGIN; +SET LOCAL client_min_messages TO DEBUG; INSERT INTO raw_events_first(user_id) (SELECT user_id FROM raw_events_first) INTERSECT (SELECT user_id FROM raw_events_first); +ROLLBACK; -- If the query is router plannable then it is executed via the coordinator INSERT INTO diff --git a/src/test/regress/sql/multi_mx_router_planner.sql b/src/test/regress/sql/multi_mx_router_planner.sql index b2b97040f..f83a27f39 100644 --- a/src/test/regress/sql/multi_mx_router_planner.sql +++ b/src/test/regress/sql/multi_mx_router_planner.sql @@ -353,15 +353,12 @@ SELECT * FROM ( ) AS combination ORDER BY 1; --- union queries are not supported if not router plannable --- there is an inconsistency on shard pruning between --- ubuntu/mac disabling log messages for this queries only - -SET client_min_messages to 'NOTICE'; - +-- union queries are supported through recursive planning +SET client_min_messages TO DEBUG1; (SELECT * FROM articles_hash_mx WHERE author_id = 1) UNION -(SELECT * FROM articles_hash_mx WHERE author_id = 2); +(SELECT * FROM articles_hash_mx WHERE author_id = 2) +ORDER BY 1,2; SELECT * FROM ( diff --git a/src/test/regress/sql/multi_router_planner.sql b/src/test/regress/sql/multi_router_planner.sql index 01a6848cc..f5f15b30f 100644 --- a/src/test/regress/sql/multi_router_planner.sql +++ b/src/test/regress/sql/multi_router_planner.sql @@ -422,15 +422,18 @@ SELECT * FROM ( ) AS combination ORDER BY 1; --- top-level union queries are not supported if not router plannable --- there is an inconsistency on shard pruning between --- ubuntu/mac disabling log messages for this queries only +-- top-level union queries are supported through recursive planning SET client_min_messages to 'NOTICE'; -(SELECT * FROM articles_hash WHERE author_id = 1) +( + (SELECT * FROM articles_hash WHERE author_id = 1) + UNION + (SELECT * FROM articles_hash WHERE author_id = 3) +) UNION -(SELECT * FROM articles_hash WHERE author_id = 2); +(SELECT * FROM articles_hash WHERE author_id = 2) +ORDER BY 1,2,3; -- unions in subqueries are supported with subquery pushdown SELECT * FROM ( diff --git a/src/test/regress/sql/multi_simple_queries.sql b/src/test/regress/sql/multi_simple_queries.sql index 476fafa1d..18dcab0e8 100644 --- a/src/test/regress/sql/multi_simple_queries.sql +++ b/src/test/regress/sql/multi_simple_queries.sql @@ -116,9 +116,10 @@ SELECT author_id, sum(word_count) AS corpus_size FROM articles HAVING sum(word_count) > 40000 ORDER BY sum(word_count) DESC; --- UNION/INTERSECT queries are unsupported if on multiple shards +-- UNION/INTERSECT queries are supported if on multiple shards SELECT * FROM articles WHERE author_id = 10 UNION -SELECT * FROM articles WHERE author_id = 2; +SELECT * FROM articles WHERE author_id = 2 +ORDER BY 1,2,3; -- queries using CTEs are supported WITH long_names AS ( SELECT id FROM authors WHERE char_length(name) > 15 ) diff --git a/src/test/regress/sql/multi_subquery.sql b/src/test/regress/sql/multi_subquery.sql index d789774af..223f03aca 100644 --- a/src/test/regress/sql/multi_subquery.sql +++ b/src/test/regress/sql/multi_subquery.sql @@ -4,7 +4,6 @@ -- no need to set shardid sequence given that we're not creating any shards SET citus.next_shard_id TO 570032; -SET citus.enable_router_execution TO FALSE; -- Check that we error out if shard min/max values are not exactly same. SELECT @@ -74,7 +73,9 @@ SELECT count(*) FROM ( SELECT l_orderkey FROM lineitem_subquery JOIN (SELECT random()::int r) sub ON (l_orderkey = r) WHERE r > 10 ) b; --- Check that we error out if there is non relation subqueries +SET client_min_messages TO DEBUG; + +-- If there is non relation subqueries then we recursively plan SELECT count(*) FROM ( (SELECT l_orderkey FROM lineitem_subquery) UNION ALL @@ -82,21 +83,23 @@ SELECT count(*) FROM ) b; --- Check that we error out if queries in union do not include partition columns. - +-- If queries in union do not include partition columns then we recursively plan SELECT count(*) FROM ( (SELECT l_orderkey FROM lineitem_subquery) UNION (SELECT l_partkey FROM lineitem_subquery) ) b; --- Check that we run union queries if partition column is selected. +-- Check that we push down union queries if partition column is selected (no DEBUG messages) SELECT count(*) FROM ( (SELECT l_orderkey FROM lineitem_subquery) UNION (SELECT l_orderkey FROM lineitem_subquery) ) b; + +RESET client_min_messages; + -- we'd error out if inner query has Limit but subquery_pushdown is not set -- but we recursively plan the query SELECT @@ -516,6 +519,8 @@ CREATE TABLE subquery_pruning_varchar_test_table SELECT master_create_distributed_table('subquery_pruning_varchar_test_table', 'a', 'hash'); SELECT master_create_worker_shards('subquery_pruning_varchar_test_table', 4, 1); +-- temporarily disable router executor to test pruning behaviour of subquery pushdown +SET citus.enable_router_execution TO off; SET client_min_messages TO DEBUG2; SELECT * FROM @@ -560,6 +565,8 @@ AS foo; DROP TABLE subquery_pruning_varchar_test_table; +RESET citus.enable_router_execution; + -- Simple join subquery pushdown SELECT avg(array_length(events, 1)) AS event_average @@ -793,4 +800,3 @@ DROP FUNCTION run_command_on_master_and_workers(p_sql text); SET client_min_messages TO DEFAULT; SET citus.subquery_pushdown to OFF; -SET citus.enable_router_execution TO 'true'; diff --git a/src/test/regress/sql/multi_subquery_complex_queries.sql b/src/test/regress/sql/multi_subquery_complex_queries.sql index 2f85722d6..8502b66a5 100644 --- a/src/test/regress/sql/multi_subquery_complex_queries.sql +++ b/src/test/regress/sql/multi_subquery_complex_queries.sql @@ -8,8 +8,6 @@ -- SET citus.next_shard_id TO 1400000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000; -SET citus.enable_router_execution TO FALSE; - -- -- UNIONs and JOINs mixed -- @@ -132,7 +130,7 @@ GROUP BY ORDER BY types; --- not supported since events_subquery_2 doesn't have partition key on the target list +-- supported through recursive planning since events_subquery_2 doesn't have partition key on the target list -- within the shuffled target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -191,7 +189,7 @@ GROUP BY ORDER BY types; --- not supported since events_subquery_2 doesn't have partition key on the target list +-- supported through recursive planning since events_subquery_2 doesn't have partition key on the target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() @@ -711,7 +709,7 @@ ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; --- not supported since subquery 3 does not have partition key +-- supported through recursive planning since subquery 3 does not have partition key SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() @@ -763,7 +761,7 @@ INNER JOIN GROUP BY types ORDER BY types; --- not supported since events_subquery_4 does not have partition key on the +-- supported through recursive planning since events_subquery_4 does not have partition key on the -- target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -2105,10 +2103,10 @@ GROUP BY ORDER BY types; --- not supported due to offset +-- supported through recursive planning SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM - ( SELECT *, random() + ( SELECT * FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM @@ -2152,7 +2150,7 @@ FROM FROM events_table as "events" WHERE - event_type IN (4, 5)) events_subquery_4) OFFSET 3) t1 + event_type IN (4, 5)) events_subquery_4) ORDER BY 1, 2 OFFSET 3) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT @@ -2273,11 +2271,9 @@ FROM ) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN - (SELECT random()::int as user_id) AS t + (SELECT 1 as user_id) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; - -SET citus.enable_router_execution TO TRUE; \ No newline at end of file diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 74d6d7644..334462226 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -8,8 +8,6 @@ -- SET citus.next_shard_id TO 1400000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000; -SET citus.enable_router_execution TO FALSE; - CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int); SELECT create_distributed_table('user_buy_test_table', 'user_id'); INSERT INTO user_buy_test_table VALUES(1,2,1); @@ -161,22 +159,27 @@ SELECT count(*) FROM SELECT count(*) FROM user_buy_test_table RIGHT JOIN (SELECT * FROM generate_series(1,10) id) users_ref_test_table ON user_buy_test_table.item_id = users_ref_test_table.id; --- volatile functions cannot be used as table expressions +-- volatile functions can be used as table expressions through recursive planning +SET client_min_messages TO DEBUG; + SELECT count(*) FROM (SELECT random() FROM user_buy_test_table JOIN random() AS users_ref_test_table(id) ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; --- cannot sneak in a volatile function as a parameter +-- can sneak in a volatile function as a parameter SELECT count(*) FROM - (SELECT random() FROM user_buy_test_table JOIN generate_series(random()::int,10) AS users_ref_test_table(id) - ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; + (SELECT item_id FROM user_buy_test_table JOIN generate_series(random()::int,10) AS users_ref_test_table(id) + ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1 +WHERE item_id = 6; --- cannot perform a union with table function +-- can perform a union with table function through recursive planning SELECT count(*) FROM (SELECT user_id FROM user_buy_test_table UNION ALL SELECT id FROM generate_series(1,10) AS users_ref_test_table(id)) subquery_1; +RESET client_min_messages; + -- subquery without FROM can be the inner relationship in a join SELECT count(*) FROM (SELECT random() FROM user_buy_test_table JOIN (SELECT 4 AS id) users_ref_test_table @@ -194,7 +197,7 @@ ON user_buy_test_table.item_id = users_ref_test_table.id; SELECT count(*) FROM user_buy_test_table RIGHT JOIN (SELECT 5 AS id) users_ref_test_table ON user_buy_test_table.item_id = users_ref_test_table.id; --- cannot perform a union with subquery without FROM +-- can perform a union with subquery without FROM SELECT count(*) FROM (SELECT user_id FROM user_buy_test_table UNION ALL @@ -770,9 +773,10 @@ INNER JOIN value_1 > 2 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query ORDER BY - types; + types +LIMIT 5; - -- reference table exist in the subquery of union, should error out + -- reference table exist in the subquery of union SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT @@ -981,15 +985,16 @@ SELECT foo.user_id FROM ) as foo; -- not pushdownable since group by is on the reference table column --- recursively planned, but hits unsupported clause type error on the top level query +-- recursively planned SELECT foo.user_id FROM ( SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) GROUP BY r.user_id -) as foo; +) as foo +ORDER BY 1 DESC; -- not pushdownable since the group by contains at least one distributed table --- recursively planned, but hits unsupported clause type error on the top level query +-- recursively planned SELECT foo.user_id FROM ( SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) @@ -998,13 +1003,16 @@ SELECT foo.user_id FROM ORDER BY 1 LIMIT 3; -- not pushdownable since distinct is on the reference table column --- recursively planned, but hits unsupported clause type error on the top level query +-- recursively planned SELECT foo.user_id FROM ( SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) -) as foo; +) as foo +ORDER BY 1 DESC +LIMIT 5; -- not supported since distinct on is on the reference table column +-- but recursively planned SELECT foo.user_id FROM ( SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) @@ -1069,7 +1077,8 @@ SELECT * FROM SELECT * FROM ( SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, (SELECT user_id, random() FROM events_table) as us_events WHERE users_reference_table.user_id = us_events.user_id -) as foo; +) as foo +ORDER BY 1; -- the following query is safe to push down since the DISTINCT clause include distribution column SELECT * FROM diff --git a/src/test/regress/sql/multi_subquery_union.sql b/src/test/regress/sql/multi_subquery_union.sql index 5d80d656c..22ef07daa 100644 --- a/src/test/regress/sql/multi_subquery_union.sql +++ b/src/test/regress/sql/multi_subquery_union.sql @@ -6,7 +6,6 @@ -- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests -- SET citus.next_shard_id TO 1400000; -SET citus.enable_router_execution TO false; -- a very simple union query SELECT user_id, counter FROM ( @@ -568,18 +567,17 @@ LIMIT 5; -- now lets also have some unsupported queries --- group by is not on the partition key --- but we can still recursively plan it, though that is not suffient for pushdown --- of the whole query +-- group by is not on the partition key, supported through recursive planning SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id UNION SELECT value_1 as user_id, sum(value_2) AS counter FROM users_table GROUP BY value_1 ) user_id -GROUP BY user_id; +GROUP BY user_id +ORDER BY 1,2; --- partition key is not selected +-- partition key is not selected, supported through recursive planning SELECT sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 1 GROUP BY user_id HAVING sum(value_2) > 25 @@ -594,7 +592,7 @@ FROM ( ) user_id GROUP BY user_id ORDER BY 1 DESC LIMIT 5; --- excepts within unions are not supported +-- excepts within unions are supported through recursive planning SELECT * FROM ( ( @@ -615,7 +613,8 @@ UNION SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id ) user_id_2 GROUP BY user_id) -) as ftop; +) as ftop +ORDER BY 1,2; -- non-equi join are not supported since there is no equivalence between the partition column SELECT user_id, sum(counter) @@ -719,9 +718,10 @@ SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id UNION - SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id OFFSET 4 + SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id ORDER BY user_id OFFSET 4 ) user_id -GROUP BY user_id; +GROUP BY user_id +ORDER BY 1,2; -- lower level union does not return partition key with the other relations SELECT * @@ -762,7 +762,8 @@ FROM ( GROUP BY user_id) user_id_2 GROUP BY - user_id)) AS ftop; + user_id)) AS ftop +ORDER BY 1,2; -- some UNION all queries that are going to be pulled up @@ -795,7 +796,7 @@ FROM ORDER BY 1 DESC, 2 DESC LIMIT 5; --- we don't allow joins within unions +-- we allow joins within unions SELECT count(*) FROM @@ -805,9 +806,7 @@ FROM (SELECT users_table.user_id FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) ) b; --- we don't support pushing down subqueries without relations --- recursive planning can replace that query, though the whole --- query is not safe to pushdown +-- we support unions on subqueries without relations through recursive planning SELECT count(*) FROM @@ -817,11 +816,9 @@ FROM (SELECT 1) ) b; --- we don't support pushing down subqueries without relations --- recursive planning can replace that query, though the whole --- query is not safe to pushdown +-- we support pushing down subqueries without relations through recursive planning SELECT - * + count(*) FROM ( (SELECT user_id FROM users_table) @@ -829,7 +826,7 @@ FROM (SELECT (random() * 100)::int) ) b; --- we don't support subqueries without relations +-- we support subqueries without relations within a union SELECT user_id, value_3 FROM @@ -849,9 +846,7 @@ FROM ORDER BY 1 DESC, 2 DESC LIMIT 5; --- we don't support pushing down subqueries without relations --- recursive planning can replace that query, though the whole --- query is not safe to pushdown +-- we support pushing down subqueries without relations through recursive planning SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() @@ -895,7 +890,5 @@ FROM GROUP BY types ORDER BY types; -SET citus.enable_router_execution TO true; - DROP TABLE events_reference_table; DROP TABLE users_reference_table; diff --git a/src/test/regress/sql/multi_view.sql b/src/test/regress/sql/multi_view.sql index f9fdb18ff..60574b151 100644 --- a/src/test/regress/sql/multi_view.sql +++ b/src/test/regress/sql/multi_view.sql @@ -283,10 +283,11 @@ SELECT et.user_id, et.time FROM events_table et WHERE et.user_id IN (SELECT user -- it is supported when it is a router query SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 1); --- expected this to work but it did not +-- union between views is supported through recursive planning (SELECT user_id FROM recent_users) UNION -(SELECT user_id FROM selected_users); +(SELECT user_id FROM selected_users) +ORDER BY 1; -- wrapping it inside a SELECT * works SELECT * @@ -313,7 +314,7 @@ SELECT count(*) (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0; --- expected this to work but it does not +-- UNION ALL between views is supported through recursive planning SELECT count(*) FROM ( (SELECT user_id FROM recent_users) diff --git a/src/test/regress/sql/set_operation_and_local_tables.sql b/src/test/regress/sql/set_operation_and_local_tables.sql new file mode 100644 index 000000000..fa2ddd88d --- /dev/null +++ b/src/test/regress/sql/set_operation_and_local_tables.sql @@ -0,0 +1,102 @@ +CREATE SCHEMA recursive_set_local; +SET search_path TO recursive_set_local, public; + +CREATE TABLE recursive_set_local.test (x int, y int); +SELECT create_distributed_table('test', 'x'); + +CREATE TABLE recursive_set_local.ref (a int, b int); +SELECT create_reference_table('ref'); + +CREATE TABLE recursive_set_local.local_test (x int, y int); + +INSERT INTO test VALUES (1,1), (2,2); +INSERT INTO ref VALUES (2,2), (3,3); +INSERT INTO local_test VALUES (3,3), (4,4); + +SET client_min_messages TO DEBUG; + +-- we should be able to run set operations with local tables +(SELECT x FROM test) INTERSECT (SELECT x FROM local_test) ORDER BY 1 DESC; + +-- we should be able to run set operations with generate series +(SELECT x FROM test) INTERSECT (SELECT i FROM generate_series(0, 100) i) ORDER BY 1 DESC; + +-- we'd first recursively plan the query with "test", thus don't need to recursively +-- plan other query +(SELECT x FROM test LIMIT 5) INTERSECT (SELECT i FROM generate_series(0, 100) i) ORDER BY 1 DESC; + +-- this doesn't require any recursive planning +(SELECT a FROM ref) INTERSECT (SELECT i FROM generate_series(0, 100) i) ORDER BY 1 DESC; + +-- same query with a failure on the worker (i.e., division by zero) +(SELECT x FROM test) INTERSECT (SELECT i/0 FROM generate_series(0, 100) i) ORDER BY 1 DESC; + +-- we should be able to run set operations with generate series and local tables as well +((SELECT x FROM local_test) UNION ALL (SELECT x FROM test)) INTERSECT (SELECT i FROM generate_series(0, 100) i) ORDER BY 1 DESC; + +-- two local tables are on different leaf queries, so safe to plan & execute +((SELECT x FROM local_test) UNION ALL (SELECT x FROM test)) INTERSECT (SELECT x FROM local_test) ORDER BY 1 DESC; + +-- use ctes inside unions along with local tables on the top level +WITH +cte_1 AS (SELECT user_id FROM users_table), +cte_2 AS (SELECT user_id FROM events_table) +((SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) UNION (SELECT x FROM local_test)) INTERSECT (SELECT i FROM generate_series(0, 100) i) +ORDER BY 1 DESC; + +-- CTEs inside subqueries unioned with local table +-- final query is real-time +SELECT + count(*) +FROM + ( + ((WITH cte_1 AS (SELECT x FROM test) SELECT * FROM cte_1) UNION + (WITH cte_1 AS (SELECT a FROM ref) SELECT * FROM cte_1)) INTERSECT + (SELECT x FROM local_test) + ) as foo, + test + WHERE test.y = foo.x; + +-- CTEs inside subqueries unioned with local table +-- final query is router +SELECT + count(*) +FROM + ( + ((WITH cte_1 AS (SELECT x FROM test) SELECT * FROM cte_1) UNION + (WITH cte_1 AS (SELECT a FROM ref) SELECT * FROM cte_1)) INTERSECT + (SELECT x FROM local_test) + ) as foo, + ref + WHERE ref.a = foo.x; + +-- subquery union in WHERE clause without parition column equality is recursively planned including the local tables +SELECT * FROM test a WHERE x IN (SELECT x FROM test b UNION SELECT y FROM test c UNION SELECT y FROM local_test d) ORDER BY 1,2; + +-- same query with subquery in where is wrapped in CTE +SELECT * FROM test a WHERE x IN (WITH cte AS (SELECT x FROM test b UNION SELECT y FROM test c UNION SELECT y FROM local_test d) SELECT * FROM cte) ORDER BY 1,2; + +-- not supported since local table is joined with a set operation +SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM test ORDER BY x LIMIT 1)) u JOIN local_test USING (x) ORDER BY 1,2; + +-- though we replace some queries including the local query, the intermediate result is on the outer part of an outer join +SELECT * FROM ((SELECT * FROM local_test) INTERSECT (SELECT * FROM test ORDER BY x LIMIT 1)) u LEFT JOIN test USING (x) ORDER BY 1,2; + +-- we replace some queries including the local query, the intermediate result is on the inner part of an outer join +SELECT * FROM ((SELECT * FROM local_test) INTERSECT (SELECT * FROM test ORDER BY x LIMIT 1)) u RIGHT JOIN test USING (x) ORDER BY 1,2; + +-- recurively plan left part of the join, and run a final real-time query +SELECT * FROM ((SELECT * FROM local_test) INTERSECT (SELECT * FROM test ORDER BY x LIMIT 1)) u INNER JOIN test USING (x) ORDER BY 1,2; + +-- distributed table in WHERE clause, but not FROM clause still disallowed +SELECT * FROM ((SELECT x FROM test) UNION (SELECT x FROM (SELECT x FROM local_test) as foo WHERE x IN (SELECT x FROM test))) u ORDER BY 1; + +SET citus.enable_repartition_joins TO ON; + +-- repartition is recursively planned before the set operation +(SELECT x FROM test) INTERSECT (SELECT t1.x FROM test as t1, test as t2 WHERE t1.x = t2.y LIMIT 2) INTERSECT (((SELECT x FROM local_test) UNION ALL (SELECT x FROM test)) INTERSECT (SELECT i FROM generate_series(0, 100) i)) ORDER BY 1 DESC; + +SET citus.enable_repartition_joins TO OFF; + +RESET client_min_messages; +DROP SCHEMA recursive_set_local CASCADE; \ No newline at end of file diff --git a/src/test/regress/sql/set_operations.sql b/src/test/regress/sql/set_operations.sql new file mode 100644 index 000000000..866ea9f8d --- /dev/null +++ b/src/test/regress/sql/set_operations.sql @@ -0,0 +1,173 @@ +CREATE SCHEMA recursive_union; +SET search_path TO recursive_union, public; + +CREATE TABLE recursive_union.test (x int, y int); +SELECT create_distributed_table('test', 'x'); + +CREATE TABLE recursive_union.ref (a int, b int); +SELECT create_reference_table('ref'); + +INSERT INTO test VALUES (1,1), (2,2); +INSERT INTO ref VALUES (2,2), (3,3); + +-- top-level set operations are supported through recursive planning +SET client_min_messages TO DEBUG; + +(SELECT * FROM test) UNION (SELECT * FROM test) ORDER BY 1,2; +(SELECT * FROM test) UNION (SELECT * FROM ref) ORDER BY 1,2; +(SELECT * FROM ref) UNION (SELECT * FROM ref) ORDER BY 1,2; + +(SELECT * FROM test) UNION ALL (SELECT * FROM test) ORDER BY 1,2; +(SELECT * FROM test) UNION ALL (SELECT * FROM ref) ORDER BY 1,2; +(SELECT * FROM ref) UNION ALL (SELECT * FROM ref) ORDER BY 1,2; + +(SELECT * FROM test) INTERSECT (SELECT * FROM test) ORDER BY 1,2; +(SELECT * FROM test) INTERSECT (SELECT * FROM ref) ORDER BY 1,2; +(SELECT * FROM ref) INTERSECT (SELECT * FROM ref) ORDER BY 1,2; + +(SELECT * FROM test) INTERSECT ALL (SELECT * FROM test) ORDER BY 1,2; +(SELECT * FROM test) INTERSECT ALL (SELECT * FROM ref) ORDER BY 1,2; +(SELECT * FROM ref) INTERSECT ALL (SELECT * FROM ref) ORDER BY 1,2; + +(SELECT * FROM test) EXCEPT (SELECT * FROM test) ORDER BY 1,2; +(SELECT * FROM test) EXCEPT (SELECT * FROM ref) ORDER BY 1,2; +(SELECT * FROM ref) EXCEPT (SELECT * FROM ref) ORDER BY 1,2; + +(SELECT * FROM test) EXCEPT ALL (SELECT * FROM test) ORDER BY 1,2; +(SELECT * FROM test) EXCEPT ALL (SELECT * FROM ref) ORDER BY 1,2; +(SELECT * FROM ref) EXCEPT ALL (SELECT * FROM ref) ORDER BY 1,2; + +-- more complex set operation trees are supported +(SELECT * FROM test) +INTERSECT +(SELECT * FROM ref) +UNION ALL +(SELECT s, s FROM generate_series(1,10) s) +EXCEPT +(SELECT 1,1) +UNION +(SELECT test.x, ref.a FROM test LEFT JOIN ref ON (x = a)) +ORDER BY 1,2; + +-- within a subquery, some unions can be pushed down +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test)) u ORDER BY 1,2; +SELECT * FROM ((SELECT x, y FROM test) UNION (SELECT y, x FROM test)) u ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM ref)) u ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM ref) UNION (SELECT * FROM ref)) u ORDER BY 1,2; + +SELECT * FROM ((SELECT * FROM test) UNION ALL (SELECT * FROM test)) u ORDER BY 1,2; +SELECT * FROM ((SELECT x, y FROM test) UNION ALL (SELECT y, x FROM test)) u ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM test) UNION ALL (SELECT * FROM ref)) u ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM ref) UNION ALL (SELECT * FROM ref)) u ORDER BY 1,2; + +SELECT * FROM ((SELECT * FROM test) INTERSECT (SELECT * FROM test)) u ORDER BY 1,2; +SELECT * FROM ((SELECT x, y FROM test) INTERSECT (SELECT y, x FROM test)) u ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM test) INTERSECT (SELECT * FROM ref)) u ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM ref) INTERSECT (SELECT * FROM ref)) u ORDER BY 1,2; + +SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM test)) u ORDER BY 1,2; +SELECT * FROM ((SELECT x, y FROM test) EXCEPT (SELECT y, x FROM test)) u ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM ref)) u ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM ref) EXCEPT (SELECT * FROM ref)) u ORDER BY 1,2; + +-- unions can even be pushed down within a join +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test)) u JOIN test USING (x) ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM test) UNION ALL (SELECT * FROM test)) u LEFT JOIN test USING (x) ORDER BY 1,2; + +-- unions cannot be pushed down if one leaf recurs +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test ORDER BY x LIMIT 1)) u JOIN test USING (x) ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM test) UNION ALL (SELECT * FROM test ORDER BY x LIMIT 1)) u LEFT JOIN test USING (x) ORDER BY 1,2; + +-- unions in a join without partition column equality (column names from first query are used for join) +SELECT * FROM ((SELECT x, y FROM test) UNION (SELECT y, x FROM test)) u JOIN test USING (x) ORDER BY 1,2; +SELECT * FROM ((SELECT x, y FROM test) UNION (SELECT 1, 1 FROM test)) u JOIN test USING (x) ORDER BY 1,2; + +-- a join between a set operation and a generate_series which is pushdownable + SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test ORDER BY x)) u JOIN generate_series(1,10) x USING (x) ORDER BY 1,2; + +-- a join between a set operation and a generate_series which is not pushdownable due to EXCEPT + SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM test ORDER BY x)) u JOIN generate_series(1,10) x USING (x) ORDER BY 1,2; + +-- subqueries in WHERE clause with set operations fails due to the current limitaions of recursive planning IN WHERE clause +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test)) foo WHERE x IN (SELECT y FROM test); + +-- subqueries in WHERE clause forced to be recursively planned +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test)) foo WHERE x IN (SELECT y FROM test ORDER BY 1 LIMIT 4) ORDER BY 1; + +-- now both the set operations and the sublink is recursively planned +SELECT * FROM ((SELECT x,y FROM test) UNION (SELECT y,x FROM test)) foo WHERE x IN (SELECT y FROM test ORDER BY 1 LIMIT 4) ORDER BY 1; + +-- set operations are recursively planned and not the sublink, thus should error out +SELECT * FROM ((SELECT x,y FROM test) UNION (SELECT y,x FROM test)) foo WHERE x IN (SELECT y FROM test) ORDER BY 1; + +-- set operations works fine with pushdownable window functions +SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_win AS (PARTITION BY x ORDER BY y DESC)) as foo +UNION +SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_win AS (PARTITION BY x ORDER BY y DESC)) as bar +ORDER BY 1 DESC, 2 DESC, 3 DESC; + +-- set operations errors out with non-pushdownable window functions +SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_win AS (PARTITION BY y ORDER BY x DESC)) as foo +UNION +SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_win AS (PARTITION BY y ORDER BY x DESC)) as bar; + +-- other set operations in joins also cannot be pushed down +SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM test ORDER BY x LIMIT 1)) u JOIN test USING (x) ORDER BY 1,2; +SELECT * FROM ((SELECT * FROM test) INTERSECT (SELECT * FROM test ORDER BY x LIMIT 1)) u LEFT JOIN test USING (x) ORDER BY 1,2; + +-- distributed table in WHERE clause, but not FROM clause still disallowed +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM ref WHERE a IN (SELECT x FROM test))) u ORDER BY 1,2; + +-- subquery union in WHERE clause with partition column equality and implicit join is pushed down +SELECT * FROM test a WHERE x IN (SELECT x FROM test b WHERE y = 1 UNION SELECT x FROM test c WHERE y = 2) ORDER BY 1,2; + +-- subquery union in WHERE clause with partition column equality, without implicit join on partition column +SELECT * FROM test a WHERE x NOT IN (SELECT x FROM test b WHERE y = 1 UNION SELECT x FROM test c WHERE y = 2) ORDER BY 1,2; + +-- subquery union in WHERE clause without parition column equality is recursively planned +SELECT * FROM test a WHERE x IN (SELECT x FROM test b UNION SELECT y FROM test c) ORDER BY 1,2; + +-- correlated subquery with union in WHERE clause +SELECT * FROM test a WHERE x IN (SELECT x FROM test b UNION SELECT y FROM test c WHERE a.x = c.x) ORDER BY 1,2; + +-- force unions to be planned while subqueries are being planned +SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM test) ORDER BY 1,2 LIMIT 5) as foo ORDER BY 1 DESC LIMIT 3; + +-- distinct and count distinct should work without any problems +select count(DISTINCT t.x) FROM ((SELECT DISTINCT x FROM test) UNION (SELECT DISTINCT y FROM test)) as t(x) ORDER BY 1; +select count(DISTINCT t.x) FROM ((SELECT count(DISTINCT x) FROM test) UNION (SELECT count(DISTINCT y) FROM test)) as t(x) ORDER BY 1; + +-- other agg. distincts are also supported when group by includes partition key +select avg(DISTINCT t.x) FROM ((SELECT avg(DISTINCT y) FROM test GROUP BY x) UNION (SELECT avg(DISTINCT y) FROM test GROUP BY x)) as t(x) ORDER BY 1; + +-- other agg. distincts are not supported when group by doesn't include partition key +select count(DISTINCT t.x) FROM ((SELECT avg(DISTINCT y) FROM test GROUP BY y) UNION (SELECT avg(DISTINCT y) FROM test GROUP BY y)) as t(x) ORDER BY 1; + +-- one of the leaves is a repartition join +SET citus.enable_repartition_joins TO ON; + +-- repartition is recursively planned before the set operation +(SELECT x FROM test) INTERSECT (SELECT t1.x FROM test as t1, test as t2 WHERE t1.x = t2.y LIMIT 0) ORDER BY 1 DESC; + +-- repartition is recursively planned with the set operation +(SELECT x FROM test) INTERSECT (SELECT t1.x FROM test as t1, test as t2 WHERE t1.x = t2.y) ORDER BY 1 DESC; + +SET citus.enable_repartition_joins TO OFF; + +-- this should be recursively planned +CREATE VIEW set_view_recursive AS (SELECT y FROM test) UNION (SELECT y FROM test); +SELECT * FROM set_view_recursive ORDER BY 1 DESC; + +-- this should be pushed down +CREATE VIEW set_view_pushdown AS (SELECT x FROM test) UNION (SELECT x FROM test); +SELECT * FROM set_view_pushdown ORDER BY 1 DESC; + +-- this should be recursively planned +CREATE VIEW set_view_recursive_second AS SELECT u.x, test.y FROM ((SELECT x, y FROM test) UNION (SELECT 1, 1 FROM test)) u JOIN test USING (x) ORDER BY 1,2; +SELECT * FROM set_view_recursive_second; + +-- this should create lots of recursive calls since both views and set operations lead to recursive plans :) +((SELECT x FROM set_view_recursive_second) INTERSECT (SELECT * FROM set_view_recursive)) EXCEPT (SELECT * FROM set_view_pushdown); + +RESET client_min_messages; +DROP SCHEMA recursive_union CASCADE; diff --git a/src/test/regress/sql/with_basics.sql b/src/test/regress/sql/with_basics.sql index b60d51b6b..c47ed3df0 100644 --- a/src/test/regress/sql/with_basics.sql +++ b/src/test/regress/sql/with_basics.sql @@ -354,7 +354,7 @@ ORDER BY LIMIT 5; --- SELECT * FROM (SELECT * FROM cte UNION SELECT * FROM distributed_table) a; should error out +-- SELECT * FROM (SELECT * FROM cte UNION SELECT * FROM distributed_table) a; should work WITH cte AS ( SELECT * FROM users_table ) diff --git a/src/test/regress/sql/with_set_operations.sql b/src/test/regress/sql/with_set_operations.sql new file mode 100644 index 000000000..1c212cdd4 --- /dev/null +++ b/src/test/regress/sql/with_set_operations.sql @@ -0,0 +1,214 @@ +-- =================================================================== +-- test recursive planning functionality with Set Operations and CTEs +-- =================================================================== + +SET client_min_messages TO DEBUG1; + +-- use ctes inside unions on the top level +WITH +cte_1 AS (SELECT user_id FROM users_table), +cte_2 AS (SELECT user_id FROM events_table) +(SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) +ORDER BY 1 DESC; + + +-- use ctes inside unions in a subquery +WITH +cte_1 AS (SELECT user_id FROM users_table), +cte_2 AS (SELECT user_id FROM events_table) +SELECT + count(*) +FROM ( + (SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) + ) as foo; + + +-- cte with unions of other ctes +WITH +cte_1 AS (SELECT user_id FROM users_table), +cte_2 AS (SELECT user_id FROM events_table), +cte_3 AS ((SELECT * FROM cte_1) UNION (SELECT * FROM cte_2)) +SELECT * FROM cte_3 ORDER BY 1 DESC; + +-- cte with unions of distributed table +WITH +cte_1 AS ((SELECT user_id FROM users_table) UNION (SELECT user_id FROM users_table)) +SELECT * FROM cte_1 ORDER BY 1 DESC; + +-- cte with unions of tables is intersected with another query +WITH +cte_1 AS ((SELECT user_id FROM users_table) UNION (SELECT user_id FROM users_table)) +(SELECT * FROM cte_1) INTERSECT (SELECT user_id FROM users_table) ORDER BY 1 DESC; + +-- cte with unions of tables is intersected with another query that involves except +WITH +cte_1 AS ((SELECT user_id FROM users_table) UNION (SELECT user_id FROM users_table)) +(SELECT * FROM cte_1) + INTERSECT +((SELECT user_id FROM events_table WHERE user_id < 3) EXCEPT (SELECT user_id FROM users_table WHERE user_id > 4)) ORDER BY 1 DESC; + + +-- CTE inside a top level EXCEPT +(WITH cte_1 AS (SELECT user_id FROM events_table WHERE user_id < 3) SELECT * FROM cte_1) INTERSECT (SELECT user_id FROM users_table) ORDER BY 1; + +-- INTERSECT inside a CTE, which is inside a subquery +SELECT + DISTINCT users_table.user_id +FROM + users_table, + (WITH cte_1 AS (SELECT user_id FROM events_table WHERE user_id < 3 INTERSECT + SELECT user_id FROM events_table WHERE user_id < 2) + SELECT * FROM cte_1) as foo +WHERE + users_table.user_id = foo.user_id +ORDER BY 1 DESC; + +-- UNION is created via outputs of CTEs, which is inside a subquery +-- and the subquery is joined with a distributed table +SELECT + count(*) +FROM + events_table, + ( + WITH + cte_1 AS (SELECT user_id FROM users_table), + cte_2 AS (SELECT user_id FROM events_table) + (SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) + ) as foo +WHERE foo.user_id = events_table.event_type; + +-- joins inside unions that are safe to pushdown +(SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id ) +INTERSECT +(SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id ) +ORDER BY 1 DESC; + +-- joins inside unions that are not safe to pushdown +(SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id LIMIT 10) +INTERSECT +(SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id LIMIT 10) +ORDER BY 1 DESC; + +-- joins inside unions that are not safe to pushdown inside a subquery +SELECT + count(*) +FROM + (SELECT DISTINCT value_2 FROM events_table) as events_table, + (WITH foo AS + ((SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id ) + INTERSECT + (SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id LIMIT 10)) + SELECT * FROM foo) + as foo +WHERE + foo.user_id = events_table.value_2; + +-- joins inside unions some safe to pushdown +SELECT + count(*) +FROM + (WITH events_table AS (SELECT DISTINCT user_id FROM events_table) SELECT * FROM events_table) as events_table, + ((SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id ) + INTERSECT + (SELECT DISTINCT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id LIMIT 10)) as foo +WHERE + foo.user_id = events_table.user_id; + +-- CTE inside unions +(WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) UNION +(WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) ORDER BY 1 DESC; + +-- more complex CTEs inside unions +SELECT + count(*) +FROM + ( + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) UNION + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) + ) as foo, + users_table + WHERE users_table.value_2 = foo.user_id; + +-- CTEs with less alias than the input subquery +(WITH cte_1(x) AS (SELECT user_id, value_2 FROM users_table) SELECT * FROM cte_1) UNION +(WITH cte_1(x) AS (SELECT user_id, value_2 FROM users_table) SELECT * FROM cte_1) ORDER BY 1 DESC, 2 DESC LIMIT 5; + + +-- simple subqueries in WHERE with unions +SELECT + count(*) +FROM + users_table +WHERE + value_2 IN + ( + WITH cte_1 AS + ( + (SELECT user_id FROM users_table) + UNION + (SELECT user_id FROM events_table) + ) SELECT DISTINCT user_id FROM cte_1 +) +ORDER BY 1 DESC; + +-- simple subqueries in WHERE with unions and ctes +SELECT + count(*) +FROM + users_table +WHERE + value_2 IN + ( + WITH + cte_1 AS (SELECT user_id FROM users_table), + cte_2 AS (SELECT user_id FROM events_table) + (SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) +) +ORDER BY 1 DESC; + +-- unions and ctes inside subqueries in where clause with a pushdownable correlated subquery +SELECT + DISTINCT user_id +FROM + events_table +WHERE + event_type IN +( + SELECT + users_table.user_id + FROM + ( + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) UNION + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) + ) as foo, + users_table + WHERE users_table.value_2 = foo.user_id AND events_table.user_id = users_table.user_id +) +ORDER BY 1 DESC; + +-- unions and ctes inside subqueries in where clause with a not pushdownable correlated subquery +-- should error out +SELECT + DISTINCT user_id +FROM + events_table +WHERE + event_type IN +( + SELECT + users_table.user_id + FROM + ( + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) UNION + (WITH cte_1 AS (SELECT user_id FROM users_table) SELECT * FROM cte_1) + ) as foo, + users_table + WHERE users_table.value_2 = foo.user_id AND events_table.user_id = users_table.user_id + LIMIT 5 +) +ORDER BY 1 DESC; + + +SET client_min_messages TO DEFAULT; + +SET search_path TO public; \ No newline at end of file