diff --git a/src/backend/distributed/planner/cte_inline.c b/src/backend/distributed/planner/cte_inline.c new file mode 100644 index 000000000..42fe06102 --- /dev/null +++ b/src/backend/distributed/planner/cte_inline.c @@ -0,0 +1,532 @@ +/*------------------------------------------------------------------------- + * + * cte_inline.c + * For multi-shard queries, Citus can only recursively plan CTEs. Instead, + * with the functions defined in this file, the certain CTEs can be inlined + * as subqueries in the query tree. In that case, more optimal distributed + * planning, the query pushdown planning, kicks in and the CTEs can actually + * be pushed down as long as it is safe to pushdown as a subquery. + * + * Most of the logic in this function is inspired (and some is copy & pasted) + * from PostgreSQL 12's CTE inlining feature. + * + * Copyright (c) Citus Data, Inc. + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "distributed/cte_inline.h" +#include "nodes/nodeFuncs.h" +#if PG_VERSION_NUM >= 120000 +#include "optimizer/optimizer.h" +#else +#include "optimizer/cost.h" +#include "optimizer/clauses.h" +#endif +#include "rewrite/rewriteManip.h" + +#if PG_VERSION_NUM < 120000 + +/* copy & paste from PG 12 */ +#define PG_12_QTW_EXAMINE_RTES_BEFORE 0x10 +#define PG_12_QTW_EXAMINE_RTES_AFTER 0x20 +bool pg_12_query_tree_walker(Query *query, + bool (*walker)(), + void *context, + int flags); +bool pg_12_range_table_walker(List *rtable, + bool (*walker)(), + void *context, + int flags); +#endif + +typedef struct inline_cte_walker_context +{ + const char *ctename; /* name and relative level of target CTE */ + int levelsup; + int refcount; /* number of remaining references */ + Query *ctequery; /* query to substitute */ + + List *aliascolnames; /* citus addition to Postgres' inline_cte_walker_context */ +} inline_cte_walker_context; + +/* copy & paste from Postgres source, moved into a function for readability */ +static bool PostgreSQLCTEInlineCondition(CommonTableExpr *cte, CmdType cmdType); + +/* the following utility functions are copy & paste from PostgreSQL code */ +static void inline_cte(Query *mainQuery, CommonTableExpr *cte); +static bool inline_cte_walker(Node *node, inline_cte_walker_context *context); +static bool contain_dml(Node *node); +static bool contain_dml_walker(Node *node, void *context); + + +/* the following utility functions are related to Citus' logic */ +static bool RecursivelyInlineCteWalker(Node *node, void *context); +static void InlineCTEsInQueryTree(Query *query); +static bool QueryTreeContainsInlinableCteWalker(Node *node); + + +/* controlled via GUC */ +bool EnableCTEInlining = true; + +/* + * RecursivelyInlineCtesInQueryTree gets a query and recursively traverses the + * tree from top to bottom. On each level, the CTEs that are eligable for + * inlining are inlined as subqueries. This is useful in distributed planning + * because Citus' sub(query) planning logic superior to CTE planning, where CTEs + * are always recursively planned, which might produce very slow executions. + */ +void +RecursivelyInlineCtesInQueryTree(Query *query) +{ + InlineCTEsInQueryTree(query); + + query_tree_walker(query, RecursivelyInlineCteWalker, NULL, 0); +} + + +/* + * RecursivelyInlineCteWalker recursively finds all the Query nodes and + * recursively inline eligable ctes. + */ +static bool +RecursivelyInlineCteWalker(Node *node, void *context) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, Query)) + { + Query *query = (Query *) node; + + InlineCTEsInQueryTree(query); + + query_tree_walker(query, RecursivelyInlineCteWalker, NULL, 0); + + /* we're done, no need to recurse anymore for this query */ + return false; + } + + return expression_tree_walker(node, RecursivelyInlineCteWalker, context); +} + + +/* + * InlineCTEsInQueryTree gets a query tree and tries to inline CTEs as subqueries + * in the query tree. + * + * Most of the code is coming from PostgreSQL's CTE inlining logic, there are very + * few additions that Citus added, which are already commented in the code. + */ +void +InlineCTEsInQueryTree(Query *query) +{ + ListCell *cteCell = NULL; + + /* iterate on the copy of the list because we'll be modifying query->cteList */ + List *copyOfCteList = list_copy(query->cteList); + foreach(cteCell, copyOfCteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell); + + /* + * First, make sure that Postgres is OK to inline the CTE. Later, check for + * distributed query planning constraints that might prevent inlining. + */ + if (PostgreSQLCTEInlineCondition(cte, query->commandType)) + { + elog(DEBUG1, "CTE %s is going to be inlined via " + "distributed planning", cte->ctename); + + /* do the hard work of cte inlining */ + inline_cte(query, cte); + + /* clean-up the necessary fields for distributed planning */ + cte->cterefcount = 0; + query->cteList = list_delete_ptr(query->cteList, cte); + } + } +} + + +/* + * QueryTreeContainsInlinableCTE recursively traverses the queryTree, and returns true + * if any of the (sub)queries in the queryTree contains at least one CTE. + */ +bool +QueryTreeContainsInlinableCTE(Query *queryTree) +{ + return QueryTreeContainsInlinableCteWalker((Node *) queryTree); +} + + +/* + * QueryTreeContainsInlinableCteWalker walks over the node, and returns true if any of + * the (sub)queries in the node contains at least one CTE. + */ +static bool +QueryTreeContainsInlinableCteWalker(Node *node) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, Query)) + { + Query *query = (Query *) node; + + ListCell *cteCell = NULL; + foreach(cteCell, query->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell); + + if (PostgreSQLCTEInlineCondition(cte, query->commandType)) + { + /* + * Return true even if we can find a single CTE that is + * eligable for inlining. + */ + return true; + } + } + + return query_tree_walker(query, QueryTreeContainsInlinableCteWalker, NULL, 0); + } + + return expression_tree_walker(node, QueryTreeContainsInlinableCteWalker, NULL); +} + + +/* + * PostgreSQLCTEInlineCondition returns true if the CTE is considered + * safe to inline by Postgres. + */ +static bool +PostgreSQLCTEInlineCondition(CommonTableExpr *cte, CmdType cmdType) +{ + /* + * Consider inlining the CTE (creating RTE_SUBQUERY RTE(s)) instead of + * implementing it as a separately-planned CTE. + * + * We cannot inline if any of these conditions hold: + * + * 1. The user said not to (the CTEMaterializeAlways option). + * + * 2. The CTE is recursive. + * + * 3. The CTE has side-effects; this includes either not being a plain + * SELECT, or containing volatile functions. Inlining might change + * the side-effects, which would be bad. + * + * Otherwise, we have an option whether to inline or not. That should + * always be a win if there's just a single reference, but if the CTE + * is multiply-referenced then it's unclear: inlining adds duplicate + * computations, but the ability to absorb restrictions from the outer + * query level could outweigh that. We do not have nearly enough + * information at this point to tell whether that's true, so we let + * the user express a preference. Our default behavior is to inline + * only singly-referenced CTEs, but a CTE marked CTEMaterializeNever + * will be inlined even if multiply referenced. + */ + if ( +#if PG_VERSION_NUM >= 120000 + (cte->ctematerialized == CTEMaterializeNever || + (cte->ctematerialized == CTEMaterializeDefault && + cte->cterefcount == 1)) && +#else + + /* + * If referenced only once inlining would probably perform + * better, so for pg < 12, try inlining + */ + cte->cterefcount == 1 && +#endif + !cte->cterecursive && + cmdType == CMD_SELECT && + !contain_dml(cte->ctequery) && + !contain_volatile_functions(cte->ctequery)) + { + return true; + } + + return false; +} + + +/* *INDENT-OFF* */ +/* + * inline_cte: convert RTE_CTE references to given CTE into RTE_SUBQUERYs + */ +static void +inline_cte(Query *mainQuery, CommonTableExpr *cte) +{ + struct inline_cte_walker_context context; + + context.ctename = cte->ctename; + /* Start at levelsup = -1 because we'll immediately increment it */ + context.levelsup = -1; + context.refcount = cte->cterefcount; + context.ctequery = castNode(Query, cte->ctequery); + context.aliascolnames = cte->aliascolnames; + + (void) inline_cte_walker((Node *) mainQuery, &context); + + /* Assert we replaced all references */ + Assert(context.refcount == 0); +} + + +/* + * See PostgreSQL's source code at src/backend/optimizer/plan/subselect.c. + */ +static bool +inline_cte_walker(Node *node, inline_cte_walker_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Query)) + { + Query *query = (Query *) node; + + context->levelsup++; + + /* + * Visit the query's RTE nodes after their contents; otherwise + * query_tree_walker would descend into the newly inlined CTE query, + * which we don't want. + */ +#if PG_VERSION_NUM < 120000 + (void) pg_12_query_tree_walker(query, inline_cte_walker, context, + PG_12_QTW_EXAMINE_RTES_AFTER); +#else + (void) query_tree_walker(query, inline_cte_walker, context, + QTW_EXAMINE_RTES_AFTER); +#endif + context->levelsup--; + + return false; + } + else if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rte = (RangeTblEntry *) node; + + if (rte->rtekind == RTE_CTE && + strcmp(rte->ctename, context->ctename) == 0 && + rte->ctelevelsup == context->levelsup) + { + /* + * Found a reference to replace. Generate a copy of the CTE query + * with appropriate level adjustment for outer references (e.g., + * to other CTEs). + */ + Query *newquery = copyObject(context->ctequery); + + if (context->levelsup > 0) + IncrementVarSublevelsUp((Node *) newquery, context->levelsup, 1); + + /* + * Convert the RTE_CTE RTE into a RTE_SUBQUERY. + * + * Historically, a FOR UPDATE clause has been treated as extending + * into views and subqueries, but not into CTEs. We preserve this + * distinction by not trying to push rowmarks into the new + * subquery. + */ + rte->rtekind = RTE_SUBQUERY; + rte->subquery = newquery; + rte->security_barrier = false; + + List *columnAliasList = context->aliascolnames; + int columnAliasCount = list_length(columnAliasList); + int columnIndex = 1; + for (; columnIndex < list_length(rte->subquery->targetList) + 1; ++columnIndex) + { + /* + * Rename the column only if a column alias is defined. + * Notice that column alias count could be less than actual + * column count. We only use provided aliases and keep the + * original column names if no alias is defined. + */ + if (columnAliasCount >= columnIndex) + { + Value *columnAlias = (Value *) list_nth(columnAliasList, columnIndex - 1); + Assert(IsA(columnAlias, String)); + TargetEntry *targetEntry = + list_nth(rte->subquery->targetList, columnIndex - 1); + Assert(IsA(columnAlias, String)); + targetEntry->resname = strVal(columnAlias); + } + } + + /* Zero out CTE-specific fields */ + rte->ctename = NULL; + rte->ctelevelsup = 0; + rte->self_reference = false; + rte->coltypes = NIL; + rte->coltypmods = NIL; + rte->colcollations = NIL; + + /* Count the number of replacements we've done */ + context->refcount--; + } + + return false; + } + + return expression_tree_walker(node, inline_cte_walker, context); +} + + +/* + * contain_dml: is any subquery not a plain SELECT? + * + * We reject SELECT FOR UPDATE/SHARE as well as INSERT etc. + */ +static bool +contain_dml(Node *node) +{ + return contain_dml_walker(node, NULL); +} + + +static bool +contain_dml_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, Query)) + { + Query *query = (Query *) node; + + if (query->commandType != CMD_SELECT || + query->rowMarks != NIL) + return true; + + return query_tree_walker(query, contain_dml_walker, context, 0); + } + return expression_tree_walker(node, contain_dml_walker, context); +} + + +#if PG_VERSION_NUM < 120000 +/* + * pg_12_query_tree_walker is copied from Postgres 12's source + * code. The only difference between query_tree_walker the new + * two flags added in range_table_walker: QTW_EXAMINE_RTES_AFTER + * and QTW_EXAMINE_RTES_BEFORE. + */ +bool +pg_12_query_tree_walker(Query *query, + bool (*walker) (), + void *context, + int flags) +{ + Assert(query != NULL && IsA(query, Query)); + + if (walker((Node *) query->targetList, context)) + return true; + if (walker((Node *) query->withCheckOptions, context)) + return true; + if (walker((Node *) query->onConflict, context)) + return true; + if (walker((Node *) query->returningList, context)) + return true; + if (walker((Node *) query->jointree, context)) + return true; + if (walker(query->setOperations, context)) + return true; + if (walker(query->havingQual, context)) + return true; + if (walker(query->limitOffset, context)) + return true; + if (walker(query->limitCount, context)) + return true; + if (!(flags & QTW_IGNORE_CTE_SUBQUERIES)) + { + if (walker((Node *) query->cteList, context)) + return true; + } + if (!(flags & QTW_IGNORE_RANGE_TABLE)) + { + if (pg_12_range_table_walker(query->rtable, walker, context, flags)) + return true; + } + return false; +} + +/* + * pg_12_range_table_walker is copied from Postgres 12's source + * code. The only difference between range_table_walker the new + * two flags added in range_table_walker: QTW_EXAMINE_RTES_AFTER + * and QTW_EXAMINE_RTES_BEFORE. + */ +bool +pg_12_range_table_walker(List *rtable, + bool (*walker) (), + void *context, + int flags) +{ + ListCell *rt; + + foreach(rt, rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); + + /* + * Walkers might need to examine the RTE node itself either before or + * after visiting its contents (or, conceivably, both). Note that if + * you specify neither flag, the walker won't visit the RTE at all. + */ + if (flags & PG_12_QTW_EXAMINE_RTES_BEFORE) + if (walker(rte, context)) + return true; + + switch (rte->rtekind) + { + case RTE_RELATION: + if (walker(rte->tablesample, context)) + return true; + break; + case RTE_CTE: + case RTE_NAMEDTUPLESTORE: + /* nothing to do */ + break; + case RTE_SUBQUERY: + if (!(flags & QTW_IGNORE_RT_SUBQUERIES)) + if (walker(rte->subquery, context)) + return true; + break; + case RTE_JOIN: + if (!(flags & QTW_IGNORE_JOINALIASES)) + if (walker(rte->joinaliasvars, context)) + return true; + break; + case RTE_FUNCTION: + if (walker(rte->functions, context)) + return true; + break; + case RTE_TABLEFUNC: + if (walker(rte->tablefunc, context)) + return true; + break; + case RTE_VALUES: + if (walker(rte->values_lists, context)) + return true; + break; + } + + if (walker(rte->securityQuals, context)) + return true; + + if (flags & PG_12_QTW_EXAMINE_RTES_AFTER) + if (walker(rte, context)) + return true; + } + return false; +} +#endif + +/* *INDENT-ON* */ diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 4aef0b89c..5d017c37d 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -19,6 +19,7 @@ #include "catalog/pg_type.h" #include "distributed/citus_nodefuncs.h" #include "distributed/citus_nodes.h" +#include "distributed/cte_inline.h" #include "distributed/function_call_delegation.h" #include "distributed/insert_select_planner.h" #include "distributed/intermediate_result_pruning.h" @@ -70,6 +71,15 @@ static bool ListContainsDistributedTableRTE(List *rangeTableList); static bool IsUpdateOrDelete(Query *query); static PlannedStmt * CreateDistributedPlannedStmt( DistributedPlanningContext *planContext); +static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, + DistributedPlanningContext + *planContext); +static PlannedStmt * TryCreateDistributedPlannedStmt(PlannedStmt *localPlan, + Query *originalQuery, + Query *query, ParamListInfo + boundParams, + PlannerRestrictionContext * + plannerRestrictionContext); static DistributedPlan * CreateDistributedPlan(uint64 planId, Query *originalQuery, Query *query, ParamListInfo boundParams, bool hasUnresolvedParams, @@ -606,6 +616,28 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext) JoinRestrictionContext *joinRestrictionContext = planContext->plannerRestrictionContext->joinRestrictionContext; + PlannedStmt *resultPlan = NULL; + + if (QueryTreeContainsInlinableCTE(planContext->originalQuery)) + { + /* + * Inlining CTEs as subqueries in the query can avoid recursively + * planning some (or all) of the CTEs. In other words, the inlined + * CTEs could become part of query pushdown planning, which is much + * more efficient than recursively planning. So, first try distributed + * planning on the inlined CTEs in the query tree. + * + * We also should fallback to distributed planning with non-inlined CTEs + * if the distributed planning fails with inlined CTEs, because recursively + * planning CTEs can provide full SQL coverage, although it might be slow. + */ + resultPlan = InlineCtesAndCreateDistributedPlannedStmt(planId, planContext); + if (resultPlan != NULL) + { + return resultPlan; + } + } + if (HasUnresolvedExternParamsWalker((Node *) planContext->originalQuery, planContext->boundParams)) { @@ -664,7 +696,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext) distributedPlan->planId = planId; /* create final plan by combining local plan with distributed plan */ - PlannedStmt *resultPlan = FinalizePlan(planContext->plan, distributedPlan); + resultPlan = FinalizePlan(planContext->plan, distributedPlan); /* * As explained above, force planning costs to be unrealistically high if @@ -683,6 +715,113 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext) } +/* + * InlineCtesAndCreateDistributedPlannedStmt gets all the parameters required + * for creating a distributed planned statement. The function is primarily a + * wrapper on top of CreateDistributedPlannedStmt(), by first inlining the + * CTEs and calling CreateDistributedPlannedStmt() in PG_TRY() block. The + * function returns NULL if the planning fails on the query where eligable + * CTEs are inlined. + */ +static PlannedStmt * +InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, + DistributedPlanningContext *planContext) +{ + if (!EnableCTEInlining) + { + /* + * In Postgres 12+, users can adjust whether to inline/not inline CTEs + * by [NOT] MATERIALIZED keywords. However, in PG 11, that's not possible. + * So, with this we provide a way to prevent CTE inlining on Postgres 11. + * + * The main use-case for this is not to have divergent test outputs between + * PG 11 vs PG 12, so not very much intended for users. + */ + return NULL; + } + + /* + * We'll inline the CTEs and try distributed planning, preserve the original + * query in case the planning fails and we fallback to recursive planning of + * CTEs. + */ + Query *copyOfOriginalQuery = copyObject(planContext->originalQuery); + + RecursivelyInlineCtesInQueryTree(copyOfOriginalQuery); + + /* after inlining, we shouldn't have any inlinable CTEs */ + Assert(!QueryTreeContainsInlinableCTE(copyOfOriginalQuery)); + + /* simply recurse into CreateDistributedPlannedStmt() in a PG_TRY() block */ + PlannedStmt *result = TryCreateDistributedPlannedStmt(planContext->plan, + copyOfOriginalQuery, + planContext->query, + planContext->boundParams, + planContext-> + plannerRestrictionContext); + + return result; +} + + +/* + * TryCreateDistributedPlannedStmt is a wrapper around CreateDistributedPlannedStmt, simply + * calling it in PG_TRY()/PG_CATCH() block. The function returns a PlannedStmt if the input + * query can be planned by Citus. If not, the function returns NULL and generates a DEBUG4 + * message with the reason for the failure. + */ +static PlannedStmt * +TryCreateDistributedPlannedStmt(PlannedStmt *localPlan, + Query *originalQuery, + Query *query, ParamListInfo boundParams, + PlannerRestrictionContext *plannerRestrictionContext) +{ + MemoryContext savedContext = CurrentMemoryContext; + PlannedStmt *result = NULL; + + DistributedPlanningContext *planContext = palloc0(sizeof(DistributedPlanningContext)); + + planContext->plan = localPlan; + planContext->boundParams = boundParams; + planContext->originalQuery = originalQuery; + planContext->query = query; + planContext->plannerRestrictionContext = plannerRestrictionContext; + + + PG_TRY(); + { + result = CreateDistributedPlannedStmt(planContext); + } + PG_CATCH(); + { + MemoryContextSwitchTo(savedContext); + ErrorData *edata = CopyErrorData(); + FlushErrorState(); + + /* don't try to intercept PANIC or FATAL, let those breeze past us */ + if (edata->elevel != ERROR) + { + PG_RE_THROW(); + } + + ereport(DEBUG4, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Planning after CTEs inlined failed with " + "\nmessage: %s\ndetail: %s\nhint: %s", + edata->message ? edata->message : "", + edata->detail ? edata->detail : "", + edata->hint ? edata->hint : ""))); + + /* leave the error handling system */ + FreeErrorData(edata); + + result = NULL; + } + PG_END_TRY(); + + return result; +} + + /* * CreateDistributedPlan generates a distributed plan for a query. * It goes through 3 steps: diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index ebdf44198..11cc2c565 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -30,6 +30,7 @@ #include "distributed/commands/multi_copy.h" #include "distributed/commands/utility_hook.h" #include "distributed/connection_management.h" +#include "distributed/cte_inline.h" #include "distributed/distributed_deadlock_detection.h" #include "distributed/intermediate_result_pruning.h" #include "distributed/local_executor.h" @@ -761,6 +762,24 @@ RegisterCitusConfigVariables(void) GUC_STANDARD, NULL, NULL, NULL); + /* + * We shouldn't need this variable after we drop support to PostgreSQL 11 and + * below. So, noting it here with PG_VERSION_NUM < 120000 + */ + DefineCustomBoolVariable( + "citus.enable_cte_inlining", + gettext_noop("When set to false, CTE inlining feature is disabled"), + gettext_noop("This feature is not intended for users. It is developed " + "to get consistent regression test outputs between Postgres 11" + "and Postgres 12. In Postgres 12+, the user can control the behaviour" + "by [NOT] MATERIALIZED keyword on CTEs. However, in PG 11, we cannot do " + "that."), + &EnableCTEInlining, + true, + PGC_SUSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomEnumVariable( "citus.propagate_set_commands", gettext_noop("Sets which SET commands are propagated to workers."), diff --git a/src/include/distributed/cte_inline.h b/src/include/distributed/cte_inline.h new file mode 100644 index 000000000..09cac7bdb --- /dev/null +++ b/src/include/distributed/cte_inline.h @@ -0,0 +1,21 @@ +/*------------------------------------------------------------------------- + * + * cte_inline.h + * Functions and global variables to control cte inlining. + * + * Copyright (c) 2019, Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#ifndef CTE_INLINE_H +#define CTE_INLINE_H + +#include "nodes/parsenodes.h" + +extern bool EnableCTEInlining; + +extern void RecursivelyInlineCtesInQueryTree(Query *query); +extern bool QueryTreeContainsInlinableCTE(Query *queryTree); + +#endif /* CTE_INLINE_H */ diff --git a/src/test/regress/expected/cte_inline.out b/src/test/regress/expected/cte_inline.out new file mode 100644 index 000000000..e6d2145e3 --- /dev/null +++ b/src/test/regress/expected/cte_inline.out @@ -0,0 +1,1397 @@ +CREATE SCHEMA cte_inline; +SET search_path TO cte_inline; +SET citus.next_shard_id TO 1960000; +CREATE TABLE test_table (key int, value text, other_value jsonb); +SELECT create_distributed_table ('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- server version because CTE inlining might produce +-- different debug messages in PG 11 vs PG 12 +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int; + substring +--------------------------------------------------------------------- + 12 +(1 row) + +SET client_min_messages TO DEBUG; +-- Citus should not inline this CTE because otherwise it cannot +-- plan the query +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + *, (SELECT 1) +FROM + cte_1; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- Should still not be inlined even if NOT MATERIALIZED is passed +WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + *, (SELECT 1) +FROM + cte_1; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- the cte can be inlined because the unsupported +-- part of the query (subquery in WHERE clause) +-- doesn't access the cte +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 +WHERE + key IN ( + SELECT + (SELECT 1) + FROM + test_table WHERE key = 1 + ); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Creating router plan +DEBUG: Plan is router executable +DETAIL: distribution column value: 1 +DEBUG: generating subplan XXX_1 for subquery SELECT (SELECT 1) FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.=) 1) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1 WHERE (key OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result."?column?" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer))) +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- a similar query as the above, and this time the planning +-- fails, but it fails because the subquery in WHERE clause +-- cannot be planned by Citus +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 +WHERE + key IN ( + SELECT + key + FROM + test_table + FOR UPDATE + ); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: SELECT FOR UPDATE with table replication factor > 1 not supported for non-reference tables. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: SELECT FOR UPDATE with table replication factor > 1 not supported for non-reference tables. +ERROR: could not run distributed query with FOR UPDATE/SHARE commands +HINT: Consider using an equality filter on the distributed table's partition column. +-- Citus does the inlining, the planning fails +-- and retries without inlining, which works +-- fine later via recursive planning +WITH cte_1 AS + (SELECT * + FROM test_table) +SELECT *, (SELECT 1) +FROM + (SELECT * + FROM cte_1) AS foo; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT cte_1.key, cte_1.value, cte_1.other_value FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1) foo +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- a little more complicated query tree +-- Citus does the inlining, the planning fails +-- and retries without inlining, which works +WITH top_cte AS + (SELECT * + FROM test_table) +SELECT * +FROM top_cte, + (WITH cte_1 AS + (SELECT * + FROM test_table) SELECT *, (SELECT 1) + FROM + (SELECT * + FROM cte_1) AS foo) AS bar; +DEBUG: CTE top_cte is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE top_cte: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan XXX_3 for subquery SELECT key, value, other_value, (SELECT 1) FROM (SELECT cte_1.key, cte_1.value, cte_1.other_value FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1) foo +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT top_cte.key, top_cte.value, top_cte.other_value, bar.key, bar.value, bar.other_value, bar."?column?" FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) top_cte, (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result."?column?" FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, "?column?" integer)) bar +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- CTE is used inside a subquery in WHERE clause +-- the query wouldn't work by inlining, so Citus +-- retries again via recursive planning, which +-- works fine +WITH cte_1 AS + (SELECT * + FROM test_table) +SELECT count(*) +FROM test_table +WHERE KEY IN + (SELECT (SELECT 1) + FROM + (SELECT *, + random() + FROM + (SELECT * + FROM cte_1) AS foo) AS bar); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan XXX_2 for subquery SELECT (SELECT 1) FROM (SELECT foo.key, foo.value, foo.other_value, random() AS random FROM (SELECT cte_1.key, cte_1.value, cte_1.other_value FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1) foo) bar +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result."?column?" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer))) +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cte_1 is used inside another CTE, but still +-- doesn't work when inlined because it is finally +-- used in an unsupported query +-- but still works fine because recursive planning +-- kicks in +WITH cte_1 AS + (SELECT * + FROM test_table) +SELECT (SELECT 1) AS KEY FROM ( + WITH cte_2 AS (SELECT *, random() + FROM (SELECT *,random() FROM cte_1) as foo) +SELECT *, random() FROM cte_2) as bar; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_2: SELECT key, value, other_value, random, random() AS random FROM (SELECT cte_1.key, cte_1.value, cte_1.other_value, random() AS random FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1) foo +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT (SELECT 1) AS key FROM (SELECT cte_2.key, cte_2.value, cte_2.other_value, cte_2.random, cte_2.random_1 AS random, random() AS random FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result.random, intermediate_result.random_1 AS random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, random double precision, random_1 double precision)) cte_2(key, value, other_value, random, random_1)) bar(key, value, other_value, random, random_1, random_2) +DEBUG: Creating router plan +DEBUG: Plan is router executable + key +--------------------------------------------------------------------- +(0 rows) + +-- in this example, cte_2 can be inlined, because it is not used +-- on any query that Citus cannot plan. However, cte_1 should not be +-- inlined, because it is used with a subquery in target list +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (select * from test_table) +SELECT + * +FROM + (SELECT *, (SELECT 1) FROM cte_1) as foo + JOIN + cte_2 + ON (true); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.key, foo.value, foo.other_value, foo."?column?", cte_2.key, cte_2.value, cte_2.other_value FROM ((SELECT cte_1.key, cte_1.value, cte_1.other_value, (SELECT 1) FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1) foo JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_2 ON (true)) +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value | ?column? | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- unreferenced CTEs are just ignored +-- by Citus/Postgres +WITH a AS (SELECT * FROM test_table) +SELECT + *, row_number() OVER () +FROM + test_table +WHERE + key = 1; +DEBUG: Creating router plan +DEBUG: Plan is router executable +DETAIL: distribution column value: 1 + key | value | other_value | row_number +--------------------------------------------------------------------- +(0 rows) + +-- router queries are affected by the distributed +-- cte inlining +WITH a AS (SELECT * FROM test_table WHERE key = 1) +SELECT + *, (SELECT 1) +FROM + a +WHERE + key = 1; +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: Creating router plan +DEBUG: Plan is router executable +DETAIL: distribution column value: 1 + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- non router queries are affected by the distributed +-- cte inlining as well +WITH a AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: Creating router plan +DEBUG: Plan is router executable +DETAIL: distribution column value: 1 + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- explicitely using NOT MATERIALIZED should result in the same +WITH a AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: Creating router plan +DEBUG: Plan is router executable +DETAIL: distribution column value: 1 + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- using MATERIALIZED should cause inlining not to happen +WITH a AS MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a WHERE (key OPERATOR(pg_catalog.=) 1) +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- EXPLAIN should show the difference between materialized an not materialized +EXPLAIN (COSTS OFF) WITH a AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: Creating router plan +DEBUG: Plan is router executable +DETAIL: distribution column value: 1 + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Seq Scan on test_table_1960000 test_table + Filter: (key = 1) +(8 rows) + +EXPLAIN (COSTS OFF) WITH a AS MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a WHERE (key OPERATOR(pg_catalog.=) 1) +DEBUG: Creating router plan +DEBUG: Plan is router executable + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on test_table_1960000 test_table + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Function Scan on read_intermediate_result intermediate_result + Filter: (key = 1) +(15 rows) + +-- citus should not inline the CTE because it is used multiple times +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) first_entry JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) second_entry USING (key)) +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- NOT MATERIALIZED should cause the query to be inlined twice +WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] +DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] +DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] +DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] +DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] +DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- EXPLAIN should show the differences between MATERIALIZED and NOT MATERIALIZED +EXPLAIN (COSTS OFF) WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) first_entry JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) second_entry USING (key)) +DEBUG: Creating router plan +DEBUG: Plan is router executable + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on test_table_1960000 test_table + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (intermediate_result.key = intermediate_result_1.key) + -> Sort + Sort Key: intermediate_result.key + -> Function Scan on read_intermediate_result intermediate_result + -> Sort + Sort Key: intermediate_result_1.key + -> Function Scan on read_intermediate_result intermediate_result_1 +(21 rows) + +EXPLAIN (COSTS OFF) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] +DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] +DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] +DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] +DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] +DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] + QUERY PLAN +--------------------------------------------------------------------- + Aggregate + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Hash Join + Hash Cond: (test_table.key = test_table_1.key) + -> Seq Scan on test_table_1960000 test_table + -> Hash + -> Seq Scan on test_table_1960000 test_table_1 +(12 rows) + +-- ctes with volatile functions are not +-- inlined +WITH cte_1 AS (SELECT *, random() FROM test_table) +SELECT + * +FROM + cte_1; +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value, random() AS random FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, random FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, random double precision)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | random +--------------------------------------------------------------------- +(0 rows) + +-- even with NOT MATERIALIZED volatile functions should not be inlined +WITH cte_1 AS NOT MATERIALIZED (SELECT *, random() FROM test_table) +SELECT + * +FROM + cte_1; +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value, random() AS random FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, random FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, random double precision)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | random +--------------------------------------------------------------------- +(0 rows) + +-- cte_1 should be able to inlined even if +-- it is used one level below +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + * +FROM +( + WITH ct2 AS (SELECT * FROM cte_1) + SELECT * FROM ct2 +) as foo; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE ct2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- a similar query, but there is also +-- one more cte, which relies on the previous +-- CTE +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + * +FROM +( + WITH cte_2 AS (SELECT * FROM cte_1), + cte_3 AS (SELECT * FROM cte_2) + SELECT * FROM cte_3 +) as foo; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: CTE cte_3 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- inlined CTE contains a reference to outer query +-- should be fine (because we pushdown the whole query) +SELECT * + FROM + (SELECT * + FROM test_table) AS test_table_cte + JOIN LATERAL + (WITH bar AS (SELECT * + FROM test_table + WHERE key = test_table_cte.key) + SELECT * + FROM + bar + LEFT JOIN test_table u2 ON u2.key = bar.key) AS foo ON TRUE; +DEBUG: CTE bar is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value | key | value | other_value | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- inlined CTE contains a reference to outer query +-- should be fine (even if the recursive planning fails +-- to recursively plan the query) +SELECT * + FROM + (SELECT * + FROM test_table) AS test_table_cte + JOIN LATERAL + (WITH bar AS (SELECT * + FROM test_table + WHERE key = test_table_cte.key) + SELECT * + FROM + bar + LEFT JOIN test_table u2 ON u2.key = bar.value::int) AS foo ON TRUE; +DEBUG: CTE bar is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: Router planner cannot handle multi-shard select queries +ERROR: CTEs that refer to other subqueries are not supported in multi-shard queries +-- inlined CTE can recursively planned later, that's the decision +-- recursive planning makes +-- LIMIT 5 in cte2 triggers recusrive planning, after cte inlining +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + * +FROM +( + WITH ct2 AS (SELECT * FROM cte_1 LIMIT 5) + SELECT * FROM ct2 +) as foo; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE ct2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: push down of limit count: 5 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1 LIMIT 5 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value FROM (SELECT ct2.key, ct2.value, ct2.other_value FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) ct2) foo +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- all nested CTEs can be inlinied +WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS (SELECT count(*), key FROM test_table GROUP BY key) + SELECT * FROM cte_1) + SELECT * FROM cte_1 WHERE key = 1) + SELECT * FROM cte_1 WHERE key = 2) + SELECT * FROM cte_1 WHERE key = 3) + SELECT * FROM cte_1 WHERE key = 4) + SELECT * FROM cte_1 WHERE key = 5) +SELECT * FROM cte_1 WHERE key = 6; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Creating router plan +DEBUG: Plan is router executable +DETAIL: distribution column value: 1 + count | key +--------------------------------------------------------------------- +(0 rows) + +-- ctes can be inlined even if they are used +-- in set operations +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +(SELECT * FROM cte_1 EXCEPT SELECT * FROM test_table) +UNION +(SELECT * FROM cte_2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_3 for subquery SELECT key, value, other_value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_2 +DEBUG: Plan XXX query after replacing subqueries and CTEs: (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb) EXCEPT SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) UNION SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb) +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- cte_1 is going to be inlined even inside another set operation +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +(SELECT *, (SELECT 1) FROM cte_1 EXCEPT SELECT *, 1 FROM test_table) +UNION +(SELECT *, 1 FROM cte_2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan XXX_3 for subquery SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_4 for subquery SELECT key, value, other_value, 1 FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result."?column?" FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, "?column?" integer) EXCEPT SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result."?column?" FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, "?column?" integer)) UNION SELECT cte_2.key, cte_2.value, cte_2.other_value, 1 FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- cte_1 is safe to inline, even if because after inlining +-- it'd be in a query tree where there is a query that is +-- not supported by Citus unless recursively planned +-- cte_2 is on another queryTree, should be fine +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +(SELECT *, (SELECT key FROM cte_1) FROM test_table) +UNION +(SELECT *, 1 FROM cte_2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses +HINT: Consider using an equality filter on the distributed table's partition column. +-- after inlining CTEs, the query becomes +-- subquery pushdown with set operations +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +SELECT * FROM +( + SELECT * FROM cte_1 + UNION + SELECT * FROM cte_2 +) as bar; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- cte LEFT JOIN subquery should only work +-- when CTE is inlined, as Citus currently +-- doesn't know how to handle intermediate +-- results in the outer parts of outer +-- queries +WITH cte AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte LEFT JOIN test_table USING (key); +DEBUG: CTE cte is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- the CTEs are very simple, so postgres +-- can pull-up the subqueries after inlining +-- the CTEs, and the query that we send to workers +-- becomes a join between two tables +WITH cte_1 AS (SELECT key FROM test_table), + cte_2 AS (SELECT key FROM test_table) +SELECT + count(*) +FROM + cte_1 JOIN cte_2 USING (key); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] +DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] +DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] +DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] +DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] +DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- the following query is kind of interesting +-- During INSERT .. SELECT via coordinator, +-- Citus moves the CTEs into SELECT part, and plans/execute +-- the SELECT separately. Thus, fist_table_cte can be inlined +-- by Citus -- but not by Postgres +WITH fist_table_cte AS + (SELECT * FROM test_table) +INSERT INTO test_table + (key, value) + SELECT + key, value + FROM + fist_table_cte; +DEBUG: distributed INSERT ... SELECT can only select from distributed tables +DEBUG: Collecting INSERT ... SELECT results on coordinator +DEBUG: CTE fist_table_cte is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +-- the following INSERT..SELECT is even more interesting +-- the CTE becomes pushdownable +INSERT INTO test_table +WITH fist_table_cte AS + (SELECT * FROM test_table) + SELECT + key, value + FROM + fist_table_cte; +DEBUG: CTE fist_table_cte is going to be inlined via distributed planning +DEBUG: distributed statement: INSERT INTO cte_inline.test_table_1960000 AS citus_table_alias (key, value) SELECT key, value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table_1960000 test_table) fist_table_cte WHERE ((worker_hash(key) OPERATOR(pg_catalog.>=) '-2147483648'::integer) AND (worker_hash(key) OPERATOR(pg_catalog.<=) '-1073741825'::integer)) +DEBUG: distributed statement: INSERT INTO cte_inline.test_table_1960001 AS citus_table_alias (key, value) SELECT key, value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table_1960001 test_table) fist_table_cte WHERE ((worker_hash(key) OPERATOR(pg_catalog.>=) '-1073741824'::integer) AND (worker_hash(key) OPERATOR(pg_catalog.<=) '-1'::integer)) +DEBUG: distributed statement: INSERT INTO cte_inline.test_table_1960002 AS citus_table_alias (key, value) SELECT key, value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table_1960002 test_table) fist_table_cte WHERE ((worker_hash(key) OPERATOR(pg_catalog.>=) 0) AND (worker_hash(key) OPERATOR(pg_catalog.<=) 1073741823)) +DEBUG: distributed statement: INSERT INTO cte_inline.test_table_1960003 AS citus_table_alias (key, value) SELECT key, value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table_1960003 test_table) fist_table_cte WHERE ((worker_hash(key) OPERATOR(pg_catalog.>=) 1073741824) AND (worker_hash(key) OPERATOR(pg_catalog.<=) 2147483647)) +DEBUG: Plan is router executable +-- update/delete/modifying ctes +-- we don't support any cte inlining in modifications +-- queries and modifying CTEs +WITH cte_1 AS (SELECT * FROM test_table) + DELETE FROM test_table WHERE key NOT IN (SELECT key FROM cte_1); +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: DELETE FROM cte_inline.test_table WHERE (NOT (key OPERATOR(pg_catalog.=) ANY (SELECT cte_1.key FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1))) +DEBUG: Creating router plan +DEBUG: Plan is router executable +-- NOT MATERIALIZED should not CTEs that are used in a modifying query, because +-- we de still don't support it +WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) + DELETE FROM test_table WHERE key NOT IN (SELECT key FROM cte_1); +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: DELETE FROM cte_inline.test_table WHERE (NOT (key OPERATOR(pg_catalog.=) ANY (SELECT cte_1.key FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1))) +DEBUG: Creating router plan +DEBUG: Plan is router executable +-- we don't inline CTEs if they are modifying CTEs +WITH cte_1 AS (DELETE FROM test_table RETURNING key) +SELECT * FROM cte_1; +DEBUG: data-modifying statements are not supported in the WITH clauses of distributed queries +DEBUG: generating subplan XXX_1 for CTE cte_1: DELETE FROM cte_inline.test_table RETURNING key +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key FROM (SELECT intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key +--------------------------------------------------------------------- +(0 rows) + +-- NOT MATERIALIZED should not affect modifying CTEs +WITH cte_1 AS NOT MATERIALIZED (DELETE FROM test_table RETURNING key) +SELECT * FROM cte_1; +DEBUG: data-modifying statements are not supported in the WITH clauses of distributed queries +DEBUG: generating subplan XXX_1 for CTE cte_1: DELETE FROM cte_inline.test_table RETURNING key +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key FROM (SELECT intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key +--------------------------------------------------------------------- +(0 rows) + +-- cte with column aliases +SELECT * FROM test_table, +(WITH cte_1 (x,y) AS (SELECT * FROM test_table), + cte_2 (z,y) AS (SELECT value, other_value, key FROM test_table), + cte_3 (t,m) AS (SELECT z, y, key as cte_2_key FROM cte_2) + SELECT * FROM cte_2, cte_3) as bar; +DEBUG: CTE cte_3 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_2: SELECT value, other_value, key FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT test_table.key, test_table.value, test_table.other_value, bar.z, bar.y, bar.key, bar.t, bar.m, bar.cte_2_key FROM cte_inline.test_table, (SELECT cte_2.z, cte_2.y, cte_2.key, cte_3.t, cte_3.m, cte_3.cte_2_key FROM (SELECT intermediate_result.value AS z, intermediate_result.other_value AS y, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text, other_value jsonb, key integer)) cte_2, (SELECT cte_2_1.z AS t, cte_2_1.y AS m, cte_2_1.key AS cte_2_key FROM (SELECT intermediate_result.value AS z, intermediate_result.other_value AS y, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text, other_value jsonb, key integer)) cte_2_1) cte_3) bar +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value | z | y | key | t | m | cte_2_key +--------------------------------------------------------------------- +(0 rows) + +-- cte used in HAVING subquery just works fine +-- even if it is inlined +WITH cte_1 AS (SELECT max(key) as max FROM test_table) +SELECT + key, count(*) +FROM + test_table +GROUP BY + key +HAVING + (count(*) > (SELECT max FROM cte_1)); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT max(key) AS max FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, count(*) AS count FROM cte_inline.test_table GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.>) (SELECT cte_1.max FROM (SELECT intermediate_result.max FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(max integer)) cte_1)) +DEBUG: Router planner cannot handle multi-shard select queries + key | count +--------------------------------------------------------------------- +(0 rows) + +-- cte used in ORDER BY just works fine +-- even if it is inlined +WITH cte_1 AS (SELECT max(key) as max FROM test_table) +SELECT + key +FROM + test_table JOIN cte_1 ON (key = max) +ORDER BY + cte_1.max; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT max(key) AS max FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT test_table.key FROM (cte_inline.test_table JOIN (SELECT intermediate_result.max FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(max integer)) cte_1 ON ((test_table.key OPERATOR(pg_catalog.=) cte_1.max))) ORDER BY cte_1.max +DEBUG: Router planner cannot handle multi-shard select queries + key +--------------------------------------------------------------------- +(0 rows) + +PREPARE inlined_cte_without_params AS + WITH cte_1 AS (SELECT count(*) FROM test_table GROUP BY key) + SELECT * FROM cte_1; +PREPARE non_inlined_cte_without_params AS + WITH cte_1 AS (SELECT * FROM test_table) + SELECT + *, (SELECT 1) + FROM + cte_1; +PREPARE inlined_cte_has_parameter_on_non_dist_key(int) AS + WITH cte_1 AS (SELECT count(*) FROM test_table WHERE value::int = $1 GROUP BY key) + SELECT * FROM cte_1; +PREPARE inlined_cte_has_parameter_on_dist_key(int) AS + WITH cte_1 AS (SELECT count(*) FROM test_table WHERE key > $1 GROUP BY key) + SELECT * FROM cte_1; +PREPARE non_inlined_cte_has_parameter_on_dist_key(int) AS + WITH cte_1 AS (SELECT * FROM test_table where key > $1) + SELECT + *, (SELECT 1) + FROM + cte_1; +PREPARE retry_planning(int) AS + WITH cte_1 AS (SELECT * FROM test_table WHERE key > $1) + SELECT json_object_agg(DISTINCT key, value) FROM cte_1; +EXECUTE inlined_cte_without_params; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_without_params; + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_without_params; + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_without_params; + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_without_params; + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_without_params; + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(1); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(3); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(4); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(5); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(6); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(1); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(3); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(4); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(5); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(6); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(1); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 2) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(3); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(4); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 4) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(5); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 5) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(6); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 6) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE retry_planning(1); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +EXECUTE retry_planning(2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +EXECUTE retry_planning(3); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +EXECUTE retry_planning(4); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +EXECUTE retry_planning(5); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +EXECUTE retry_planning(6); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +-- this test can only work if the CTE is recursively +-- planned +WITH b AS (SELECT * FROM test_table) +SELECT * FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key); +DEBUG: CTE b is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT ref.x, b.key, b.value, b.other_value FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT ref.x, b.key, b.value, b.other_value FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- this becomes a non-colocated subquery join +-- because after the CTEs are inlined the joins +-- become a non-colocated subquery join +WITH a AS (SELECT * FROM test_table), +b AS (SELECT * FROM test_table) +SELECT * FROM a LEFT JOIN b ON (a.value = b.value); +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: CTE b is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.key, a.value, a.other_value, b.key, b.value, b.other_value FROM ((SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) a LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- cte a has to be recursively planned because of OFFSET 0 +-- after that, cte b also requires recursive planning +WITH a AS (SELECT * FROM test_table OFFSET 0), +b AS (SELECT * FROM test_table) +SELECT * FROM a LEFT JOIN b ON (a.value = b.value); +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: CTE b is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.key, a.value, a.other_value, b.key, b.value, b.other_value FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.key, a.value, a.other_value, b.key, b.value, b.other_value FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- after both CTEs are inlined, this becomes non-colocated subquery join +WITH cte_1 AS (SELECT * FROM test_table), +cte_2 AS (SELECT * FROM test_table) +SELECT * FROM cte_1 JOIN cte_2 ON (cte_1.value > cte_2.value); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT cte_1.key, cte_1.value, cte_1.other_value, cte_2.key, cte_2.value, cte_2.other_value FROM ((SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1 JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_2 ON ((cte_1.value OPERATOR(pg_catalog.>) cte_2.value))) +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- full join is only supported when both sides are +-- recursively planned +WITH cte_1 AS (SELECT value FROM test_table WHERE key > 1), + cte_2 AS (SELECT value FROM test_table WHERE key > 3) +SELECT * FROM cte_1 FULL JOIN cte_2 USING (value); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT test_table.value FROM cte_inline.test_table WHERE (test_table.key OPERATOR(pg_catalog.>) 1)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) +DEBUG: Creating router plan +DEBUG: Plan is router executable + value +--------------------------------------------------------------------- +(0 rows) + +-- an unsupported agg. for multi-shard queries +-- so CTE has to be recursively planned +WITH cte_1 AS (SELECT * FROM test_table WHERE key > 1) +SELECT json_object_agg(DISTINCT key, value) FROM cte_1; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +-- both cte_1 and cte_2 are going to be inlined. +-- later, cte_2 is recursively planned since it doesn't have +-- GROUP BY but aggragate in a subquery. +-- this is an important example of being able to recursively plan +-- "some" of the CTEs +WITH cte_1 AS (SELECT value FROM test_table WHERE key > 1), + cte_2 AS (SELECT max(value) as value FROM test_table WHERE key > 3) +SELECT * FROM cte_1 JOIN cte_2 USING (value); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT max(value) AS value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT cte_1.value FROM ((SELECT test_table.value FROM cte_inline.test_table WHERE (test_table.key OPERATOR(pg_catalog.>) 1)) cte_1 JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) +DEBUG: Router planner cannot handle multi-shard select queries + value +--------------------------------------------------------------------- +(0 rows) + +-- prevent DROP CASCADE to give notices +SET client_min_messages TO ERROR; +DROP SCHEMA cte_inline CASCADE; diff --git a/src/test/regress/expected/cte_inline_0.out b/src/test/regress/expected/cte_inline_0.out new file mode 100644 index 000000000..0ec39e590 --- /dev/null +++ b/src/test/regress/expected/cte_inline_0.out @@ -0,0 +1,1474 @@ +CREATE SCHEMA cte_inline; +SET search_path TO cte_inline; +SET citus.next_shard_id TO 1960000; +CREATE TABLE test_table (key int, value text, other_value jsonb); +SELECT create_distributed_table ('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- server version because CTE inlining might produce +-- different debug messages in PG 11 vs PG 12 +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int; + substring +--------------------------------------------------------------------- + 11 +(1 row) + +SET client_min_messages TO DEBUG; +-- Citus should not inline this CTE because otherwise it cannot +-- plan the query +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + *, (SELECT 1) +FROM + cte_1; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- Should still not be inlined even if NOT MATERIALIZED is passed +WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + *, (SELECT 1) +FROM + cte_1; +ERROR: syntax error at or near "NOT" +-- the cte can be inlined because the unsupported +-- part of the query (subquery in WHERE clause) +-- doesn't access the cte +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 +WHERE + key IN ( + SELECT + (SELECT 1) + FROM + test_table WHERE key = 1 + ); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Creating router plan +DEBUG: Plan is router executable +DETAIL: distribution column value: 1 +DEBUG: generating subplan XXX_1 for subquery SELECT (SELECT 1) FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.=) 1) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1 WHERE (key OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result."?column?" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer))) +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- a similar query as the above, and this time the planning +-- fails, but it fails because the subquery in WHERE clause +-- cannot be planned by Citus +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 +WHERE + key IN ( + SELECT + key + FROM + test_table + FOR UPDATE + ); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: SELECT FOR UPDATE with table replication factor > 1 not supported for non-reference tables. +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: SELECT FOR UPDATE with table replication factor > 1 not supported for non-reference tables. +ERROR: could not run distributed query with FOR UPDATE/SHARE commands +HINT: Consider using an equality filter on the distributed table's partition column. +-- Citus does the inlining, the planning fails +-- and retries without inlining, which works +-- fine later via recursive planning +WITH cte_1 AS + (SELECT * + FROM test_table) +SELECT *, (SELECT 1) +FROM + (SELECT * + FROM cte_1) AS foo; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT cte_1.key, cte_1.value, cte_1.other_value FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1) foo +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- a little more complicated query tree +-- Citus does the inlining, the planning fails +-- and retries without inlining, which works +WITH top_cte AS + (SELECT * + FROM test_table) +SELECT * +FROM top_cte, + (WITH cte_1 AS + (SELECT * + FROM test_table) SELECT *, (SELECT 1) + FROM + (SELECT * + FROM cte_1) AS foo) AS bar; +DEBUG: CTE top_cte is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE top_cte: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan XXX_3 for subquery SELECT key, value, other_value, (SELECT 1) FROM (SELECT cte_1.key, cte_1.value, cte_1.other_value FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1) foo +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT top_cte.key, top_cte.value, top_cte.other_value, bar.key, bar.value, bar.other_value, bar."?column?" FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) top_cte, (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result."?column?" FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, "?column?" integer)) bar +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- CTE is used inside a subquery in WHERE clause +-- the query wouldn't work by inlining, so Citus +-- retries again via recursive planning, which +-- works fine +WITH cte_1 AS + (SELECT * + FROM test_table) +SELECT count(*) +FROM test_table +WHERE KEY IN + (SELECT (SELECT 1) + FROM + (SELECT *, + random() + FROM + (SELECT * + FROM cte_1) AS foo) AS bar); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan XXX_2 for subquery SELECT (SELECT 1) FROM (SELECT foo.key, foo.value, foo.other_value, random() AS random FROM (SELECT cte_1.key, cte_1.value, cte_1.other_value FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1) foo) bar +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result."?column?" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer))) +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cte_1 is used inside another CTE, but still +-- doesn't work when inlined because it is finally +-- used in an unsupported query +-- but still works fine because recursive planning +-- kicks in +WITH cte_1 AS + (SELECT * + FROM test_table) +SELECT (SELECT 1) AS KEY FROM ( + WITH cte_2 AS (SELECT *, random() + FROM (SELECT *,random() FROM cte_1) as foo) +SELECT *, random() FROM cte_2) as bar; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_2: SELECT key, value, other_value, random, random() AS random FROM (SELECT cte_1.key, cte_1.value, cte_1.other_value, random() AS random FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1) foo +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT (SELECT 1) AS key FROM (SELECT cte_2.key, cte_2.value, cte_2.other_value, cte_2.random, cte_2.random_1 AS random, random() AS random FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result.random, intermediate_result.random_1 AS random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, random double precision, random_1 double precision)) cte_2(key, value, other_value, random, random_1)) bar(key, value, other_value, random, random_1, random_2) +DEBUG: Creating router plan +DEBUG: Plan is router executable + key +--------------------------------------------------------------------- +(0 rows) + +-- in this example, cte_2 can be inlined, because it is not used +-- on any query that Citus cannot plan. However, cte_1 should not be +-- inlined, because it is used with a subquery in target list +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (select * from test_table) +SELECT + * +FROM + (SELECT *, (SELECT 1) FROM cte_1) as foo + JOIN + cte_2 + ON (true); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.key, foo.value, foo.other_value, foo."?column?", cte_2.key, cte_2.value, cte_2.other_value FROM ((SELECT cte_1.key, cte_1.value, cte_1.other_value, (SELECT 1) FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1) foo JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_2 ON (true)) +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value | ?column? | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- unreferenced CTEs are just ignored +-- by Citus/Postgres +WITH a AS (SELECT * FROM test_table) +SELECT + *, row_number() OVER () +FROM + test_table +WHERE + key = 1; +DEBUG: Creating router plan +DEBUG: Plan is router executable +DETAIL: distribution column value: 1 + key | value | other_value | row_number +--------------------------------------------------------------------- +(0 rows) + +-- router queries are affected by the distributed +-- cte inlining +WITH a AS (SELECT * FROM test_table WHERE key = 1) +SELECT + *, (SELECT 1) +FROM + a +WHERE + key = 1; +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: Creating router plan +DEBUG: Plan is router executable +DETAIL: distribution column value: 1 + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- non router queries are affected by the distributed +-- cte inlining as well +WITH a AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a WHERE (key OPERATOR(pg_catalog.=) 1) +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- explicitely using NOT MATERIALIZED should result in the same +WITH a AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; +ERROR: syntax error at or near "NOT" +-- using MATERIALIZED should cause inlining not to happen +WITH a AS MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; +ERROR: syntax error at or near "MATERIALIZED" +-- EXPLAIN should show the difference between materialized an not materialized +EXPLAIN (COSTS OFF) WITH a AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a WHERE (key OPERATOR(pg_catalog.=) 1) +DEBUG: Creating router plan +DEBUG: Plan is router executable + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on test_table_1960000 test_table + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Function Scan on read_intermediate_result intermediate_result + Filter: (key = 1) +(15 rows) + +EXPLAIN (COSTS OFF) WITH a AS MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; +ERROR: syntax error at or near "MATERIALIZED" +-- citus should not inline the CTE because it is used multiple times +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) first_entry JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) second_entry USING (key)) +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- NOT MATERIALIZED should cause the query to be inlined twice +WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); +ERROR: syntax error at or near "NOT" +-- EXPLAIN should show the differences between MATERIALIZED and NOT MATERIALIZED +EXPLAIN (COSTS OFF) WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) first_entry JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) second_entry USING (key)) +DEBUG: Creating router plan +DEBUG: Plan is router executable + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on test_table_1960000 test_table + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate + -> Merge Join + Merge Cond: (intermediate_result.key = intermediate_result_1.key) + -> Sort + Sort Key: intermediate_result.key + -> Function Scan on read_intermediate_result intermediate_result + -> Sort + Sort Key: intermediate_result_1.key + -> Function Scan on read_intermediate_result intermediate_result_1 +(21 rows) + +EXPLAIN (COSTS OFF) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); +ERROR: syntax error at or near "NOT" +-- ctes with volatile functions are not +-- inlined +WITH cte_1 AS (SELECT *, random() FROM test_table) +SELECT + * +FROM + cte_1; +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value, random() AS random FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, random FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, random double precision)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | random +--------------------------------------------------------------------- +(0 rows) + +-- even with NOT MATERIALIZED volatile functions should not be inlined +WITH cte_1 AS NOT MATERIALIZED (SELECT *, random() FROM test_table) +SELECT + * +FROM + cte_1; +ERROR: syntax error at or near "NOT" +-- cte_1 should be able to inlined even if +-- it is used one level below +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + * +FROM +( + WITH ct2 AS (SELECT * FROM cte_1) + SELECT * FROM ct2 +) as foo; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE ct2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- a similar query, but there is also +-- one more cte, which relies on the previous +-- CTE +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + * +FROM +( + WITH cte_2 AS (SELECT * FROM cte_1), + cte_3 AS (SELECT * FROM cte_2) + SELECT * FROM cte_3 +) as foo; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: CTE cte_3 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- inlined CTE contains a reference to outer query +-- should be fine (because we pushdown the whole query) +SELECT * + FROM + (SELECT * + FROM test_table) AS test_table_cte + JOIN LATERAL + (WITH bar AS (SELECT * + FROM test_table + WHERE key = test_table_cte.key) + SELECT * + FROM + bar + LEFT JOIN test_table u2 ON u2.key = bar.key) AS foo ON TRUE; +DEBUG: CTE bar is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: Router planner cannot handle multi-shard select queries +ERROR: CTEs that refer to other subqueries are not supported in multi-shard queries +-- inlined CTE contains a reference to outer query +-- should be fine (even if the recursive planning fails +-- to recursively plan the query) +SELECT * + FROM + (SELECT * + FROM test_table) AS test_table_cte + JOIN LATERAL + (WITH bar AS (SELECT * + FROM test_table + WHERE key = test_table_cte.key) + SELECT * + FROM + bar + LEFT JOIN test_table u2 ON u2.key = bar.value::int) AS foo ON TRUE; +DEBUG: CTE bar is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: Router planner cannot handle multi-shard select queries +ERROR: CTEs that refer to other subqueries are not supported in multi-shard queries +-- inlined CTE can recursively planned later, that's the decision +-- recursive planning makes +-- LIMIT 5 in cte2 triggers recusrive planning, after cte inlining +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + * +FROM +( + WITH ct2 AS (SELECT * FROM cte_1 LIMIT 5) + SELECT * FROM ct2 +) as foo; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE ct2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: push down of limit count: 5 +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1 LIMIT 5 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value FROM (SELECT ct2.key, ct2.value, ct2.other_value FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) ct2) foo +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- all nested CTEs can be inlinied +WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS (SELECT count(*), key FROM test_table GROUP BY key) + SELECT * FROM cte_1) + SELECT * FROM cte_1 WHERE key = 1) + SELECT * FROM cte_1 WHERE key = 2) + SELECT * FROM cte_1 WHERE key = 3) + SELECT * FROM cte_1 WHERE key = 4) + SELECT * FROM cte_1 WHERE key = 5) +SELECT * FROM cte_1 WHERE key = 6; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: WITH cte_1 AS (WITH cte_1 AS (WITH cte_1 AS (WITH cte_1 AS (WITH cte_1 AS (WITH cte_1 AS (SELECT count(*) AS count, test_table.key FROM cte_inline.test_table GROUP BY test_table.key) SELECT cte_1_5.count, cte_1_5.key FROM cte_1 cte_1_5) SELECT cte_1_4.count, cte_1_4.key FROM cte_1 cte_1_4 WHERE (cte_1_4.key OPERATOR(pg_catalog.=) 1)) SELECT cte_1_3.count, cte_1_3.key FROM cte_1 cte_1_3 WHERE (cte_1_3.key OPERATOR(pg_catalog.=) 2)) SELECT cte_1_2.count, cte_1_2.key FROM cte_1 cte_1_2 WHERE (cte_1_2.key OPERATOR(pg_catalog.=) 3)) SELECT cte_1_1.count, cte_1_1.key FROM cte_1 cte_1_1 WHERE (cte_1_1.key OPERATOR(pg_catalog.=) 4)) SELECT count, key FROM cte_1 WHERE (key OPERATOR(pg_catalog.=) 5) +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: WITH cte_1 AS (WITH cte_1 AS (WITH cte_1 AS (WITH cte_1 AS (WITH cte_1 AS (SELECT count(*) AS count, test_table.key FROM cte_inline.test_table GROUP BY test_table.key) SELECT cte_1_4.count, cte_1_4.key FROM cte_1 cte_1_4) SELECT cte_1_3.count, cte_1_3.key FROM cte_1 cte_1_3 WHERE (cte_1_3.key OPERATOR(pg_catalog.=) 1)) SELECT cte_1_2.count, cte_1_2.key FROM cte_1 cte_1_2 WHERE (cte_1_2.key OPERATOR(pg_catalog.=) 2)) SELECT cte_1_1.count, cte_1_1.key FROM cte_1 cte_1_1 WHERE (cte_1_1.key OPERATOR(pg_catalog.=) 3)) SELECT count, key FROM cte_1 WHERE (key OPERATOR(pg_catalog.=) 4) +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: WITH cte_1 AS (WITH cte_1 AS (WITH cte_1 AS (WITH cte_1 AS (SELECT count(*) AS count, test_table.key FROM cte_inline.test_table GROUP BY test_table.key) SELECT cte_1_3.count, cte_1_3.key FROM cte_1 cte_1_3) SELECT cte_1_2.count, cte_1_2.key FROM cte_1 cte_1_2 WHERE (cte_1_2.key OPERATOR(pg_catalog.=) 1)) SELECT cte_1_1.count, cte_1_1.key FROM cte_1 cte_1_1 WHERE (cte_1_1.key OPERATOR(pg_catalog.=) 2)) SELECT count, key FROM cte_1 WHERE (key OPERATOR(pg_catalog.=) 3) +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: WITH cte_1 AS (WITH cte_1 AS (WITH cte_1 AS (SELECT count(*) AS count, test_table.key FROM cte_inline.test_table GROUP BY test_table.key) SELECT cte_1_2.count, cte_1_2.key FROM cte_1 cte_1_2) SELECT cte_1_1.count, cte_1_1.key FROM cte_1 cte_1_1 WHERE (cte_1_1.key OPERATOR(pg_catalog.=) 1)) SELECT count, key FROM cte_1 WHERE (key OPERATOR(pg_catalog.=) 2) +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: WITH cte_1 AS (WITH cte_1 AS (SELECT count(*) AS count, test_table.key FROM cte_inline.test_table GROUP BY test_table.key) SELECT cte_1_1.count, cte_1_1.key FROM cte_1 cte_1_1) SELECT count, key FROM cte_1 WHERE (key OPERATOR(pg_catalog.=) 1) +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: WITH cte_1 AS (SELECT count(*) AS count, test_table.key FROM cte_inline.test_table GROUP BY test_table.key) SELECT count, key FROM cte_1 +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count, key FROM cte_inline.test_table GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count, key FROM (SELECT intermediate_result.count, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint, key integer)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count, key FROM (SELECT intermediate_result.count, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint, key integer)) cte_1 WHERE (key OPERATOR(pg_catalog.=) 1) +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count, key FROM (SELECT intermediate_result.count, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint, key integer)) cte_1 WHERE (key OPERATOR(pg_catalog.=) 2) +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count, key FROM (SELECT intermediate_result.count, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint, key integer)) cte_1 WHERE (key OPERATOR(pg_catalog.=) 3) +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count, key FROM (SELECT intermediate_result.count, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint, key integer)) cte_1 WHERE (key OPERATOR(pg_catalog.=) 4) +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count, key FROM (SELECT intermediate_result.count, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint, key integer)) cte_1 WHERE (key OPERATOR(pg_catalog.=) 5) +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count, key FROM (SELECT intermediate_result.count, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint, key integer)) cte_1 WHERE (key OPERATOR(pg_catalog.=) 6) +DEBUG: Creating router plan +DEBUG: Plan is router executable + count | key +--------------------------------------------------------------------- +(0 rows) + +-- ctes can be inlined even if they are used +-- in set operations +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +(SELECT * FROM cte_1 EXCEPT SELECT * FROM test_table) +UNION +(SELECT * FROM cte_2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_3 for subquery SELECT key, value, other_value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_2 +DEBUG: Plan XXX query after replacing subqueries and CTEs: (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb) EXCEPT SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) UNION SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb) +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- cte_1 is going to be inlined even inside another set operation +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +(SELECT *, (SELECT 1) FROM cte_1 EXCEPT SELECT *, 1 FROM test_table) +UNION +(SELECT *, 1 FROM cte_2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan XXX_3 for subquery SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_4 for subquery SELECT key, value, other_value, 1 FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result."?column?" FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, "?column?" integer) EXCEPT SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value, intermediate_result."?column?" FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb, "?column?" integer)) UNION SELECT cte_2.key, cte_2.value, cte_2.other_value, 1 FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_2 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +-- cte_1 is safe to inline, even if because after inlining +-- it'd be in a query tree where there is a query that is +-- not supported by Citus unless recursively planned +-- cte_2 is on another queryTree, should be fine +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +(SELECT *, (SELECT key FROM cte_1) FROM test_table) +UNION +(SELECT *, 1 FROM cte_2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses +HINT: Consider using an equality filter on the distributed table's partition column. +-- after inlining CTEs, the query becomes +-- subquery pushdown with set operations +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +SELECT * FROM +( + SELECT * FROM cte_1 + UNION + SELECT * FROM cte_2 +) as bar; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- cte LEFT JOIN subquery should only work +-- when CTE is inlined, as Citus currently +-- doesn't know how to handle intermediate +-- results in the outer parts of outer +-- queries +WITH cte AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte LEFT JOIN test_table USING (key); +DEBUG: CTE cte is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte LEFT JOIN cte_inline.test_table USING (key)) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte LEFT JOIN cte_inline.test_table USING (key)) +DEBUG: Router planner cannot handle multi-shard select queries +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- the CTEs are very simple, so postgres +-- can pull-up the subqueries after inlining +-- the CTEs, and the query that we send to workers +-- becomes a join between two tables +WITH cte_1 AS (SELECT key FROM test_table), + cte_2 AS (SELECT key FROM test_table) +SELECT + count(*) +FROM + cte_1 JOIN cte_2 USING (key); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT test_table.key FROM cte_inline.test_table) cte_1 JOIN (SELECT intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer)) cte_2 USING (key)) +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- the following query is kind of interesting +-- During INSERT .. SELECT via coordinator, +-- Citus moves the CTEs into SELECT part, and plans/execute +-- the SELECT separately. Thus, fist_table_cte can be inlined +-- by Citus -- but not by Postgres +WITH fist_table_cte AS + (SELECT * FROM test_table) +INSERT INTO test_table + (key, value) + SELECT + key, value + FROM + fist_table_cte; +DEBUG: distributed INSERT ... SELECT can only select from distributed tables +DEBUG: Collecting INSERT ... SELECT results on coordinator +DEBUG: CTE fist_table_cte is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE fist_table_cte: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value FROM (SELECT fist_table_cte.key, fist_table_cte.value FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) fist_table_cte) citus_insert_select_subquery +DEBUG: Creating router plan +DEBUG: Plan is router executable +-- the following INSERT..SELECT is even more interesting +-- the CTE becomes pushdownable +INSERT INTO test_table +WITH fist_table_cte AS + (SELECT * FROM test_table) + SELECT + key, value + FROM + fist_table_cte; +DEBUG: CTE fist_table_cte is going to be inlined via distributed planning +DEBUG: distributed statement: INSERT INTO cte_inline.test_table_1960000 AS citus_table_alias (key, value) SELECT key, value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table_1960000 test_table) fist_table_cte WHERE ((worker_hash(key) OPERATOR(pg_catalog.>=) '-2147483648'::integer) AND (worker_hash(key) OPERATOR(pg_catalog.<=) '-1073741825'::integer)) +DEBUG: distributed statement: INSERT INTO cte_inline.test_table_1960001 AS citus_table_alias (key, value) SELECT key, value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table_1960001 test_table) fist_table_cte WHERE ((worker_hash(key) OPERATOR(pg_catalog.>=) '-1073741824'::integer) AND (worker_hash(key) OPERATOR(pg_catalog.<=) '-1'::integer)) +DEBUG: distributed statement: INSERT INTO cte_inline.test_table_1960002 AS citus_table_alias (key, value) SELECT key, value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table_1960002 test_table) fist_table_cte WHERE ((worker_hash(key) OPERATOR(pg_catalog.>=) 0) AND (worker_hash(key) OPERATOR(pg_catalog.<=) 1073741823)) +DEBUG: distributed statement: INSERT INTO cte_inline.test_table_1960003 AS citus_table_alias (key, value) SELECT key, value FROM (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table_1960003 test_table) fist_table_cte WHERE ((worker_hash(key) OPERATOR(pg_catalog.>=) 1073741824) AND (worker_hash(key) OPERATOR(pg_catalog.<=) 2147483647)) +DEBUG: Plan is router executable +-- update/delete/modifying ctes +-- we don't support any cte inlining in modifications +-- queries and modifying CTEs +WITH cte_1 AS (SELECT * FROM test_table) + DELETE FROM test_table WHERE key NOT IN (SELECT key FROM cte_1); +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: DELETE FROM cte_inline.test_table WHERE (NOT (key OPERATOR(pg_catalog.=) ANY (SELECT cte_1.key FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1))) +DEBUG: Creating router plan +DEBUG: Plan is router executable +-- NOT MATERIALIZED should not CTEs that are used in a modifying query, because +-- we de still don't support it +WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) + DELETE FROM test_table WHERE key NOT IN (SELECT key FROM cte_1); +ERROR: syntax error at or near "NOT" +-- we don't inline CTEs if they are modifying CTEs +WITH cte_1 AS (DELETE FROM test_table RETURNING key) +SELECT * FROM cte_1; +DEBUG: data-modifying statements are not supported in the WITH clauses of distributed queries +DEBUG: generating subplan XXX_1 for CTE cte_1: DELETE FROM cte_inline.test_table RETURNING key +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key FROM (SELECT intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key +--------------------------------------------------------------------- +(0 rows) + +-- NOT MATERIALIZED should not affect modifying CTEs +WITH cte_1 AS NOT MATERIALIZED (DELETE FROM test_table RETURNING key) +SELECT * FROM cte_1; +ERROR: syntax error at or near "NOT" +-- cte with column aliases +SELECT * FROM test_table, +(WITH cte_1 (x,y) AS (SELECT * FROM test_table), + cte_2 (z,y) AS (SELECT value, other_value, key FROM test_table), + cte_3 (t,m) AS (SELECT z, y, key as cte_2_key FROM cte_2) + SELECT * FROM cte_2, cte_3) as bar; +DEBUG: CTE cte_3 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_2: SELECT value, other_value, key FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT test_table.key, test_table.value, test_table.other_value, bar.z, bar.y, bar.key, bar.t, bar.m, bar.cte_2_key FROM cte_inline.test_table, (SELECT cte_2.z, cte_2.y, cte_2.key, cte_3.t, cte_3.m, cte_3.cte_2_key FROM (SELECT intermediate_result.value AS z, intermediate_result.other_value AS y, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text, other_value jsonb, key integer)) cte_2, (SELECT cte_2_1.z AS t, cte_2_1.y AS m, cte_2_1.key AS cte_2_key FROM (SELECT intermediate_result.value AS z, intermediate_result.other_value AS y, intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text, other_value jsonb, key integer)) cte_2_1) cte_3) bar +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value | z | y | key | t | m | cte_2_key +--------------------------------------------------------------------- +(0 rows) + +-- cte used in HAVING subquery just works fine +-- even if it is inlined +WITH cte_1 AS (SELECT max(key) as max FROM test_table) +SELECT + key, count(*) +FROM + test_table +GROUP BY + key +HAVING + (count(*) > (SELECT max FROM cte_1)); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT max(key) AS max FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, count(*) AS count FROM cte_inline.test_table GROUP BY key HAVING (count(*) OPERATOR(pg_catalog.>) (SELECT cte_1.max FROM (SELECT intermediate_result.max FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(max integer)) cte_1)) +DEBUG: Router planner cannot handle multi-shard select queries + key | count +--------------------------------------------------------------------- +(0 rows) + +-- cte used in ORDER BY just works fine +-- even if it is inlined +WITH cte_1 AS (SELECT max(key) as max FROM test_table) +SELECT + key +FROM + test_table JOIN cte_1 ON (key = max) +ORDER BY + cte_1.max; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT max(key) AS max FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT test_table.key FROM (cte_inline.test_table JOIN (SELECT intermediate_result.max FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(max integer)) cte_1 ON ((test_table.key OPERATOR(pg_catalog.=) cte_1.max))) ORDER BY cte_1.max +DEBUG: Router planner cannot handle multi-shard select queries + key +--------------------------------------------------------------------- +(0 rows) + +PREPARE inlined_cte_without_params AS + WITH cte_1 AS (SELECT count(*) FROM test_table GROUP BY key) + SELECT * FROM cte_1; +PREPARE non_inlined_cte_without_params AS + WITH cte_1 AS (SELECT * FROM test_table) + SELECT + *, (SELECT 1) + FROM + cte_1; +PREPARE inlined_cte_has_parameter_on_non_dist_key(int) AS + WITH cte_1 AS (SELECT count(*) FROM test_table WHERE value::int = $1 GROUP BY key) + SELECT * FROM cte_1; +PREPARE inlined_cte_has_parameter_on_dist_key(int) AS + WITH cte_1 AS (SELECT count(*) FROM test_table WHERE key > $1 GROUP BY key) + SELECT * FROM cte_1; +PREPARE non_inlined_cte_has_parameter_on_dist_key(int) AS + WITH cte_1 AS (SELECT * FROM test_table where key > $1) + SELECT + *, (SELECT 1) + FROM + cte_1; +PREPARE retry_planning(int) AS + WITH cte_1 AS (SELECT * FROM test_table WHERE key > $1) + SELECT json_object_agg(DISTINCT key, value) FROM cte_1; +EXECUTE inlined_cte_without_params; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_without_params; + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_without_params; + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_without_params; + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_without_params; + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_without_params; + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_without_params; + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(1); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE ((value)::integer OPERATOR(pg_catalog.=) 1) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE ((value)::integer OPERATOR(pg_catalog.=) 2) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(3); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE ((value)::integer OPERATOR(pg_catalog.=) 3) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(4); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE ((value)::integer OPERATOR(pg_catalog.=) 4) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(5); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE ((value)::integer OPERATOR(pg_catalog.=) 5) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(6); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE ((value)::integer OPERATOR(pg_catalog.=) 6) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(1); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 2) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(3); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(4); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 4) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(5); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 5) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE inlined_cte_has_parameter_on_dist_key(6); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT count(*) AS count FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 6) GROUP BY key +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(1); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 2) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(3); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(4); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 4) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(5); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 5) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(6); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 6) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value, other_value, (SELECT 1) FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | ?column? +--------------------------------------------------------------------- +(0 rows) + +EXECUTE retry_planning(1); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT json_object_agg(DISTINCT key, value) AS json_object_agg FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +EXECUTE retry_planning(2); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 2) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT json_object_agg(DISTINCT key, value) AS json_object_agg FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +EXECUTE retry_planning(3); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT json_object_agg(DISTINCT key, value) AS json_object_agg FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +EXECUTE retry_planning(4); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 4) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT json_object_agg(DISTINCT key, value) AS json_object_agg FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +EXECUTE retry_planning(5); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 5) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT json_object_agg(DISTINCT key, value) AS json_object_agg FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +EXECUTE retry_planning(6); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 6) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT json_object_agg(DISTINCT key, value) AS json_object_agg FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +-- this test can only work if the CTE is recursively +-- planned +WITH b AS (SELECT * FROM test_table) +SELECT * FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key); +DEBUG: CTE b is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT ref.x, b.key, b.value, b.other_value FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT ref.x, b.key, b.value, b.other_value FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- this becomes a non-colocated subquery join +-- because after the CTEs are inlined the joins +-- become a non-colocated subquery join +WITH a AS (SELECT * FROM test_table), +b AS (SELECT * FROM test_table) +SELECT * FROM a LEFT JOIN b ON (a.value = b.value); +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: CTE b is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.key, a.value, a.other_value, b.key, b.value, b.other_value FROM ((SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) a LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- cte a has to be recursively planned because of OFFSET 0 +-- after that, cte b also requires recursive planning +WITH a AS (SELECT * FROM test_table OFFSET 0), +b AS (SELECT * FROM test_table) +SELECT * FROM a LEFT JOIN b ON (a.value = b.value); +DEBUG: CTE a is going to be inlined via distributed planning +DEBUG: CTE b is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.key, a.value, a.other_value, b.key, b.value, b.other_value FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.key, a.value, a.other_value, b.key, b.value, b.other_value FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) +DEBUG: Creating router plan +DEBUG: Plan is router executable + key | value | other_value | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- after both CTEs are inlined, this becomes non-colocated subquery join +WITH cte_1 AS (SELECT * FROM test_table), +cte_2 AS (SELECT * FROM test_table) +SELECT * FROM cte_1 JOIN cte_2 ON (cte_1.value > cte_2.value); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT cte_1.key, cte_1.value, cte_1.other_value, cte_2.key, cte_2.value, cte_2.other_value FROM ((SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) cte_1 JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_2 ON ((cte_1.value OPERATOR(pg_catalog.>) cte_2.value))) +DEBUG: Router planner cannot handle multi-shard select queries + key | value | other_value | key | value | other_value +--------------------------------------------------------------------- +(0 rows) + +-- full join is only supported when both sides are +-- recursively planned +WITH cte_1 AS (SELECT value FROM test_table WHERE key > 1), + cte_2 AS (SELECT value FROM test_table WHERE key > 3) +SELECT * FROM cte_1 FULL JOIN cte_2 USING (value); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT test_table.value FROM cte_inline.test_table WHERE (test_table.key OPERATOR(pg_catalog.>) 1)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) +DEBUG: Creating router plan +DEBUG: Plan is router executable + value +--------------------------------------------------------------------- +(0 rows) + +-- an unsupported agg. for multi-shard queries +-- so CTE has to be recursively planned +WITH cte_1 AS (SELECT * FROM test_table WHERE key > 1) +SELECT json_object_agg(DISTINCT key, value) FROM cte_1; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT json_object_agg(DISTINCT key, value) AS json_object_agg FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) cte_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable + json_object_agg +--------------------------------------------------------------------- + +(1 row) + +-- both cte_1 and cte_2 are going to be inlined. +-- later, cte_2 is recursively planned since it doesn't have +-- GROUP BY but aggragate in a subquery. +-- this is an important example of being able to recursively plan +-- "some" of the CTEs +WITH cte_1 AS (SELECT value FROM test_table WHERE key > 1), + cte_2 AS (SELECT max(value) as value FROM test_table WHERE key > 3) +SELECT * FROM cte_1 JOIN cte_2 USING (value); +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT max(value) AS value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT cte_1.value FROM ((SELECT test_table.value FROM cte_inline.test_table WHERE (test_table.key OPERATOR(pg_catalog.>) 1)) cte_1 JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) +DEBUG: Router planner cannot handle multi-shard select queries + value +--------------------------------------------------------------------- +(0 rows) + +-- prevent DROP CASCADE to give notices +SET client_min_messages TO ERROR; +DROP SCHEMA cte_inline CASCADE; diff --git a/src/test/regress/expected/dml_recursive.out b/src/test/regress/expected/dml_recursive.out index c159862a5..75ae7e6fb 100644 --- a/src/test/regress/expected/dml_recursive.out +++ b/src/test/regress/expected/dml_recursive.out @@ -297,6 +297,7 @@ ERROR: complex joins are only supported when all distributed tables are co-loca INSERT INTO second_distributed_table (tenant_id, dept) VALUES ('3', (WITH vals AS (SELECT 3) select * from vals)); +DEBUG: CTE vals is going to be inlined via distributed planning DEBUG: generating subplan XXX_1 for CTE vals: SELECT 3 DEBUG: Plan XXX query after replacing subqueries and CTEs: INSERT INTO recursive_dml_queries.second_distributed_table (tenant_id, dept) VALUES ('3'::text, (SELECT vals."?column?" FROM (SELECT intermediate_result."?column?" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer)) vals)) ERROR: subqueries are not supported within INSERT queries diff --git a/src/test/regress/expected/failure_cte_subquery.out b/src/test/regress/expected/failure_cte_subquery.out index 6ae86cfd9..ac1dc5aa2 100644 --- a/src/test/regress/expected/failure_cte_subquery.out +++ b/src/test/regress/expected/failure_cte_subquery.out @@ -3,6 +3,9 @@ SET SEARCH_PATH=cte_failure; SET citus.shard_count to 2; SET citus.shard_replication_factor to 1; SET citus.next_shard_id TO 16000000; +-- CTE inlining should not happen because +-- the tests rely on intermediate results +SET citus.enable_cte_inlining TO false; SELECT pg_backend_pid() as pid \gset CREATE TABLE users_table (user_id int, user_name text); CREATE TABLE events_table(user_id int, event_id int, event_type int); diff --git a/src/test/regress/expected/fast_path_router_modify.out b/src/test/regress/expected/fast_path_router_modify.out index 96745a84b..d995a1840 100644 --- a/src/test/regress/expected/fast_path_router_modify.out +++ b/src/test/regress/expected/fast_path_router_modify.out @@ -146,17 +146,15 @@ DEBUG: non-IMMUTABLE functions are not allowed in the RETURNING clause ERROR: non-IMMUTABLE functions are not allowed in the RETURNING clause -- modifying ctes are not supported via fast-path WITH t1 AS (DELETE FROM modify_fast_path WHERE key = 1), t2 AS (SELECT * FROM modify_fast_path) SELECT * FROM t2; +DEBUG: CTE t2 is going to be inlined via distributed planning DEBUG: data-modifying statements are not supported in the WITH clauses of distributed queries DEBUG: generating subplan XXX_1 for CTE t1: DELETE FROM fast_path_router_modify.modify_fast_path WHERE (key OPERATOR(pg_catalog.=) 1) DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: Plan is router executable DETAIL: distribution column value: 1 -DEBUG: generating subplan XXX_2 for CTE t2: SELECT key, value_1, value_2 FROM fast_path_router_modify.modify_fast_path +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value_1, value_2 FROM (SELECT modify_fast_path.key, modify_fast_path.value_1, modify_fast_path.value_2 FROM fast_path_router_modify.modify_fast_path) t2 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT key, value_1, value_2 FROM (SELECT intermediate_result.key, intermediate_result.value_1, intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value_1 integer, value_2 text)) t2 -DEBUG: Creating router plan -DEBUG: Plan is router executable key | value_1 | value_2 --------------------------------------------------------------------- (0 rows) diff --git a/src/test/regress/expected/intermediate_result_pruning.out b/src/test/regress/expected/intermediate_result_pruning.out index c95fec934..3da52fee2 100644 --- a/src/test/regress/expected/intermediate_result_pruning.out +++ b/src/test/regress/expected/intermediate_result_pruning.out @@ -32,6 +32,8 @@ SELECT create_reference_table('ref_table'); (1 row) +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; -- load some data INSERT INTO table_1 VALUES (1, '1'), (2, '2'), (3, '3'), (4, '4'); INSERT INTO table_2 VALUES (3, '3'), (4, '4'), (5, '5'), (6, '6'); diff --git a/src/test/regress/expected/limit_intermediate_size.out b/src/test/regress/expected/limit_intermediate_size.out index de7c758bf..335eae25a 100644 --- a/src/test/regress/expected/limit_intermediate_size.out +++ b/src/test/regress/expected/limit_intermediate_size.out @@ -1,5 +1,7 @@ SET citus.enable_repartition_joins to ON; SET citus.task_executor_type to 'task-tracker'; +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; SET citus.max_intermediate_result_size TO 2; -- should fail because the copy size is ~4kB for each cte WITH cte AS @@ -296,4 +298,3 @@ SELECT * FROM cte ORDER BY 1,2,3,4,5 LIMIT 10; 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 1 | 2 (10 rows) -SET citus.task_executor_type to 'adaptive'; diff --git a/src/test/regress/expected/local_shard_execution.out b/src/test/regress/expected/local_shard_execution.out index ebcac1d06..e123d0887 100644 --- a/src/test/regress/expected/local_shard_execution.out +++ b/src/test/regress/expected/local_shard_execution.out @@ -696,6 +696,9 @@ NOTICE: executing the command locally: SELECT key FROM local_shard_execution.re WITH distributed_local_mixed AS (SELECT * FROM distributed_table), local_insert AS (INSERT INTO distributed_table VALUES (1, '11',21) ON CONFLICT(key) DO UPDATE SET value = '29' RETURNING *) SELECT * FROM local_insert, distributed_local_mixed ORDER BY 1,2,3,4,5; +NOTICE: executing the command locally: INSERT INTO local_shard_execution.distributed_table_1470001 AS citus_table_alias (key, value, age) VALUES (1, '11'::text, 21) ON CONFLICT(key) DO UPDATE SET value = '29'::text RETURNING citus_table_alias.key, citus_table_alias.value, citus_table_alias.age +NOTICE: executing the command locally: SELECT worker_column_1 AS key, worker_column_2 AS value, worker_column_3 AS age, worker_column_4 AS key, worker_column_5 AS value, worker_column_6 AS age FROM (SELECT local_insert.key AS worker_column_1, local_insert.value AS worker_column_2, local_insert.age AS worker_column_3, distributed_local_mixed.key AS worker_column_4, distributed_local_mixed.value AS worker_column_5, distributed_local_mixed.age AS worker_column_6 FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.age FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, age bigint)) local_insert, (SELECT distributed_table.key, distributed_table.value, distributed_table.age FROM local_shard_execution.distributed_table_1470001 distributed_table) distributed_local_mixed) worker_subquery +NOTICE: executing the command locally: SELECT worker_column_1 AS key, worker_column_2 AS value, worker_column_3 AS age, worker_column_4 AS key, worker_column_5 AS value, worker_column_6 AS age FROM (SELECT local_insert.key AS worker_column_1, local_insert.value AS worker_column_2, local_insert.age AS worker_column_3, distributed_local_mixed.key AS worker_column_4, distributed_local_mixed.value AS worker_column_5, distributed_local_mixed.age AS worker_column_6 FROM (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.age FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, age bigint)) local_insert, (SELECT distributed_table.key, distributed_table.value, distributed_table.age FROM local_shard_execution.distributed_table_1470003 distributed_table) distributed_local_mixed) worker_subquery key | value | age | key | value | age --------------------------------------------------------------------- 1 | 29 | 21 | 1 | 11 | 21 @@ -709,7 +712,7 @@ FROM distributed_table, all_data WHERE distributed_table.key = all_data.key AND distributed_table.key = 1; -NOTICE: executing the command locally: WITH all_data AS (SELECT distributed_table_1.key, distributed_table_1.value, distributed_table_1.age FROM local_shard_execution.distributed_table_1470001 distributed_table_1 WHERE (distributed_table_1.key OPERATOR(pg_catalog.=) 1)) SELECT count(*) AS count FROM local_shard_execution.distributed_table_1470001 distributed_table, all_data WHERE ((distributed_table.key OPERATOR(pg_catalog.=) all_data.key) AND (distributed_table.key OPERATOR(pg_catalog.=) 1)) +NOTICE: executing the command locally: SELECT count(*) AS count FROM local_shard_execution.distributed_table_1470001 distributed_table, (SELECT distributed_table_1.key, distributed_table_1.value, distributed_table_1.age FROM local_shard_execution.distributed_table_1470001 distributed_table_1 WHERE (distributed_table_1.key OPERATOR(pg_catalog.=) 1)) all_data WHERE ((distributed_table.key OPERATOR(pg_catalog.=) all_data.key) AND (distributed_table.key OPERATOR(pg_catalog.=) 1)) count --------------------------------------------------------------------- 1 @@ -1367,6 +1370,7 @@ EXECUTE serial_prepared_local; -- Citus currently doesn't allow using task_assignment_policy for intermediate results WITH distributed_local_mixed AS (INSERT INTO reference_table VALUES (1000) RETURNING *) SELECT * FROM distributed_local_mixed; NOTICE: executing the command locally: INSERT INTO local_shard_execution.reference_table_1470000 (key) VALUES (1000) RETURNING key +NOTICE: executing the command locally: SELECT key FROM (SELECT intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer)) distributed_local_mixed key --------------------------------------------------------------------- 1000 diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index 251315e31..9da8e24e0 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -1251,6 +1251,8 @@ Custom Scan (Citus INSERT ... SELECT via coordinator) -> Function Scan on generate_series s -> Function Scan on generate_series s_1 -- explain with recursive planning +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; EXPLAIN (COSTS OFF, VERBOSE true) WITH keys AS ( SELECT DISTINCT l_orderkey FROM lineitem_hash_part @@ -1300,6 +1302,7 @@ Custom Scan (Citus Adaptive) -> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 Output: intermediate_result_1.l_orderkey Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) +SET citus.enable_cte_inlining TO true; SELECT true AS valid FROM explain_json($$ WITH result AS ( SELECT l_quantity, count(*) count_quantity FROM lineitem diff --git a/src/test/regress/expected/multi_insert_select.out b/src/test/regress/expected/multi_insert_select.out index 9f74e8588..1f504a725 100644 --- a/src/test/regress/expected/multi_insert_select.out +++ b/src/test/regress/expected/multi_insert_select.out @@ -680,7 +680,10 @@ DEBUG: Router planner cannot handle multi-shard select queries (9 rows) -- We support CTEs +-- but prefer to prevent inlining of the CTE +-- in order not to diverge from pg 11 vs pg 12 BEGIN; +SET LOCAL citus.enable_cte_inlining TO false; WITH fist_table_agg AS (SELECT max(value_1)+1 as v1_agg, user_id FROM raw_events_first GROUP BY user_id) INSERT INTO agg_events @@ -705,12 +708,10 @@ INSERT INTO agg_events raw_events_first.user_id, (SELECT * FROM sub_cte) FROM raw_events_first; +DEBUG: CTE sub_cte is going to be inlined via distributed planning DEBUG: Subqueries without relations are not allowed in distributed INSERT ... SELECT queries DEBUG: Collecting INSERT ... SELECT results on coordinator DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE sub_cte: SELECT 1 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, (SELECT sub_cte."?column?" FROM (SELECT intermediate_result."?column?" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer)) sub_cte) FROM public.raw_events_first -DEBUG: Router planner cannot handle multi-shard select queries ERROR: could not run distributed query with subquery outside the FROM, WHERE and HAVING clauses HINT: Consider using an equality filter on the distributed table's partition column. -- We support set operations via the coordinator diff --git a/src/test/regress/expected/multi_insert_select_conflict.out b/src/test/regress/expected/multi_insert_select_conflict.out index e1310c46b..f543ceb6f 100644 --- a/src/test/regress/expected/multi_insert_select_conflict.out +++ b/src/test/regress/expected/multi_insert_select_conflict.out @@ -172,6 +172,8 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT col_1, col_2 10 | 0 (10 rows) +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; -- Get the select part from cte and do nothing on conflict WITH cte AS( SELECT col_1, col_2 FROM source_table_1 @@ -205,6 +207,7 @@ SELECT * FROM target_table ORDER BY 1; 10 | 0 (10 rows) +SET citus.enable_cte_inlining TO true; -- Test with multiple CTEs WITH cte AS( SELECT col_1, col_2 FROM source_table_1 @@ -214,10 +217,8 @@ WITH cte AS( INSERT INTO target_table ((SELECT * FROM cte) UNION (SELECT * FROM cte_2)) ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1; DEBUG: distributed INSERT ... SELECT can only select from distributed tables DEBUG: Collecting INSERT ... SELECT results on coordinator -DEBUG: generating subplan XXX_1 for CTE cte: SELECT col_1, col_2 FROM on_conflict.source_table_1 -DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT col_1, col_2 FROM on_conflict.source_table_2 -DEBUG: generating subplan XXX_3 for subquery SELECT cte.col_1, cte.col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer)) cte UNION SELECT cte_2.col_1, cte_2.col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer)) cte_2 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT col_1, col_2 FROM (SELECT intermediate_result.col_1, intermediate_result.col_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(col_1 integer, col_2 integer)) citus_insert_select_subquery +DEBUG: CTE cte is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning SELECT * FROM target_table ORDER BY 1; col_1 | col_2 --------------------------------------------------------------------- @@ -233,6 +234,8 @@ SELECT * FROM target_table ORDER BY 1; 10 | 11 (10 rows) +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; WITH inserted_table AS ( WITH cte AS( SELECT col_1, col_2, col_3 FROM source_table_1 @@ -287,6 +290,7 @@ FROM cte, source_table_1 WHERE cte.col_1 = source_table_1.col_1 ON CONFLICT DO NOTHING; ERROR: cannot perform distributed planning for the given modification DETAIL: Select query cannot be pushed down to the worker. +SET citus.enable_cte_inlining TO true; -- Tests with foreign key to reference table CREATE TABLE test_ref_table (key int PRIMARY KEY); SELECT create_reference_table('test_ref_table'); @@ -499,6 +503,8 @@ SELECT * FROM target_table ORDER BY 1; 10 | 0 (10 rows) +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; WITH cte AS( SELECT col_1, col_2, col_3 FROM source_table_1 ), cte_2 AS( diff --git a/src/test/regress/expected/multi_mx_function_call_delegation.out b/src/test/regress/expected/multi_mx_function_call_delegation.out index 32dd3892d..23e6f5e33 100644 --- a/src/test/regress/expected/multi_mx_function_call_delegation.out +++ b/src/test/regress/expected/multi_mx_function_call_delegation.out @@ -403,13 +403,12 @@ DETAIL: A distributed function is created. To make sure subsequent commands see (1 row) +\set VERBOSITY terse select mx_call_func_raise(2); DEBUG: pushing down the function call DEBUG: warning -DETAIL: WARNING from localhost:xxxxx ERROR: error -CONTEXT: while executing command on localhost:xxxxx -PL/pgSQL function multi_mx_function_call_delegation.mx_call_func_raise(integer) line 4 at RAISE +\set VERBOSITY default -- Don't push-down when doing INSERT INTO ... SELECT func(); SET client_min_messages TO ERROR; CREATE TABLE test (x int primary key); @@ -468,9 +467,10 @@ WITH r AS ( ), t AS ( SELECT count(*) c FROM r ) SELECT * FROM test, t WHERE t.c=0; +DEBUG: CTE t is going to be inlined via distributed planning DEBUG: generating subplan XXX_1 for CTE r: SELECT multi_mx_function_call_delegation.delegated_function(10) AS delegated_function DEBUG: not pushing down function calls in CTEs or Subqueries -DEBUG: generating subplan XXX_2 for CTE t: SELECT count(*) AS c FROM (SELECT intermediate_result.delegated_function FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(delegated_function integer)) r +DEBUG: generating subplan XXX_2 for subquery SELECT count(*) AS c FROM (SELECT intermediate_result.delegated_function FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(delegated_function integer)) r DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT test.x, t.c FROM multi_mx_function_call_delegation.test, (SELECT intermediate_result.c FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(c bigint)) t WHERE (t.c OPERATOR(pg_catalog.=) 0) x | c --------------------------------------------------------------------- @@ -483,11 +483,13 @@ WITH r AS ( ), t AS ( SELECT count(*) c FROM s ) SELECT * FROM test, r, t WHERE t.c=0; -DEBUG: generating subplan XXX_1 for CTE r: SELECT count(*) AS count FROM multi_mx_function_call_delegation.test -DEBUG: generating subplan XXX_2 for CTE s: SELECT multi_mx_function_call_delegation.delegated_function(13) AS delegated_function +DEBUG: CTE r is going to be inlined via distributed planning +DEBUG: CTE t is going to be inlined via distributed planning +DEBUG: generating subplan XXX_1 for CTE s: SELECT multi_mx_function_call_delegation.delegated_function(13) AS delegated_function DEBUG: not pushing down function calls in CTEs or Subqueries -DEBUG: generating subplan XXX_3 for CTE t: SELECT count(*) AS c FROM (SELECT intermediate_result.delegated_function FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(delegated_function integer)) s -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT test.x, r.count, t.c FROM multi_mx_function_call_delegation.test, (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) r, (SELECT intermediate_result.c FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(c bigint)) t WHERE (t.c OPERATOR(pg_catalog.=) 0) +DEBUG: generating subplan XXX_2 for subquery SELECT count(*) AS count FROM multi_mx_function_call_delegation.test +DEBUG: generating subplan XXX_3 for subquery SELECT count(*) AS c FROM (SELECT intermediate_result.delegated_function FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(delegated_function integer)) s +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT test.x, r.count, t.c FROM multi_mx_function_call_delegation.test, (SELECT intermediate_result.count FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) r, (SELECT intermediate_result.c FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(c bigint)) t WHERE (t.c OPERATOR(pg_catalog.=) 0) x | count | c --------------------------------------------------------------------- (0 rows) diff --git a/src/test/regress/expected/multi_mx_router_planner.out b/src/test/regress/expected/multi_mx_router_planner.out index 791b86a46..7b28dedac 100644 --- a/src/test/regress/expected/multi_mx_router_planner.out +++ b/src/test/regress/expected/multi_mx_router_planner.out @@ -2,6 +2,9 @@ -- test router planner functionality for single shard select queries -- =================================================================== -- run all the router queries from the one of the workers +-- prevent PG 11 - PG 12 outputs to diverge +-- and CTE inlining is not relevant to router plannery anyway +SET citus.enable_cte_inlining TO false; \c - - - :worker_1_port -- this table is used in a CTE test CREATE TABLE authors_hash_mx ( name text, id bigint ); @@ -226,6 +229,7 @@ DEBUG: Plan is router executable -- queries with CTEs are supported WITH first_author AS ( SELECT id FROM articles_hash_mx WHERE author_id = 1) SELECT * FROM first_author; +DEBUG: CTE first_author is going to be inlined via distributed planning DEBUG: Creating router plan DEBUG: Plan is router executable DETAIL: distribution column value: 1 @@ -257,6 +261,8 @@ DETAIL: distribution column value: 1 WITH id_author AS ( SELECT id, author_id FROM articles_hash_mx WHERE author_id = 1), id_title AS (SELECT id, title from articles_hash_mx WHERE author_id = 1) SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; +DEBUG: CTE id_author is going to be inlined via distributed planning +DEBUG: CTE id_title is going to be inlined via distributed planning DEBUG: Creating router plan DEBUG: Plan is router executable DETAIL: distribution column value: 1 @@ -272,6 +278,8 @@ DETAIL: distribution column value: 1 WITH id_author AS ( SELECT id, author_id FROM articles_hash_mx WHERE author_id = 1), id_title AS (SELECT id, title from articles_hash_mx WHERE author_id = 3) SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; +DEBUG: CTE id_author is going to be inlined via distributed planning +DEBUG: CTE id_title is going to be inlined via distributed planning DEBUG: Creating router plan DEBUG: Plan is router executable id | author_id | id | title @@ -282,18 +290,17 @@ DEBUG: Plan is router executable WITH id_author AS ( SELECT id, author_id FROM articles_hash_mx WHERE author_id = 1), id_title AS (SELECT id, title from articles_hash_mx WHERE author_id = 2) SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; +DEBUG: CTE id_author is going to be inlined via distributed planning +DEBUG: CTE id_title is going to be inlined via distributed planning DEBUG: cannot run command which targets multiple shards -DEBUG: generating subplan XXX_1 for CTE id_author: SELECT id, author_id FROM public.articles_hash_mx WHERE (author_id OPERATOR(pg_catalog.=) 1) -DEBUG: Creating router plan -DEBUG: Plan is router executable -DETAIL: distribution column value: 1 -DEBUG: generating subplan XXX_2 for CTE id_title: SELECT id, title FROM public.articles_hash_mx WHERE (author_id OPERATOR(pg_catalog.=) 2) DEBUG: Creating router plan DEBUG: Plan is router executable DETAIL: distribution column value: 2 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id_author.id, id_author.author_id, id_title.id, id_title.title FROM (SELECT intermediate_result.id, intermediate_result.author_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint)) id_author, (SELECT intermediate_result.id, intermediate_result.title FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, title character varying(20))) id_title WHERE (id_author.id OPERATOR(pg_catalog.=) id_title.id) +DEBUG: generating subplan XXX_1 for subquery SELECT id, title FROM public.articles_hash_mx WHERE (author_id OPERATOR(pg_catalog.=) 2) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id_author.id, id_author.author_id, id_title.id, id_title.title FROM (SELECT articles_hash_mx.id, articles_hash_mx.author_id FROM public.articles_hash_mx WHERE (articles_hash_mx.author_id OPERATOR(pg_catalog.=) 1)) id_author, (SELECT intermediate_result.id, intermediate_result.title FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, title character varying(20))) id_title WHERE (id_author.id OPERATOR(pg_catalog.=) id_title.id) DEBUG: Creating router plan DEBUG: Plan is router executable +DETAIL: distribution column value: 1 id | author_id | id | title --------------------------------------------------------------------- (0 rows) @@ -610,24 +617,17 @@ DETAIL: distribution column value: 10 -- following join is not router plannable since there are no -- workers containing both shards, but will work through recursive -- planning +SET client_min_messages TO INFO; WITH single_shard as (SELECT * FROM articles_single_shard_hash_mx) SELECT a.author_id as first_author, b.word_count as second_word_count FROM articles_hash_mx a, single_shard b WHERE a.author_id = 2 and a.author_id = b.author_id LIMIT 3; -DEBUG: Found no worker with all shard placements -DEBUG: found no worker with all shard placements -DEBUG: generating subplan XXX_1 for CTE single_shard: SELECT id, author_id, title, word_count FROM public.articles_single_shard_hash_mx -DEBUG: Creating router plan -DEBUG: Plan is router executable -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.author_id AS first_author, b.word_count AS second_word_count FROM public.articles_hash_mx a, (SELECT intermediate_result.id, intermediate_result.author_id, intermediate_result.title, intermediate_result.word_count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint, title character varying(20), word_count integer)) b WHERE ((a.author_id OPERATOR(pg_catalog.=) 2) AND (a.author_id OPERATOR(pg_catalog.=) b.author_id)) LIMIT 3 -DEBUG: Creating router plan -DEBUG: Plan is router executable -DETAIL: distribution column value: 2 first_author | second_word_count --------------------------------------------------------------------- (0 rows) +SET client_min_messages TO DEBUG; -- single shard select with limit is router plannable SELECT * FROM articles_hash_mx diff --git a/src/test/regress/expected/multi_read_from_secondaries.out b/src/test/regress/expected/multi_read_from_secondaries.out index a1425e0d9..cbab871e6 100644 --- a/src/test/regress/expected/multi_read_from_secondaries.out +++ b/src/test/regress/expected/multi_read_from_secondaries.out @@ -67,9 +67,10 @@ FROM dest_table.b IN (1,2,3,4) ) SELECT * FROM cte ORDER BY 1 DESC LIMIT 5 ) as foo ORDER BY 1; -DEBUG: generating subplan XXX_1 for CTE cte: SELECT DISTINCT dest_table.a FROM public.dest_table, public.source_table WHERE ((source_table.a OPERATOR(pg_catalog.=) dest_table.a) AND (dest_table.b OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) -DEBUG: generating subplan XXX_2 for subquery SELECT a FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) cte ORDER BY a DESC LIMIT 5 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) foo ORDER BY a +DEBUG: CTE cte is going to be inlined via distributed planning +DEBUG: push down of limit count: 5 +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM (SELECT DISTINCT dest_table.a FROM public.dest_table, public.source_table WHERE ((source_table.a OPERATOR(pg_catalog.=) dest_table.a) AND (dest_table.b OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) cte ORDER BY a DESC LIMIT 5 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) foo ORDER BY a a --------------------------------------------------------------------- (0 rows) diff --git a/src/test/regress/expected/multi_router_planner.out b/src/test/regress/expected/multi_router_planner.out index b20da4f38..a4db688af 100644 --- a/src/test/regress/expected/multi_router_planner.out +++ b/src/test/regress/expected/multi_router_planner.out @@ -6,6 +6,9 @@ SET citus.next_shard_id TO 840000; -- router planner, so we're disabling it in this file. We've bunch of -- other tests that triggers fast-path-router planner SET citus.enable_fast_path_router_planner TO false; +-- prevent PG 11 - PG 12 outputs to diverge +-- and CTE inlining is not relevant to router plannery anyway +SET citus.enable_cte_inlining TO false; CREATE TABLE articles_hash ( id bigint NOT NULL, author_id bigint NOT NULL, diff --git a/src/test/regress/expected/multi_router_planner_fast_path.out b/src/test/regress/expected/multi_router_planner_fast_path.out index 7799e6365..2a4e204dd 100644 --- a/src/test/regress/expected/multi_router_planner_fast_path.out +++ b/src/test/regress/expected/multi_router_planner_fast_path.out @@ -192,6 +192,7 @@ DEBUG: Plan is router executable -- queries with CTEs cannot go through fast-path planning WITH first_author AS ( SELECT id FROM articles_hash WHERE author_id = 1) SELECT * FROM first_author; +DEBUG: CTE first_author is going to be inlined via distributed planning DEBUG: Creating router plan DEBUG: Plan is router executable DETAIL: distribution column value: 1 @@ -208,6 +209,8 @@ DETAIL: distribution column value: 1 WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), id_title AS (SELECT id, title from articles_hash WHERE author_id = 1) SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; +DEBUG: CTE id_author is going to be inlined via distributed planning +DEBUG: CTE id_title is going to be inlined via distributed planning DEBUG: Creating router plan DEBUG: Plan is router executable DETAIL: distribution column value: 1 @@ -225,20 +228,18 @@ DETAIL: distribution column value: 1 WITH id_author AS ( SELECT id, author_id FROM articles_hash WHERE author_id = 1), id_title AS (SELECT id, title from articles_hash WHERE author_id = 2) SELECT * FROM id_author, id_title WHERE id_author.id = id_title.id; +DEBUG: CTE id_author is going to be inlined via distributed planning +DEBUG: CTE id_title is going to be inlined via distributed planning DEBUG: cannot run command which targets multiple shards -DEBUG: generating subplan XXX_1 for CTE id_author: SELECT id, author_id FROM fast_path_router_select.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 1) -DEBUG: Distributed planning for a fast-path router query -DEBUG: Creating router plan -DEBUG: Plan is router executable -DETAIL: distribution column value: 1 -DEBUG: generating subplan XXX_2 for CTE id_title: SELECT id, title FROM fast_path_router_select.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 2) DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: Plan is router executable DETAIL: distribution column value: 2 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id_author.id, id_author.author_id, id_title.id, id_title.title FROM (SELECT intermediate_result.id, intermediate_result.author_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, author_id bigint)) id_author, (SELECT intermediate_result.id, intermediate_result.title FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, title character varying(20))) id_title WHERE (id_author.id OPERATOR(pg_catalog.=) id_title.id) +DEBUG: generating subplan XXX_1 for subquery SELECT id, title FROM fast_path_router_select.articles_hash WHERE (author_id OPERATOR(pg_catalog.=) 2) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id_author.id, id_author.author_id, id_title.id, id_title.title FROM (SELECT articles_hash.id, articles_hash.author_id FROM fast_path_router_select.articles_hash WHERE (articles_hash.author_id OPERATOR(pg_catalog.=) 1)) id_author, (SELECT intermediate_result.id, intermediate_result.title FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, title character varying(20))) id_title WHERE (id_author.id OPERATOR(pg_catalog.=) id_title.id) DEBUG: Creating router plan DEBUG: Plan is router executable +DETAIL: distribution column value: 1 id | author_id | id | title --------------------------------------------------------------------- (0 rows) diff --git a/src/test/regress/expected/multi_task_assignment_policy.out b/src/test/regress/expected/multi_task_assignment_policy.out index 67f29166d..e44d8ee9a 100644 --- a/src/test/regress/expected/multi_task_assignment_policy.out +++ b/src/test/regress/expected/multi_task_assignment_policy.out @@ -307,6 +307,8 @@ SELECT create_distributed_table('task_assignment_test_table_2', 'test_id'); SET citus.task_assignment_policy TO 'round-robin'; -- Run the query two times to make sure that it hits two different workers -- on consecutive runs +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; INSERT INTO explain_outputs SELECT parse_explain_output($cmd$ EXPLAIN WITH q1 AS (SELECT * FROM task_assignment_test_table_2) SELECT * FROM q1 diff --git a/src/test/regress/expected/non_colocated_leaf_subquery_joins.out b/src/test/regress/expected/non_colocated_leaf_subquery_joins.out index da9964f0e..6909696a2 100644 --- a/src/test/regress/expected/non_colocated_leaf_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_leaf_subquery_joins.out @@ -89,9 +89,9 @@ SELECT count(*) FROM q1, (SELECT users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); -DEBUG: generating subplan XXX_1 for CTE q1: SELECT user_id FROM public.users_table -DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) q1, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) bar WHERE (bar.user_id OPERATOR(pg_catalog.=) q1.user_id) +DEBUG: CTE q1 is going to be inlined via distributed planning +DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.user_id FROM public.users_table) q1, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) bar WHERE (bar.user_id OPERATOR(pg_catalog.=) q1.user_id) valid --------------------------------------------------------------------- t diff --git a/src/test/regress/expected/non_colocated_subquery_joins.out b/src/test/regress/expected/non_colocated_subquery_joins.out index 171781aaa..705b54bd2 100644 --- a/src/test/regress/expected/non_colocated_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_subquery_joins.out @@ -542,15 +542,17 @@ SELECT count(*) FROM q1, (SELECT users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); -DEBUG: generating subplan XXX_1 for CTE q1: SELECT user_id FROM public.users_table -DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) q1, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) bar WHERE (bar.user_id OPERATOR(pg_catalog.=) q1.user_id) +DEBUG: CTE q1 is going to be inlined via distributed planning +DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.user_id FROM public.users_table) q1, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) bar WHERE (bar.user_id OPERATOR(pg_catalog.=) q1.user_id) valid --------------------------------------------------------------------- t (1 row) -- subquery joins should work fine when used with CTEs +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; SELECT true AS valid FROM explain_json_2($$ WITH q1 AS (SELECT user_id FROM users_table) SELECT count(*) FROM q1, (SELECT @@ -566,6 +568,7 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c t (1 row) +SET citus.enable_cte_inlining TO true; -- should work fine within UNIONs SELECT true AS valid FROM explain_json_2($$ (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) UNION @@ -811,13 +814,12 @@ SELECT true AS valid FROM explain_json_2($$ WHERE non_colocated_subquery.value_2 != non_colocated_subquery_2.cnt $$); -DEBUG: generating subplan XXX_1 for CTE non_colocated_subquery: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.value_2 OPERATOR(pg_catalog.=) bar.value_2) +DEBUG: CTE non_colocated_subquery is going to be inlined via distributed planning +DEBUG: CTE non_colocated_subquery_2 is going to be inlined via distributed planning DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar WHERE (foo.value_2 OPERATOR(pg_catalog.=) bar.value_2) -DEBUG: generating subplan XXX_2 for CTE non_colocated_subquery_2: SELECT count(*) AS cnt FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT events_table_1.event_type FROM public.events_table events_table_1 WHERE (events_table_1.user_id OPERATOR(pg_catalog.<) 4))) -DEBUG: generating subplan XXX_1 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 4) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS cnt FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT non_colocated_subquery.value_2, non_colocated_subquery_2.cnt FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) non_colocated_subquery, (SELECT intermediate_result.cnt FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(cnt bigint)) non_colocated_subquery_2 WHERE (non_colocated_subquery.value_2 OPERATOR(pg_catalog.<>) non_colocated_subquery_2.cnt) +DEBUG: generating subplan XXX_2 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 4) +DEBUG: generating subplan XXX_3 for subquery SELECT count(*) AS cnt FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT non_colocated_subquery.value_2, non_colocated_subquery_2.cnt FROM (SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar WHERE (foo.value_2 OPERATOR(pg_catalog.=) bar.value_2)) non_colocated_subquery, (SELECT intermediate_result.cnt FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(cnt bigint)) non_colocated_subquery_2 WHERE (non_colocated_subquery.value_2 OPERATOR(pg_catalog.<>) non_colocated_subquery_2.cnt) valid --------------------------------------------------------------------- t diff --git a/src/test/regress/expected/pg12.out b/src/test/regress/expected/pg12.out index 001910977..b32ef305e 100644 --- a/src/test/regress/expected/pg12.out +++ b/src/test/regress/expected/pg12.out @@ -233,14 +233,12 @@ SELECT count(*) FROM cte1, single_hash_repartition_second WHERE cte1.id = single_hash_repartition_second.id AND single_hash_repartition_second.sum = 45; $Q$); - coordinator_plan + coordinator_plan --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(5 rows) + Task Count: 4 +(3 rows) -- Foreign keys to partition tables CREATE TABLE collections_list ( diff --git a/src/test/regress/expected/set_operation_and_local_tables.out b/src/test/regress/expected/set_operation_and_local_tables.out index 9adcdb5ba..e4b847654 100644 --- a/src/test/regress/expected/set_operation_and_local_tables.out +++ b/src/test/regress/expected/set_operation_and_local_tables.out @@ -121,13 +121,15 @@ cte_1 AS (SELECT user_id FROM users_table), cte_2 AS (SELECT user_id FROM events_table) ((SELECT * FROM cte_1) UNION (SELECT * FROM cte_2) UNION (SELECT x FROM local_test)) INTERSECT (SELECT i FROM generate_series(0, 100) i) ORDER BY 1 DESC; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning DEBUG: Local tables cannot be used in distributed queries. -DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT user_id FROM public.users_table +DEBUG: generating subplan XXX_1 for subquery SELECT x FROM recursive_set_local.local_test DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT user_id FROM public.events_table +DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM (SELECT users_table.user_id FROM public.users_table) cte_1 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_3 for subquery SELECT x FROM recursive_set_local.local_test -DEBUG: Plan XXX query after replacing subqueries and CTEs: (SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_2.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_2 UNION SELECT intermediate_result.x FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) INTERSECT SELECT i.i FROM generate_series(0, 100) i(i) ORDER BY 1 DESC +DEBUG: generating subplan XXX_3 for subquery SELECT user_id FROM (SELECT events_table.user_id FROM public.events_table) cte_2 +DEBUG: Plan XXX query after replacing subqueries and CTEs: (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) INTERSECT SELECT i.i FROM generate_series(0, 100) i(i) ORDER BY 1 DESC DEBUG: Creating router plan DEBUG: Plan is router executable user_id @@ -152,18 +154,16 @@ FROM ) as foo, test WHERE test.y = foo.x; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning DEBUG: Local tables cannot be used in distributed queries. -DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT x FROM recursive_set_local.test +DEBUG: generating subplan XXX_1 for subquery SELECT x FROM recursive_set_local.local_test DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE cte_1: SELECT a FROM recursive_set_local.ref -DEBUG: Distributed planning for a fast-path router query +DEBUG: generating subplan XXX_2 for subquery SELECT x FROM (SELECT test.x FROM recursive_set_local.test) cte_1 DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan XXX_3 for subquery SELECT x FROM recursive_set_local.local_test -DEBUG: Creating router plan -DEBUG: Plan is router executable -DEBUG: generating subplan XXX_4 for subquery (SELECT cte_1.x FROM (SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) cte_1 UNION SELECT cte_1.a FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) cte_1) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.x FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) foo, recursive_set_local.test WHERE (test.y OPERATOR(pg_catalog.=) foo.x) +DEBUG: generating subplan XXX_3 for subquery (SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT cte_1.a FROM (SELECT ref.a FROM recursive_set_local.ref) cte_1) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.x FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) foo, recursive_set_local.test WHERE (test.y OPERATOR(pg_catalog.=) foo.x) DEBUG: Router planner cannot handle multi-shard select queries count --------------------------------------------------------------------- @@ -182,18 +182,16 @@ FROM ) as foo, ref WHERE ref.a = foo.x; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning DEBUG: Local tables cannot be used in distributed queries. -DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT x FROM recursive_set_local.test +DEBUG: generating subplan XXX_1 for subquery SELECT x FROM recursive_set_local.local_test DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE cte_1: SELECT a FROM recursive_set_local.ref -DEBUG: Distributed planning for a fast-path router query +DEBUG: generating subplan XXX_2 for subquery SELECT x FROM (SELECT test.x FROM recursive_set_local.test) cte_1 DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: generating subplan XXX_3 for subquery SELECT x FROM recursive_set_local.local_test -DEBUG: Creating router plan -DEBUG: Plan is router executable -DEBUG: generating subplan XXX_4 for subquery (SELECT cte_1.x FROM (SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) cte_1 UNION SELECT cte_1.a FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) cte_1) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.x FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) foo, recursive_set_local.ref WHERE (ref.a OPERATOR(pg_catalog.=) foo.x) +DEBUG: generating subplan XXX_3 for subquery (SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT cte_1.a FROM (SELECT ref.a FROM recursive_set_local.ref) cte_1) INTERSECT SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.x FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) foo, recursive_set_local.ref WHERE (ref.a OPERATOR(pg_catalog.=) foo.x) DEBUG: Creating router plan DEBUG: Plan is router executable count @@ -222,18 +220,17 @@ DEBUG: Router planner cannot handle multi-shard select queries -- same query with subquery in where is wrapped in CTE SELECT * FROM test a WHERE x IN (WITH cte AS (SELECT x FROM test b UNION SELECT y FROM test c UNION SELECT y FROM local_test d) SELECT * FROM cte) ORDER BY 1,2; -DEBUG: Local tables cannot be used in distributed queries. -DEBUG: generating subplan XXX_1 for CTE cte: SELECT b.x FROM recursive_set_local.test b UNION SELECT c.y FROM recursive_set_local.test c UNION SELECT d.y FROM recursive_set_local.local_test d +DEBUG: CTE cte is going to be inlined via distributed planning DEBUG: Local tables cannot be used in distributed queries. DEBUG: generating subplan XXX_1 for subquery SELECT y FROM recursive_set_local.local_test d DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_2 for subquery SELECT x FROM recursive_set_local.test b DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_3 for subquery SELECT y FROM recursive_set_local.test c -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(y integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer) DEBUG: Creating router plan DEBUG: Plan is router executable -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT x, y FROM recursive_set_local.test a WHERE (x OPERATOR(pg_catalog.=) ANY (SELECT cte.x FROM (SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) cte)) ORDER BY x, y +DEBUG: generating subplan XXX_4 for subquery SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(y integer) UNION SELECT intermediate_result.y FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(y integer) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT x, y FROM recursive_set_local.test a WHERE (x OPERATOR(pg_catalog.=) ANY (SELECT cte.x FROM (SELECT intermediate_result.x FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) cte)) ORDER BY x, y DEBUG: Router planner cannot handle multi-shard select queries x | y --------------------------------------------------------------------- diff --git a/src/test/regress/expected/single_hash_repartition_join.out b/src/test/regress/expected/single_hash_repartition_join.out index 3e81abb7c..8df46495a 100644 --- a/src/test/regress/expected/single_hash_repartition_join.out +++ b/src/test/regress/expected/single_hash_repartition_join.out @@ -266,6 +266,31 @@ FROM cte1, single_hash_repartition_first WHERE cte1.data > single_hash_repartition_first.id; +DEBUG: CTE cte1 is going to be inlined via distributed planning +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +LOG: join order: [ "single_hash_repartition_first" ][ single hash partition join "single_hash_repartition_second" ] +DEBUG: push down of limit count: 50 +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] +DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] +DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] +DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] +DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] +DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] +DEBUG: pruning merge fetch taskId 1 +DETAIL: Creating dependency on merge taskId 5 +DEBUG: pruning merge fetch taskId 3 +DETAIL: Creating dependency on merge taskId 10 +DEBUG: pruning merge fetch taskId 5 +DETAIL: Creating dependency on merge taskId 15 +DEBUG: pruning merge fetch taskId 7 +DETAIL: Creating dependency on merge taskId 20 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for CTE cte1: SELECT ((t1.id)::double precision OPERATOR(pg_catalog.*) t2.avg) AS data FROM single_hash_repartition.single_hash_repartition_first t1, single_hash_repartition.single_hash_repartition_second t2 WHERE ((t1.id OPERATOR(pg_catalog.=) t2.sum) AND (t1.sum OPERATOR(pg_catalog.>) 5000)) ORDER BY ((t1.id)::double precision OPERATOR(pg_catalog.*) t2.avg) DESC LIMIT 50 DEBUG: Router planner cannot handle multi-shard select queries diff --git a/src/test/regress/expected/subqueries_deep.out b/src/test/regress/expected/subqueries_deep.out index c270018c4..08524c086 100644 --- a/src/test/regress/expected/subqueries_deep.out +++ b/src/test/regress/expected/subqueries_deep.out @@ -16,7 +16,7 @@ FROM SELECT avg(event_type) as avg_val FROM - (SELECT + (SELECT event_type, users_table.user_id FROM users_table, (SELECT user_id, event_type FROM events_table WHERE value_2 < 3 ORDER BY 1, 2 OFFSET 3) as foo @@ -73,7 +73,8 @@ FROM ( GROUP BY event ) q ORDER BY 2 DESC, 1; -DEBUG: generating subplan XXX_1 for CTE cte: SELECT count(*) AS count FROM public.users_table +DEBUG: CTE cte is going to be inlined via distributed planning +DEBUG: generating subplan XXX_1 for subquery SELECT count(*) AS count FROM public.users_table DEBUG: push down of limit count: 5 DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table WHERE ((value_2 OPERATOR(pg_catalog.>=) 5) AND (EXISTS (SELECT events_table.user_id FROM public.events_table WHERE ((events_table.event_type OPERATOR(pg_catalog.>) 1) AND (events_table.event_type OPERATOR(pg_catalog.<=) 3) AND (events_table.value_3 OPERATOR(pg_catalog.>) (1)::double precision) AND (events_table.user_id OPERATOR(pg_catalog.=) users_table.user_id)))) AND (NOT (EXISTS (SELECT events_table.user_id FROM public.events_table WHERE ((events_table.event_type OPERATOR(pg_catalog.>) 3) AND (events_table.event_type OPERATOR(pg_catalog.<=) 4) AND (events_table.value_3 OPERATOR(pg_catalog.>) (1)::double precision) AND (events_table.user_id OPERATOR(pg_catalog.=) users_table.user_id))))) AND (EXISTS (SELECT cte.count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte))) LIMIT 5 DEBUG: generating subplan XXX_3 for subquery SELECT DISTINCT ON ((e.event_type)::text) (e.event_type)::text AS event, e."time", e.user_id FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))) diff --git a/src/test/regress/expected/subquery_and_cte.out b/src/test/regress/expected/subquery_and_cte.out index ea6b28828..878486d33 100644 --- a/src/test/regress/expected/subquery_and_cte.out +++ b/src/test/regress/expected/subquery_and_cte.out @@ -2,6 +2,8 @@ -- test recursive planning functionality with subqueries and CTEs -- =================================================================== SET search_path TO subquery_and_ctes; +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; CREATE TABLE users_table_local AS SELECT * FROM users_table; CREATE TABLE dist_table (id int, value int); SELECT create_distributed_table('dist_table', 'id', colocate_with => 'users_table'); diff --git a/src/test/regress/expected/subquery_in_where.out b/src/test/regress/expected/subquery_in_where.out index 96cf8e7c9..62aefb80d 100644 --- a/src/test/regress/expected/subquery_in_where.out +++ b/src/test/regress/expected/subquery_in_where.out @@ -5,6 +5,8 @@ CREATE SCHEMA subquery_in_where; SET search_path TO subquery_in_where, public; SET client_min_messages TO DEBUG1; --CTEs can be used as a recurring tuple with subqueries in WHERE +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; WITH event_id AS (SELECT user_id AS events_user_id, time AS events_time, @@ -22,6 +24,7 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c 101 (1 row) +SET citus.enable_cte_inlining TO true; --Correlated subqueries can not be used in WHERE clause WITH event_id AS (SELECT user_id AS events_user_id, @@ -33,6 +36,7 @@ FROM event_id WHERE events_user_id IN (SELECT user_id FROM users_table WHERE users_table.time = events_time); +DEBUG: CTE event_id is going to be inlined via distributed planning DEBUG: generating subplan XXX_1 for CTE event_id: SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) event_id WHERE (events_user_id OPERATOR(pg_catalog.=) ANY (SELECT users_table.user_id FROM public.users_table WHERE (users_table."time" OPERATOR(pg_catalog.=) event_id.events_time))) ERROR: cannot pushdown the subquery @@ -71,14 +75,15 @@ WHERE events_user_id IN ( FROM users_table ORDER BY 1 limit 10)); +DEBUG: CTE event_id is going to be inlined via distributed planning DEBUG: push down of limit count: 10 DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10 DEBUG: push down of limit count: 10 DEBUG: generating subplan XXX_2 for subquery SELECT value_1 FROM public.users_table ORDER BY value_1 LIMIT 10 DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION ALL SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer) -DEBUG: generating subplan XXX_4 for CTE event_id: SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table -DEBUG: generating subplan XXX_5 for subquery SELECT events_user_id, events_time, event_type FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) event_id ORDER BY events_user_id, events_time, event_type LIMIT 10 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE (events_user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_4 for subquery SELECT events_user_id, events_time, event_type FROM (SELECT events_table.user_id AS events_user_id, events_table."time" AS events_time, events_table.event_type FROM public.events_table) event_id ORDER BY events_user_id, events_time, event_type LIMIT 10 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE (events_user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) count --------------------------------------------------------------------- 10 @@ -129,14 +134,15 @@ IN LIMIT 10 ); -DEBUG: generating subplan XXX_1 for CTE event_id: SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table -DEBUG: generating subplan XXX_2 for subquery SELECT events_user_id FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) event_id ORDER BY events_user_id LIMIT 10 +DEBUG: CTE event_id is going to be inlined via distributed planning DEBUG: push down of limit count: 10 -DEBUG: generating subplan XXX_3 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: generating subplan XXX_1 for subquery SELECT events_user_id FROM (SELECT events_table.user_id AS events_user_id, events_table."time" AS events_time, events_table.event_type FROM public.events_table) event_id ORDER BY events_user_id LIMIT 10 DEBUG: push down of limit count: 10 -DEBUG: generating subplan XXX_4 for subquery SELECT value_1 FROM public.users_table ORDER BY value_1 LIMIT 10 -DEBUG: generating subplan XXX_5 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION ALL SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.events_user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer))) +DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_3 for subquery SELECT value_1 FROM public.users_table ORDER BY value_1 LIMIT 10 +DEBUG: generating subplan XXX_4 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION ALL SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.events_user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer))) user_id --------------------------------------------------------------------- 1 @@ -324,12 +330,12 @@ SELECT COUNT(*) FROM cte; -DEBUG: generating subplan XXX_1 for CTE cte: SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT users_table.user_id, users_table."time", users_table.value_1, users_table.value_2, users_table.value_3, users_table.value_4 FROM public.users_table ORDER BY users_table.user_id, users_table.value_2 DESC LIMIT 10) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT events_table.value_2 FROM public.events_table)) +DEBUG: CTE cte is going to be inlined via distributed planning DEBUG: push down of limit count: 10 DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table ORDER BY user_id, value_2 DESC LIMIT 10 DEBUG: generating subplan XXX_2 for subquery SELECT value_2 FROM public.events_table -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) cte +DEBUG: generating subplan XXX_3 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) cte count --------------------------------------------------------------------- 10 diff --git a/src/test/regress/expected/subquery_partitioning.out b/src/test/regress/expected/subquery_partitioning.out index f8c7ceb1d..5e4c09cc4 100644 --- a/src/test/regress/expected/subquery_partitioning.out +++ b/src/test/regress/expected/subquery_partitioning.out @@ -206,18 +206,17 @@ FROM SELECT * FROM subquery_and_ctes ORDER BY 3 DESC, 1 DESC, 2 DESC, 4 DESC LIMIT 5; -DEBUG: generating subplan XXX_1 for CTE cte: WITH local_cte AS (SELECT users_table_local.user_id, users_table_local."time", users_table_local.value_1, users_table_local.value_2, users_table_local.value_3, users_table_local.value_4 FROM subquery_and_partitioning.users_table_local), dist_cte AS (SELECT events_table.user_id FROM public.events_table, (SELECT DISTINCT partitioning_test.value_1 FROM subquery_and_partitioning.partitioning_test OFFSET 0) foo WHERE ((events_table.user_id OPERATOR(pg_catalog.=) foo.value_1) AND (events_table.user_id OPERATOR(pg_catalog.=) ANY (SELECT DISTINCT users_table.value_1 FROM public.users_table ORDER BY users_table.value_1 LIMIT 3)))) SELECT dist_cte.user_id FROM (local_cte JOIN dist_cte ON ((dist_cte.user_id OPERATOR(pg_catalog.=) local_cte.user_id))) -DEBUG: generating subplan XXX_1 for CTE local_cte: SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM subquery_and_partitioning.users_table_local -DEBUG: generating subplan XXX_2 for CTE dist_cte: SELECT events_table.user_id FROM public.events_table, (SELECT DISTINCT partitioning_test.value_1 FROM subquery_and_partitioning.partitioning_test OFFSET 0) foo WHERE ((events_table.user_id OPERATOR(pg_catalog.=) foo.value_1) AND (events_table.user_id OPERATOR(pg_catalog.=) ANY (SELECT DISTINCT users_table.value_1 FROM public.users_table ORDER BY users_table.value_1 LIMIT 3))) +DEBUG: CTE cte is going to be inlined via distributed planning +DEBUG: CTE local_cte is going to be inlined via distributed planning +DEBUG: CTE dist_cte is going to be inlined via distributed planning +DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM subquery_and_partitioning.users_table_local DEBUG: push down of limit count: 3 -DEBUG: generating subplan XXX_1 for subquery SELECT DISTINCT value_1 FROM public.users_table ORDER BY value_1 LIMIT 3 -DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT value_1 FROM subquery_and_partitioning.partitioning_test OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT events_table.user_id FROM public.events_table, (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer)) foo WHERE ((events_table.user_id OPERATOR(pg_catalog.=) foo.value_1) AND (events_table.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer)))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT dist_cte.user_id FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) local_cte JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) dist_cte ON ((dist_cte.user_id OPERATOR(pg_catalog.=) local_cte.user_id))) +DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT value_1 FROM public.users_table ORDER BY value_1 LIMIT 3 +DEBUG: generating subplan XXX_3 for subquery SELECT DISTINCT value_1 FROM subquery_and_partitioning.partitioning_test OFFSET 0 DEBUG: push down of limit count: 5 -DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT events_table.user_id FROM subquery_and_partitioning.partitioning_test, public.events_table WHERE ((events_table.user_id OPERATOR(pg_catalog.=) partitioning_test.id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) ORDER BY events_table.user_id DESC LIMIT 5 -DEBUG: generating subplan XXX_3 for subquery SELECT count(*) AS cnt FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte, (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo WHERE (foo.user_id OPERATOR(pg_catalog.=) cte.user_id) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT cnt, user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT foo.cnt, users_table.user_id, users_table."time", users_table.value_1, users_table.value_2, users_table.value_3, users_table.value_4 FROM (SELECT intermediate_result.cnt FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(cnt bigint)) foo, public.users_table WHERE (foo.cnt OPERATOR(pg_catalog.>) users_table.value_2)) subquery_and_ctes ORDER BY "time" DESC, cnt DESC, user_id DESC, value_1 DESC LIMIT 5 +DEBUG: generating subplan XXX_4 for subquery SELECT DISTINCT events_table.user_id FROM subquery_and_partitioning.partitioning_test, public.events_table WHERE ((events_table.user_id OPERATOR(pg_catalog.=) partitioning_test.id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) ORDER BY events_table.user_id DESC LIMIT 5 +DEBUG: generating subplan XXX_5 for subquery SELECT count(*) AS cnt FROM (SELECT dist_cte.user_id FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) local_cte JOIN (SELECT events_table.user_id FROM public.events_table, (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer)) foo_1 WHERE ((events_table.user_id OPERATOR(pg_catalog.=) foo_1.value_1) AND (events_table.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer))))) dist_cte ON ((dist_cte.user_id OPERATOR(pg_catalog.=) local_cte.user_id)))) cte, (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo WHERE (foo.user_id OPERATOR(pg_catalog.=) cte.user_id) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT cnt, user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT foo.cnt, users_table.user_id, users_table."time", users_table.value_1, users_table.value_2, users_table.value_3, users_table.value_4 FROM (SELECT intermediate_result.cnt FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(cnt bigint)) foo, public.users_table WHERE (foo.cnt OPERATOR(pg_catalog.>) users_table.value_2)) subquery_and_ctes ORDER BY "time" DESC, cnt DESC, user_id DESC, value_1 DESC LIMIT 5 DEBUG: push down of limit count: 5 cnt | user_id | time | value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- diff --git a/src/test/regress/expected/subquery_view.out b/src/test/regress/expected/subquery_view.out index fcb5d0eba..502f19f3e 100644 --- a/src/test/regress/expected/subquery_view.out +++ b/src/test/regress/expected/subquery_view.out @@ -392,18 +392,17 @@ FROM SELECT * FROM subquery_and_ctes ORDER BY 3 DESC, 1 DESC, 2 DESC, 4 DESC LIMIT 5; -DEBUG: generating subplan XXX_1 for CTE cte: WITH local_cte AS (SELECT users_table_local.user_id, users_table_local."time", users_table_local.value_1, users_table_local.value_2, users_table_local.value_3, users_table_local.value_4 FROM subquery_view.users_table_local), dist_cte AS (SELECT events_table.user_id FROM public.events_table, (SELECT DISTINCT users_table.value_2 FROM public.users_table OFFSET 0) foo WHERE ((events_table.user_id OPERATOR(pg_catalog.=) foo.value_2) AND (events_table.user_id OPERATOR(pg_catalog.=) ANY (SELECT DISTINCT users_table.value_1 FROM public.users_table ORDER BY users_table.value_1 LIMIT 3)))) SELECT dist_cte.user_id FROM (local_cte JOIN dist_cte ON ((dist_cte.user_id OPERATOR(pg_catalog.=) local_cte.user_id))) -DEBUG: generating subplan XXX_1 for CTE local_cte: SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM subquery_view.users_table_local -DEBUG: generating subplan XXX_2 for CTE dist_cte: SELECT events_table.user_id FROM public.events_table, (SELECT DISTINCT users_table.value_2 FROM public.users_table OFFSET 0) foo WHERE ((events_table.user_id OPERATOR(pg_catalog.=) foo.value_2) AND (events_table.user_id OPERATOR(pg_catalog.=) ANY (SELECT DISTINCT users_table.value_1 FROM public.users_table ORDER BY users_table.value_1 LIMIT 3))) +DEBUG: CTE cte is going to be inlined via distributed planning +DEBUG: CTE local_cte is going to be inlined via distributed planning +DEBUG: CTE dist_cte is going to be inlined via distributed planning +DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM subquery_view.users_table_local DEBUG: push down of limit count: 3 -DEBUG: generating subplan XXX_1 for subquery SELECT DISTINCT value_1 FROM public.users_table ORDER BY value_1 LIMIT 3 -DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT value_2 FROM public.users_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT events_table.user_id FROM public.events_table, (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo WHERE ((events_table.user_id OPERATOR(pg_catalog.=) foo.value_2) AND (events_table.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer)))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT dist_cte.user_id FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) local_cte JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) dist_cte ON ((dist_cte.user_id OPERATOR(pg_catalog.=) local_cte.user_id))) +DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT value_1 FROM public.users_table ORDER BY value_1 LIMIT 3 +DEBUG: generating subplan XXX_3 for subquery SELECT DISTINCT value_2 FROM public.users_table OFFSET 0 DEBUG: push down of limit count: 5 -DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) ORDER BY users_table.user_id DESC LIMIT 5 -DEBUG: generating subplan XXX_3 for subquery SELECT count(*) AS cnt FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte, (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo WHERE (foo.user_id OPERATOR(pg_catalog.=) cte.user_id) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT cnt, user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT foo.cnt, users_table.user_id, users_table."time", users_table.value_1, users_table.value_2, users_table.value_3, users_table.value_4 FROM (SELECT intermediate_result.cnt FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(cnt bigint)) foo, public.users_table WHERE (foo.cnt OPERATOR(pg_catalog.>) users_table.value_2)) subquery_and_ctes ORDER BY "time" DESC, cnt DESC, user_id DESC, value_1 DESC LIMIT 5 +DEBUG: generating subplan XXX_4 for subquery SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) ORDER BY users_table.user_id DESC LIMIT 5 +DEBUG: generating subplan XXX_5 for subquery SELECT count(*) AS cnt FROM (SELECT dist_cte.user_id FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) local_cte JOIN (SELECT events_table.user_id FROM public.events_table, (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo_1 WHERE ((events_table.user_id OPERATOR(pg_catalog.=) foo_1.value_2) AND (events_table.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer))))) dist_cte ON ((dist_cte.user_id OPERATOR(pg_catalog.=) local_cte.user_id)))) cte, (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo WHERE (foo.user_id OPERATOR(pg_catalog.=) cte.user_id) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT cnt, user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT foo.cnt, users_table.user_id, users_table."time", users_table.value_1, users_table.value_2, users_table.value_3, users_table.value_4 FROM (SELECT intermediate_result.cnt FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(cnt bigint)) foo, public.users_table WHERE (foo.cnt OPERATOR(pg_catalog.>) users_table.value_2)) subquery_and_ctes ORDER BY "time" DESC, cnt DESC, user_id DESC, value_1 DESC LIMIT 5 DEBUG: push down of limit count: 5 cnt | user_id | time | value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- @@ -437,6 +436,8 @@ SELECT time, event_type, value_2, value_3 FROM events_table WHERE foo.user_id = events_table.value_2; +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; SELECT * FROM subquery_and_ctes_second ORDER BY 3 DESC, 2 DESC, 1 DESC LIMIT 5; @@ -458,6 +459,7 @@ DEBUG: push down of limit count: 5 Thu Nov 23 09:33:16.992454 2017 | 3 | 4 | 1 (5 rows) +SET citus.enable_cte_inlining TO true; CREATE VIEW deep_subquery AS SELECT count(*) FROM diff --git a/src/test/regress/expected/with_dml.out b/src/test/regress/expected/with_dml.out index 3e33fe52f..5936d623f 100644 --- a/src/test/regress/expected/with_dml.out +++ b/src/test/regress/expected/with_dml.out @@ -97,7 +97,8 @@ DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Collecting INSERT ... SELECT results on coordinator -DEBUG: generating subplan XXX_1 for CTE ids_to_insert: SELECT (((tenant_id)::integer OPERATOR(pg_catalog.*) 100))::text AS tenant_id FROM with_dml.distributed_table WHERE (dept OPERATOR(pg_catalog.>) 7) +DEBUG: CTE ids_to_insert is going to be inlined via distributed planning +DEBUG: generating subplan XXX_1 for subquery SELECT (((tenant_id)::integer OPERATOR(pg_catalog.*) 100))::text AS tenant_id FROM with_dml.distributed_table WHERE (dept OPERATOR(pg_catalog.>) 7) DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT ids_to_insert.tenant_id FROM (SELECT intermediate_result.tenant_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(tenant_id text)) ids_to_insert, with_dml.distributed_table WHERE (distributed_table.tenant_id OPERATOR(pg_catalog.<) ids_to_insert.tenant_id) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT tenant_id FROM (SELECT intermediate_result.tenant_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(tenant_id text)) citus_insert_select_subquery -- not a very meaningful query @@ -145,7 +146,8 @@ SET dept = SELECT DISTINCT tenant_id::int FROM distributed_table ) select * from vals where tenant_id = 8 ) WHERE dept = 8; -DEBUG: generating subplan XXX_1 for CTE vals: SELECT DISTINCT (tenant_id)::integer AS tenant_id FROM with_dml.distributed_table +DEBUG: CTE vals is going to be inlined via distributed planning +DEBUG: generating subplan XXX_1 for subquery SELECT DISTINCT (tenant_id)::integer AS tenant_id FROM with_dml.distributed_table DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE with_dml.second_distributed_table SET dept = (SELECT vals.tenant_id FROM (SELECT intermediate_result.tenant_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(tenant_id integer)) vals WHERE (vals.tenant_id OPERATOR(pg_catalog.=) 8)) WHERE (dept OPERATOR(pg_catalog.=) 8) -- Subquery inside the UPDATE statement UPDATE diff --git a/src/test/regress/expected/with_join.out b/src/test/regress/expected/with_join.out index 12d855c3e..ba2ffaa6a 100644 --- a/src/test/regress/expected/with_join.out +++ b/src/test/regress/expected/with_join.out @@ -152,6 +152,9 @@ LIMIT (5 rows) -- cte LEFT JOIN distributed_table should error out +-- as long as the CTE is recursively planned +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; WITH cte AS ( SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 ) @@ -167,6 +170,7 @@ LIMIT 5; ERROR: cannot pushdown the subquery DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +SET citus.enable_cte_inlining TO true; -- cte RIGHT JOIN distributed_table should work WITH cte AS ( SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 @@ -214,6 +218,8 @@ LIMIT (5 rows) -- distributed_table RIGHT JOIN cte should error out +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; WITH cte AS ( SELECT * FROM users_table WHERE value_1 = 1 ORDER BY value_1 ) @@ -245,6 +251,7 @@ LIMIT 5; ERROR: cannot pushdown the subquery DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +SET citus.enable_cte_inlining TO false; -- Joins with reference tables are planned as router queries WITH cte AS ( SELECT value_2, max(user_id) AS user_id FROM users_table WHERE value_2 = 1 GROUP BY value_2 HAVING count(*) > 1 diff --git a/src/test/regress/expected/with_modifying.out b/src/test/regress/expected/with_modifying.out index cb2279724..064df3aa4 100644 --- a/src/test/regress/expected/with_modifying.out +++ b/src/test/regress/expected/with_modifying.out @@ -586,6 +586,8 @@ raw_data AS ( SELECT * FROM raw_data ORDER BY val; ERROR: cannot perform distributed planning for the given modification DETAIL: Recursively planned distributed modifications with ctid on where clause are not supported. +-- Needed becaues of CTE inlining triggering https://github.com/citusdata/citus/issues/3189 +SET citus.enable_cte_inlining TO FALSE; WITH added_data AS ( INSERT INTO modify_table VALUES (1, trunc(10 * random())), (1, trunc(random())) RETURNING * ), @@ -601,6 +603,22 @@ SELECT COUNT(*) FROM raw_data; 2 (1 row) +SET citus.enable_cte_inlining TO TRUE; +WITH added_data AS ( + INSERT INTO modify_table VALUES (1, trunc(10 * random())), (1, trunc(random())) RETURNING * +), +select_data AS ( + SELECT val, '2011-01-01' FROM added_data WHERE id = 1 +), +raw_data AS ( + DELETE FROM modify_table WHERE id = 1 AND val IN (SELECT val FROM select_data) RETURNING * +) +SELECT COUNT(*) FROM raw_data; + count +--------------------------------------------------------------------- + 2 +(1 row) + INSERT INTO modify_table VALUES (1,2), (1,6), (2, 3), (3, 5); WITH select_data AS ( SELECT * FROM modify_table diff --git a/src/test/regress/expected/with_set_operations.out b/src/test/regress/expected/with_set_operations.out index d685c4246..c3ccbd4bd 100644 --- a/src/test/regress/expected/with_set_operations.out +++ b/src/test/regress/expected/with_set_operations.out @@ -2,6 +2,8 @@ -- test recursive planning functionality with Set Operations and CTEs -- =================================================================== SET client_min_messages TO DEBUG1; +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; -- use ctes inside unions on the top level WITH cte_1 AS (SELECT user_id FROM users_table), diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 2ef3781b4..6990f6d27 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -65,7 +65,7 @@ test: multi_partitioning_utils multi_partitioning replicated_partitioned_table test: subquery_basics subquery_local_tables subquery_executors subquery_and_cte set_operations set_operation_and_local_tables test: subqueries_deep subquery_view subquery_partitioning subquery_complex_target_list subqueries_not_supported subquery_in_where test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins non_colocated_join_order -test: subquery_prepared_statements pg12 +test: subquery_prepared_statements pg12 cte_inline # ---------- # Miscellaneous tests to check our query planning behavior diff --git a/src/test/regress/sql/cte_inline.sql b/src/test/regress/sql/cte_inline.sql new file mode 100644 index 000000000..a8ee57df6 --- /dev/null +++ b/src/test/regress/sql/cte_inline.sql @@ -0,0 +1,588 @@ +CREATE SCHEMA cte_inline; +SET search_path TO cte_inline; +SET citus.next_shard_id TO 1960000; +CREATE TABLE test_table (key int, value text, other_value jsonb); +SELECT create_distributed_table ('test_table', 'key'); + +-- server version because CTE inlining might produce +-- different debug messages in PG 11 vs PG 12 +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int; + +SET client_min_messages TO DEBUG; + +-- Citus should not inline this CTE because otherwise it cannot +-- plan the query +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + *, (SELECT 1) +FROM + cte_1; + +-- Should still not be inlined even if NOT MATERIALIZED is passed +WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + *, (SELECT 1) +FROM + cte_1; + +-- the cte can be inlined because the unsupported +-- part of the query (subquery in WHERE clause) +-- doesn't access the cte +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 +WHERE + key IN ( + SELECT + (SELECT 1) + FROM + test_table WHERE key = 1 + ); + +-- a similar query as the above, and this time the planning +-- fails, but it fails because the subquery in WHERE clause +-- cannot be planned by Citus +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 +WHERE + key IN ( + SELECT + key + FROM + test_table + FOR UPDATE + ); + +-- Citus does the inlining, the planning fails +-- and retries without inlining, which works +-- fine later via recursive planning +WITH cte_1 AS + (SELECT * + FROM test_table) +SELECT *, (SELECT 1) +FROM + (SELECT * + FROM cte_1) AS foo; + +-- a little more complicated query tree +-- Citus does the inlining, the planning fails +-- and retries without inlining, which works +WITH top_cte AS + (SELECT * + FROM test_table) +SELECT * +FROM top_cte, + (WITH cte_1 AS + (SELECT * + FROM test_table) SELECT *, (SELECT 1) + FROM + (SELECT * + FROM cte_1) AS foo) AS bar; + +-- CTE is used inside a subquery in WHERE clause +-- the query wouldn't work by inlining, so Citus +-- retries again via recursive planning, which +-- works fine +WITH cte_1 AS + (SELECT * + FROM test_table) +SELECT count(*) +FROM test_table +WHERE KEY IN + (SELECT (SELECT 1) + FROM + (SELECT *, + random() + FROM + (SELECT * + FROM cte_1) AS foo) AS bar); + +-- cte_1 is used inside another CTE, but still +-- doesn't work when inlined because it is finally +-- used in an unsupported query +-- but still works fine because recursive planning +-- kicks in +WITH cte_1 AS + (SELECT * + FROM test_table) +SELECT (SELECT 1) AS KEY FROM ( + WITH cte_2 AS (SELECT *, random() + FROM (SELECT *,random() FROM cte_1) as foo) +SELECT *, random() FROM cte_2) as bar; + +-- in this example, cte_2 can be inlined, because it is not used +-- on any query that Citus cannot plan. However, cte_1 should not be +-- inlined, because it is used with a subquery in target list +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (select * from test_table) +SELECT + * +FROM + (SELECT *, (SELECT 1) FROM cte_1) as foo + JOIN + cte_2 + ON (true); + +-- unreferenced CTEs are just ignored +-- by Citus/Postgres +WITH a AS (SELECT * FROM test_table) +SELECT + *, row_number() OVER () +FROM + test_table +WHERE + key = 1; + +-- router queries are affected by the distributed +-- cte inlining +WITH a AS (SELECT * FROM test_table WHERE key = 1) +SELECT + *, (SELECT 1) +FROM + a +WHERE + key = 1; + +-- non router queries are affected by the distributed +-- cte inlining as well +WITH a AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; + +-- explicitely using NOT MATERIALIZED should result in the same +WITH a AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; + +-- using MATERIALIZED should cause inlining not to happen +WITH a AS MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; + +-- EXPLAIN should show the difference between materialized an not materialized +EXPLAIN (COSTS OFF) WITH a AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; + +EXPLAIN (COSTS OFF) WITH a AS MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + a +WHERE + key = 1; + + +-- citus should not inline the CTE because it is used multiple times +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); + +-- NOT MATERIALIZED should cause the query to be inlined twice +WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); + +-- EXPLAIN should show the differences between MATERIALIZED and NOT MATERIALIZED +EXPLAIN (COSTS OFF) WITH cte_1 AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); + +EXPLAIN (COSTS OFF) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte_1 as first_entry + JOIN + cte_1 as second_entry + USING (key); + + + +-- ctes with volatile functions are not +-- inlined +WITH cte_1 AS (SELECT *, random() FROM test_table) +SELECT + * +FROM + cte_1; + +-- even with NOT MATERIALIZED volatile functions should not be inlined +WITH cte_1 AS NOT MATERIALIZED (SELECT *, random() FROM test_table) +SELECT + * +FROM + cte_1; + + +-- cte_1 should be able to inlined even if +-- it is used one level below +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + * +FROM +( + WITH ct2 AS (SELECT * FROM cte_1) + SELECT * FROM ct2 +) as foo; + +-- a similar query, but there is also +-- one more cte, which relies on the previous +-- CTE +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + * +FROM +( + WITH cte_2 AS (SELECT * FROM cte_1), + cte_3 AS (SELECT * FROM cte_2) + SELECT * FROM cte_3 +) as foo; + + +-- inlined CTE contains a reference to outer query +-- should be fine (because we pushdown the whole query) +SELECT * + FROM + (SELECT * + FROM test_table) AS test_table_cte + JOIN LATERAL + (WITH bar AS (SELECT * + FROM test_table + WHERE key = test_table_cte.key) + SELECT * + FROM + bar + LEFT JOIN test_table u2 ON u2.key = bar.key) AS foo ON TRUE; + +-- inlined CTE contains a reference to outer query +-- should be fine (even if the recursive planning fails +-- to recursively plan the query) +SELECT * + FROM + (SELECT * + FROM test_table) AS test_table_cte + JOIN LATERAL + (WITH bar AS (SELECT * + FROM test_table + WHERE key = test_table_cte.key) + SELECT * + FROM + bar + LEFT JOIN test_table u2 ON u2.key = bar.value::int) AS foo ON TRUE; + + +-- inlined CTE can recursively planned later, that's the decision +-- recursive planning makes +-- LIMIT 5 in cte2 triggers recusrive planning, after cte inlining +WITH cte_1 AS (SELECT * FROM test_table) +SELECT + * +FROM +( + WITH ct2 AS (SELECT * FROM cte_1 LIMIT 5) + SELECT * FROM ct2 +) as foo; + +-- all nested CTEs can be inlinied +WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS ( + WITH cte_1 AS (SELECT count(*), key FROM test_table GROUP BY key) + SELECT * FROM cte_1) + SELECT * FROM cte_1 WHERE key = 1) + SELECT * FROM cte_1 WHERE key = 2) + SELECT * FROM cte_1 WHERE key = 3) + SELECT * FROM cte_1 WHERE key = 4) + SELECT * FROM cte_1 WHERE key = 5) +SELECT * FROM cte_1 WHERE key = 6; + + + +-- ctes can be inlined even if they are used +-- in set operations +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +(SELECT * FROM cte_1 EXCEPT SELECT * FROM test_table) +UNION +(SELECT * FROM cte_2); + +-- cte_1 is going to be inlined even inside another set operation +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +(SELECT *, (SELECT 1) FROM cte_1 EXCEPT SELECT *, 1 FROM test_table) +UNION +(SELECT *, 1 FROM cte_2); + + +-- cte_1 is safe to inline, even if because after inlining +-- it'd be in a query tree where there is a query that is +-- not supported by Citus unless recursively planned +-- cte_2 is on another queryTree, should be fine +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +(SELECT *, (SELECT key FROM cte_1) FROM test_table) +UNION +(SELECT *, 1 FROM cte_2); + +-- after inlining CTEs, the query becomes +-- subquery pushdown with set operations +WITH cte_1 AS (SELECT * FROM test_table), + cte_2 AS (SELECT * FROM test_table) +SELECT * FROM +( + SELECT * FROM cte_1 + UNION + SELECT * FROM cte_2 +) as bar; + +-- cte LEFT JOIN subquery should only work +-- when CTE is inlined, as Citus currently +-- doesn't know how to handle intermediate +-- results in the outer parts of outer +-- queries +WITH cte AS (SELECT * FROM test_table) +SELECT + count(*) +FROM + cte LEFT JOIN test_table USING (key); + +-- the CTEs are very simple, so postgres +-- can pull-up the subqueries after inlining +-- the CTEs, and the query that we send to workers +-- becomes a join between two tables +WITH cte_1 AS (SELECT key FROM test_table), + cte_2 AS (SELECT key FROM test_table) +SELECT + count(*) +FROM + cte_1 JOIN cte_2 USING (key); + + +-- the following query is kind of interesting +-- During INSERT .. SELECT via coordinator, +-- Citus moves the CTEs into SELECT part, and plans/execute +-- the SELECT separately. Thus, fist_table_cte can be inlined +-- by Citus -- but not by Postgres +WITH fist_table_cte AS + (SELECT * FROM test_table) +INSERT INTO test_table + (key, value) + SELECT + key, value + FROM + fist_table_cte; + +-- the following INSERT..SELECT is even more interesting +-- the CTE becomes pushdownable +INSERT INTO test_table +WITH fist_table_cte AS + (SELECT * FROM test_table) + SELECT + key, value + FROM + fist_table_cte; + +-- update/delete/modifying ctes +-- we don't support any cte inlining in modifications +-- queries and modifying CTEs +WITH cte_1 AS (SELECT * FROM test_table) + DELETE FROM test_table WHERE key NOT IN (SELECT key FROM cte_1); + +-- NOT MATERIALIZED should not CTEs that are used in a modifying query, because +-- we de still don't support it +WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) + DELETE FROM test_table WHERE key NOT IN (SELECT key FROM cte_1); + +-- we don't inline CTEs if they are modifying CTEs +WITH cte_1 AS (DELETE FROM test_table RETURNING key) +SELECT * FROM cte_1; + +-- NOT MATERIALIZED should not affect modifying CTEs +WITH cte_1 AS NOT MATERIALIZED (DELETE FROM test_table RETURNING key) +SELECT * FROM cte_1; + +-- cte with column aliases +SELECT * FROM test_table, +(WITH cte_1 (x,y) AS (SELECT * FROM test_table), + cte_2 (z,y) AS (SELECT value, other_value, key FROM test_table), + cte_3 (t,m) AS (SELECT z, y, key as cte_2_key FROM cte_2) + SELECT * FROM cte_2, cte_3) as bar; + +-- cte used in HAVING subquery just works fine +-- even if it is inlined +WITH cte_1 AS (SELECT max(key) as max FROM test_table) +SELECT + key, count(*) +FROM + test_table +GROUP BY + key +HAVING + (count(*) > (SELECT max FROM cte_1)); + +-- cte used in ORDER BY just works fine +-- even if it is inlined +WITH cte_1 AS (SELECT max(key) as max FROM test_table) +SELECT + key +FROM + test_table JOIN cte_1 ON (key = max) +ORDER BY + cte_1.max; + +PREPARE inlined_cte_without_params AS + WITH cte_1 AS (SELECT count(*) FROM test_table GROUP BY key) + SELECT * FROM cte_1; +PREPARE non_inlined_cte_without_params AS + WITH cte_1 AS (SELECT * FROM test_table) + SELECT + *, (SELECT 1) + FROM + cte_1; +PREPARE inlined_cte_has_parameter_on_non_dist_key(int) AS + WITH cte_1 AS (SELECT count(*) FROM test_table WHERE value::int = $1 GROUP BY key) + SELECT * FROM cte_1; +PREPARE inlined_cte_has_parameter_on_dist_key(int) AS + WITH cte_1 AS (SELECT count(*) FROM test_table WHERE key > $1 GROUP BY key) + SELECT * FROM cte_1; +PREPARE non_inlined_cte_has_parameter_on_dist_key(int) AS + WITH cte_1 AS (SELECT * FROM test_table where key > $1) + SELECT + *, (SELECT 1) + FROM + cte_1; +PREPARE retry_planning(int) AS + WITH cte_1 AS (SELECT * FROM test_table WHERE key > $1) + SELECT json_object_agg(DISTINCT key, value) FROM cte_1; + + +EXECUTE inlined_cte_without_params; +EXECUTE inlined_cte_without_params; +EXECUTE inlined_cte_without_params; +EXECUTE inlined_cte_without_params; +EXECUTE inlined_cte_without_params; +EXECUTE inlined_cte_without_params; + +EXECUTE non_inlined_cte_without_params; +EXECUTE non_inlined_cte_without_params; +EXECUTE non_inlined_cte_without_params; +EXECUTE non_inlined_cte_without_params; +EXECUTE non_inlined_cte_without_params; +EXECUTE non_inlined_cte_without_params; + +EXECUTE inlined_cte_has_parameter_on_non_dist_key(1); +EXECUTE inlined_cte_has_parameter_on_non_dist_key(2); +EXECUTE inlined_cte_has_parameter_on_non_dist_key(3); +EXECUTE inlined_cte_has_parameter_on_non_dist_key(4); +EXECUTE inlined_cte_has_parameter_on_non_dist_key(5); +EXECUTE inlined_cte_has_parameter_on_non_dist_key(6); + +EXECUTE inlined_cte_has_parameter_on_dist_key(1); +EXECUTE inlined_cte_has_parameter_on_dist_key(2); +EXECUTE inlined_cte_has_parameter_on_dist_key(3); +EXECUTE inlined_cte_has_parameter_on_dist_key(4); +EXECUTE inlined_cte_has_parameter_on_dist_key(5); +EXECUTE inlined_cte_has_parameter_on_dist_key(6); + +EXECUTE non_inlined_cte_has_parameter_on_dist_key(1); +EXECUTE non_inlined_cte_has_parameter_on_dist_key(2); +EXECUTE non_inlined_cte_has_parameter_on_dist_key(3); +EXECUTE non_inlined_cte_has_parameter_on_dist_key(4); +EXECUTE non_inlined_cte_has_parameter_on_dist_key(5); +EXECUTE non_inlined_cte_has_parameter_on_dist_key(6); + +EXECUTE retry_planning(1); +EXECUTE retry_planning(2); +EXECUTE retry_planning(3); +EXECUTE retry_planning(4); +EXECUTE retry_planning(5); +EXECUTE retry_planning(6); + +-- this test can only work if the CTE is recursively +-- planned +WITH b AS (SELECT * FROM test_table) +SELECT * FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key); + +-- this becomes a non-colocated subquery join +-- because after the CTEs are inlined the joins +-- become a non-colocated subquery join +WITH a AS (SELECT * FROM test_table), +b AS (SELECT * FROM test_table) +SELECT * FROM a LEFT JOIN b ON (a.value = b.value); + +-- cte a has to be recursively planned because of OFFSET 0 +-- after that, cte b also requires recursive planning +WITH a AS (SELECT * FROM test_table OFFSET 0), +b AS (SELECT * FROM test_table) +SELECT * FROM a LEFT JOIN b ON (a.value = b.value); + +-- after both CTEs are inlined, this becomes non-colocated subquery join +WITH cte_1 AS (SELECT * FROM test_table), +cte_2 AS (SELECT * FROM test_table) +SELECT * FROM cte_1 JOIN cte_2 ON (cte_1.value > cte_2.value); + +-- full join is only supported when both sides are +-- recursively planned +WITH cte_1 AS (SELECT value FROM test_table WHERE key > 1), + cte_2 AS (SELECT value FROM test_table WHERE key > 3) +SELECT * FROM cte_1 FULL JOIN cte_2 USING (value); + +-- an unsupported agg. for multi-shard queries +-- so CTE has to be recursively planned +WITH cte_1 AS (SELECT * FROM test_table WHERE key > 1) +SELECT json_object_agg(DISTINCT key, value) FROM cte_1; + +-- both cte_1 and cte_2 are going to be inlined. +-- later, cte_2 is recursively planned since it doesn't have +-- GROUP BY but aggragate in a subquery. +-- this is an important example of being able to recursively plan +-- "some" of the CTEs +WITH cte_1 AS (SELECT value FROM test_table WHERE key > 1), + cte_2 AS (SELECT max(value) as value FROM test_table WHERE key > 3) +SELECT * FROM cte_1 JOIN cte_2 USING (value); + + +-- prevent DROP CASCADE to give notices +SET client_min_messages TO ERROR; +DROP SCHEMA cte_inline CASCADE; diff --git a/src/test/regress/sql/failure_cte_subquery.sql b/src/test/regress/sql/failure_cte_subquery.sql index 2e6ee026c..c4341ef4b 100644 --- a/src/test/regress/sql/failure_cte_subquery.sql +++ b/src/test/regress/sql/failure_cte_subquery.sql @@ -5,6 +5,10 @@ SET citus.shard_count to 2; SET citus.shard_replication_factor to 1; SET citus.next_shard_id TO 16000000; +-- CTE inlining should not happen because +-- the tests rely on intermediate results +SET citus.enable_cte_inlining TO false; + SELECT pg_backend_pid() as pid \gset CREATE TABLE users_table (user_id int, user_name text); diff --git a/src/test/regress/sql/intermediate_result_pruning.sql b/src/test/regress/sql/intermediate_result_pruning.sql index 22a61d4e4..8aee49fcc 100644 --- a/src/test/regress/sql/intermediate_result_pruning.sql +++ b/src/test/regress/sql/intermediate_result_pruning.sql @@ -19,6 +19,8 @@ SELECT create_distributed_table('table_3', 'key'); CREATE TABLE ref_table (key int, value text); SELECT create_reference_table('ref_table'); +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; -- load some data INSERT INTO table_1 VALUES (1, '1'), (2, '2'), (3, '3'), (4, '4'); diff --git a/src/test/regress/sql/limit_intermediate_size.sql b/src/test/regress/sql/limit_intermediate_size.sql index 34f88dbf1..6207faef1 100644 --- a/src/test/regress/sql/limit_intermediate_size.sql +++ b/src/test/regress/sql/limit_intermediate_size.sql @@ -1,6 +1,9 @@ SET citus.enable_repartition_joins to ON; SET citus.task_executor_type to 'task-tracker'; +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; + SET citus.max_intermediate_result_size TO 2; -- should fail because the copy size is ~4kB for each cte WITH cte AS @@ -229,5 +232,3 @@ WITH cte AS ( cte2.user_id = cte3.user_id AND cte2.user_id = 1 ) SELECT * FROM cte ORDER BY 1,2,3,4,5 LIMIT 10; - -SET citus.task_executor_type to 'adaptive'; diff --git a/src/test/regress/sql/multi_explain.sql b/src/test/regress/sql/multi_explain.sql index a9ec11eab..d67d69d87 100644 --- a/src/test/regress/sql/multi_explain.sql +++ b/src/test/regress/sql/multi_explain.sql @@ -559,6 +559,8 @@ INSERT INTO lineitem_hash_part ( SELECT s FROM generate_series(5,10) s); -- explain with recursive planning +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; EXPLAIN (COSTS OFF, VERBOSE true) WITH keys AS ( SELECT DISTINCT l_orderkey FROM lineitem_hash_part @@ -569,6 +571,8 @@ series AS ( SELECT l_orderkey FROM series JOIN keys ON (s = l_orderkey) ORDER BY s; +SET citus.enable_cte_inlining TO true; + SELECT true AS valid FROM explain_json($$ WITH result AS ( SELECT l_quantity, count(*) count_quantity FROM lineitem diff --git a/src/test/regress/sql/multi_insert_select.sql b/src/test/regress/sql/multi_insert_select.sql index 1f5464064..bb9ea4619 100644 --- a/src/test/regress/sql/multi_insert_select.sql +++ b/src/test/regress/sql/multi_insert_select.sql @@ -515,7 +515,10 @@ INSERT INTO agg_events (value_1_agg, user_id) SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; -- We support CTEs +-- but prefer to prevent inlining of the CTE +-- in order not to diverge from pg 11 vs pg 12 BEGIN; +SET LOCAL citus.enable_cte_inlining TO false; WITH fist_table_agg AS (SELECT max(value_1)+1 as v1_agg, user_id FROM raw_events_first GROUP BY user_id) INSERT INTO agg_events diff --git a/src/test/regress/sql/multi_insert_select_conflict.sql b/src/test/regress/sql/multi_insert_select_conflict.sql index 521c1afd0..08619d9e1 100644 --- a/src/test/regress/sql/multi_insert_select_conflict.sql +++ b/src/test/regress/sql/multi_insert_select_conflict.sql @@ -103,12 +103,16 @@ WITH inserted_table AS ( ON CONFLICT(col_1) DO UPDATE SET col_2 = 0 RETURNING * ) SELECT * FROM inserted_table ORDER BY 1; +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; + -- Get the select part from cte and do nothing on conflict WITH cte AS( SELECT col_1, col_2 FROM source_table_1 ) INSERT INTO target_table SELECT * FROM cte ON CONFLICT DO NOTHING; + -- Get the select part from cte and update on conflict WITH cte AS( SELECT col_1, col_2 FROM source_table_1 @@ -116,6 +120,8 @@ WITH cte AS( INSERT INTO target_table SELECT * FROM cte ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1; SELECT * FROM target_table ORDER BY 1; +SET citus.enable_cte_inlining TO true; + -- Test with multiple CTEs WITH cte AS( SELECT col_1, col_2 FROM source_table_1 @@ -125,6 +131,9 @@ WITH cte AS( INSERT INTO target_table ((SELECT * FROM cte) UNION (SELECT * FROM cte_2)) ON CONFLICT(col_1) DO UPDATE SET col_2 = EXCLUDED.col_2 + 1; SELECT * FROM target_table ORDER BY 1; +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; + WITH inserted_table AS ( WITH cte AS( SELECT col_1, col_2, col_3 FROM source_table_1 @@ -159,6 +168,8 @@ SELECT FROM cte, source_table_1 WHERE cte.col_1 = source_table_1.col_1 ON CONFLICT DO NOTHING; +SET citus.enable_cte_inlining TO true; + -- Tests with foreign key to reference table CREATE TABLE test_ref_table (key int PRIMARY KEY); SELECT create_reference_table('test_ref_table'); @@ -288,6 +299,9 @@ FROM ( ON CONFLICT(col_1) DO UPDATE SET col_2 = 0; SELECT * FROM target_table ORDER BY 1; +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; + WITH cte AS( SELECT col_1, col_2, col_3 FROM source_table_1 ), cte_2 AS( diff --git a/src/test/regress/sql/multi_mx_function_call_delegation.sql b/src/test/regress/sql/multi_mx_function_call_delegation.sql index 6f58ac8a2..febc44363 100644 --- a/src/test/regress/sql/multi_mx_function_call_delegation.sql +++ b/src/test/regress/sql/multi_mx_function_call_delegation.sql @@ -164,8 +164,9 @@ BEGIN RAISE EXCEPTION 'error'; END;$$; select create_distributed_function('mx_call_func_raise(int)', '$1', 'mx_call_dist_table_1'); +\set VERBOSITY terse select mx_call_func_raise(2); - +\set VERBOSITY default -- Don't push-down when doing INSERT INTO ... SELECT func(); SET client_min_messages TO ERROR; CREATE TABLE test (x int primary key); diff --git a/src/test/regress/sql/multi_mx_router_planner.sql b/src/test/regress/sql/multi_mx_router_planner.sql index 8f93f1b3e..628118f70 100644 --- a/src/test/regress/sql/multi_mx_router_planner.sql +++ b/src/test/regress/sql/multi_mx_router_planner.sql @@ -6,6 +6,10 @@ -- run all the router queries from the one of the workers +-- prevent PG 11 - PG 12 outputs to diverge +-- and CTE inlining is not relevant to router plannery anyway +SET citus.enable_cte_inlining TO false; + \c - - - :worker_1_port -- this table is used in a CTE test CREATE TABLE authors_hash_mx ( name text, id bigint ); @@ -273,11 +277,13 @@ SELECT a.author_id as first_author, b.word_count as second_word_count -- following join is not router plannable since there are no -- workers containing both shards, but will work through recursive -- planning +SET client_min_messages TO INFO; WITH single_shard as (SELECT * FROM articles_single_shard_hash_mx) SELECT a.author_id as first_author, b.word_count as second_word_count FROM articles_hash_mx a, single_shard b WHERE a.author_id = 2 and a.author_id = b.author_id LIMIT 3; +SET client_min_messages TO DEBUG; -- single shard select with limit is router plannable SELECT * diff --git a/src/test/regress/sql/multi_router_planner.sql b/src/test/regress/sql/multi_router_planner.sql index 18d14f544..a9783b105 100644 --- a/src/test/regress/sql/multi_router_planner.sql +++ b/src/test/regress/sql/multi_router_planner.sql @@ -10,6 +10,10 @@ SET citus.next_shard_id TO 840000; -- other tests that triggers fast-path-router planner SET citus.enable_fast_path_router_planner TO false; +-- prevent PG 11 - PG 12 outputs to diverge +-- and CTE inlining is not relevant to router plannery anyway +SET citus.enable_cte_inlining TO false; + CREATE TABLE articles_hash ( id bigint NOT NULL, author_id bigint NOT NULL, diff --git a/src/test/regress/sql/multi_task_assignment_policy.sql b/src/test/regress/sql/multi_task_assignment_policy.sql index 03ead23c2..855b1ef82 100644 --- a/src/test/regress/sql/multi_task_assignment_policy.sql +++ b/src/test/regress/sql/multi_task_assignment_policy.sql @@ -240,6 +240,8 @@ SET citus.task_assignment_policy TO 'round-robin'; -- Run the query two times to make sure that it hits two different workers -- on consecutive runs +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; INSERT INTO explain_outputs SELECT parse_explain_output($cmd$ EXPLAIN WITH q1 AS (SELECT * FROM task_assignment_test_table_2) SELECT * FROM q1 diff --git a/src/test/regress/sql/non_colocated_subquery_joins.sql b/src/test/regress/sql/non_colocated_subquery_joins.sql index 9b74ba28f..925974ca9 100644 --- a/src/test/regress/sql/non_colocated_subquery_joins.sql +++ b/src/test/regress/sql/non_colocated_subquery_joins.sql @@ -437,6 +437,8 @@ SELECT count(*) FROM q1, (SELECT users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); -- subquery joins should work fine when used with CTEs +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; SELECT true AS valid FROM explain_json_2($$ WITH q1 AS (SELECT user_id FROM users_table) SELECT count(*) FROM q1, (SELECT @@ -445,6 +447,7 @@ SELECT true AS valid FROM explain_json_2($$ users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); +SET citus.enable_cte_inlining TO true; -- should work fine within UNIONs diff --git a/src/test/regress/sql/subquery_and_cte.sql b/src/test/regress/sql/subquery_and_cte.sql index 272384176..1752a2bb6 100644 --- a/src/test/regress/sql/subquery_and_cte.sql +++ b/src/test/regress/sql/subquery_and_cte.sql @@ -3,6 +3,9 @@ -- =================================================================== SET search_path TO subquery_and_ctes; +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; + CREATE TABLE users_table_local AS SELECT * FROM users_table; CREATE TABLE dist_table (id int, value int); diff --git a/src/test/regress/sql/subquery_in_where.sql b/src/test/regress/sql/subquery_in_where.sql index 993f0e481..df637ce19 100644 --- a/src/test/regress/sql/subquery_in_where.sql +++ b/src/test/regress/sql/subquery_in_where.sql @@ -7,6 +7,8 @@ SET search_path TO subquery_in_where, public; SET client_min_messages TO DEBUG1; --CTEs can be used as a recurring tuple with subqueries in WHERE +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; WITH event_id AS (SELECT user_id AS events_user_id, time AS events_time, @@ -16,6 +18,7 @@ SELECT Count(*) FROM event_id WHERE events_user_id IN (SELECT user_id FROM users_table); +SET citus.enable_cte_inlining TO true; --Correlated subqueries can not be used in WHERE clause WITH event_id diff --git a/src/test/regress/sql/subquery_view.sql b/src/test/regress/sql/subquery_view.sql index fd7c8e662..91541a189 100644 --- a/src/test/regress/sql/subquery_view.sql +++ b/src/test/regress/sql/subquery_view.sql @@ -339,11 +339,14 @@ SELECT time, event_type, value_2, value_3 FROM WHERE foo.user_id = events_table.value_2; - +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; SELECT * FROM subquery_and_ctes_second ORDER BY 3 DESC, 2 DESC, 1 DESC LIMIT 5; +SET citus.enable_cte_inlining TO true; + CREATE VIEW deep_subquery AS SELECT count(*) FROM diff --git a/src/test/regress/sql/with_join.sql b/src/test/regress/sql/with_join.sql index 0157199c2..85c2c122a 100644 --- a/src/test/regress/sql/with_join.sql +++ b/src/test/regress/sql/with_join.sql @@ -127,6 +127,9 @@ LIMIT 5; -- cte LEFT JOIN distributed_table should error out +-- as long as the CTE is recursively planned +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; WITH cte AS ( SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 ) @@ -141,6 +144,8 @@ ORDER BY LIMIT 5; +SET citus.enable_cte_inlining TO true; + -- cte RIGHT JOIN distributed_table should work WITH cte AS ( SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 @@ -172,6 +177,8 @@ LIMIT 5; -- distributed_table RIGHT JOIN cte should error out +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; WITH cte AS ( SELECT * FROM users_table WHERE value_1 = 1 ORDER BY value_1 ) @@ -201,6 +208,8 @@ ORDER BY LIMIT 5; +SET citus.enable_cte_inlining TO false; + -- Joins with reference tables are planned as router queries WITH cte AS ( SELECT value_2, max(user_id) AS user_id FROM users_table WHERE value_2 = 1 GROUP BY value_2 HAVING count(*) > 1 diff --git a/src/test/regress/sql/with_modifying.sql b/src/test/regress/sql/with_modifying.sql index bc0315ee6..f49d53be0 100644 --- a/src/test/regress/sql/with_modifying.sql +++ b/src/test/regress/sql/with_modifying.sql @@ -354,6 +354,8 @@ raw_data AS ( ) SELECT * FROM raw_data ORDER BY val; +-- Needed becaues of CTE inlining triggering https://github.com/citusdata/citus/issues/3189 +SET citus.enable_cte_inlining TO FALSE; WITH added_data AS ( INSERT INTO modify_table VALUES (1, trunc(10 * random())), (1, trunc(random())) RETURNING * ), @@ -364,6 +366,18 @@ raw_data AS ( DELETE FROM modify_table WHERE id = 1 AND val IN (SELECT val FROM select_data) RETURNING * ) SELECT COUNT(*) FROM raw_data; +SET citus.enable_cte_inlining TO TRUE; + +WITH added_data AS ( + INSERT INTO modify_table VALUES (1, trunc(10 * random())), (1, trunc(random())) RETURNING * +), +select_data AS ( + SELECT val, '2011-01-01' FROM added_data WHERE id = 1 +), +raw_data AS ( + DELETE FROM modify_table WHERE id = 1 AND val IN (SELECT val FROM select_data) RETURNING * +) +SELECT COUNT(*) FROM raw_data; INSERT INTO modify_table VALUES (1,2), (1,6), (2, 3), (3, 5); WITH select_data AS ( diff --git a/src/test/regress/sql/with_set_operations.sql b/src/test/regress/sql/with_set_operations.sql index e6b5caecd..bbfe707a1 100644 --- a/src/test/regress/sql/with_set_operations.sql +++ b/src/test/regress/sql/with_set_operations.sql @@ -4,6 +4,9 @@ SET client_min_messages TO DEBUG1; +-- prevent PG 11 - PG 12 outputs to diverge +SET citus.enable_cte_inlining TO false; + -- use ctes inside unions on the top level WITH cte_1 AS (SELECT user_id FROM users_table),