citus/src/backend/distributed/planner/cte_inline.c

374 lines
10 KiB
C

/*-------------------------------------------------------------------------
*
* cte_inline.c
* For multi-shard queries, Citus can only recursively plan CTEs. Instead,
* with the functions defined in this file, the certain CTEs can be inlined
* as subqueries in the query tree. In that case, more optimal distributed
* planning, the query pushdown planning, kicks in and the CTEs can actually
* be pushed down as long as it is safe to pushdown as a subquery.
*
*
* Copyright (c) Citus Data, Inc.
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
#include "rewrite/rewriteManip.h"
#include "pg_version_compat.h"
#include "pg_version_constants.h"
#include "distributed/cte_inline.h"
typedef struct inline_cte_walker_context
{
const char *ctename; /* name and relative level of target CTE */
int levelsup;
int refcount; /* number of remaining references */
Query *ctequery; /* query to substitute */
List *aliascolnames; /* citus addition to Postgres' inline_cte_walker_context */
} inline_cte_walker_context;
/* copy & paste from Postgres source, moved into a function for readability */
static bool PostgreSQLCTEInlineCondition(CommonTableExpr *cte, CmdType cmdType);
/* the following utility functions are copy & paste from PostgreSQL code */
static void inline_cte(Query *mainQuery, CommonTableExpr *cte);
static bool inline_cte_walker(Node *node, inline_cte_walker_context *context);
static bool contain_dml(Node *node);
static bool contain_dml_walker(Node *node, void *context);
/* the following utility functions are related to Citus' logic */
static bool RecursivelyInlineCteWalker(Node *node, void *context);
static void InlineCTEsInQueryTree(Query *query);
static bool QueryTreeContainsInlinableCteWalker(Node *node, void *context);
/*
* RecursivelyInlineCtesInQueryTree gets a query and recursively traverses the
* tree from top to bottom. On each level, the CTEs that are eligable for
* inlining are inlined as subqueries. This is useful in distributed planning
* because Citus' sub(query) planning logic superior to CTE planning, where CTEs
* are always recursively planned, which might produce very slow executions.
*/
void
RecursivelyInlineCtesInQueryTree(Query *query)
{
InlineCTEsInQueryTree(query);
query_tree_walker(query, RecursivelyInlineCteWalker, NULL, 0);
}
/*
* RecursivelyInlineCteWalker recursively finds all the Query nodes and
* recursively inline eligable ctes.
*/
static bool
RecursivelyInlineCteWalker(Node *node, void *context)
{
if (node == NULL)
{
return false;
}
if (IsA(node, Query))
{
Query *query = (Query *) node;
InlineCTEsInQueryTree(query);
query_tree_walker(query, RecursivelyInlineCteWalker, NULL, 0);
/* we're done, no need to recurse anymore for this query */
return false;
}
return expression_tree_walker(node, RecursivelyInlineCteWalker, context);
}
/*
* InlineCTEsInQueryTree gets a query tree and tries to inline CTEs as subqueries
* in the query tree.
*
* Most of the code is coming from PostgreSQL's CTE inlining logic, there are very
* few additions that Citus added, which are already commented in the code.
*/
void
InlineCTEsInQueryTree(Query *query)
{
ListCell *cteCell = NULL;
/* iterate on the copy of the list because we'll be modifying query->cteList */
List *copyOfCteList = list_copy(query->cteList);
foreach(cteCell, copyOfCteList)
{
CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell);
/*
* First, make sure that Postgres is OK to inline the CTE. Later, check for
* distributed query planning constraints that might prevent inlining.
*/
if (PostgreSQLCTEInlineCondition(cte, query->commandType))
{
elog(DEBUG1, "CTE %s is going to be inlined via "
"distributed planning", cte->ctename);
/* do the hard work of cte inlining */
inline_cte(query, cte);
/* clean-up the necessary fields for distributed planning */
cte->cterefcount = 0;
query->cteList = list_delete_ptr(query->cteList, cte);
}
}
}
/*
* QueryTreeContainsInlinableCTE recursively traverses the queryTree, and returns true
* if any of the (sub)queries in the queryTree contains at least one CTE.
*/
bool
QueryTreeContainsInlinableCTE(Query *queryTree)
{
return QueryTreeContainsInlinableCteWalker((Node *) queryTree, NULL);
}
/*
* QueryTreeContainsInlinableCteWalker walks over the node, and returns true if any of
* the (sub)queries in the node contains at least one CTE.
*/
static bool
QueryTreeContainsInlinableCteWalker(Node *node, void *context)
{
if (node == NULL)
{
return false;
}
if (IsA(node, Query))
{
Query *query = (Query *) node;
ListCell *cteCell = NULL;
foreach(cteCell, query->cteList)
{
CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell);
if (PostgreSQLCTEInlineCondition(cte, query->commandType))
{
/*
* Return true even if we can find a single CTE that is
* eligable for inlining.
*/
return true;
}
}
return query_tree_walker(query, QueryTreeContainsInlinableCteWalker, NULL, 0);
}
return expression_tree_walker(node, QueryTreeContainsInlinableCteWalker, NULL);
}
/*
* PostgreSQLCTEInlineCondition returns true if the CTE is considered
* safe to inline by Postgres.
*/
static bool
PostgreSQLCTEInlineCondition(CommonTableExpr *cte, CmdType cmdType)
{
/*
* Consider inlining the CTE (creating RTE_SUBQUERY RTE(s)) instead of
* implementing it as a separately-planned CTE.
*
* We cannot inline if any of these conditions hold:
*
* 1. The user said not to (the CTEMaterializeAlways option).
*
* 2. The CTE is recursive.
*
* 3. The CTE has side-effects; this includes either not being a plain
* SELECT, or containing volatile functions. Inlining might change
* the side-effects, which would be bad.
*
* Otherwise, we have an option whether to inline or not. That should
* always be a win if there's just a single reference, but if the CTE
* is multiply-referenced then it's unclear: inlining adds duplicate
* computations, but the ability to absorb restrictions from the outer
* query level could outweigh that. We do not have nearly enough
* information at this point to tell whether that's true, so we let
* the user express a preference. Our default behavior is to inline
* only singly-referenced CTEs, but a CTE marked CTEMaterializeNever
* will be inlined even if multiply referenced.
*/
if (
(cte->ctematerialized == CTEMaterializeNever ||
(cte->ctematerialized == CTEMaterializeDefault &&
cte->cterefcount == 1)) &&
!cte->cterecursive &&
cmdType == CMD_SELECT &&
!contain_dml(cte->ctequery) &&
!contain_volatile_functions(cte->ctequery))
{
return true;
}
return false;
}
/* *INDENT-OFF* */
/*
* inline_cte: convert RTE_CTE references to given CTE into RTE_SUBQUERYs
*/
static void
inline_cte(Query *mainQuery, CommonTableExpr *cte)
{
struct inline_cte_walker_context context;
context.ctename = cte->ctename;
/* Start at levelsup = -1 because we'll immediately increment it */
context.levelsup = -1;
context.refcount = cte->cterefcount;
context.ctequery = castNode(Query, cte->ctequery);
context.aliascolnames = cte->aliascolnames;
(void) inline_cte_walker((Node *) mainQuery, &context);
/* Assert we replaced all references */
Assert(context.refcount == 0);
}
/*
* See PostgreSQL's source code at src/backend/optimizer/plan/subselect.c.
*/
static bool
inline_cte_walker(Node *node, inline_cte_walker_context *context)
{
if (node == NULL)
return false;
if (IsA(node, Query))
{
Query *query = (Query *) node;
context->levelsup++;
(void) query_tree_walker(query, inline_cte_walker, context,
QTW_EXAMINE_RTES_AFTER);
context->levelsup--;
return false;
}
else if (IsA(node, RangeTblEntry))
{
RangeTblEntry *rte = (RangeTblEntry *) node;
if (rte->rtekind == RTE_CTE &&
strcmp(rte->ctename, context->ctename) == 0 &&
rte->ctelevelsup == context->levelsup)
{
/*
* Found a reference to replace. Generate a copy of the CTE query
* with appropriate level adjustment for outer references (e.g.,
* to other CTEs).
*/
Query *newquery = copyObject(context->ctequery);
if (context->levelsup > 0)
IncrementVarSublevelsUp((Node *) newquery, context->levelsup, 1);
/*
* Convert the RTE_CTE RTE into a RTE_SUBQUERY.
*
* Historically, a FOR UPDATE clause has been treated as extending
* into views and subqueries, but not into CTEs. We preserve this
* distinction by not trying to push rowmarks into the new
* subquery.
*/
rte->rtekind = RTE_SUBQUERY;
rte->subquery = newquery;
rte->security_barrier = false;
List *columnAliasList = context->aliascolnames;
int columnAliasCount = list_length(columnAliasList);
int columnIndex = 1;
for (; columnIndex < list_length(rte->subquery->targetList) + 1; ++columnIndex)
{
/*
* Rename the column only if a column alias is defined.
* Notice that column alias count could be less than actual
* column count. We only use provided aliases and keep the
* original column names if no alias is defined.
*/
if (columnAliasCount >= columnIndex)
{
String *columnAlias = (String *) list_nth(columnAliasList, columnIndex - 1);
Assert(IsA(columnAlias, String));
TargetEntry *targetEntry =
list_nth(rte->subquery->targetList, columnIndex - 1);
Assert(IsA(columnAlias, String));
targetEntry->resname = strVal(columnAlias);
}
}
/* Zero out CTE-specific fields */
rte->ctename = NULL;
rte->ctelevelsup = 0;
rte->self_reference = false;
rte->coltypes = NIL;
rte->coltypmods = NIL;
rte->colcollations = NIL;
/* Count the number of replacements we've done */
context->refcount--;
}
return false;
}
return expression_tree_walker(node, inline_cte_walker, context);
}
/*
* contain_dml: is any subquery not a plain SELECT?
*
* We reject SELECT FOR UPDATE/SHARE as well as INSERT etc.
*/
static bool
contain_dml(Node *node)
{
return contain_dml_walker(node, NULL);
}
static bool
contain_dml_walker(Node *node, void *context)
{
if (node == NULL)
return false;
if (IsA(node, Query))
{
Query *query = (Query *) node;
if (query->commandType != CMD_SELECT ||
query->rowMarks != NIL)
return true;
return query_tree_walker(query, contain_dml_walker, context, 0);
}
return expression_tree_walker(node, contain_dml_walker, context);
}
/* *INDENT-ON* */