mirror of https://github.com/citusdata/citus.git
374 lines
10 KiB
C
374 lines
10 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* cte_inline.c
|
|
* For multi-shard queries, Citus can only recursively plan CTEs. Instead,
|
|
* with the functions defined in this file, the certain CTEs can be inlined
|
|
* as subqueries in the query tree. In that case, more optimal distributed
|
|
* planning, the query pushdown planning, kicks in and the CTEs can actually
|
|
* be pushed down as long as it is safe to pushdown as a subquery.
|
|
*
|
|
*
|
|
* Copyright (c) Citus Data, Inc.
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "nodes/nodeFuncs.h"
|
|
#include "optimizer/optimizer.h"
|
|
#include "rewrite/rewriteManip.h"
|
|
|
|
#include "pg_version_compat.h"
|
|
#include "pg_version_constants.h"
|
|
|
|
#include "distributed/cte_inline.h"
|
|
|
|
typedef struct inline_cte_walker_context
|
|
{
|
|
const char *ctename; /* name and relative level of target CTE */
|
|
int levelsup;
|
|
int refcount; /* number of remaining references */
|
|
Query *ctequery; /* query to substitute */
|
|
|
|
List *aliascolnames; /* citus addition to Postgres' inline_cte_walker_context */
|
|
} inline_cte_walker_context;
|
|
|
|
/* copy & paste from Postgres source, moved into a function for readability */
|
|
static bool PostgreSQLCTEInlineCondition(CommonTableExpr *cte, CmdType cmdType);
|
|
|
|
/* the following utility functions are copy & paste from PostgreSQL code */
|
|
static void inline_cte(Query *mainQuery, CommonTableExpr *cte);
|
|
static bool inline_cte_walker(Node *node, inline_cte_walker_context *context);
|
|
static bool contain_dml(Node *node);
|
|
static bool contain_dml_walker(Node *node, void *context);
|
|
|
|
|
|
/* the following utility functions are related to Citus' logic */
|
|
static bool RecursivelyInlineCteWalker(Node *node, void *context);
|
|
static void InlineCTEsInQueryTree(Query *query);
|
|
static bool QueryTreeContainsInlinableCteWalker(Node *node, void *context);
|
|
|
|
|
|
/*
|
|
* RecursivelyInlineCtesInQueryTree gets a query and recursively traverses the
|
|
* tree from top to bottom. On each level, the CTEs that are eligable for
|
|
* inlining are inlined as subqueries. This is useful in distributed planning
|
|
* because Citus' sub(query) planning logic superior to CTE planning, where CTEs
|
|
* are always recursively planned, which might produce very slow executions.
|
|
*/
|
|
void
|
|
RecursivelyInlineCtesInQueryTree(Query *query)
|
|
{
|
|
InlineCTEsInQueryTree(query);
|
|
|
|
query_tree_walker(query, RecursivelyInlineCteWalker, NULL, 0);
|
|
}
|
|
|
|
|
|
/*
|
|
* RecursivelyInlineCteWalker recursively finds all the Query nodes and
|
|
* recursively inline eligable ctes.
|
|
*/
|
|
static bool
|
|
RecursivelyInlineCteWalker(Node *node, void *context)
|
|
{
|
|
if (node == NULL)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (IsA(node, Query))
|
|
{
|
|
Query *query = (Query *) node;
|
|
|
|
InlineCTEsInQueryTree(query);
|
|
|
|
query_tree_walker(query, RecursivelyInlineCteWalker, NULL, 0);
|
|
|
|
/* we're done, no need to recurse anymore for this query */
|
|
return false;
|
|
}
|
|
|
|
return expression_tree_walker(node, RecursivelyInlineCteWalker, context);
|
|
}
|
|
|
|
|
|
/*
|
|
* InlineCTEsInQueryTree gets a query tree and tries to inline CTEs as subqueries
|
|
* in the query tree.
|
|
*
|
|
* Most of the code is coming from PostgreSQL's CTE inlining logic, there are very
|
|
* few additions that Citus added, which are already commented in the code.
|
|
*/
|
|
void
|
|
InlineCTEsInQueryTree(Query *query)
|
|
{
|
|
ListCell *cteCell = NULL;
|
|
|
|
/* iterate on the copy of the list because we'll be modifying query->cteList */
|
|
List *copyOfCteList = list_copy(query->cteList);
|
|
foreach(cteCell, copyOfCteList)
|
|
{
|
|
CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell);
|
|
|
|
/*
|
|
* First, make sure that Postgres is OK to inline the CTE. Later, check for
|
|
* distributed query planning constraints that might prevent inlining.
|
|
*/
|
|
if (PostgreSQLCTEInlineCondition(cte, query->commandType))
|
|
{
|
|
elog(DEBUG1, "CTE %s is going to be inlined via "
|
|
"distributed planning", cte->ctename);
|
|
|
|
/* do the hard work of cte inlining */
|
|
inline_cte(query, cte);
|
|
|
|
/* clean-up the necessary fields for distributed planning */
|
|
cte->cterefcount = 0;
|
|
query->cteList = list_delete_ptr(query->cteList, cte);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* QueryTreeContainsInlinableCTE recursively traverses the queryTree, and returns true
|
|
* if any of the (sub)queries in the queryTree contains at least one CTE.
|
|
*/
|
|
bool
|
|
QueryTreeContainsInlinableCTE(Query *queryTree)
|
|
{
|
|
return QueryTreeContainsInlinableCteWalker((Node *) queryTree, NULL);
|
|
}
|
|
|
|
|
|
/*
|
|
* QueryTreeContainsInlinableCteWalker walks over the node, and returns true if any of
|
|
* the (sub)queries in the node contains at least one CTE.
|
|
*/
|
|
static bool
|
|
QueryTreeContainsInlinableCteWalker(Node *node, void *context)
|
|
{
|
|
if (node == NULL)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (IsA(node, Query))
|
|
{
|
|
Query *query = (Query *) node;
|
|
|
|
ListCell *cteCell = NULL;
|
|
foreach(cteCell, query->cteList)
|
|
{
|
|
CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell);
|
|
|
|
if (PostgreSQLCTEInlineCondition(cte, query->commandType))
|
|
{
|
|
/*
|
|
* Return true even if we can find a single CTE that is
|
|
* eligable for inlining.
|
|
*/
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return query_tree_walker(query, QueryTreeContainsInlinableCteWalker, NULL, 0);
|
|
}
|
|
|
|
return expression_tree_walker(node, QueryTreeContainsInlinableCteWalker, NULL);
|
|
}
|
|
|
|
|
|
/*
|
|
* PostgreSQLCTEInlineCondition returns true if the CTE is considered
|
|
* safe to inline by Postgres.
|
|
*/
|
|
static bool
|
|
PostgreSQLCTEInlineCondition(CommonTableExpr *cte, CmdType cmdType)
|
|
{
|
|
/*
|
|
* Consider inlining the CTE (creating RTE_SUBQUERY RTE(s)) instead of
|
|
* implementing it as a separately-planned CTE.
|
|
*
|
|
* We cannot inline if any of these conditions hold:
|
|
*
|
|
* 1. The user said not to (the CTEMaterializeAlways option).
|
|
*
|
|
* 2. The CTE is recursive.
|
|
*
|
|
* 3. The CTE has side-effects; this includes either not being a plain
|
|
* SELECT, or containing volatile functions. Inlining might change
|
|
* the side-effects, which would be bad.
|
|
*
|
|
* Otherwise, we have an option whether to inline or not. That should
|
|
* always be a win if there's just a single reference, but if the CTE
|
|
* is multiply-referenced then it's unclear: inlining adds duplicate
|
|
* computations, but the ability to absorb restrictions from the outer
|
|
* query level could outweigh that. We do not have nearly enough
|
|
* information at this point to tell whether that's true, so we let
|
|
* the user express a preference. Our default behavior is to inline
|
|
* only singly-referenced CTEs, but a CTE marked CTEMaterializeNever
|
|
* will be inlined even if multiply referenced.
|
|
*/
|
|
if (
|
|
(cte->ctematerialized == CTEMaterializeNever ||
|
|
(cte->ctematerialized == CTEMaterializeDefault &&
|
|
cte->cterefcount == 1)) &&
|
|
!cte->cterecursive &&
|
|
cmdType == CMD_SELECT &&
|
|
!contain_dml(cte->ctequery) &&
|
|
!contain_volatile_functions(cte->ctequery))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/* *INDENT-OFF* */
|
|
/*
|
|
* inline_cte: convert RTE_CTE references to given CTE into RTE_SUBQUERYs
|
|
*/
|
|
static void
|
|
inline_cte(Query *mainQuery, CommonTableExpr *cte)
|
|
{
|
|
struct inline_cte_walker_context context;
|
|
|
|
context.ctename = cte->ctename;
|
|
/* Start at levelsup = -1 because we'll immediately increment it */
|
|
context.levelsup = -1;
|
|
context.refcount = cte->cterefcount;
|
|
context.ctequery = castNode(Query, cte->ctequery);
|
|
context.aliascolnames = cte->aliascolnames;
|
|
|
|
(void) inline_cte_walker((Node *) mainQuery, &context);
|
|
|
|
/* Assert we replaced all references */
|
|
Assert(context.refcount == 0);
|
|
}
|
|
|
|
|
|
/*
|
|
* See PostgreSQL's source code at src/backend/optimizer/plan/subselect.c.
|
|
*/
|
|
static bool
|
|
inline_cte_walker(Node *node, inline_cte_walker_context *context)
|
|
{
|
|
if (node == NULL)
|
|
return false;
|
|
if (IsA(node, Query))
|
|
{
|
|
Query *query = (Query *) node;
|
|
|
|
context->levelsup++;
|
|
|
|
(void) query_tree_walker(query, inline_cte_walker, context,
|
|
QTW_EXAMINE_RTES_AFTER);
|
|
context->levelsup--;
|
|
|
|
return false;
|
|
}
|
|
else if (IsA(node, RangeTblEntry))
|
|
{
|
|
RangeTblEntry *rte = (RangeTblEntry *) node;
|
|
|
|
if (rte->rtekind == RTE_CTE &&
|
|
strcmp(rte->ctename, context->ctename) == 0 &&
|
|
rte->ctelevelsup == context->levelsup)
|
|
{
|
|
/*
|
|
* Found a reference to replace. Generate a copy of the CTE query
|
|
* with appropriate level adjustment for outer references (e.g.,
|
|
* to other CTEs).
|
|
*/
|
|
Query *newquery = copyObject(context->ctequery);
|
|
|
|
if (context->levelsup > 0)
|
|
IncrementVarSublevelsUp((Node *) newquery, context->levelsup, 1);
|
|
|
|
/*
|
|
* Convert the RTE_CTE RTE into a RTE_SUBQUERY.
|
|
*
|
|
* Historically, a FOR UPDATE clause has been treated as extending
|
|
* into views and subqueries, but not into CTEs. We preserve this
|
|
* distinction by not trying to push rowmarks into the new
|
|
* subquery.
|
|
*/
|
|
rte->rtekind = RTE_SUBQUERY;
|
|
rte->subquery = newquery;
|
|
rte->security_barrier = false;
|
|
|
|
List *columnAliasList = context->aliascolnames;
|
|
int columnAliasCount = list_length(columnAliasList);
|
|
int columnIndex = 1;
|
|
for (; columnIndex < list_length(rte->subquery->targetList) + 1; ++columnIndex)
|
|
{
|
|
/*
|
|
* Rename the column only if a column alias is defined.
|
|
* Notice that column alias count could be less than actual
|
|
* column count. We only use provided aliases and keep the
|
|
* original column names if no alias is defined.
|
|
*/
|
|
if (columnAliasCount >= columnIndex)
|
|
{
|
|
String *columnAlias = (String *) list_nth(columnAliasList, columnIndex - 1);
|
|
Assert(IsA(columnAlias, String));
|
|
TargetEntry *targetEntry =
|
|
list_nth(rte->subquery->targetList, columnIndex - 1);
|
|
Assert(IsA(columnAlias, String));
|
|
targetEntry->resname = strVal(columnAlias);
|
|
}
|
|
}
|
|
|
|
/* Zero out CTE-specific fields */
|
|
rte->ctename = NULL;
|
|
rte->ctelevelsup = 0;
|
|
rte->self_reference = false;
|
|
rte->coltypes = NIL;
|
|
rte->coltypmods = NIL;
|
|
rte->colcollations = NIL;
|
|
|
|
/* Count the number of replacements we've done */
|
|
context->refcount--;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
return expression_tree_walker(node, inline_cte_walker, context);
|
|
}
|
|
|
|
|
|
/*
|
|
* contain_dml: is any subquery not a plain SELECT?
|
|
*
|
|
* We reject SELECT FOR UPDATE/SHARE as well as INSERT etc.
|
|
*/
|
|
static bool
|
|
contain_dml(Node *node)
|
|
{
|
|
return contain_dml_walker(node, NULL);
|
|
}
|
|
|
|
|
|
static bool
|
|
contain_dml_walker(Node *node, void *context)
|
|
{
|
|
if (node == NULL)
|
|
return false;
|
|
if (IsA(node, Query))
|
|
{
|
|
Query *query = (Query *) node;
|
|
|
|
if (query->commandType != CMD_SELECT ||
|
|
query->rowMarks != NIL)
|
|
return true;
|
|
|
|
return query_tree_walker(query, contain_dml_walker, context, 0);
|
|
}
|
|
return expression_tree_walker(node, contain_dml_walker, context);
|
|
}
|
|
|
|
/* *INDENT-ON* */
|