Add Citus' CTE inlining functions

With this commit we add the necessary Citus function to inline CTEs
in a queryTree.

You might ask, why do we need to inline CTEs if Postgres is already
going to do it?

Few reasons behind this decision:

- One techinal node here is that Citus does the recursive CTE planning
  by checking the originalQuery which is the query that has not gone
  through the standard_planner().

  CTEs in Citus is super powerful. It is practically key for full SQL
  coverage for multi-shard queries. With CTEs, you can always reduce
  any query multi-shard query into a router query via recursive
  planning (thus full SQL coverage).
  We cannot let CTE inlining break that. The main idea is Citus should
  be able to retry planning if anything goes after CTE inlining.

  So, by taking ownership of CTE inlining on the originalQuery, Citus
  can fallback to recursive planning of CTEs if the planning with the
  inlined query fails. It could have been a lot harder if we had relied
  on standard_planner() to have the inlined CTEs on the original query.

- We want to have this feature in PostgreSQL 11 as well, but Postgres
  only inlines in version 12
pull/3161/head
Onder Kalaci 2019-12-10 17:35:04 +01:00 committed by Jelte Fennema
parent 1856ab6cdd
commit 01a5800ee8
2 changed files with 157 additions and 0 deletions

View File

@ -15,6 +15,7 @@
*/ */
#include "postgres.h" #include "postgres.h"
#include "distributed/cte_inline.h"
#include "nodes/nodeFuncs.h" #include "nodes/nodeFuncs.h"
#if PG_VERSION_NUM >= 120000 #if PG_VERSION_NUM >= 120000
#include "optimizer/optimizer.h" #include "optimizer/optimizer.h"
@ -57,6 +58,143 @@ static bool contain_dml(Node *node);
static bool contain_dml_walker(Node *node, void *context); static bool contain_dml_walker(Node *node, void *context);
/* the following utility functions are related to Citus' logic */
static bool RecursivelyInlineCteWalker(Node *node, void *context);
static void InlineCTEsInQueryTree(Query *query);
static bool QueryTreeContainsInlinableCteWalker(Node *node);
/*
* RecursivelyInlineCtesInQueryTree gets a query and recursively traverses the
* tree from top to bottom. On each level, the CTEs that are eligable for
* inlining are inlined as subqueries. This is useful in distributed planning
* because Citus' sub(query) planning logic superior to CTE planning, where CTEs
* are always recursively planned, which might produce very slow executions.
*/
void
RecursivelyInlineCtesInQueryTree(Query *query)
{
InlineCTEsInQueryTree(query);
query_tree_walker(query, RecursivelyInlineCteWalker, NULL, 0);
}
/*
* RecursivelyInlineCteWalker recursively finds all the Query nodes and
* recursively inline eligable ctes.
*/
static bool
RecursivelyInlineCteWalker(Node *node, void *context)
{
if (node == NULL)
{
return false;
}
if (IsA(node, Query))
{
Query *query = (Query *) node;
InlineCTEsInQueryTree(query);
query_tree_walker(query, RecursivelyInlineCteWalker, NULL, 0);
/* we're done, no need to recurse anymore for this query */
return false;
}
return expression_tree_walker(node, RecursivelyInlineCteWalker, context);
}
/*
* InlineCTEsInQueryTree gets a query tree and tries to inline CTEs as subqueries
* in the query tree.
*
* Most of the code is coming from PostgreSQL's CTE inlining logic, there are very
* few additions that Citus added, which are already commented in the code.
*/
void
InlineCTEsInQueryTree(Query *query)
{
ListCell *cteCell = NULL;
/* iterate on the copy of the list because we'll be modifying query->cteList */
List *copyOfCteList = list_copy(query->cteList);
foreach(cteCell, copyOfCteList)
{
CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell);
/*
* First, make sure that Postgres is OK to inline the CTE. Later, check for
* distributed query planning constraints that might prevent inlining.
*/
if (PostgreSQLCTEInlineCondition(cte, query->commandType))
{
elog(DEBUG2, "CTE %s is going to be inlined via "
"distributed planning", cte->ctename);
/* do the hard work of cte inlining */
inline_cte(query, cte);
/* clean-up the necessary fields for distributed planning */
cte->cterefcount = 0;
query->cteList = list_delete_ptr(query->cteList, cte);
}
}
}
/*
* QueryTreeContainsInlinableCTE recursively traverses the queryTree, and returns true
* if any of the (sub)queries in the queryTree contains at least one CTE.
*/
bool
QueryTreeContainsInlinableCTE(Query *queryTree)
{
return QueryTreeContainsInlinableCteWalker((Node *) queryTree);
}
/*
* QueryTreeContainsInlinableCteWalker walks over the node, and returns true if any of
* the (sub)queries in the node contains at least one CTE.
*/
static bool
QueryTreeContainsInlinableCteWalker(Node *node)
{
if (node == NULL)
{
return false;
}
if (IsA(node, Query))
{
Query *query = (Query *) node;
ListCell *cteCell = NULL;
foreach(cteCell, query->cteList)
{
CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell);
if (PostgreSQLCTEInlineCondition(cte, query->commandType))
{
/*
* Return true even if we can find a single CTE that is
* eligable for inlining.
*/
return true;
}
}
return query_tree_walker(query, QueryTreeContainsInlinableCteWalker, NULL, 0);
}
return expression_tree_walker(node, QueryTreeContainsInlinableCteWalker, NULL);
}
/* /*
* PostgreSQLCTEInlineCondition returns true if the CTE is considered * PostgreSQLCTEInlineCondition returns true if the CTE is considered
* safe to inline by Postgres. * safe to inline by Postgres.

View File

@ -0,0 +1,19 @@
/*-------------------------------------------------------------------------
*
* cte_inline.h
* Functions and global variables to control cte inlining.
*
* Copyright (c) 2019, Citus Data, Inc.
*
*-------------------------------------------------------------------------
*/
#ifndef CTE_INLINE_H
#define CTE_INLINE_H
#include "nodes/parsenodes.h"
extern void RecursivelyInlineCtesInQueryTree(Query *query);
extern bool QueryTreeContainsInlinableCTE(Query *queryTree);
#endif /* CTE_INLINE_H */