citus/src/backend/distributed/planner/combine_query_planner.c

366 lines
13 KiB
C

/*-------------------------------------------------------------------------
*
* combine_query_planner.c
* Routines for planning the combine query that runs on the coordinator
* to combine results from the workers.
*
* Copyright (c) Citus Data, Inc.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/planner.h"
#include "rewrite/rewriteManip.h"
#include "pg_version_constants.h"
#include "distributed/citus_ruleutils.h"
#include "distributed/combine_query_planner.h"
#include "distributed/insert_select_planner.h"
#include "distributed/listutils.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_physical_planner.h"
static List * RemoteScanTargetList(List *workerTargetList);
static PlannedStmt * BuildSelectStatementViaStdPlanner(Query *combineQuery,
List *remoteScanTargetList,
CustomScan *remoteScan);
static Plan * CitusCustomScanPathPlan(PlannerInfo *root, RelOptInfo *rel,
struct CustomPath *best_path, List *tlist,
List *clauses, List *custom_plans);
bool ReplaceCitusExtraDataContainer = false;
CustomScan *ReplaceCitusExtraDataContainerWithCustomScan = NULL;
/*
* CitusCustomScanPathMethods defines the methods for a custom path we insert into the
* planner during the planning of the query part that will be executed on the node
* coordinating the query.
*/
static CustomPathMethods CitusCustomScanPathMethods = {
.CustomName = "CitusCustomScanPath",
.PlanCustomPath = CitusCustomScanPathPlan,
};
/*
* PlanCombineQuery takes in a distributed plan and a custom scan node which
* wraps remote part of the plan. This function finds the combine query structure
* in the multi plan, and builds the final select plan to execute on the tuples
* returned by remote scan on the coordinator node. Note that this select
* plan is executed after result files are retrieved from worker nodes and
* filled into the tuple store inside provided custom scan.
*/
PlannedStmt *
PlanCombineQuery(DistributedPlan *distributedPlan, CustomScan *remoteScan)
{
Query *combineQuery = distributedPlan->combineQuery;
Job *workerJob = distributedPlan->workerJob;
List *workerTargetList = workerJob->jobQuery->targetList;
List *remoteScanTargetList = RemoteScanTargetList(workerTargetList);
return BuildSelectStatementViaStdPlanner(combineQuery, remoteScanTargetList,
remoteScan);
}
/*
* RemoteScanTargetList uses the given worker target list's expressions, and creates
* a target list for the remote scan on the coordinator node.
*/
static List *
RemoteScanTargetList(List *workerTargetList)
{
List *remoteScanTargetList = NIL;
const Index tableId = 1;
AttrNumber columnId = 1;
ListCell *workerTargetCell = NULL;
foreach(workerTargetCell, workerTargetList)
{
TargetEntry *workerTargetEntry = (TargetEntry *) lfirst(workerTargetCell);
if (workerTargetEntry->resjunk)
{
continue;
}
Var *remoteScanColumn = makeVarFromTargetEntry(tableId, workerTargetEntry);
remoteScanColumn->varattno = columnId;
remoteScanColumn->varattnosyn = columnId;
columnId++;
if (remoteScanColumn->vartype == RECORDOID || remoteScanColumn->vartype ==
RECORDARRAYOID)
{
remoteScanColumn->vartypmod = BlessRecordExpression(workerTargetEntry->expr);
}
/*
* The remote scan target entry has two pieces to it. The first piece is the
* target entry's expression, which we set to the newly created column.
* The second piece is sort and group clauses that we implicitly copy
* from the worker target entry. Note that any changes to worker target
* entry's sort and group clauses will *break* us here.
*/
TargetEntry *remoteScanTargetEntry = flatCopyTargetEntry(workerTargetEntry);
remoteScanTargetEntry->expr = (Expr *) remoteScanColumn;
remoteScanTargetList = lappend(remoteScanTargetList, remoteScanTargetEntry);
}
return remoteScanTargetList;
}
/*
* CreateCitusCustomScanPath creates a custom path node that will return the CustomScan if
* the path ends up in the best_path during postgres planning. We use this function during
* the set relation hook of postgres during the planning of the query part that will be
* executed on the query coordinating node.
*/
Path *
CreateCitusCustomScanPath(PlannerInfo *root, RelOptInfo *relOptInfo,
Index restrictionIndex, RangeTblEntry *rte,
CustomScan *remoteScan)
{
CitusCustomScanPath *path = (CitusCustomScanPath *) newNode(
sizeof(CitusCustomScanPath), T_CustomPath);
path->custom_path.methods = &CitusCustomScanPathMethods;
path->custom_path.path.pathtype = T_CustomScan;
path->custom_path.path.pathtarget = relOptInfo->reltarget;
path->custom_path.path.parent = relOptInfo;
#if (PG_VERSION_NUM >= PG_VERSION_15)
/* necessary to avoid extra Result node in PG15 */
path->custom_path.flags = CUSTOMPATH_SUPPORT_PROJECTION;
#endif
/*
* The 100k rows we put on the cost of the path is kind of arbitrary and could be
* improved in accuracy to produce better plans.
*
* 100k on the row estimate causes the postgres planner to behave very much like the
* old citus planner in the plans it produces. Namely the old planner had hardcoded
* the use of Hash Aggregates for most of the operations, unless a postgres guc was
* set that would disallow hash aggregates to be used.
*
* Ideally we would be able to provide estimates close to postgres' estimates on the
* workers to let the standard planner choose an optimal solution for the combineQuery.
*/
path->custom_path.path.rows = 100000;
path->remoteScan = remoteScan;
return (Path *) path;
}
/*
* CitusCustomScanPathPlan is called for the CitusCustomScanPath node in the best_path
* after the postgres planner has evaluated all possible paths.
*
* This function returns a Plan node, more specifically the CustomScan Plan node that has
* the ability to execute the distributed part of the query.
*
* When this function is called there is an extra list of clauses passed in that might not
* already have been applied to the plan. We add these clauses to the quals this node will
* execute. The quals are evaluated before returning the tuples scanned from the workers
* to the plan above ours to make sure they do not end up in the final result.
*/
static Plan *
CitusCustomScanPathPlan(PlannerInfo *root,
RelOptInfo *rel,
struct CustomPath *best_path,
List *tlist,
List *clauses,
List *custom_plans)
{
CitusCustomScanPath *citusPath = (CitusCustomScanPath *) best_path;
/*
* Columns could have been pruned from the target list by the standard planner.
* A situation in which this might happen is a CASE that is proven to be always the
* same causing the other column to become useless;
* CASE WHEN ... <> NULL
* THEN ...
* ELSE ...
* END
* Since nothing is equal to NULL it will always end up in the else branch. The final
* target list the planenr needs from our node is passed in as tlist. By placing that
* as the target list on our scan the internal rows will be projected to this one.
*/
citusPath->remoteScan->scan.plan.targetlist = tlist;
/*
* The custom_scan_tlist contains target entries for to the "output" of the call
* to citus_extradata_container, which is actually replaced by a CustomScan.
* The target entries are initialized with varno 1 (see RemoteScanTargetList), since
* it's currently the only relation in the join tree of the combineQuery.
*
* If the citus_extradata_container function call is not the first relation to
* appear in the flattened rtable for the entire plan, then varno is now pointing
* to the wrong relation and needs to be updated.
*
* Example:
* When the combineQuery field of the DistributedPlan is
* INSERT INTO local SELECT .. FROM citus_extradata_container.
* In that case the varno of citusdata_extradata_container should be 3, because
* it is preceded range table entries for "local" and the subquery.
*/
if (rel->relid != 1)
{
TargetEntry *targetEntry = NULL;
foreach_declared_ptr(targetEntry, citusPath->remoteScan->custom_scan_tlist)
{
/* we created this list, so we know it only contains Var */
Assert(IsA(targetEntry->expr, Var));
Var *var = (Var *) targetEntry->expr;
var->varno = rel->relid;
}
}
/* clauses might have been added by the planner, need to add them to our scan */
RestrictInfo *restrictInfo = NULL;
List **quals = &citusPath->remoteScan->scan.plan.qual;
foreach_declared_ptr(restrictInfo, clauses)
{
*quals = lappend(*quals, restrictInfo->clause);
}
return (Plan *) citusPath->remoteScan;
}
/*
* BuildSelectStatementViaStdPlanner creates a PlannedStmt where it combines the
* combineQuery and the remoteScan. It utilizes the standard_planner from postgres to
* create a plan based on the combineQuery.
*/
static PlannedStmt *
BuildSelectStatementViaStdPlanner(Query *combineQuery, List *remoteScanTargetList,
CustomScan *remoteScan)
{
/*
* the standard planner will scribble on the target list. Since it is essential to not
* change the custom_scan_tlist we copy the target list before adding them to any.
* The remoteScanTargetList is used in the end to extract the column names to be added to
* the alias we will create for the CustomScan, (expressed as the
* citus_extradata_container function call in the combineQuery).
*/
remoteScan->custom_scan_tlist = copyObject(remoteScanTargetList);
remoteScan->scan.plan.targetlist = copyObject(remoteScanTargetList);
/*
* We will overwrite the alias of the rangetable which describes the custom scan.
* Ideally we would have set the correct column names and alias on the range table in
* the combine query already when we inserted the extra data container. This could be
* improved in the future.
*/
/* find the rangetable entry for the extradata container and overwrite its alias */
RangeTblEntry *extradataContainerRTE = NULL;
FindCitusExtradataContainerRTE((Node *) combineQuery, &extradataContainerRTE);
if (extradataContainerRTE != NULL)
{
/* extract column names from the remoteScanTargetList */
List *columnNameList = NIL;
TargetEntry *targetEntry = NULL;
foreach_declared_ptr(targetEntry, remoteScanTargetList)
{
columnNameList = lappend(columnNameList, makeString(targetEntry->resname));
}
extradataContainerRTE->eref = makeAlias("remote_scan", columnNameList);
}
/*
* Print the combine query at debug level 4. Since serializing the query is relatively
* cpu intensive we only perform that if we are actually logging DEBUG4.
*/
const int logCombineQueryLevel = DEBUG4;
if (IsLoggableLevel(logCombineQueryLevel))
{
StringInfo queryString = makeStringInfo();
pg_get_query_def(combineQuery, queryString);
elog(logCombineQueryLevel, "combine query: %s", queryString->data);
}
PlannedStmt *standardStmt = NULL;
PG_TRY();
{
/* This code should not be re-entrant, we check via asserts below */
Assert(ReplaceCitusExtraDataContainer == false);
Assert(ReplaceCitusExtraDataContainerWithCustomScan == NULL);
ReplaceCitusExtraDataContainer = true;
ReplaceCitusExtraDataContainerWithCustomScan = remoteScan;
standardStmt = standard_planner(combineQuery, NULL, 0, NULL);
ReplaceCitusExtraDataContainer = false;
ReplaceCitusExtraDataContainerWithCustomScan = NULL;
}
PG_CATCH();
{
ReplaceCitusExtraDataContainer = false;
ReplaceCitusExtraDataContainerWithCustomScan = NULL;
PG_RE_THROW();
}
PG_END_TRY();
Assert(standardStmt != NULL);
return standardStmt;
}
/*
* Finds the rangetable entry in the query that refers to the citus_extradata_container
* and stores the pointer in result.
*/
bool
FindCitusExtradataContainerRTE(Node *node, RangeTblEntry **result)
{
if (node == NULL)
{
return false;
}
if (IsA(node, RangeTblEntry))
{
RangeTblEntry *rangeTblEntry = castNode(RangeTblEntry, node);
if (rangeTblEntry->rtekind == RTE_FUNCTION &&
list_length(rangeTblEntry->functions) == 1)
{
RangeTblFunction *rangeTblFunction = (RangeTblFunction *) linitial(
rangeTblEntry->functions);
if (!IsA(rangeTblFunction->funcexpr, FuncExpr))
{
return false;
}
FuncExpr *funcExpr = castNode(FuncExpr, rangeTblFunction->funcexpr);
if (funcExpr->funcid == CitusExtraDataContainerFuncId())
{
*result = rangeTblEntry;
return true;
}
}
/* query_tree_walker descends into RTEs */
return false;
}
else if (IsA(node, Query))
{
const int flags = QTW_EXAMINE_RTES_BEFORE;
return query_tree_walker((Query *) node, FindCitusExtradataContainerRTE, result,
flags);
}
return expression_tree_walker(node, FindCitusExtradataContainerRTE, result);
}