citus/src/backend/distributed/planner/multi_planner.c

286 lines
7.8 KiB
C

/*-------------------------------------------------------------------------
*
* multi_planner.c
* General Citus planner code.
*
* Copyright (c) 2012-2015, Citus Data, Inc.
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <limits.h>
#include "catalog/pg_type.h"
#include "distributed/citus_nodefuncs.h"
#include "distributed/citus_nodes.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_planner.h"
#include "distributed/multi_logical_optimizer.h"
#include "distributed/multi_logical_planner.h"
#include "distributed/multi_physical_planner.h"
#include "distributed/modify_planner.h"
#include "executor/executor.h"
#include "optimizer/planner.h"
#include "utils/memutils.h"
/* local function forward declarations */
static void CheckNodeIsDumpable(Node *node);
/* local function forward declarations */
static MultiPlan * CreatePhysicalPlan(Query *parse);
static char * GetMultiPlanString(PlannedStmt *result);
static PlannedStmt * MultiQueryContainerNode(PlannedStmt *result, MultiPlan *multiPlan);
/* Distributed planner hook */
PlannedStmt *
multi_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
{
PlannedStmt *result = NULL;
/*
* First call into standard planner. This is required because the Citus
* planner relies on parse tree transformations made by postgres' planner.
*/
result = standard_planner(parse, cursorOptions, boundParams);
if (NeedsDistributedPlanning(parse))
{
MultiPlan *physicalPlan = CreatePhysicalPlan(parse);
/* store required data into the planned statement */
result = MultiQueryContainerNode(result, physicalPlan);
}
return result;
}
/*
* CreatePhysicalPlan encapsulates the logic needed to transform a particular
* query into a physical plan. For modifications, queries immediately enter
* the physical planning stage, since they are essentially "routed" to remote
* target shards. SELECT queries go through the full logical plan/optimize/
* physical plan process needed to produce distributed query plans.
*/
static MultiPlan *
CreatePhysicalPlan(Query *parse)
{
Query *parseCopy = copyObject(parse);
MultiPlan *physicalPlan = NULL;
CmdType commandType = parse->commandType;
if (commandType == CMD_INSERT || commandType == CMD_UPDATE ||
commandType == CMD_DELETE)
{
/* modifications go directly from a query to a physical plan */
physicalPlan = MultiModifyPlanCreate(parse);
}
else
{
/* Create and optimize logical plan */
MultiTreeRoot *logicalPlan = MultiLogicalPlanCreate(parseCopy);
MultiLogicalPlanOptimize(logicalPlan);
/*
* This check is here to make it likely that all node types used in
* Citus are dumpable. Explain can dump logical and physical plans
* using the extended outfuncs infrastructure, but it's infeasible to
* test most plans. MultiQueryContainerNode always serializes the
* physical plan, so there's no need to check that separately.
*/
CheckNodeIsDumpable((Node *) logicalPlan);
/* Create the physical plan */
physicalPlan = MultiPhysicalPlanCreate(logicalPlan);
}
return physicalPlan;
}
/*
* GetMultiPlan returns the associated MultiPlan for a PlannedStmt if the
* statement requires distributed execution, NULL otherwise.
*/
MultiPlan *
GetMultiPlan(PlannedStmt *result)
{
char *serializedMultiPlan = NULL;
MultiPlan *multiPlan = NULL;
serializedMultiPlan = GetMultiPlanString(result);
multiPlan = (MultiPlan *) CitusStringToNode(serializedMultiPlan);
Assert(CitusIsA(multiPlan, MultiPlan));
return multiPlan;
}
/* Does the passed in statement require distributed execution? */
bool
HasCitusToplevelNode(PlannedStmt *result)
{
/*
* Can't be a distributed query if the extension hasn't been loaded
* yet. Directly return false, part of the required infrastructure for
* further checks might not be present.
*/
if (!CitusHasBeenLoaded())
{
return false;
}
if (GetMultiPlanString(result) == NULL)
{
return false;
}
else
{
return true;
}
}
/*
* CreateCitusToplevelNode creates the top-level planTree node for a
* distributed statement. That top-level node is a) recognizable by the
* executor hooks, allowing them to redirect execution, b) contains the
* parameters required for distributed execution.
*
* The exact representation of the top-level node is an implementation detail
* which should not be referred to outside this file, as it's likely to become
* version dependant. Use GetMultiPlan() and HasCitusToplevelNode() to access.
*
* Internally the data is stored as arguments to a 'citus_extradata_container'
* function, which has to be removed from the really executed plan tree before
* query execution.
*/
static PlannedStmt *
MultiQueryContainerNode(PlannedStmt *result, MultiPlan *multiPlan)
{
FunctionScan *fauxFunctionScan = NULL;
RangeTblFunction *fauxFunction = NULL;
FuncExpr *fauxFuncExpr = NULL;
Const *multiPlanData = NULL;
char *serializedPlan = NULL;
/* pass multiPlan serialized as a constant function argument */
serializedPlan = CitusNodeToString(multiPlan);
multiPlanData = makeNode(Const);
multiPlanData->consttype = CSTRINGOID;
multiPlanData->constlen = strlen(serializedPlan);
multiPlanData->constvalue = CStringGetDatum(serializedPlan);
multiPlanData->constbyval = false;
multiPlanData->location = -1;
fauxFuncExpr = makeNode(FuncExpr);
fauxFuncExpr->funcid = CitusExtraDataContainerFuncId();
fauxFuncExpr->funcretset = true;
fauxFuncExpr->location = -1;
fauxFuncExpr->args = list_make1(multiPlanData);
fauxFunction = makeNode(RangeTblFunction);
fauxFunction->funcexpr = (Node *) fauxFuncExpr;
fauxFunctionScan = makeNode(FunctionScan);
fauxFunctionScan->functions = lappend(fauxFunctionScan->functions, fauxFunction);
/*
* Add set returning function to target list if the original (postgres
* created) plan doesn't support backward scans; doing so prevents
* backward scans being supported by the new plantree as well. This is
* ugly as hell, but until we can rely on custom scans (which can signal
* this via CUSTOMPATH_SUPPORT_BACKWARD_SCAN), there's not really a pretty
* method to achieve this.
*
* FIXME: This should really be done on the master select plan.
*/
if (!ExecSupportsBackwardScan(result->planTree))
{
FuncExpr *funcExpr = makeNode(FuncExpr);
funcExpr->funcretset = true;
fauxFunctionScan->scan.plan.targetlist =
lappend(fauxFunctionScan->scan.plan.targetlist,
funcExpr);
}
result->planTree = (Plan *) fauxFunctionScan;
return result;
}
/*
* GetMultiPlanString returns either NULL, if the plan is not a distributed
* one, or the string representing the distributed plan.
*/
static char *
GetMultiPlanString(PlannedStmt *result)
{
FunctionScan *fauxFunctionScan = NULL;
RangeTblFunction *fauxFunction = NULL;
FuncExpr *fauxFuncExpr = NULL;
Const *multiPlanData = NULL;
if (!IsA(result->planTree, FunctionScan))
{
return NULL;
}
fauxFunctionScan = (FunctionScan *) result->planTree;
if (list_length(fauxFunctionScan->functions) != 1)
{
return NULL;
}
fauxFunction = linitial(fauxFunctionScan->functions);
if (!IsA(fauxFunction->funcexpr, FuncExpr))
{
return NULL;
}
fauxFuncExpr = (FuncExpr *) fauxFunction->funcexpr;
if (fauxFuncExpr->funcid != CitusExtraDataContainerFuncId())
{
return NULL;
}
if (list_length(fauxFuncExpr->args) != 1)
{
ereport(ERROR, (errmsg("unexpected number of function arguments to "
"citus_extradata_container")));
}
multiPlanData = (Const *) linitial(fauxFuncExpr->args);
Assert(IsA(multiPlanData, Const));
Assert(multiPlanData->consttype == CSTRINGOID);
return DatumGetCString(multiPlanData->constvalue);
}
/*
* CheckNodeIsDumpable checks that the passed node can be dumped using
* CitusNodeToString(). As this checks is expensive, it's only active when
* assertions are enabled.
*/
static void
CheckNodeIsDumpable(Node *node)
{
#ifdef USE_ASSERT_CHECKING
char *out = CitusNodeToString(node);
pfree(out);
#endif
}