mirror of https://github.com/citusdata/citus.git
648 lines
21 KiB
C
648 lines
21 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* multi_master_planner.c
|
|
* Routines for building create table and select into table statements on the
|
|
* master node.
|
|
*
|
|
* Copyright (c) Citus Data, Inc.
|
|
*
|
|
* $Id$
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "catalog/pg_type.h"
|
|
#include "commands/extension.h"
|
|
#include "distributed/citus_ruleutils.h"
|
|
#include "distributed/function_utils.h"
|
|
#include "distributed/listutils.h"
|
|
#include "distributed/multi_logical_optimizer.h"
|
|
#include "distributed/multi_master_planner.h"
|
|
#include "distributed/multi_physical_planner.h"
|
|
#include "distributed/distributed_planner.h"
|
|
#include "distributed/multi_server_executor.h"
|
|
#include "distributed/version_compat.h"
|
|
#include "distributed/worker_protocol.h"
|
|
#include "nodes/makefuncs.h"
|
|
#include "nodes/nodeFuncs.h"
|
|
#include "nodes/print.h"
|
|
#include "optimizer/clauses.h"
|
|
#include "optimizer/cost.h"
|
|
#include "optimizer/planmain.h"
|
|
#include "optimizer/tlist.h"
|
|
#include "optimizer/subselect.h"
|
|
#if PG_VERSION_NUM >= 120000
|
|
#include "optimizer/optimizer.h"
|
|
#else
|
|
#include "optimizer/var.h"
|
|
#endif
|
|
#include "utils/builtins.h"
|
|
#include "utils/guc.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/rel.h"
|
|
#include "utils/syscache.h"
|
|
#include "utils/lsyscache.h"
|
|
|
|
|
|
static List * MasterTargetList(List *workerTargetList);
|
|
static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTargetList,
|
|
CustomScan *remoteScan);
|
|
static Agg * BuildAggregatePlan(PlannerInfo *root, Query *masterQuery, Plan *subPlan);
|
|
static bool HasDistinctAggregate(Query *masterQuery);
|
|
static bool UseGroupAggregateWithHLL(Query *masterQuery);
|
|
static bool QueryContainsAggregateWithHLL(Query *query);
|
|
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
|
|
static Agg * makeAggNode(List *groupClauseList, List *havingQual,
|
|
AggStrategy aggrStrategy, List *queryTargetList, Plan *subPlan);
|
|
static void FinalizeStatement(PlannerInfo *root, PlannedStmt *stmt, Plan *topLevelPlan);
|
|
|
|
|
|
/*
|
|
* MasterNodeSelectPlan takes in a distributed plan and a custom scan node which
|
|
* wraps remote part of the plan. This function finds the master node query
|
|
* structure in the multi plan, and builds the final select plan to execute on
|
|
* the tuples returned by remote scan on the master node. Note that this select
|
|
* plan is executed after result files are retrieved from worker nodes and
|
|
* filled into the tuple store inside provided custom scan.
|
|
*/
|
|
PlannedStmt *
|
|
MasterNodeSelectPlan(DistributedPlan *distributedPlan, CustomScan *remoteScan)
|
|
{
|
|
Query *masterQuery = distributedPlan->masterQuery;
|
|
|
|
Job *workerJob = distributedPlan->workerJob;
|
|
List *workerTargetList = workerJob->jobQuery->targetList;
|
|
List *masterTargetList = MasterTargetList(workerTargetList);
|
|
|
|
PlannedStmt *masterSelectPlan = BuildSelectStatement(masterQuery, masterTargetList,
|
|
remoteScan);
|
|
|
|
return masterSelectPlan;
|
|
}
|
|
|
|
|
|
/*
|
|
* MasterTargetList uses the given worker target list's expressions, and creates
|
|
* a target list for the master node. This master target list keeps the
|
|
* temporary table's columns on the master node.
|
|
*/
|
|
static List *
|
|
MasterTargetList(List *workerTargetList)
|
|
{
|
|
List *masterTargetList = NIL;
|
|
const Index tableId = 1;
|
|
AttrNumber columnId = 1;
|
|
|
|
ListCell *workerTargetCell = NULL;
|
|
foreach(workerTargetCell, workerTargetList)
|
|
{
|
|
TargetEntry *workerTargetEntry = (TargetEntry *) lfirst(workerTargetCell);
|
|
|
|
if (workerTargetEntry->resjunk)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
Var *masterColumn = makeVarFromTargetEntry(tableId, workerTargetEntry);
|
|
masterColumn->varattno = columnId;
|
|
masterColumn->varoattno = columnId;
|
|
columnId++;
|
|
|
|
if (masterColumn->vartype == RECORDOID)
|
|
{
|
|
masterColumn->vartypmod = BlessRecordExpression(workerTargetEntry->expr);
|
|
}
|
|
|
|
/*
|
|
* The master target entry has two pieces to it. The first piece is the
|
|
* target entry's expression, which we set to the newly created column.
|
|
* The second piece is sort and group clauses that we implicitly copy
|
|
* from the worker target entry. Note that any changes to worker target
|
|
* entry's sort and group clauses will *break* us here.
|
|
*/
|
|
TargetEntry *masterTargetEntry = flatCopyTargetEntry(workerTargetEntry);
|
|
masterTargetEntry->expr = (Expr *) masterColumn;
|
|
masterTargetList = lappend(masterTargetList, masterTargetEntry);
|
|
}
|
|
|
|
return masterTargetList;
|
|
}
|
|
|
|
|
|
/*
|
|
* BuildSelectStatement builds the final select statement to run on the master
|
|
* node, before returning results to the user. The function first gets the custom
|
|
* scan node for all results fetched to the master, and layers aggregation, sort
|
|
* and limit plans on top of the scan statement if necessary.
|
|
*/
|
|
static PlannedStmt *
|
|
BuildSelectStatement(Query *masterQuery, List *masterTargetList, CustomScan *remoteScan)
|
|
{
|
|
/* top level select query should have only one range table entry */
|
|
Assert(list_length(masterQuery->rtable) == 1);
|
|
Agg *aggregationPlan = NULL;
|
|
Plan *topLevelPlan = NULL;
|
|
List *sortClauseList = copyObject(masterQuery->sortClause);
|
|
List *columnNameList = NIL;
|
|
TargetEntry *targetEntry = NULL;
|
|
|
|
PlannerGlobal *glob = makeNode(PlannerGlobal);
|
|
PlannerInfo *root = makeNode(PlannerInfo);
|
|
root->parse = masterQuery;
|
|
root->glob = glob;
|
|
root->query_level = 1;
|
|
root->planner_cxt = CurrentMemoryContext;
|
|
root->wt_param_id = -1;
|
|
|
|
|
|
/* (1) make PlannedStmt and set basic information */
|
|
PlannedStmt *selectStatement = makeNode(PlannedStmt);
|
|
selectStatement->canSetTag = true;
|
|
selectStatement->relationOids = NIL;
|
|
selectStatement->commandType = CMD_SELECT;
|
|
|
|
|
|
remoteScan->custom_scan_tlist = masterTargetList;
|
|
|
|
/* (2) add an aggregation plan if needed */
|
|
if (masterQuery->hasAggs || masterQuery->groupClause)
|
|
{
|
|
remoteScan->scan.plan.targetlist = masterTargetList;
|
|
|
|
aggregationPlan = BuildAggregatePlan(root, masterQuery, &remoteScan->scan.plan);
|
|
topLevelPlan = (Plan *) aggregationPlan;
|
|
selectStatement->planTree = topLevelPlan;
|
|
}
|
|
else
|
|
{
|
|
/* otherwise set the final projections on the scan plan directly */
|
|
|
|
/*
|
|
* The masterTargetList contains all columns that we fetch from
|
|
* the worker as non-resjunk.
|
|
*
|
|
* Here the output of the plan node determines the output of the query.
|
|
* We therefore use the targetList of masterQuery, which has non-output
|
|
* columns set as resjunk.
|
|
*/
|
|
remoteScan->scan.plan.targetlist = masterQuery->targetList;
|
|
topLevelPlan = &remoteScan->scan.plan;
|
|
}
|
|
|
|
/*
|
|
* (3) create distinct plan if needed.
|
|
*
|
|
* distinct on() requires sort + unique plans. Unique itself is not enough
|
|
* as it only compares the current value with previous one when checking
|
|
* uniqueness, thus ordering is necessary. If already has order by
|
|
* clause we append distinct clauses to the end of it. Postgresql requires
|
|
* that if both distinct on() and order by exists, ordering shall start
|
|
* on distinct clauses. Therefore we can safely append distinct clauses to
|
|
* the end of order by clauses. Although the same column may appear more
|
|
* than once in order by clauses, created plan uses only one instance, for
|
|
* example order by a,b,a,a,b,c is translated to equivalent order by a,b,c.
|
|
*
|
|
* If the query has distinct clause but not distinct on, we first create
|
|
* distinct plan that is either HashAggreate or Sort + Unique plans depending
|
|
* on hashable property of columns in distinct clause. If there is order by
|
|
* clause, it is handled after distinct planning.
|
|
*/
|
|
if (masterQuery->hasDistinctOn)
|
|
{
|
|
ListCell *distinctCell = NULL;
|
|
foreach(distinctCell, masterQuery->distinctClause)
|
|
{
|
|
SortGroupClause *singleDistinctClause = lfirst(distinctCell);
|
|
Index sortGroupRef = singleDistinctClause->tleSortGroupRef;
|
|
|
|
if (get_sortgroupref_clause_noerr(sortGroupRef, sortClauseList) == NULL)
|
|
{
|
|
sortClauseList = lappend(sortClauseList, singleDistinctClause);
|
|
}
|
|
}
|
|
}
|
|
else if (masterQuery->distinctClause)
|
|
{
|
|
Plan *distinctPlan = BuildDistinctPlan(masterQuery, topLevelPlan);
|
|
topLevelPlan = distinctPlan;
|
|
}
|
|
|
|
/* (4) add a sorting plan if needed */
|
|
if (sortClauseList)
|
|
{
|
|
Sort *sortPlan = make_sort_from_sortclauses(sortClauseList, topLevelPlan);
|
|
|
|
/* just for reproducible costs between different PostgreSQL versions */
|
|
sortPlan->plan.startup_cost = 0;
|
|
sortPlan->plan.total_cost = 0;
|
|
sortPlan->plan.plan_rows = 0;
|
|
|
|
topLevelPlan = (Plan *) sortPlan;
|
|
}
|
|
|
|
/*
|
|
* (5) add a unique plan for distinctOn.
|
|
* If the query has distinct on we add a sort clause in step 3. Therefore
|
|
* Step 4 always creates a sort plan.
|
|
* */
|
|
if (masterQuery->hasDistinctOn)
|
|
{
|
|
Assert(IsA(topLevelPlan, Sort));
|
|
topLevelPlan =
|
|
(Plan *) make_unique_from_sortclauses(topLevelPlan,
|
|
masterQuery->distinctClause);
|
|
}
|
|
|
|
/* (5) add a limit plan if needed */
|
|
if (masterQuery->limitCount || masterQuery->limitOffset)
|
|
{
|
|
Node *limitCount = masterQuery->limitCount;
|
|
Node *limitOffset = masterQuery->limitOffset;
|
|
Limit *limitPlan = make_limit(topLevelPlan, limitOffset, limitCount);
|
|
topLevelPlan = (Plan *) limitPlan;
|
|
}
|
|
|
|
/*
|
|
* (6) set top level plan in the plantree and copy over some things from
|
|
* PlannerInfo
|
|
*/
|
|
FinalizeStatement(root, selectStatement, topLevelPlan);
|
|
|
|
/*
|
|
* (7) Replace rangetable with one with nice names to show in EXPLAIN plans
|
|
*/
|
|
foreach_ptr(targetEntry, masterTargetList)
|
|
{
|
|
columnNameList = lappend(columnNameList, makeString(targetEntry->resname));
|
|
}
|
|
|
|
RangeTblEntry *customScanRangeTableEntry = linitial(selectStatement->rtable);
|
|
customScanRangeTableEntry->eref = makeAlias("remote_scan", columnNameList);
|
|
|
|
return selectStatement;
|
|
}
|
|
|
|
|
|
/*
|
|
* FinalizeStatement sets some necessary fields on the final statement and its
|
|
* plan to make it work with the regular postgres executor. This code is copied
|
|
* almost verbatim from standard_planner in the PG source code.
|
|
*
|
|
* Modifications from original code:
|
|
* - Added SS_attach_initplans call
|
|
*/
|
|
static void
|
|
FinalizeStatement(PlannerInfo *root, PlannedStmt *result, Plan *top_plan)
|
|
{
|
|
ListCell *lp,
|
|
*lr;
|
|
PlannerGlobal *glob = root->glob;
|
|
|
|
/* Taken from create_plan */
|
|
SS_attach_initplans(root, top_plan);
|
|
|
|
/*
|
|
* If any Params were generated, run through the plan tree and compute
|
|
* each plan node's extParam/allParam sets. Ideally we'd merge this into
|
|
* set_plan_references' tree traversal, but for now it has to be separate
|
|
* because we need to visit subplans before not after main plan.
|
|
*/
|
|
if (glob->paramExecTypes != NIL)
|
|
{
|
|
Assert(list_length(glob->subplans) == list_length(glob->subroots));
|
|
forboth(lp, glob->subplans, lr, glob->subroots)
|
|
{
|
|
Plan *subplan = (Plan *) lfirst(lp);
|
|
PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
|
|
|
|
SS_finalize_plan(subroot, subplan);
|
|
}
|
|
SS_finalize_plan(root, top_plan);
|
|
}
|
|
|
|
/* final cleanup of the plan */
|
|
Assert(glob->finalrtable == NIL);
|
|
Assert(glob->finalrowmarks == NIL);
|
|
Assert(glob->resultRelations == NIL);
|
|
Assert(glob->rootResultRelations == NIL);
|
|
|
|
top_plan = set_plan_references(root, top_plan);
|
|
|
|
/* ... and the subplans (both regular subplans and initplans) */
|
|
Assert(list_length(glob->subplans) == list_length(glob->subroots));
|
|
forboth(lp, glob->subplans, lr, glob->subroots)
|
|
{
|
|
Plan *subplan = (Plan *) lfirst(lp);
|
|
PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
|
|
|
|
lfirst(lp) = set_plan_references(subroot, subplan);
|
|
}
|
|
result->transientPlan = glob->transientPlan;
|
|
result->dependsOnRole = glob->dependsOnRole;
|
|
result->parallelModeNeeded = glob->parallelModeNeeded;
|
|
result->planTree = top_plan;
|
|
|
|
result->rtable = glob->finalrtable;
|
|
result->resultRelations = glob->resultRelations;
|
|
#if PG_VERSION_NUM < 120000
|
|
result->nonleafResultRelations = glob->nonleafResultRelations;
|
|
#endif
|
|
result->rootResultRelations = glob->rootResultRelations;
|
|
result->subplans = glob->subplans;
|
|
result->rewindPlanIDs = glob->rewindPlanIDs;
|
|
result->rowMarks = glob->finalrowmarks;
|
|
result->relationOids = glob->relationOids;
|
|
result->invalItems = glob->invalItems;
|
|
result->paramExecTypes = glob->paramExecTypes;
|
|
}
|
|
|
|
|
|
/*
|
|
* BuildAggregatePlan creates and returns an aggregate plan. This aggregate plan
|
|
* builds aggreation and grouping operators (if any) that are to be executed on
|
|
* the master node.
|
|
*/
|
|
static Agg *
|
|
BuildAggregatePlan(PlannerInfo *root, Query *masterQuery, Plan *subPlan)
|
|
{
|
|
/* assert that we need to build an aggregate plan */
|
|
Assert(masterQuery->hasAggs || masterQuery->groupClause);
|
|
AggClauseCosts aggregateCosts;
|
|
AggStrategy aggregateStrategy = AGG_PLAIN;
|
|
List *groupColumnList = masterQuery->groupClause;
|
|
List *aggregateTargetList = masterQuery->targetList;
|
|
|
|
/*
|
|
* Replaces SubLink nodes with SubPlan nodes in the having section of the
|
|
* query. (and creates the subplans in root->subplans)
|
|
*
|
|
* Would be nice if we could use masterQuery->hasSubLinks to only call
|
|
* these when that is true. However, for some reason hasSubLinks is false
|
|
* even when there are SubLinks.
|
|
*/
|
|
Node *havingQual = SS_process_sublinks(root, masterQuery->havingQual, true);
|
|
|
|
/*
|
|
* Right now this is not really needed, since we don't support correlated
|
|
* subqueries anyway. Once we do calling this is critical to do right after
|
|
* calling SS_process_sublinks, according to the postgres function comment.
|
|
*/
|
|
havingQual = SS_replace_correlation_vars(root, havingQual);
|
|
|
|
|
|
/* estimate aggregate execution costs */
|
|
memset(&aggregateCosts, 0, sizeof(AggClauseCosts));
|
|
get_agg_clause_costs(root, (Node *) aggregateTargetList, AGGSPLIT_SIMPLE,
|
|
&aggregateCosts);
|
|
|
|
get_agg_clause_costs(root, (Node *) havingQual, AGGSPLIT_SIMPLE, &aggregateCosts);
|
|
|
|
|
|
/* if we have grouping, then initialize appropriate information */
|
|
if (list_length(groupColumnList) > 0)
|
|
{
|
|
bool groupingIsHashable = grouping_is_hashable(groupColumnList);
|
|
bool groupingIsSortable = grouping_is_sortable(groupColumnList);
|
|
bool hasDistinctAggregate = HasDistinctAggregate(masterQuery);
|
|
|
|
if (!groupingIsHashable && !groupingIsSortable)
|
|
{
|
|
ereport(ERROR, (errmsg("grouped column list cannot be hashed or sorted")));
|
|
}
|
|
|
|
/*
|
|
* Postgres hash aggregate strategy does not support distinct aggregates
|
|
* in group and order by with aggregate operations.
|
|
* see nodeAgg.c:build_pertrans_for_aggref(). In that case we use
|
|
* sorted agg strategy, otherwise we use hash strategy.
|
|
*
|
|
* If the master query contains hll aggregate functions and the client set
|
|
* hll.force_groupagg to on, then we choose to use group aggregation.
|
|
*/
|
|
if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate ||
|
|
UseGroupAggregateWithHLL(masterQuery))
|
|
{
|
|
char *messageHint = NULL;
|
|
if (!enable_hashagg && groupingIsHashable)
|
|
{
|
|
messageHint = "Consider setting enable_hashagg to on.";
|
|
}
|
|
|
|
if (!groupingIsSortable)
|
|
{
|
|
ereport(ERROR, (errmsg("grouped column list must cannot be sorted"),
|
|
errdetail("Having a distinct aggregate requires "
|
|
"grouped column list to be sortable."),
|
|
messageHint ? errhint("%s", messageHint) : 0));
|
|
}
|
|
|
|
aggregateStrategy = AGG_SORTED;
|
|
subPlan = (Plan *) make_sort_from_sortclauses(groupColumnList, subPlan);
|
|
}
|
|
else
|
|
{
|
|
aggregateStrategy = AGG_HASHED;
|
|
}
|
|
}
|
|
|
|
/* finally create the plan */
|
|
Agg *aggregatePlan = makeAggNode(groupColumnList, (List *) havingQual,
|
|
aggregateStrategy, aggregateTargetList, subPlan);
|
|
|
|
/* just for reproducible costs between different PostgreSQL versions */
|
|
aggregatePlan->plan.startup_cost = 0;
|
|
aggregatePlan->plan.total_cost = 0;
|
|
aggregatePlan->plan.plan_rows = 0;
|
|
|
|
return aggregatePlan;
|
|
}
|
|
|
|
|
|
/*
|
|
* HasDistinctAggregate returns true if the query has a distinct
|
|
* aggregate in its target list or in having clause.
|
|
*/
|
|
static bool
|
|
HasDistinctAggregate(Query *masterQuery)
|
|
{
|
|
ListCell *allColumnCell = NULL;
|
|
|
|
List *targetVarList = pull_var_clause((Node *) masterQuery->targetList,
|
|
PVC_INCLUDE_AGGREGATES);
|
|
List *havingVarList = pull_var_clause(masterQuery->havingQual,
|
|
PVC_INCLUDE_AGGREGATES);
|
|
|
|
List *allColumnList = list_concat(targetVarList, havingVarList);
|
|
foreach(allColumnCell, allColumnList)
|
|
{
|
|
Node *columnNode = lfirst(allColumnCell);
|
|
if (IsA(columnNode, Aggref))
|
|
{
|
|
Aggref *aggref = (Aggref *) columnNode;
|
|
if (aggref->aggdistinct != NIL)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
* UseGroupAggregateWithHLL first checks whether the HLL extension is loaded, if
|
|
* it is not then simply return false. Otherwise, checks whether the client set
|
|
* the hll.force_groupagg to on. If it is enabled and the master query contains
|
|
* hll aggregate function, it returns true.
|
|
*/
|
|
static bool
|
|
UseGroupAggregateWithHLL(Query *masterQuery)
|
|
{
|
|
Oid hllId = get_extension_oid(HLL_EXTENSION_NAME, true);
|
|
|
|
/* If HLL extension is not loaded, return false */
|
|
if (!OidIsValid(hllId))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/* If HLL is loaded but related GUC is not set, return false */
|
|
const char *gucStrValue = GetConfigOption(HLL_FORCE_GROUPAGG_GUC_NAME, true, false);
|
|
if (gucStrValue == NULL || strcmp(gucStrValue, "off") == 0)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return QueryContainsAggregateWithHLL(masterQuery);
|
|
}
|
|
|
|
|
|
/*
|
|
* QueryContainsAggregateWithHLL returns true if the query has an hll aggregate
|
|
* function in it's target list.
|
|
*/
|
|
static bool
|
|
QueryContainsAggregateWithHLL(Query *query)
|
|
{
|
|
ListCell *varCell = NULL;
|
|
|
|
List *varList = pull_var_clause((Node *) query->targetList, PVC_INCLUDE_AGGREGATES);
|
|
foreach(varCell, varList)
|
|
{
|
|
Var *var = (Var *) lfirst(varCell);
|
|
if (nodeTag(var) == T_Aggref)
|
|
{
|
|
Aggref *aggref = (Aggref *) var;
|
|
int argCount = list_length(aggref->args);
|
|
Oid hllId = get_extension_oid(HLL_EXTENSION_NAME, false);
|
|
Oid hllSchemaOid = get_extension_schema(hllId);
|
|
const char *hllSchemaName = get_namespace_name(hllSchemaOid);
|
|
|
|
/*
|
|
* If the obtained oid is InvalidOid for addFunctionId, that means
|
|
* we don't have an hll_add_agg function with the given argument count.
|
|
* So, we don't need to double check whether the obtained id is valid.
|
|
*/
|
|
Oid addFunctionId = FunctionOidExtended(hllSchemaName, HLL_ADD_AGGREGATE_NAME,
|
|
argCount, true);
|
|
Oid unionFunctionId = FunctionOid(hllSchemaName, HLL_UNION_AGGREGATE_NAME, 1);
|
|
|
|
if (aggref->aggfnoid == addFunctionId || aggref->aggfnoid == unionFunctionId)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
* BuildDistinctPlan creates an returns a plan for distinct. Depending on
|
|
* availability of hash function it chooses HashAgg over Sort/Unique
|
|
* plans.
|
|
* This function has a potential performance issue since we blindly set
|
|
* Plan nodes without looking at cost. We might need to revisit this
|
|
* if we have performance issues with select distinct queries.
|
|
*/
|
|
static Plan *
|
|
BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
|
|
{
|
|
Plan *distinctPlan = NULL;
|
|
List *distinctClauseList = masterQuery->distinctClause;
|
|
List *targetList = copyObject(masterQuery->targetList);
|
|
|
|
/*
|
|
* We don't need to add distinct plan if all of the columns used in group by
|
|
* clause also used in distinct clause, since group by clause guarantees the
|
|
* uniqueness of the target list for every row.
|
|
*/
|
|
if (IsGroupBySubsetOfDistinct(masterQuery->groupClause, masterQuery->distinctClause))
|
|
{
|
|
return subPlan;
|
|
}
|
|
|
|
Assert(masterQuery->distinctClause);
|
|
Assert(!masterQuery->hasDistinctOn);
|
|
|
|
/*
|
|
* Create group by plan with HashAggregate if all distinct
|
|
* members are hashable, and not containing distinct aggregate.
|
|
* Otherwise create sort+unique plan.
|
|
*/
|
|
bool distinctClausesHashable = grouping_is_hashable(distinctClauseList);
|
|
bool hasDistinctAggregate = HasDistinctAggregate(masterQuery);
|
|
|
|
if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate)
|
|
{
|
|
distinctPlan = (Plan *) makeAggNode(distinctClauseList, NIL, AGG_HASHED,
|
|
targetList, subPlan);
|
|
}
|
|
else
|
|
{
|
|
Sort *sortPlan = make_sort_from_sortclauses(masterQuery->distinctClause,
|
|
subPlan);
|
|
distinctPlan = (Plan *) make_unique_from_sortclauses((Plan *) sortPlan,
|
|
masterQuery->distinctClause);
|
|
}
|
|
|
|
return distinctPlan;
|
|
}
|
|
|
|
|
|
/*
|
|
* makeAggNode creates a "Agg" plan node. groupClauseList is a list of
|
|
* SortGroupClause's.
|
|
*/
|
|
static Agg *
|
|
makeAggNode(List *groupClauseList, List *havingQual, AggStrategy aggrStrategy,
|
|
List *queryTargetList, Plan *subPlan)
|
|
{
|
|
Agg *aggNode = NULL;
|
|
int groupColumnCount = list_length(groupClauseList);
|
|
AttrNumber *groupColumnIdArray =
|
|
extract_grouping_cols(groupClauseList, subPlan->targetlist);
|
|
Oid *groupColumnOpArray = extract_grouping_ops(groupClauseList);
|
|
const int rowEstimate = 10;
|
|
|
|
#if (PG_VERSION_NUM >= 120000)
|
|
aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
|
|
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
|
|
groupColumnOpArray,
|
|
extract_grouping_collations(groupClauseList,
|
|
subPlan->targetlist),
|
|
NIL, NIL, rowEstimate, subPlan);
|
|
#else
|
|
aggNode = make_agg(queryTargetList, havingQual, aggrStrategy,
|
|
AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray,
|
|
groupColumnOpArray,
|
|
NIL, NIL, rowEstimate, subPlan);
|
|
#endif
|
|
|
|
return aggNode;
|
|
}
|