/*------------------------------------------------------------------------- * * multi_master_planner.c * Routines for building create table and select into table statements on the * master node. * * Copyright (c) Citus Data, Inc. * * $Id$ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "catalog/pg_type.h" #include "commands/extension.h" #include "distributed/citus_ruleutils.h" #include "distributed/function_utils.h" #include "distributed/listutils.h" #include "distributed/multi_logical_optimizer.h" #include "distributed/multi_master_planner.h" #include "distributed/multi_physical_planner.h" #include "distributed/distributed_planner.h" #include "distributed/multi_server_executor.h" #include "distributed/version_compat.h" #include "distributed/worker_protocol.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/print.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" #include "optimizer/subselect.h" #if PG_VERSION_NUM >= 120000 #include "optimizer/optimizer.h" #else #include "optimizer/var.h" #endif #include "utils/builtins.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/syscache.h" #include "utils/lsyscache.h" static List * MasterTargetList(List *workerTargetList); static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTargetList, CustomScan *remoteScan); static Agg * BuildAggregatePlan(PlannerInfo *root, Query *masterQuery, Plan *subPlan); static bool HasDistinctAggregate(Query *masterQuery); static bool UseGroupAggregateWithHLL(Query *masterQuery); static bool QueryContainsAggregateWithHLL(Query *query); static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan); static Agg * makeAggNode(List *groupClauseList, List *havingQual, AggStrategy aggrStrategy, List *queryTargetList, Plan *subPlan); static void FinalizeStatement(PlannerInfo *root, PlannedStmt *stmt, Plan *topLevelPlan); /* * MasterNodeSelectPlan takes in a distributed plan and a custom scan node which * wraps remote part of the plan. This function finds the master node query * structure in the multi plan, and builds the final select plan to execute on * the tuples returned by remote scan on the master node. Note that this select * plan is executed after result files are retrieved from worker nodes and * filled into the tuple store inside provided custom scan. */ PlannedStmt * MasterNodeSelectPlan(DistributedPlan *distributedPlan, CustomScan *remoteScan) { Query *masterQuery = distributedPlan->masterQuery; Job *workerJob = distributedPlan->workerJob; List *workerTargetList = workerJob->jobQuery->targetList; List *masterTargetList = MasterTargetList(workerTargetList); PlannedStmt *masterSelectPlan = BuildSelectStatement(masterQuery, masterTargetList, remoteScan); return masterSelectPlan; } /* * MasterTargetList uses the given worker target list's expressions, and creates * a target list for the master node. This master target list keeps the * temporary table's columns on the master node. */ static List * MasterTargetList(List *workerTargetList) { List *masterTargetList = NIL; const Index tableId = 1; AttrNumber columnId = 1; ListCell *workerTargetCell = NULL; foreach(workerTargetCell, workerTargetList) { TargetEntry *workerTargetEntry = (TargetEntry *) lfirst(workerTargetCell); if (workerTargetEntry->resjunk) { continue; } Var *masterColumn = makeVarFromTargetEntry(tableId, workerTargetEntry); masterColumn->varattno = columnId; masterColumn->varoattno = columnId; columnId++; if (masterColumn->vartype == RECORDOID) { masterColumn->vartypmod = BlessRecordExpression(workerTargetEntry->expr); } /* * The master target entry has two pieces to it. The first piece is the * target entry's expression, which we set to the newly created column. * The second piece is sort and group clauses that we implicitly copy * from the worker target entry. Note that any changes to worker target * entry's sort and group clauses will *break* us here. */ TargetEntry *masterTargetEntry = flatCopyTargetEntry(workerTargetEntry); masterTargetEntry->expr = (Expr *) masterColumn; masterTargetList = lappend(masterTargetList, masterTargetEntry); } return masterTargetList; } /* * BuildSelectStatement builds the final select statement to run on the master * node, before returning results to the user. The function first gets the custom * scan node for all results fetched to the master, and layers aggregation, sort * and limit plans on top of the scan statement if necessary. */ static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTargetList, CustomScan *remoteScan) { /* top level select query should have only one range table entry */ Assert(list_length(masterQuery->rtable) == 1); Agg *aggregationPlan = NULL; Plan *topLevelPlan = NULL; List *sortClauseList = copyObject(masterQuery->sortClause); List *columnNameList = NIL; TargetEntry *targetEntry = NULL; PlannerGlobal *glob = makeNode(PlannerGlobal); PlannerInfo *root = makeNode(PlannerInfo); root->parse = masterQuery; root->glob = glob; root->query_level = 1; root->planner_cxt = CurrentMemoryContext; root->wt_param_id = -1; /* (1) make PlannedStmt and set basic information */ PlannedStmt *selectStatement = makeNode(PlannedStmt); selectStatement->canSetTag = true; selectStatement->relationOids = NIL; selectStatement->commandType = CMD_SELECT; remoteScan->custom_scan_tlist = masterTargetList; /* (2) add an aggregation plan if needed */ if (masterQuery->hasAggs || masterQuery->groupClause) { remoteScan->scan.plan.targetlist = masterTargetList; aggregationPlan = BuildAggregatePlan(root, masterQuery, &remoteScan->scan.plan); topLevelPlan = (Plan *) aggregationPlan; selectStatement->planTree = topLevelPlan; } else { /* otherwise set the final projections on the scan plan directly */ /* * The masterTargetList contains all columns that we fetch from * the worker as non-resjunk. * * Here the output of the plan node determines the output of the query. * We therefore use the targetList of masterQuery, which has non-output * columns set as resjunk. */ remoteScan->scan.plan.targetlist = masterQuery->targetList; topLevelPlan = &remoteScan->scan.plan; } /* * (3) create distinct plan if needed. * * distinct on() requires sort + unique plans. Unique itself is not enough * as it only compares the current value with previous one when checking * uniqueness, thus ordering is necessary. If already has order by * clause we append distinct clauses to the end of it. Postgresql requires * that if both distinct on() and order by exists, ordering shall start * on distinct clauses. Therefore we can safely append distinct clauses to * the end of order by clauses. Although the same column may appear more * than once in order by clauses, created plan uses only one instance, for * example order by a,b,a,a,b,c is translated to equivalent order by a,b,c. * * If the query has distinct clause but not distinct on, we first create * distinct plan that is either HashAggreate or Sort + Unique plans depending * on hashable property of columns in distinct clause. If there is order by * clause, it is handled after distinct planning. */ if (masterQuery->hasDistinctOn) { ListCell *distinctCell = NULL; foreach(distinctCell, masterQuery->distinctClause) { SortGroupClause *singleDistinctClause = lfirst(distinctCell); Index sortGroupRef = singleDistinctClause->tleSortGroupRef; if (get_sortgroupref_clause_noerr(sortGroupRef, sortClauseList) == NULL) { sortClauseList = lappend(sortClauseList, singleDistinctClause); } } } else if (masterQuery->distinctClause) { Plan *distinctPlan = BuildDistinctPlan(masterQuery, topLevelPlan); topLevelPlan = distinctPlan; } /* (4) add a sorting plan if needed */ if (sortClauseList) { Sort *sortPlan = make_sort_from_sortclauses(sortClauseList, topLevelPlan); /* just for reproducible costs between different PostgreSQL versions */ sortPlan->plan.startup_cost = 0; sortPlan->plan.total_cost = 0; sortPlan->plan.plan_rows = 0; topLevelPlan = (Plan *) sortPlan; } /* * (5) add a unique plan for distinctOn. * If the query has distinct on we add a sort clause in step 3. Therefore * Step 4 always creates a sort plan. * */ if (masterQuery->hasDistinctOn) { Assert(IsA(topLevelPlan, Sort)); topLevelPlan = (Plan *) make_unique_from_sortclauses(topLevelPlan, masterQuery->distinctClause); } /* (5) add a limit plan if needed */ if (masterQuery->limitCount || masterQuery->limitOffset) { Node *limitCount = masterQuery->limitCount; Node *limitOffset = masterQuery->limitOffset; Limit *limitPlan = make_limit(topLevelPlan, limitOffset, limitCount); topLevelPlan = (Plan *) limitPlan; } /* * (6) set top level plan in the plantree and copy over some things from * PlannerInfo */ FinalizeStatement(root, selectStatement, topLevelPlan); /* * (7) Replace rangetable with one with nice names to show in EXPLAIN plans */ foreach_ptr(targetEntry, masterTargetList) { columnNameList = lappend(columnNameList, makeString(targetEntry->resname)); } RangeTblEntry *customScanRangeTableEntry = linitial(selectStatement->rtable); customScanRangeTableEntry->eref = makeAlias("remote_scan", columnNameList); return selectStatement; } /* * FinalizeStatement sets some necessary fields on the final statement and its * plan to make it work with the regular postgres executor. This code is copied * almost verbatim from standard_planner in the PG source code. * * Modifications from original code: * - Added SS_attach_initplans call */ static void FinalizeStatement(PlannerInfo *root, PlannedStmt *result, Plan *top_plan) { ListCell *lp, *lr; PlannerGlobal *glob = root->glob; /* Taken from create_plan */ SS_attach_initplans(root, top_plan); /* * If any Params were generated, run through the plan tree and compute * each plan node's extParam/allParam sets. Ideally we'd merge this into * set_plan_references' tree traversal, but for now it has to be separate * because we need to visit subplans before not after main plan. */ if (glob->paramExecTypes != NIL) { Assert(list_length(glob->subplans) == list_length(glob->subroots)); forboth(lp, glob->subplans, lr, glob->subroots) { Plan *subplan = (Plan *) lfirst(lp); PlannerInfo *subroot = lfirst_node(PlannerInfo, lr); SS_finalize_plan(subroot, subplan); } SS_finalize_plan(root, top_plan); } /* final cleanup of the plan */ Assert(glob->finalrtable == NIL); Assert(glob->finalrowmarks == NIL); Assert(glob->resultRelations == NIL); Assert(glob->rootResultRelations == NIL); top_plan = set_plan_references(root, top_plan); /* ... and the subplans (both regular subplans and initplans) */ Assert(list_length(glob->subplans) == list_length(glob->subroots)); forboth(lp, glob->subplans, lr, glob->subroots) { Plan *subplan = (Plan *) lfirst(lp); PlannerInfo *subroot = lfirst_node(PlannerInfo, lr); lfirst(lp) = set_plan_references(subroot, subplan); } result->transientPlan = glob->transientPlan; result->dependsOnRole = glob->dependsOnRole; result->parallelModeNeeded = glob->parallelModeNeeded; result->planTree = top_plan; result->rtable = glob->finalrtable; result->resultRelations = glob->resultRelations; #if PG_VERSION_NUM < 120000 result->nonleafResultRelations = glob->nonleafResultRelations; #endif result->rootResultRelations = glob->rootResultRelations; result->subplans = glob->subplans; result->rewindPlanIDs = glob->rewindPlanIDs; result->rowMarks = glob->finalrowmarks; result->relationOids = glob->relationOids; result->invalItems = glob->invalItems; result->paramExecTypes = glob->paramExecTypes; } /* * BuildAggregatePlan creates and returns an aggregate plan. This aggregate plan * builds aggreation and grouping operators (if any) that are to be executed on * the master node. */ static Agg * BuildAggregatePlan(PlannerInfo *root, Query *masterQuery, Plan *subPlan) { /* assert that we need to build an aggregate plan */ Assert(masterQuery->hasAggs || masterQuery->groupClause); AggClauseCosts aggregateCosts; AggStrategy aggregateStrategy = AGG_PLAIN; List *groupColumnList = masterQuery->groupClause; List *aggregateTargetList = masterQuery->targetList; /* * Replaces SubLink nodes with SubPlan nodes in the having section of the * query. (and creates the subplans in root->subplans) * * Would be nice if we could use masterQuery->hasSubLinks to only call * these when that is true. However, for some reason hasSubLinks is false * even when there are SubLinks. */ Node *havingQual = SS_process_sublinks(root, masterQuery->havingQual, true); /* * Right now this is not really needed, since we don't support correlated * subqueries anyway. Once we do calling this is critical to do right after * calling SS_process_sublinks, according to the postgres function comment. */ havingQual = SS_replace_correlation_vars(root, havingQual); /* estimate aggregate execution costs */ memset(&aggregateCosts, 0, sizeof(AggClauseCosts)); get_agg_clause_costs(root, (Node *) aggregateTargetList, AGGSPLIT_SIMPLE, &aggregateCosts); get_agg_clause_costs(root, (Node *) havingQual, AGGSPLIT_SIMPLE, &aggregateCosts); /* if we have grouping, then initialize appropriate information */ if (list_length(groupColumnList) > 0) { bool groupingIsHashable = grouping_is_hashable(groupColumnList); bool groupingIsSortable = grouping_is_sortable(groupColumnList); bool hasDistinctAggregate = HasDistinctAggregate(masterQuery); if (!groupingIsHashable && !groupingIsSortable) { ereport(ERROR, (errmsg("grouped column list cannot be hashed or sorted"))); } /* * Postgres hash aggregate strategy does not support distinct aggregates * in group and order by with aggregate operations. * see nodeAgg.c:build_pertrans_for_aggref(). In that case we use * sorted agg strategy, otherwise we use hash strategy. * * If the master query contains hll aggregate functions and the client set * hll.force_groupagg to on, then we choose to use group aggregation. */ if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate || UseGroupAggregateWithHLL(masterQuery)) { char *messageHint = NULL; if (!enable_hashagg && groupingIsHashable) { messageHint = "Consider setting enable_hashagg to on."; } if (!groupingIsSortable) { ereport(ERROR, (errmsg("grouped column list must cannot be sorted"), errdetail("Having a distinct aggregate requires " "grouped column list to be sortable."), messageHint ? errhint("%s", messageHint) : 0)); } aggregateStrategy = AGG_SORTED; subPlan = (Plan *) make_sort_from_sortclauses(groupColumnList, subPlan); } else { aggregateStrategy = AGG_HASHED; } } /* finally create the plan */ Agg *aggregatePlan = makeAggNode(groupColumnList, (List *) havingQual, aggregateStrategy, aggregateTargetList, subPlan); /* just for reproducible costs between different PostgreSQL versions */ aggregatePlan->plan.startup_cost = 0; aggregatePlan->plan.total_cost = 0; aggregatePlan->plan.plan_rows = 0; return aggregatePlan; } /* * HasDistinctAggregate returns true if the query has a distinct * aggregate in its target list or in having clause. */ static bool HasDistinctAggregate(Query *masterQuery) { ListCell *allColumnCell = NULL; List *targetVarList = pull_var_clause((Node *) masterQuery->targetList, PVC_INCLUDE_AGGREGATES); List *havingVarList = pull_var_clause(masterQuery->havingQual, PVC_INCLUDE_AGGREGATES); List *allColumnList = list_concat(targetVarList, havingVarList); foreach(allColumnCell, allColumnList) { Node *columnNode = lfirst(allColumnCell); if (IsA(columnNode, Aggref)) { Aggref *aggref = (Aggref *) columnNode; if (aggref->aggdistinct != NIL) { return true; } } } return false; } /* * UseGroupAggregateWithHLL first checks whether the HLL extension is loaded, if * it is not then simply return false. Otherwise, checks whether the client set * the hll.force_groupagg to on. If it is enabled and the master query contains * hll aggregate function, it returns true. */ static bool UseGroupAggregateWithHLL(Query *masterQuery) { Oid hllId = get_extension_oid(HLL_EXTENSION_NAME, true); /* If HLL extension is not loaded, return false */ if (!OidIsValid(hllId)) { return false; } /* If HLL is loaded but related GUC is not set, return false */ const char *gucStrValue = GetConfigOption(HLL_FORCE_GROUPAGG_GUC_NAME, true, false); if (gucStrValue == NULL || strcmp(gucStrValue, "off") == 0) { return false; } return QueryContainsAggregateWithHLL(masterQuery); } /* * QueryContainsAggregateWithHLL returns true if the query has an hll aggregate * function in it's target list. */ static bool QueryContainsAggregateWithHLL(Query *query) { ListCell *varCell = NULL; List *varList = pull_var_clause((Node *) query->targetList, PVC_INCLUDE_AGGREGATES); foreach(varCell, varList) { Var *var = (Var *) lfirst(varCell); if (nodeTag(var) == T_Aggref) { Aggref *aggref = (Aggref *) var; int argCount = list_length(aggref->args); Oid hllId = get_extension_oid(HLL_EXTENSION_NAME, false); Oid hllSchemaOid = get_extension_schema(hllId); const char *hllSchemaName = get_namespace_name(hllSchemaOid); /* * If the obtained oid is InvalidOid for addFunctionId, that means * we don't have an hll_add_agg function with the given argument count. * So, we don't need to double check whether the obtained id is valid. */ Oid addFunctionId = FunctionOidExtended(hllSchemaName, HLL_ADD_AGGREGATE_NAME, argCount, true); Oid unionFunctionId = FunctionOid(hllSchemaName, HLL_UNION_AGGREGATE_NAME, 1); if (aggref->aggfnoid == addFunctionId || aggref->aggfnoid == unionFunctionId) { return true; } } } return false; } /* * BuildDistinctPlan creates an returns a plan for distinct. Depending on * availability of hash function it chooses HashAgg over Sort/Unique * plans. * This function has a potential performance issue since we blindly set * Plan nodes without looking at cost. We might need to revisit this * if we have performance issues with select distinct queries. */ static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan) { Plan *distinctPlan = NULL; List *distinctClauseList = masterQuery->distinctClause; List *targetList = copyObject(masterQuery->targetList); /* * We don't need to add distinct plan if all of the columns used in group by * clause also used in distinct clause, since group by clause guarantees the * uniqueness of the target list for every row. */ if (IsGroupBySubsetOfDistinct(masterQuery->groupClause, masterQuery->distinctClause)) { return subPlan; } Assert(masterQuery->distinctClause); Assert(!masterQuery->hasDistinctOn); /* * Create group by plan with HashAggregate if all distinct * members are hashable, and not containing distinct aggregate. * Otherwise create sort+unique plan. */ bool distinctClausesHashable = grouping_is_hashable(distinctClauseList); bool hasDistinctAggregate = HasDistinctAggregate(masterQuery); if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate) { distinctPlan = (Plan *) makeAggNode(distinctClauseList, NIL, AGG_HASHED, targetList, subPlan); } else { Sort *sortPlan = make_sort_from_sortclauses(masterQuery->distinctClause, subPlan); distinctPlan = (Plan *) make_unique_from_sortclauses((Plan *) sortPlan, masterQuery->distinctClause); } return distinctPlan; } /* * makeAggNode creates a "Agg" plan node. groupClauseList is a list of * SortGroupClause's. */ static Agg * makeAggNode(List *groupClauseList, List *havingQual, AggStrategy aggrStrategy, List *queryTargetList, Plan *subPlan) { Agg *aggNode = NULL; int groupColumnCount = list_length(groupClauseList); AttrNumber *groupColumnIdArray = extract_grouping_cols(groupClauseList, subPlan->targetlist); Oid *groupColumnOpArray = extract_grouping_ops(groupClauseList); const int rowEstimate = 10; #if (PG_VERSION_NUM >= 120000) aggNode = make_agg(queryTargetList, havingQual, aggrStrategy, AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray, groupColumnOpArray, extract_grouping_collations(groupClauseList, subPlan->targetlist), NIL, NIL, rowEstimate, subPlan); #else aggNode = make_agg(queryTargetList, havingQual, aggrStrategy, AGGSPLIT_SIMPLE, groupColumnCount, groupColumnIdArray, groupColumnOpArray, NIL, NIL, rowEstimate, subPlan); #endif return aggNode; }