From 32def06ebd5339778095ae4e878cbc0e693048db Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Mon, 27 Nov 2017 15:17:18 +0200 Subject: [PATCH 1/3] Split assigning RTE identities and partitioning related query modifications Note that we used to iterate over the RTEs once for performance reasons. However, keeping an extra copy of original query seems more costly and hard to maintain/explain. --- .../distributed/planner/distributed_planner.c | 61 ++++++++++++++----- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index ae163d0f6..e8a294528 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -45,9 +45,11 @@ static PlannedStmt * CreateDistributedPlan(PlannedStmt *localPlan, Query *origin Query *query, ParamListInfo boundParams, PlannerRestrictionContext * plannerRestrictionContext); -static void AdjustParseTree(Query *parse, bool assignRTEIdentities, - bool setPartitionedTablesInherited); + +static void AssignRTEIdentities(Query *queryTree); static void AssignRTEIdentity(RangeTblEntry *rangeTableEntry, int rteIdentifier); +static void AdjustPartitioningForDistributedPlanning(Query *parse, + bool setPartitionedTablesInherited); static PlannedStmt * FinalizePlan(PlannedStmt *localPlan, DistributedPlan *distributedPlan); static PlannedStmt * FinalizeNonRouterPlan(PlannedStmt *localPlan, @@ -71,20 +73,23 @@ distributed_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) bool needsDistributedPlanning = NeedsDistributedPlanning(parse); Query *originalQuery = NULL; PlannerRestrictionContext *plannerRestrictionContext = NULL; - bool assignRTEIdentities = false; bool setPartitionedTablesInherited = false; /* * standard_planner scribbles on it's input, but for deparsing we need the - * unmodified form. So copy once we're sure it's a distributed query. + * unmodified form. Note that we keep RTE_RELATIONs with their identities + * set, which doesn't break our goals, but, prevents us keeping an extra copy + * of the query tree. Note that we copy the query tree once we're sure it's a + * distributed query. */ if (needsDistributedPlanning) { - originalQuery = copyObject(parse); - assignRTEIdentities = true; setPartitionedTablesInherited = false; - AdjustParseTree(parse, assignRTEIdentities, setPartitionedTablesInherited); + AssignRTEIdentities(parse); + originalQuery = copyObject(parse); + + AdjustPartitioningForDistributedPlanning(parse, setPartitionedTablesInherited); } /* create a restriction context and put it at the end if context list */ @@ -114,10 +119,9 @@ distributed_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) if (needsDistributedPlanning) { - assignRTEIdentities = false; setPartitionedTablesInherited = true; - AdjustParseTree(parse, assignRTEIdentities, setPartitionedTablesInherited); + AdjustPartitioningForDistributedPlanning(parse, setPartitionedTablesInherited); } /* remove the context from the context list */ @@ -144,18 +148,15 @@ distributed_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) /* - * AdjustParseTree function modifies query tree by adding RTE identities to the - * RTE_RELATIONs and changing inh flag and relkind of partitioned tables. We - * perform these operations to ensure PostgreSQL's standard planner behaves as - * we need. + * AssignRTEIdentities function modifies query tree by adding RTE identities to the + * RTE_RELATIONs. * * Please note that, we want to avoid modifying query tree as much as possible * because if PostgreSQL changes the way it uses modified fields, that may break * our logic. */ static void -AdjustParseTree(Query *queryTree, bool assignRTEIdentities, - bool setPartitionedTablesInherited) +AssignRTEIdentities(Query *queryTree) { List *rangeTableList = NIL; ListCell *rangeTableCell = NULL; @@ -177,10 +178,38 @@ AdjustParseTree(Query *queryTree, bool assignRTEIdentities, * Note that we're only interested in RTE_RELATIONs and thus assigning * identifiers to those RTEs only. */ - if (assignRTEIdentities && rangeTableEntry->rtekind == RTE_RELATION) + if (rangeTableEntry->rtekind == RTE_RELATION) { AssignRTEIdentity(rangeTableEntry, rteIdentifier++); } + } +} + + +/* + * AdjustPartitioningForDistributedPlanning function modifies query tree by + * changing inh flag and relkind of partitioned tables. We want Postgres to + * treat partitioned tables as regular relations (i.e. we do not want to + * expand them to their partitions) since it breaks Citus planning in different + * ways. We let anything related to partitioning happen on the shards. + * + * Please note that, we want to avoid modifying query tree as much as possible + * because if PostgreSQL changes the way it uses modified fields, that may break + * our logic. + */ +static void +AdjustPartitioningForDistributedPlanning(Query *queryTree, + bool setPartitionedTablesInherited) +{ + List *rangeTableList = NIL; + ListCell *rangeTableCell = NULL; + + /* extract range table entries for simple relations only */ + ExtractRangeTableEntryWalker((Node *) queryTree, &rangeTableList); + + foreach(rangeTableCell, rangeTableList) + { + RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); /* * We want Postgres to behave partitioned tables as regular relations From 26d9b58e9e4153d7fd91da4ca64c883984d7ac49 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Mon, 27 Nov 2017 15:28:03 +0200 Subject: [PATCH 2/3] Make sure that ExtractRangeTableRelationWalker never misses RTE_RELATION --- .../planner/multi_logical_planner.c | 42 ++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 274e0444a..0e87f8067 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -3385,24 +3385,46 @@ NeedsDistributedPlanning(Query *queryTree) /* - * ExtractRangeTableRelationWalker gathers all range table entries in a query - * and filters them to preserve only those of the RTE_RELATION type. + * ExtractRangeTableRelationWalker gathers all range table relation entries + * in a query. */ bool ExtractRangeTableRelationWalker(Node *node, List **rangeTableRelationList) { - List *rangeTableList = NIL; - ListCell *rangeTableCell = NULL; - bool walkIsComplete = ExtractRangeTableEntryWalker(node, &rangeTableList); + bool walkIsComplete = false; - foreach(rangeTableCell, rangeTableList) + if (node == NULL) { - RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); - if (rangeTableEntry->rtekind == RTE_RELATION && - rangeTableEntry->relkind != RELKIND_VIEW) + return false; + } + + if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rangeTable = (RangeTblEntry *) node; + + if (rangeTable->rtekind == RTE_RELATION && rangeTable->relkind != RELKIND_VIEW) { - (*rangeTableRelationList) = lappend(*rangeTableRelationList, rangeTableEntry); + (*rangeTableRelationList) = lappend(*rangeTableRelationList, rangeTable); + + walkIsComplete = false; } + else + { + walkIsComplete = range_table_walker(list_make1(rangeTable), + ExtractRangeTableRelationWalker, + rangeTableRelationList, 0); + } + } + else if (IsA(node, Query)) + { + walkIsComplete = query_tree_walker((Query *) node, + ExtractRangeTableRelationWalker, + rangeTableRelationList, QTW_EXAMINE_RTES); + } + else + { + walkIsComplete = expression_tree_walker(node, ExtractRangeTableRelationWalker, + rangeTableRelationList); } return walkIsComplete; From 05fb0dd020488e1721ef88d1a69ba0be5321cc75 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Mon, 27 Nov 2017 15:35:44 +0200 Subject: [PATCH 3/3] Add infrastructure for filtering restriction contexts based on the input query In subquery pushdown, we first ensure that each relation is joined with at least on another relation on the partition keys. That's fine given that the decision is binary: pushdown the query at all or not. With recursive planning, we'd want to check whether any specific part of the query can be pushded down or not. Thus, we need the ability to understand which part(s) of the subquery is safe to pushdown. This commit adds the infrastructure for doing that. --- .../planner/multi_logical_planner.c | 218 +++++++++++++++++- .../relation_restriction_equivalence.c | 3 +- .../distributed/multi_logical_planner.h | 3 + .../relation_restriction_equivalence.h | 1 + 4 files changed, 222 insertions(+), 3 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 0e87f8067..ab7169095 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -85,6 +85,17 @@ static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryPushdown(Query * PlannerRestrictionContext * plannerRestrictionContext); +static RelationRestrictionContext * FilterRelationRestrictionContext( + RelationRestrictionContext *relationRestrictionContext, + Relids + queryRteIdentities); +static JoinRestrictionContext * FilterJoinRestrictionContext( + JoinRestrictionContext *joinRestrictionContext, Relids + queryRteIdentities); +static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEntries, int + rangeTableArrayLength, Relids + queryRteIdentities); +static Relids QueryRteIdentities(Query *queryTree); static DeferredErrorMessage * DeferErrorIfUnsupportedSublinkAndReferenceTable( Query *queryTree); static DeferredErrorMessage * DeferErrorIfUnsupportedFilters(Query *subquery); @@ -704,6 +715,210 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, } +/* + * FilterPlannerRestrictionForQuery gets a planner restriction context and + * set of rte identities. It returns the restrictions that that appear + * in the queryRteIdentities and returns a newly allocated + * PlannerRestrictionContext. The function also sets all the other fields of + * the PlannerRestrictionContext with respect to the filtered restrictions. + */ +PlannerRestrictionContext * +FilterPlannerRestrictionForQuery(PlannerRestrictionContext *plannerRestrictionContext, + Query *query) +{ + PlannerRestrictionContext *filteredPlannerRestrictionContext = NULL; + int referenceRelationCount = 0; + int totalRelationCount = 0; + + Relids queryRteIdentities = QueryRteIdentities(query); + + RelationRestrictionContext *relationRestrictionContext = + plannerRestrictionContext->relationRestrictionContext; + JoinRestrictionContext *joinRestrictionContext = + plannerRestrictionContext->joinRestrictionContext; + + RelationRestrictionContext *filteredRelationRestrictionContext = + FilterRelationRestrictionContext(relationRestrictionContext, queryRteIdentities); + + JoinRestrictionContext *filtererdJoinRestrictionContext = + FilterJoinRestrictionContext(joinRestrictionContext, queryRteIdentities); + + /* allocate the filtered planner restriction context and set all the fields */ + filteredPlannerRestrictionContext = palloc0(sizeof(PlannerRestrictionContext)); + + filteredPlannerRestrictionContext->memoryContext = + plannerRestrictionContext->memoryContext; + + totalRelationCount = list_length( + filteredRelationRestrictionContext->relationRestrictionList); + referenceRelationCount = ReferenceRelationCount(filteredRelationRestrictionContext); + + filteredRelationRestrictionContext->allReferenceTables = + (totalRelationCount == referenceRelationCount); + + /* we currently don't support local relations and we cannot come up to this point */ + filteredRelationRestrictionContext->hasLocalRelation = false; + filteredRelationRestrictionContext->hasDistributedRelation = true; + + /* finally set the relation and join restriction contexts */ + filteredPlannerRestrictionContext->relationRestrictionContext = + filteredRelationRestrictionContext; + filteredPlannerRestrictionContext->joinRestrictionContext = + filtererdJoinRestrictionContext; + + return filteredPlannerRestrictionContext; +} + + +/* + * FilterRelationRestrictionContext gets a relation restriction context and + * set of rte identities. It returns the relation restrictions that that appear + * in the queryRteIdentities and returns a newly allocated + * RelationRestrictionContext. + */ +static RelationRestrictionContext * +FilterRelationRestrictionContext(RelationRestrictionContext *relationRestrictionContext, + Relids queryRteIdentities) +{ + RelationRestrictionContext *filteredRestrictionContext = + palloc0(sizeof(RelationRestrictionContext)); + + ListCell *relationRestrictionCell = NULL; + + foreach(relationRestrictionCell, relationRestrictionContext->relationRestrictionList) + { + RelationRestriction *relationRestriction = + (RelationRestriction *) lfirst(relationRestrictionCell); + + int rteIdentity = GetRTEIdentity(relationRestriction->rte); + + if (bms_is_member(rteIdentity, queryRteIdentities)) + { + filteredRestrictionContext->relationRestrictionList = + lappend(filteredRestrictionContext->relationRestrictionList, + relationRestriction); + } + } + + return filteredRestrictionContext; +} + + +/* + * FilterJoinRestrictionContext gets a join restriction context and + * set of rte identities. It returns the join restrictions that that appear + * in the queryRteIdentities and returns a newly allocated + * JoinRestrictionContext. + * + * Note that the join restriction is added to the return context as soon as + * any range table entry that appear in the join belongs to queryRteIdentities. + */ +static JoinRestrictionContext * +FilterJoinRestrictionContext(JoinRestrictionContext *joinRestrictionContext, Relids + queryRteIdentities) +{ + JoinRestrictionContext *filtererdJoinRestrictionContext = + palloc0(sizeof(JoinRestrictionContext)); + + ListCell *joinRestrictionCell = NULL; + + foreach(joinRestrictionCell, joinRestrictionContext->joinRestrictionList) + { + JoinRestriction *joinRestriction = + (JoinRestriction *) lfirst(joinRestrictionCell); + RangeTblEntry **rangeTableEntries = + joinRestriction->plannerInfo->simple_rte_array; + int rangeTableArrayLength = joinRestriction->plannerInfo->simple_rel_array_size; + + if (RangeTableArrayContainsAnyRTEIdentities(rangeTableEntries, + rangeTableArrayLength, + queryRteIdentities)) + { + filtererdJoinRestrictionContext->joinRestrictionList = lappend( + filtererdJoinRestrictionContext->joinRestrictionList, + joinRestriction); + } + } + + return filtererdJoinRestrictionContext; +} + + +/* + * RangeTableArrayContainsAnyRTEIdentities returns true if any of the range table entries + * int rangeTableEntries array is an range table relation specified in queryRteIdentities. + */ +static bool +RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEntries, int + rangeTableArrayLength, Relids queryRteIdentities) +{ + int rteIndex = 0; + + /* simple_rte_array starts from 1, see plannerInfo struct */ + for (rteIndex = 1; rteIndex < rangeTableArrayLength; ++rteIndex) + { + RangeTblEntry *rangeTableEntry = rangeTableEntries[rteIndex]; + List *rangeTableRelationList = NULL; + ListCell *rteRelationCell = NULL; + + /* + * Get list of all RTE_RELATIONs in the given range table entry + * (i.e.,rangeTableEntry could be a subquery where we're interested + * in relations). + */ + ExtractRangeTableRelationWalker((Node *) rangeTableEntry, + &rangeTableRelationList); + + foreach(rteRelationCell, rangeTableRelationList) + { + RangeTblEntry *rteRelation = (RangeTblEntry *) lfirst(rteRelationCell); + int rteIdentity = 0; + + Assert(rteRelation->rtekind == RTE_RELATION); + + rteIdentity = GetRTEIdentity(rteRelation); + if (bms_is_member(rteIdentity, queryRteIdentities)) + { + return true; + } + } + } + + return false; +} + + +/* + * QueryRteIdentities gets a queryTree, find get all the rte identities assigned by + * us. + */ +static Relids +QueryRteIdentities(Query *queryTree) +{ + List *rangeTableList = NULL; + ListCell *rangeTableCell = NULL; + Relids queryRteIdentities = NULL; + + /* extract range table entries for simple relations only */ + ExtractRangeTableRelationWalker((Node *) queryTree, &rangeTableList); + + foreach(rangeTableCell, rangeTableList) + { + RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); + int rteIdentity = 0; + + /* we're only interested in relations */ + Assert(rangeTableEntry->rtekind == RTE_RELATION); + + rteIdentity = GetRTEIdentity(rangeTableEntry); + + queryRteIdentities = bms_add_member(queryRteIdentities, rteIdentity); + } + + return queryRteIdentities; +} + + /* * DeferErrorIfUnsupportedSublinkAndReferenceTable returns a deferred error if the * given query is not suitable for subquery pushdown. @@ -3453,7 +3668,8 @@ ExtractRangeTableEntryWalker(Node *node, List **rangeTableList) } else if (IsA(node, Query)) { - walkIsComplete = query_tree_walker((Query *) node, ExtractRangeTableEntryWalker, + walkIsComplete = query_tree_walker((Query *) node, + ExtractRangeTableEntryWalker, rangeTableList, QTW_EXAMINE_RTES); } else diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 69aff11d4..245fe82f9 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -62,7 +62,6 @@ typedef struct AttributeEquivalenceClassMember } AttributeEquivalenceClassMember; -static uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext); static Var * FindTranslatedVar(List *appendRelList, Oid relationOid, Index relationRteIndex, Index *partitionKeyIndex); static bool EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList, @@ -408,7 +407,7 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext * * ReferenceRelationCount iterates over the relations and returns the reference table * relation count. */ -static uint32 +uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext) { ListCell *relationRestrictionCell = NULL; diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index 693902ab4..937f6ca38 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -187,6 +187,9 @@ extern MultiTreeRoot * MultiLogicalPlanCreate(Query *originalQuery, Query *query PlannerRestrictionContext * plannerRestrictionContext, ParamListInfo boundParams); +extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery( + PlannerRestrictionContext *plannerRestrictionContext, + Query *query); extern bool SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail); extern bool TargetListOnPartitionColumn(Query *query, List *targetEntryList); extern bool NeedsDistributedPlanning(Query *queryTree); diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index ea49af5fe..ad515b423 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -18,6 +18,7 @@ extern bool ContainsUnionSubquery(Query *queryTree); extern bool RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext * plannerRestrictionContext); +extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext); extern bool SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext); extern List * RelationIdList(Query *query);