From 2f2d35092411c2b5609ff199251bdb1947acb6c1 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Wed, 24 Jan 2018 14:30:18 +0200 Subject: [PATCH 1/3] Refactor relation restriction related codes This commit moves some of the functions to a more relevant source file. --- .../planner/multi_logical_planner.c | 215 ------------------ .../relation_restriction_equivalence.c | 215 ++++++++++++++++++ .../distributed/multi_logical_planner.h | 3 - .../relation_restriction_equivalence.h | 4 +- 4 files changed, 218 insertions(+), 219 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 89cb0ff2d..05550ed28 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -85,17 +85,6 @@ static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryPushdown(Query * PlannerRestrictionContext * plannerRestrictionContext); -static RelationRestrictionContext * FilterRelationRestrictionContext( - RelationRestrictionContext *relationRestrictionContext, - Relids - queryRteIdentities); -static JoinRestrictionContext * FilterJoinRestrictionContext( - JoinRestrictionContext *joinRestrictionContext, Relids - queryRteIdentities); -static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEntries, int - rangeTableArrayLength, Relids - queryRteIdentities); -static Relids QueryRteIdentities(Query *queryTree); static DeferredErrorMessage * DeferErrorIfFromClauseRecurs(Query *queryTree); static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList); static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree); @@ -613,210 +602,6 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, } -/* - * FilterPlannerRestrictionForQuery gets a planner restriction context and - * set of rte identities. It returns the restrictions that that appear - * in the queryRteIdentities and returns a newly allocated - * PlannerRestrictionContext. The function also sets all the other fields of - * the PlannerRestrictionContext with respect to the filtered restrictions. - */ -PlannerRestrictionContext * -FilterPlannerRestrictionForQuery(PlannerRestrictionContext *plannerRestrictionContext, - Query *query) -{ - PlannerRestrictionContext *filteredPlannerRestrictionContext = NULL; - int referenceRelationCount = 0; - int totalRelationCount = 0; - - Relids queryRteIdentities = QueryRteIdentities(query); - - RelationRestrictionContext *relationRestrictionContext = - plannerRestrictionContext->relationRestrictionContext; - JoinRestrictionContext *joinRestrictionContext = - plannerRestrictionContext->joinRestrictionContext; - - RelationRestrictionContext *filteredRelationRestrictionContext = - FilterRelationRestrictionContext(relationRestrictionContext, queryRteIdentities); - - JoinRestrictionContext *filtererdJoinRestrictionContext = - FilterJoinRestrictionContext(joinRestrictionContext, queryRteIdentities); - - /* allocate the filtered planner restriction context and set all the fields */ - filteredPlannerRestrictionContext = palloc0(sizeof(PlannerRestrictionContext)); - - filteredPlannerRestrictionContext->memoryContext = - plannerRestrictionContext->memoryContext; - - totalRelationCount = list_length( - filteredRelationRestrictionContext->relationRestrictionList); - referenceRelationCount = ReferenceRelationCount(filteredRelationRestrictionContext); - - filteredRelationRestrictionContext->allReferenceTables = - (totalRelationCount == referenceRelationCount); - - /* we currently don't support local relations and we cannot come up to this point */ - filteredRelationRestrictionContext->hasLocalRelation = false; - filteredRelationRestrictionContext->hasDistributedRelation = true; - - /* finally set the relation and join restriction contexts */ - filteredPlannerRestrictionContext->relationRestrictionContext = - filteredRelationRestrictionContext; - filteredPlannerRestrictionContext->joinRestrictionContext = - filtererdJoinRestrictionContext; - - return filteredPlannerRestrictionContext; -} - - -/* - * FilterRelationRestrictionContext gets a relation restriction context and - * set of rte identities. It returns the relation restrictions that that appear - * in the queryRteIdentities and returns a newly allocated - * RelationRestrictionContext. - */ -static RelationRestrictionContext * -FilterRelationRestrictionContext(RelationRestrictionContext *relationRestrictionContext, - Relids queryRteIdentities) -{ - RelationRestrictionContext *filteredRestrictionContext = - palloc0(sizeof(RelationRestrictionContext)); - - ListCell *relationRestrictionCell = NULL; - - foreach(relationRestrictionCell, relationRestrictionContext->relationRestrictionList) - { - RelationRestriction *relationRestriction = - (RelationRestriction *) lfirst(relationRestrictionCell); - - int rteIdentity = GetRTEIdentity(relationRestriction->rte); - - if (bms_is_member(rteIdentity, queryRteIdentities)) - { - filteredRestrictionContext->relationRestrictionList = - lappend(filteredRestrictionContext->relationRestrictionList, - relationRestriction); - } - } - - return filteredRestrictionContext; -} - - -/* - * FilterJoinRestrictionContext gets a join restriction context and - * set of rte identities. It returns the join restrictions that that appear - * in the queryRteIdentities and returns a newly allocated - * JoinRestrictionContext. - * - * Note that the join restriction is added to the return context as soon as - * any range table entry that appear in the join belongs to queryRteIdentities. - */ -static JoinRestrictionContext * -FilterJoinRestrictionContext(JoinRestrictionContext *joinRestrictionContext, Relids - queryRteIdentities) -{ - JoinRestrictionContext *filtererdJoinRestrictionContext = - palloc0(sizeof(JoinRestrictionContext)); - - ListCell *joinRestrictionCell = NULL; - - foreach(joinRestrictionCell, joinRestrictionContext->joinRestrictionList) - { - JoinRestriction *joinRestriction = - (JoinRestriction *) lfirst(joinRestrictionCell); - RangeTblEntry **rangeTableEntries = - joinRestriction->plannerInfo->simple_rte_array; - int rangeTableArrayLength = joinRestriction->plannerInfo->simple_rel_array_size; - - if (RangeTableArrayContainsAnyRTEIdentities(rangeTableEntries, - rangeTableArrayLength, - queryRteIdentities)) - { - filtererdJoinRestrictionContext->joinRestrictionList = lappend( - filtererdJoinRestrictionContext->joinRestrictionList, - joinRestriction); - } - } - - return filtererdJoinRestrictionContext; -} - - -/* - * RangeTableArrayContainsAnyRTEIdentities returns true if any of the range table entries - * int rangeTableEntries array is an range table relation specified in queryRteIdentities. - */ -static bool -RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEntries, int - rangeTableArrayLength, Relids queryRteIdentities) -{ - int rteIndex = 0; - - /* simple_rte_array starts from 1, see plannerInfo struct */ - for (rteIndex = 1; rteIndex < rangeTableArrayLength; ++rteIndex) - { - RangeTblEntry *rangeTableEntry = rangeTableEntries[rteIndex]; - List *rangeTableRelationList = NULL; - ListCell *rteRelationCell = NULL; - - /* - * Get list of all RTE_RELATIONs in the given range table entry - * (i.e.,rangeTableEntry could be a subquery where we're interested - * in relations). - */ - ExtractRangeTableRelationWalker((Node *) rangeTableEntry, - &rangeTableRelationList); - - foreach(rteRelationCell, rangeTableRelationList) - { - RangeTblEntry *rteRelation = (RangeTblEntry *) lfirst(rteRelationCell); - int rteIdentity = 0; - - Assert(rteRelation->rtekind == RTE_RELATION); - - rteIdentity = GetRTEIdentity(rteRelation); - if (bms_is_member(rteIdentity, queryRteIdentities)) - { - return true; - } - } - } - - return false; -} - - -/* - * QueryRteIdentities gets a queryTree, find get all the rte identities assigned by - * us. - */ -static Relids -QueryRteIdentities(Query *queryTree) -{ - List *rangeTableList = NULL; - ListCell *rangeTableCell = NULL; - Relids queryRteIdentities = NULL; - - /* extract range table entries for simple relations only */ - ExtractRangeTableRelationWalker((Node *) queryTree, &rangeTableList); - - foreach(rangeTableCell, rangeTableList) - { - RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); - int rteIdentity = 0; - - /* we're only interested in relations */ - Assert(rangeTableEntry->rtekind == RTE_RELATION); - - rteIdentity = GetRTEIdentity(rangeTableEntry); - - queryRteIdentities = bms_add_member(queryRteIdentities, rteIdentity); - } - - return queryRteIdentities; -} - - /* * DeferErrorIfFromClauseRecurs returns a deferred error if the * given query is not suitable for subquery pushdown. diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 204059b5e..84bf4d4f8 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -127,6 +127,17 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass secondClass); static Index RelationRestrictionPartitionKeyIndex(RelationRestriction * relationRestriction); +static RelationRestrictionContext * FilterRelationRestrictionContext( + RelationRestrictionContext *relationRestrictionContext, + Relids + queryRteIdentities); +static JoinRestrictionContext * FilterJoinRestrictionContext( + JoinRestrictionContext *joinRestrictionContext, Relids + queryRteIdentities); +static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEntries, int + rangeTableArrayLength, Relids + queryRteIdentities); +static Relids QueryRteIdentities(Query *queryTree); /* @@ -1518,3 +1529,207 @@ RelationIdList(Query *query) return relationIdList; } + + +/* + * FilterPlannerRestrictionForQuery gets a planner restriction context and + * set of rte identities. It returns the restrictions that that appear + * in the queryRteIdentities and returns a newly allocated + * PlannerRestrictionContext. The function also sets all the other fields of + * the PlannerRestrictionContext with respect to the filtered restrictions. + */ +PlannerRestrictionContext * +FilterPlannerRestrictionForQuery(PlannerRestrictionContext *plannerRestrictionContext, + Query *query) +{ + PlannerRestrictionContext *filteredPlannerRestrictionContext = NULL; + int referenceRelationCount = 0; + int totalRelationCount = 0; + + Relids queryRteIdentities = QueryRteIdentities(query); + + RelationRestrictionContext *relationRestrictionContext = + plannerRestrictionContext->relationRestrictionContext; + JoinRestrictionContext *joinRestrictionContext = + plannerRestrictionContext->joinRestrictionContext; + + RelationRestrictionContext *filteredRelationRestrictionContext = + FilterRelationRestrictionContext(relationRestrictionContext, queryRteIdentities); + + JoinRestrictionContext *filtererdJoinRestrictionContext = + FilterJoinRestrictionContext(joinRestrictionContext, queryRteIdentities); + + /* allocate the filtered planner restriction context and set all the fields */ + filteredPlannerRestrictionContext = palloc0(sizeof(PlannerRestrictionContext)); + + filteredPlannerRestrictionContext->memoryContext = + plannerRestrictionContext->memoryContext; + + totalRelationCount = list_length( + filteredRelationRestrictionContext->relationRestrictionList); + referenceRelationCount = ReferenceRelationCount(filteredRelationRestrictionContext); + + filteredRelationRestrictionContext->allReferenceTables = + (totalRelationCount == referenceRelationCount); + + /* we currently don't support local relations and we cannot come up to this point */ + filteredRelationRestrictionContext->hasLocalRelation = false; + filteredRelationRestrictionContext->hasDistributedRelation = true; + + /* finally set the relation and join restriction contexts */ + filteredPlannerRestrictionContext->relationRestrictionContext = + filteredRelationRestrictionContext; + filteredPlannerRestrictionContext->joinRestrictionContext = + filtererdJoinRestrictionContext; + + return filteredPlannerRestrictionContext; +} + + +/* + * FilterRelationRestrictionContext gets a relation restriction context and + * set of rte identities. It returns the relation restrictions that that appear + * in the queryRteIdentities and returns a newly allocated + * RelationRestrictionContext. + */ +static RelationRestrictionContext * +FilterRelationRestrictionContext(RelationRestrictionContext *relationRestrictionContext, + Relids queryRteIdentities) +{ + RelationRestrictionContext *filteredRestrictionContext = + palloc0(sizeof(RelationRestrictionContext)); + + ListCell *relationRestrictionCell = NULL; + + foreach(relationRestrictionCell, relationRestrictionContext->relationRestrictionList) + { + RelationRestriction *relationRestriction = + (RelationRestriction *) lfirst(relationRestrictionCell); + + int rteIdentity = GetRTEIdentity(relationRestriction->rte); + + if (bms_is_member(rteIdentity, queryRteIdentities)) + { + filteredRestrictionContext->relationRestrictionList = + lappend(filteredRestrictionContext->relationRestrictionList, + relationRestriction); + } + } + + return filteredRestrictionContext; +} + + +/* + * FilterJoinRestrictionContext gets a join restriction context and + * set of rte identities. It returns the join restrictions that that appear + * in the queryRteIdentities and returns a newly allocated + * JoinRestrictionContext. + * + * Note that the join restriction is added to the return context as soon as + * any range table entry that appear in the join belongs to queryRteIdentities. + */ +static JoinRestrictionContext * +FilterJoinRestrictionContext(JoinRestrictionContext *joinRestrictionContext, Relids + queryRteIdentities) +{ + JoinRestrictionContext *filtererdJoinRestrictionContext = + palloc0(sizeof(JoinRestrictionContext)); + + ListCell *joinRestrictionCell = NULL; + + foreach(joinRestrictionCell, joinRestrictionContext->joinRestrictionList) + { + JoinRestriction *joinRestriction = + (JoinRestriction *) lfirst(joinRestrictionCell); + RangeTblEntry **rangeTableEntries = + joinRestriction->plannerInfo->simple_rte_array; + int rangeTableArrayLength = joinRestriction->plannerInfo->simple_rel_array_size; + + if (RangeTableArrayContainsAnyRTEIdentities(rangeTableEntries, + rangeTableArrayLength, + queryRteIdentities)) + { + filtererdJoinRestrictionContext->joinRestrictionList = lappend( + filtererdJoinRestrictionContext->joinRestrictionList, + joinRestriction); + } + } + + return filtererdJoinRestrictionContext; +} + + +/* + * RangeTableArrayContainsAnyRTEIdentities returns true if any of the range table entries + * int rangeTableEntries array is an range table relation specified in queryRteIdentities. + */ +static bool +RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEntries, int + rangeTableArrayLength, Relids queryRteIdentities) +{ + int rteIndex = 0; + + /* simple_rte_array starts from 1, see plannerInfo struct */ + for (rteIndex = 1; rteIndex < rangeTableArrayLength; ++rteIndex) + { + RangeTblEntry *rangeTableEntry = rangeTableEntries[rteIndex]; + List *rangeTableRelationList = NULL; + ListCell *rteRelationCell = NULL; + + /* + * Get list of all RTE_RELATIONs in the given range table entry + * (i.e.,rangeTableEntry could be a subquery where we're interested + * in relations). + */ + ExtractRangeTableRelationWalker((Node *) rangeTableEntry, + &rangeTableRelationList); + + foreach(rteRelationCell, rangeTableRelationList) + { + RangeTblEntry *rteRelation = (RangeTblEntry *) lfirst(rteRelationCell); + int rteIdentity = 0; + + Assert(rteRelation->rtekind == RTE_RELATION); + + rteIdentity = GetRTEIdentity(rteRelation); + if (bms_is_member(rteIdentity, queryRteIdentities)) + { + return true; + } + } + } + + return false; +} + + +/* + * QueryRteIdentities gets a queryTree, find get all the rte identities assigned by + * us. + */ +static Relids +QueryRteIdentities(Query *queryTree) +{ + List *rangeTableList = NULL; + ListCell *rangeTableCell = NULL; + Relids queryRteIdentities = NULL; + + /* extract range table entries for simple relations only */ + ExtractRangeTableRelationWalker((Node *) queryTree, &rangeTableList); + + foreach(rangeTableCell, rangeTableList) + { + RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); + int rteIdentity = 0; + + /* we're only interested in relations */ + Assert(rangeTableEntry->rtekind == RTE_RELATION); + + rteIdentity = GetRTEIdentity(rangeTableEntry); + + queryRteIdentities = bms_add_member(queryRteIdentities, rteIdentity); + } + + return queryRteIdentities; +} diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index b96af6713..08f8cc2f5 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -192,9 +192,6 @@ extern DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subquery bool outerMostQueryHasLimit); extern DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree); -extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery( - PlannerRestrictionContext *plannerRestrictionContext, - Query *query); extern bool SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail); extern bool TargetListOnPartitionColumn(Query *query, List *targetEntryList); extern bool FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *)); diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index 051fde2f7..9dd0b8c90 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -22,6 +22,8 @@ extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionCont extern bool SafeToPushdownUnionSubquery( PlannerRestrictionContext *plannerRestrictionContext); extern List * RelationIdList(Query *query); - +extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery( + PlannerRestrictionContext *plannerRestrictionContext, + Query *query); #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */ From c228d8ff3d2f438337b3ccf3fbaf9bfad58db988 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Wed, 31 Jan 2018 08:51:33 +0200 Subject: [PATCH 2/3] Refactor equivalance generation related codes This commit changes the APIs for restriction generation to make future changes simpler. --- .../relation_restriction_equivalence.c | 40 ++++++++++++++----- .../relation_restriction_equivalence.h | 2 + 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 84bf4d4f8..6b3c9e062 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -404,11 +404,6 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext * { RelationRestrictionContext *restrictionContext = plannerRestrictionContext->relationRestrictionContext; - JoinRestrictionContext *joinRestrictionContext = - plannerRestrictionContext->joinRestrictionContext; - - List *relationRestrictionAttributeEquivalenceList = NIL; - List *joinRestrictionAttributeEquivalenceList = NIL; List *allAttributeEquivalenceList = NIL; uint32 referenceRelationCount = ReferenceRelationCount(restrictionContext); @@ -433,20 +428,43 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext * return true; } + allAttributeEquivalenceList = + GenerateAllAttributeEquivalences(plannerRestrictionContext); + + return EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList, + restrictionContext); +} + + +/* + * GenerateAllAttributeEquivalances gets the planner restriction context and returns + * the list of all attribute equivalences based on both join restrictions and relation + * restrictions. + */ +List * +GenerateAllAttributeEquivalences(PlannerRestrictionContext *plannerRestrictionContext) +{ + RelationRestrictionContext *relationRestrictionContext = + plannerRestrictionContext->relationRestrictionContext; + JoinRestrictionContext *joinRestrictionContext = + plannerRestrictionContext->joinRestrictionContext; + + List *relationRestrictionAttributeEquivalenceList = NIL; + List *joinRestrictionAttributeEquivalenceList = NIL; + List *allAttributeEquivalenceList = NIL; + /* reset the equivalence id counter per call to prevent overflows */ attributeEquivalenceId = 1; relationRestrictionAttributeEquivalenceList = - GenerateAttributeEquivalencesForRelationRestrictions(restrictionContext); + GenerateAttributeEquivalencesForRelationRestrictions(relationRestrictionContext); joinRestrictionAttributeEquivalenceList = GenerateAttributeEquivalencesForJoinRestrictions(joinRestrictionContext); - allAttributeEquivalenceList = - list_concat(relationRestrictionAttributeEquivalenceList, - joinRestrictionAttributeEquivalenceList); + allAttributeEquivalenceList = list_concat(relationRestrictionAttributeEquivalenceList, + joinRestrictionAttributeEquivalenceList); - return EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList, - restrictionContext); + return allAttributeEquivalenceList; } diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index 9dd0b8c90..41721dd32 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -18,6 +18,8 @@ extern bool ContainsUnionSubquery(Query *queryTree); extern bool RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext * plannerRestrictionContext); +extern List * GenerateAllAttributeEquivalences(PlannerRestrictionContext * + plannerRestrictionContext); extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext); extern bool SafeToPushdownUnionSubquery( PlannerRestrictionContext *plannerRestrictionContext); From 94c5ac6ebb44c6ae5d8de400ccc1634490a65524 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Wed, 31 Jan 2018 11:09:11 +0200 Subject: [PATCH 3/3] Remove duplicate join restrictions We use PostgreSQL hooks to accumulate the join restrictions and PostgreSQL gives us all the join paths it tries while deciding on the join order. Thus, for queries that have many joins, this function is likely to remove lots of duplicate join restrictions. This becomes relevant for Citus on query pushdown check peformance. --- .../distributed/planner/distributed_planner.c | 5 + .../relation_restriction_equivalence.c | 92 +++++++++++++++++++ .../relation_restriction_equivalence.h | 2 + 3 files changed, 99 insertions(+) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 6259bf2e4..45b2ed79c 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -478,12 +478,17 @@ CreateDistributedPlan(uint64 planId, PlannedStmt *localPlan, Query *originalQuer DistributedPlan *distributedPlan = NULL; PlannedStmt *resultPlan = NULL; bool hasUnresolvedParams = false; + JoinRestrictionContext *joinRestrictionContext = + plannerRestrictionContext->joinRestrictionContext; if (HasUnresolvedExternParamsWalker((Node *) originalQuery, boundParams)) { hasUnresolvedParams = true; } + plannerRestrictionContext->joinRestrictionContext = + RemoveDuplicateJoinRestrictions(joinRestrictionContext); + if (IsModifyCommand(query)) { EnsureModificationsCanRun(); diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 6b3c9e062..f2ba53f14 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -138,6 +138,9 @@ static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEn rangeTableArrayLength, Relids queryRteIdentities); static Relids QueryRteIdentities(Query *queryTree); +static bool JoinRestrictionListExistsInContext(JoinRestriction *joinRestrictionInput, + JoinRestrictionContext * + joinRestrictionContext); /* @@ -1751,3 +1754,92 @@ QueryRteIdentities(Query *queryTree) return queryRteIdentities; } + + +/* + * RemoveDuplicateJoinRestrictions gets a join restriction context and returns a + * newly allocated join restriction context where the duplicate join restrictions + * removed. + * + * Note that we use PostgreSQL hooks to accumulate the join restrictions and PostgreSQL + * gives us all the join paths it tries while deciding on the join order. Thus, for + * queries that has many joins, this function is likely to remove lots of duplicate join + * restrictions. This becomes relevant for Citus on query pushdown check peformance. + */ +JoinRestrictionContext * +RemoveDuplicateJoinRestrictions(JoinRestrictionContext *joinRestrictionContext) +{ + JoinRestrictionContext *filteredContext = palloc0(sizeof(JoinRestrictionContext)); + ListCell *joinRestrictionCell = NULL; + + filteredContext->joinRestrictionList = NIL; + + foreach(joinRestrictionCell, joinRestrictionContext->joinRestrictionList) + { + JoinRestriction *joinRestriction = lfirst(joinRestrictionCell); + + /* if we already have the same restrictions, skip */ + if (JoinRestrictionListExistsInContext(joinRestriction, filteredContext)) + { + continue; + } + + filteredContext->joinRestrictionList = + lappend(filteredContext->joinRestrictionList, joinRestriction); + } + + return filteredContext; +} + + +/* + * JoinRestrictionListExistsInContext returns true if the given joinRestrictionInput + * has an equivalent of in the given joinRestrictionContext. + */ +static bool +JoinRestrictionListExistsInContext(JoinRestriction *joinRestrictionInput, + JoinRestrictionContext *joinRestrictionContext) +{ + List *joinRestrictionList = joinRestrictionContext->joinRestrictionList; + List *inputJoinRestrictInfoList = joinRestrictionInput->joinRestrictInfoList; + + ListCell *joinRestrictionCell = NULL; + + foreach(joinRestrictionCell, joinRestrictionList) + { + JoinRestriction *joinRestriction = lfirst(joinRestrictionCell); + List *joinRestrictInfoList = joinRestriction->joinRestrictInfoList; + + /* obviously we shouldn't treat different join types as being the same */ + if (joinRestriction->joinType != joinRestrictionInput->joinType) + { + continue; + } + + /* + * If we're dealing with different queries, we shouldn't treat their + * restrictions as being the same. + */ + if (joinRestriction->plannerInfo != joinRestrictionInput->plannerInfo) + { + continue; + } + + /* + * We check whether the restrictions in joinRestriction is a super set + * of the restrictions in joinRestrictionInput in the sense that all the + * restrictions in the latter already exists in the former. + * + * Also, note that list_difference() returns a list that contains all the + * cells in joinRestrictInfoList that are not in inputJoinRestrictInfoList. + * Finally, each element in these lists is a pointer to RestrictInfo + * structure, where equal() function is implemented for the struct. + */ + if (list_difference(joinRestrictInfoList, inputJoinRestrictInfoList) == NIL) + { + return true; + } + } + + return false; +} diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index 41721dd32..c80225963 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -27,5 +27,7 @@ extern List * RelationIdList(Query *query); extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery( PlannerRestrictionContext *plannerRestrictionContext, Query *query); +extern JoinRestrictionContext * RemoveDuplicateJoinRestrictions(JoinRestrictionContext * + joinRestrictionContext); #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */