From 94c5ac6ebb44c6ae5d8de400ccc1634490a65524 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Wed, 31 Jan 2018 11:09:11 +0200 Subject: [PATCH] Remove duplicate join restrictions We use PostgreSQL hooks to accumulate the join restrictions and PostgreSQL gives us all the join paths it tries while deciding on the join order. Thus, for queries that have many joins, this function is likely to remove lots of duplicate join restrictions. This becomes relevant for Citus on query pushdown check peformance. --- .../distributed/planner/distributed_planner.c | 5 + .../relation_restriction_equivalence.c | 92 +++++++++++++++++++ .../relation_restriction_equivalence.h | 2 + 3 files changed, 99 insertions(+) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 6259bf2e4..45b2ed79c 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -478,12 +478,17 @@ CreateDistributedPlan(uint64 planId, PlannedStmt *localPlan, Query *originalQuer DistributedPlan *distributedPlan = NULL; PlannedStmt *resultPlan = NULL; bool hasUnresolvedParams = false; + JoinRestrictionContext *joinRestrictionContext = + plannerRestrictionContext->joinRestrictionContext; if (HasUnresolvedExternParamsWalker((Node *) originalQuery, boundParams)) { hasUnresolvedParams = true; } + plannerRestrictionContext->joinRestrictionContext = + RemoveDuplicateJoinRestrictions(joinRestrictionContext); + if (IsModifyCommand(query)) { EnsureModificationsCanRun(); diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 6b3c9e062..f2ba53f14 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -138,6 +138,9 @@ static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEn rangeTableArrayLength, Relids queryRteIdentities); static Relids QueryRteIdentities(Query *queryTree); +static bool JoinRestrictionListExistsInContext(JoinRestriction *joinRestrictionInput, + JoinRestrictionContext * + joinRestrictionContext); /* @@ -1751,3 +1754,92 @@ QueryRteIdentities(Query *queryTree) return queryRteIdentities; } + + +/* + * RemoveDuplicateJoinRestrictions gets a join restriction context and returns a + * newly allocated join restriction context where the duplicate join restrictions + * removed. + * + * Note that we use PostgreSQL hooks to accumulate the join restrictions and PostgreSQL + * gives us all the join paths it tries while deciding on the join order. Thus, for + * queries that has many joins, this function is likely to remove lots of duplicate join + * restrictions. This becomes relevant for Citus on query pushdown check peformance. + */ +JoinRestrictionContext * +RemoveDuplicateJoinRestrictions(JoinRestrictionContext *joinRestrictionContext) +{ + JoinRestrictionContext *filteredContext = palloc0(sizeof(JoinRestrictionContext)); + ListCell *joinRestrictionCell = NULL; + + filteredContext->joinRestrictionList = NIL; + + foreach(joinRestrictionCell, joinRestrictionContext->joinRestrictionList) + { + JoinRestriction *joinRestriction = lfirst(joinRestrictionCell); + + /* if we already have the same restrictions, skip */ + if (JoinRestrictionListExistsInContext(joinRestriction, filteredContext)) + { + continue; + } + + filteredContext->joinRestrictionList = + lappend(filteredContext->joinRestrictionList, joinRestriction); + } + + return filteredContext; +} + + +/* + * JoinRestrictionListExistsInContext returns true if the given joinRestrictionInput + * has an equivalent of in the given joinRestrictionContext. + */ +static bool +JoinRestrictionListExistsInContext(JoinRestriction *joinRestrictionInput, + JoinRestrictionContext *joinRestrictionContext) +{ + List *joinRestrictionList = joinRestrictionContext->joinRestrictionList; + List *inputJoinRestrictInfoList = joinRestrictionInput->joinRestrictInfoList; + + ListCell *joinRestrictionCell = NULL; + + foreach(joinRestrictionCell, joinRestrictionList) + { + JoinRestriction *joinRestriction = lfirst(joinRestrictionCell); + List *joinRestrictInfoList = joinRestriction->joinRestrictInfoList; + + /* obviously we shouldn't treat different join types as being the same */ + if (joinRestriction->joinType != joinRestrictionInput->joinType) + { + continue; + } + + /* + * If we're dealing with different queries, we shouldn't treat their + * restrictions as being the same. + */ + if (joinRestriction->plannerInfo != joinRestrictionInput->plannerInfo) + { + continue; + } + + /* + * We check whether the restrictions in joinRestriction is a super set + * of the restrictions in joinRestrictionInput in the sense that all the + * restrictions in the latter already exists in the former. + * + * Also, note that list_difference() returns a list that contains all the + * cells in joinRestrictInfoList that are not in inputJoinRestrictInfoList. + * Finally, each element in these lists is a pointer to RestrictInfo + * structure, where equal() function is implemented for the struct. + */ + if (list_difference(joinRestrictInfoList, inputJoinRestrictInfoList) == NIL) + { + return true; + } + } + + return false; +} diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index 41721dd32..c80225963 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -27,5 +27,7 @@ extern List * RelationIdList(Query *query); extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery( PlannerRestrictionContext *plannerRestrictionContext, Query *query); +extern JoinRestrictionContext * RemoveDuplicateJoinRestrictions(JoinRestrictionContext * + joinRestrictionContext); #endif /* RELATION_RESTRICTION_EQUIVALENCE_H */