mirror of https://github.com/citusdata/citus.git
Remove duplicate join restrictions
We use PostgreSQL hooks to accumulate the join restrictions and PostgreSQL gives us all the join paths it tries while deciding on the join order. Thus, for queries that have many joins, this function is likely to remove lots of duplicate join restrictions. This becomes relevant for Citus on query pushdown check peformance.pull/1989/head
parent
c228d8ff3d
commit
94c5ac6ebb
|
@ -478,12 +478,17 @@ CreateDistributedPlan(uint64 planId, PlannedStmt *localPlan, Query *originalQuer
|
|||
DistributedPlan *distributedPlan = NULL;
|
||||
PlannedStmt *resultPlan = NULL;
|
||||
bool hasUnresolvedParams = false;
|
||||
JoinRestrictionContext *joinRestrictionContext =
|
||||
plannerRestrictionContext->joinRestrictionContext;
|
||||
|
||||
if (HasUnresolvedExternParamsWalker((Node *) originalQuery, boundParams))
|
||||
{
|
||||
hasUnresolvedParams = true;
|
||||
}
|
||||
|
||||
plannerRestrictionContext->joinRestrictionContext =
|
||||
RemoveDuplicateJoinRestrictions(joinRestrictionContext);
|
||||
|
||||
if (IsModifyCommand(query))
|
||||
{
|
||||
EnsureModificationsCanRun();
|
||||
|
|
|
@ -138,6 +138,9 @@ static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEn
|
|||
rangeTableArrayLength, Relids
|
||||
queryRteIdentities);
|
||||
static Relids QueryRteIdentities(Query *queryTree);
|
||||
static bool JoinRestrictionListExistsInContext(JoinRestriction *joinRestrictionInput,
|
||||
JoinRestrictionContext *
|
||||
joinRestrictionContext);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -1751,3 +1754,92 @@ QueryRteIdentities(Query *queryTree)
|
|||
|
||||
return queryRteIdentities;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RemoveDuplicateJoinRestrictions gets a join restriction context and returns a
|
||||
* newly allocated join restriction context where the duplicate join restrictions
|
||||
* removed.
|
||||
*
|
||||
* Note that we use PostgreSQL hooks to accumulate the join restrictions and PostgreSQL
|
||||
* gives us all the join paths it tries while deciding on the join order. Thus, for
|
||||
* queries that has many joins, this function is likely to remove lots of duplicate join
|
||||
* restrictions. This becomes relevant for Citus on query pushdown check peformance.
|
||||
*/
|
||||
JoinRestrictionContext *
|
||||
RemoveDuplicateJoinRestrictions(JoinRestrictionContext *joinRestrictionContext)
|
||||
{
|
||||
JoinRestrictionContext *filteredContext = palloc0(sizeof(JoinRestrictionContext));
|
||||
ListCell *joinRestrictionCell = NULL;
|
||||
|
||||
filteredContext->joinRestrictionList = NIL;
|
||||
|
||||
foreach(joinRestrictionCell, joinRestrictionContext->joinRestrictionList)
|
||||
{
|
||||
JoinRestriction *joinRestriction = lfirst(joinRestrictionCell);
|
||||
|
||||
/* if we already have the same restrictions, skip */
|
||||
if (JoinRestrictionListExistsInContext(joinRestriction, filteredContext))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
filteredContext->joinRestrictionList =
|
||||
lappend(filteredContext->joinRestrictionList, joinRestriction);
|
||||
}
|
||||
|
||||
return filteredContext;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* JoinRestrictionListExistsInContext returns true if the given joinRestrictionInput
|
||||
* has an equivalent of in the given joinRestrictionContext.
|
||||
*/
|
||||
static bool
|
||||
JoinRestrictionListExistsInContext(JoinRestriction *joinRestrictionInput,
|
||||
JoinRestrictionContext *joinRestrictionContext)
|
||||
{
|
||||
List *joinRestrictionList = joinRestrictionContext->joinRestrictionList;
|
||||
List *inputJoinRestrictInfoList = joinRestrictionInput->joinRestrictInfoList;
|
||||
|
||||
ListCell *joinRestrictionCell = NULL;
|
||||
|
||||
foreach(joinRestrictionCell, joinRestrictionList)
|
||||
{
|
||||
JoinRestriction *joinRestriction = lfirst(joinRestrictionCell);
|
||||
List *joinRestrictInfoList = joinRestriction->joinRestrictInfoList;
|
||||
|
||||
/* obviously we shouldn't treat different join types as being the same */
|
||||
if (joinRestriction->joinType != joinRestrictionInput->joinType)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're dealing with different queries, we shouldn't treat their
|
||||
* restrictions as being the same.
|
||||
*/
|
||||
if (joinRestriction->plannerInfo != joinRestrictionInput->plannerInfo)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* We check whether the restrictions in joinRestriction is a super set
|
||||
* of the restrictions in joinRestrictionInput in the sense that all the
|
||||
* restrictions in the latter already exists in the former.
|
||||
*
|
||||
* Also, note that list_difference() returns a list that contains all the
|
||||
* cells in joinRestrictInfoList that are not in inputJoinRestrictInfoList.
|
||||
* Finally, each element in these lists is a pointer to RestrictInfo
|
||||
* structure, where equal() function is implemented for the struct.
|
||||
*/
|
||||
if (list_difference(joinRestrictInfoList, inputJoinRestrictInfoList) == NIL)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -27,5 +27,7 @@ extern List * RelationIdList(Query *query);
|
|||
extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery(
|
||||
PlannerRestrictionContext *plannerRestrictionContext,
|
||||
Query *query);
|
||||
extern JoinRestrictionContext * RemoveDuplicateJoinRestrictions(JoinRestrictionContext *
|
||||
joinRestrictionContext);
|
||||
|
||||
#endif /* RELATION_RESTRICTION_EQUIVALENCE_H */
|
||||
|
|
Loading…
Reference in New Issue