onur-leftjoin_push-improvements
eaydingol 2025-07-31 17:15:38 +03:00
parent 665ae75a65
commit fc109f408b
6 changed files with 171 additions and 128 deletions

View File

@ -520,31 +520,33 @@ IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEntry, CitusTableType tabl
tableEntry->colocationId, tableType);
}
/*
* IsFirstShard returns true if the given shardId is the first shard.
*/
* IsFirstShard returns true if the given shardId is the first shard.
*/
bool
IsFirstShard(CitusTableCacheEntry *tableEntry, uint64 shardId)
{
if (tableEntry == NULL || tableEntry->sortedShardIntervalArray == NULL)
{
return false;
}
if (tableEntry->sortedShardIntervalArray[0]->shardId == INVALID_SHARD_ID)
{
return false;
}
if (tableEntry == NULL || tableEntry->sortedShardIntervalArray == NULL)
{
return false;
}
if (tableEntry->sortedShardIntervalArray[0]->shardId == INVALID_SHARD_ID)
{
return false;
}
if (shardId == tableEntry->sortedShardIntervalArray[0]->shardId)
{
return true;
}
else
{
return false;
}
if (shardId == tableEntry->sortedShardIntervalArray[0]->shardId)
{
return true;
}
else
{
return false;
}
}
/*
* HasDistributionKey returns true if given Citus table has a distribution key.
*/

View File

@ -208,8 +208,8 @@ UpdateTaskQueryString(Query *query, Task *task)
/*
* UpdateWhereClauseForOuterJoin walks over the query tree and appends quals
* to the WHERE clause to filter w.r.to the distribution column of the corresponding shard.
* UpdateWhereClauseForOuterJoin walks over the query tree and appends quals
* to the WHERE clause to filter w.r.to the distribution column of the corresponding shard.
*/
bool
UpdateWhereClauseForOuterJoin(Node *node, List *relationShardList)
@ -221,50 +221,59 @@ UpdateWhereClauseForOuterJoin(Node *node, List *relationShardList)
if (!IsA(node, Query))
{
return expression_tree_walker(node, UpdateWhereClauseForOuterJoin, relationShardList);
return expression_tree_walker(node, UpdateWhereClauseForOuterJoin,
relationShardList);
}
Query *query = (Query *) node;
if (query->jointree == NULL)
{
return query_tree_walker(query, UpdateWhereClauseForOuterJoin, relationShardList, 0);
return query_tree_walker(query, UpdateWhereClauseForOuterJoin, relationShardList,
0);
}
FromExpr *fromExpr = query->jointree;
if(fromExpr == NULL || fromExpr->fromlist == NIL)
if (fromExpr == NULL || fromExpr->fromlist == NIL)
{
return query_tree_walker(query, UpdateWhereClauseForOuterJoin, relationShardList, 0);
return query_tree_walker(query, UpdateWhereClauseForOuterJoin, relationShardList,
0);
}
// TODO: generalize to the list
/* TODO: generalize to the list */
Node *firstFromItem = linitial(fromExpr->fromlist);
if (!IsA(firstFromItem, JoinExpr))
{
return query_tree_walker(query, UpdateWhereClauseForOuterJoin, relationShardList, 0);
return query_tree_walker(query, UpdateWhereClauseForOuterJoin, relationShardList,
0);
}
JoinExpr *joinExpr = (JoinExpr *) firstFromItem;
/*
* We need to find the outer table in the join clause to add the constraints w.r.to the shard
* intervals of the inner table.
* A representative inner table is sufficient as long as it is colocated with all other
* distributed tables in the join clause.
*/
* We need to find the outer table in the join clause to add the constraints w.r.to the shard
* intervals of the inner table.
* A representative inner table is sufficient as long as it is colocated with all other
* distributed tables in the join clause.
*/
RangeTblEntry *innerRte = NULL;
RangeTblEntry *outerRte = NULL;
int outerRtIndex = -1;
int attnum;
if(!CheckPushDownFeasibilityAndComputeIndexes(joinExpr, query, &outerRtIndex, &outerRte, &innerRte, &attnum))
if (!CheckPushDownFeasibilityAndComputeIndexes(joinExpr, query, &outerRtIndex,
&outerRte, &innerRte, &attnum))
{
return query_tree_walker(query, UpdateWhereClauseForOuterJoin, relationShardList, 0);
return query_tree_walker(query, UpdateWhereClauseForOuterJoin, relationShardList,
0);
}
if( attnum == InvalidAttrNumber)
if (attnum == InvalidAttrNumber)
{
return query_tree_walker(query, UpdateWhereClauseForOuterJoin, relationShardList, 0);
return query_tree_walker(query, UpdateWhereClauseForOuterJoin, relationShardList,
0);
}
ereport(DEBUG5, (errmsg("Distributed table from the inner part of the outer join: %s.", innerRte->eref->aliasname)));
ereport(DEBUG5, (errmsg(
"Distributed table from the inner part of the outer join: %s.",
innerRte->eref->aliasname)));
RelationShard *relationShard = FindRelationShard(innerRte->relid, relationShardList);
@ -274,26 +283,29 @@ UpdateWhereClauseForOuterJoin(Node *node, List *relationShardList)
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
Var *partitionColumnVar = cacheEntry->partitionColumn;
/*
* we will add constraints for the outer table,
* we create a Var node for the outer table's column that is compared with the distribution column.
*/
/*
* we will add constraints for the outer table,
* we create a Var node for the outer table's column that is compared with the distribution column.
*/
Var* outerTablePartitionColumnVar = makeVar(
outerRtIndex, attnum, partitionColumnVar->vartype,
partitionColumnVar->vartypmod,
partitionColumnVar->varcollid,
0);
Var *outerTablePartitionColumnVar = makeVar(
outerRtIndex, attnum, partitionColumnVar->vartype,
partitionColumnVar->vartypmod,
partitionColumnVar->varcollid,
0);
bool isFirstShard = IsFirstShard(cacheEntry, shardId);
/* load the interval for the shard and create constant nodes for the upper/lower bounds */
ShardInterval *shardInterval = LoadShardInterval(shardId);
Const *constNodeLowerBound = makeConst(INT4OID, -1, InvalidOid, sizeof(int32), shardInterval->minValue, false, true);
Const *constNodeUpperBound = makeConst(INT4OID, -1, InvalidOid, sizeof(int32), shardInterval->maxValue, false, true);
Const *constNodeZero = makeConst(INT4OID, -1, InvalidOid, sizeof(int32), Int32GetDatum(0), false, true);
Const *constNodeLowerBound = makeConst(INT4OID, -1, InvalidOid, sizeof(int32),
shardInterval->minValue, false, true);
Const *constNodeUpperBound = makeConst(INT4OID, -1, InvalidOid, sizeof(int32),
shardInterval->maxValue, false, true);
Const *constNodeZero = makeConst(INT4OID, -1, InvalidOid, sizeof(int32),
Int32GetDatum(0), false, true);
/* create a function expression node for the hash partition column */
/* create a function expression node for the hash partition column */
FuncExpr *hashFunction = makeNode(FuncExpr);
hashFunction->funcid = cacheEntry->hashFunction->fn_oid;
hashFunction->args = list_make1(outerTablePartitionColumnVar);
@ -301,49 +313,57 @@ UpdateWhereClauseForOuterJoin(Node *node, List *relationShardList)
hashFunction->funcretset = false;
/* create a function expression for the lower bound of the shard interval */
Oid resultTypeOid = get_func_rettype(cacheEntry->shardIntervalCompareFunction->fn_oid);
FuncExpr* lowerBoundFuncExpr = makeNode(FuncExpr);
Oid resultTypeOid = get_func_rettype(
cacheEntry->shardIntervalCompareFunction->fn_oid);
FuncExpr *lowerBoundFuncExpr = makeNode(FuncExpr);
lowerBoundFuncExpr->funcid = cacheEntry->shardIntervalCompareFunction->fn_oid;
lowerBoundFuncExpr->args = list_make2((Node *) constNodeLowerBound, (Node *) hashFunction);
lowerBoundFuncExpr->args = list_make2((Node *) constNodeLowerBound,
(Node *) hashFunction);
lowerBoundFuncExpr->funcresulttype = resultTypeOid;
lowerBoundFuncExpr->funcretset = false;
Oid lessThan = GetSysCacheOid(OPERNAMENSP, Anum_pg_operator_oid, CStringGetDatum("<"),
Oid lessThan = GetSysCacheOid(OPERNAMENSP, Anum_pg_operator_oid, CStringGetDatum("<"),
resultTypeOid, resultTypeOid, ObjectIdGetDatum(11));
/*
* Finally, check if the comparison result is less than 0, i.e.,
/*
* Finally, check if the comparison result is less than 0, i.e.,
* shardInterval->minValue < hash(partitionColumn)
* See SearchCachedShardInterval for the behavior at the boundaries.
*/
Expr *lowerBoundExpr = make_opclause(lessThan, BOOLOID, false, (Expr *) lowerBoundFuncExpr,
*/
Expr *lowerBoundExpr = make_opclause(lessThan, BOOLOID, false,
(Expr *) lowerBoundFuncExpr,
(Expr *) constNodeZero, InvalidOid, InvalidOid);
/* create a function expression for the upper bound of the shard interval */
FuncExpr* upperBoundFuncExpr = makeNode(FuncExpr);
FuncExpr *upperBoundFuncExpr = makeNode(FuncExpr);
upperBoundFuncExpr->funcid = cacheEntry->shardIntervalCompareFunction->fn_oid;
upperBoundFuncExpr->args = list_make2((Node *) hashFunction, (Expr *) constNodeUpperBound);
upperBoundFuncExpr->args = list_make2((Node *) hashFunction,
(Expr *) constNodeUpperBound);
upperBoundFuncExpr->funcresulttype = resultTypeOid;
upperBoundFuncExpr->funcretset = false;
Oid lessThanOrEqualTo = GetSysCacheOid(OPERNAMENSP, Anum_pg_operator_oid, CStringGetDatum("<="),
resultTypeOid, resultTypeOid, ObjectIdGetDatum(11));
Oid lessThanOrEqualTo = GetSysCacheOid(OPERNAMENSP, Anum_pg_operator_oid,
CStringGetDatum("<="),
resultTypeOid, resultTypeOid, ObjectIdGetDatum(
11));
/*
* Finally, check if the comparison result is less than or equal to 0, i.e.,
/*
* Finally, check if the comparison result is less than or equal to 0, i.e.,
* hash(partitionColumn) <= shardInterval->maxValue
* See SearchCachedShardInterval for the behavior at the boundaries.
*/
Expr *upperBoundExpr = make_opclause(lessThanOrEqualTo, BOOLOID, false, (Expr *) upperBoundFuncExpr,
*/
Expr *upperBoundExpr = make_opclause(lessThanOrEqualTo, BOOLOID, false,
(Expr *) upperBoundFuncExpr,
(Expr *) constNodeZero, InvalidOid, InvalidOid);
/* create a node for both upper and lower bound */
Node *shardIntervalBoundQuals = make_and_qual((Node *) lowerBoundExpr, (Node *) upperBoundExpr);
Node *shardIntervalBoundQuals = make_and_qual((Node *) lowerBoundExpr,
(Node *) upperBoundExpr);
/*
* Add a null test for the partition column for the first shard.
/*
* Add a null test for the partition column for the first shard.
* This is because we need to include the null values in exactly one of the shard queries.
* The null test is added as an OR clause to the existing AND clause.
*/
@ -354,7 +374,8 @@ UpdateWhereClauseForOuterJoin(Node *node, List *relationShardList)
nullTest->nulltesttype = IS_NULL; /* Check for IS NULL */
nullTest->arg = (Expr *) partitionColumnVar; /* The variable to check */
nullTest->argisrow = false;
shardIntervalBoundQuals = (Node *) make_orclause(list_make2(nullTest, shardIntervalBoundQuals));
shardIntervalBoundQuals = (Node *) make_orclause(list_make2(nullTest,
shardIntervalBoundQuals));
}
if (fromExpr->quals == NULL)

View File

@ -2240,6 +2240,7 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
}
prevShardCount = cacheEntry->shardIntervalArrayLength;
innerTableOfOuterJoin = false;
/*
* For left outer joins, we need to check if the table is in the inner
* part of the join. If it is, we need to mark this shard and add interval
@ -2571,7 +2572,7 @@ QueryPushdownTaskCreate(Query *originalQuery, int shardIndex,
* refutation depend on it being so. We need to make them explicit again so
* that the query string is generated as (...) AND (...) as opposed to
* (...), (...).
* TODO: do we need to run this before adding quals?
* TODO: do we need to run this before adding quals?
*/
if (taskQuery->jointree->quals != NULL && IsA(taskQuery->jointree->quals, List))
{

View File

@ -833,10 +833,10 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
* contains recurring rels, must be an unsupported lateral outer
* join.
*/
/*
* For now only stop returning an error here.
* TODO: later add all required checks to push down the query here
*/
/*
* For now only stop returning an error here.
* TODO: later add all required checks to push down the query here
*/
continue;
recurType = FetchFirstRecurType(plannerInfo, outerrelRelids);

View File

@ -753,9 +753,9 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query,
{
/* <recurring> left join <distributed> */
if (leftNodeRecurs && !rightNodeRecurs)
{
if(chainedJoin || !CheckPushDownFeasibilityLeftJoin(joinExpr, query))
if (leftNodeRecurs && !rightNodeRecurs)
{
if (chainedJoin || !CheckPushDownFeasibilityLeftJoin(joinExpr, query))
{
ereport(DEBUG1, (errmsg("recursively planning right side of "
"the left join since the outer side "
@ -765,7 +765,8 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query,
}
else
{
ereport(DEBUG3, (errmsg("a push down safe left join with recurring left side")));
ereport(DEBUG3, (errmsg(
"a push down safe left join with recurring left side")));
leftNodeRecurs = false; /* left node will be pushed down */
}
}
@ -2666,7 +2667,8 @@ hasPseudoconstantQuals(RelationRestrictionContext *relationRestrictionContext)
* IsPushdownSafeForRTEInLeftJoin returns true if the given range table entry
* is safe for pushdown. Currently, we only allow reference tables.
*/
bool IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte)
bool
IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte)
{
if (IsCitusTable(rte->relid) && IsCitusTableType(rte->relid, REFERENCE_TABLE))
{
@ -2675,7 +2677,7 @@ bool IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte)
else
{
ereport(DEBUG5, (errmsg("RTE type %d is not safe for pushdown",
rte->rtekind)));
rte->rtekind)));
return false;
}
}
@ -2684,33 +2686,36 @@ bool IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte)
/*
* Recursively resolve a Var from a subquery target list to the base Var and RTE
*/
bool ResolveBaseVarFromSubquery(Var *var, Query *query,
Var **baseVar, RangeTblEntry **baseRte)
bool
ResolveBaseVarFromSubquery(Var *var, Query *query,
Var **baseVar, RangeTblEntry **baseRte)
{
TargetEntry *tle = get_tle_by_resno(query->targetList, var->varattno);
if (!tle || !IsA(tle->expr, Var))
return false;
TargetEntry *tle = get_tle_by_resno(query->targetList, var->varattno);
if (!tle || !IsA(tle->expr, Var))
{
return false;
}
Var *tleVar = (Var *) tle->expr;
RangeTblEntry *rte = rt_fetch(tleVar->varno, query->rtable);
Var *tleVar = (Var *) tle->expr;
RangeTblEntry *rte = rt_fetch(tleVar->varno, query->rtable);
if (rte == NULL)
{
return false;
}
if (rte->rtekind == RTE_RELATION || rte->rtekind == RTE_FUNCTION)
{
*baseVar = tleVar;
*baseRte = rte;
return true;
}
else if (rte->rtekind == RTE_SUBQUERY)
{
return ResolveBaseVarFromSubquery(tleVar, rte->subquery, baseVar, baseRte);
}
if (rte->rtekind == RTE_RELATION || rte->rtekind == RTE_FUNCTION)
{
*baseVar = tleVar;
*baseRte = rte;
return true;
}
else if (rte->rtekind == RTE_SUBQUERY)
{
return ResolveBaseVarFromSubquery(tleVar, rte->subquery, baseVar, baseRte);
}
return false;
return false;
}
@ -2718,28 +2723,30 @@ bool ResolveBaseVarFromSubquery(Var *var, Query *query,
* CheckPushDownConditionOnVarsForJoinPushdown checks if the inner variable
* from a join qual for a join pushdown. It returns true if it is valid,
* it is the partition column and hash distributed, otherwise it returns false.
*/
bool CheckPushDownConditionOnInnerVar(Var* innerVar, RangeTblEntry *rte)
*/
bool
CheckPushDownConditionOnInnerVar(Var *innerVar, RangeTblEntry *rte)
{
if (!innerVar || !rte)
{
return false;
}
if(innerVar->varattno == InvalidAttrNumber)
if (innerVar->varattno == InvalidAttrNumber)
{
return false;
}
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(rte->relid);
if(!cacheEntry || GetCitusTableType(cacheEntry) != HASH_DISTRIBUTED)
if (!cacheEntry || GetCitusTableType(cacheEntry) != HASH_DISTRIBUTED)
{
return false;
}
/* Check if the inner variable is part of the distribution column */
if (cacheEntry->partitionColumn && innerVar->varattno == cacheEntry->partitionColumn->varattno)
if (cacheEntry->partitionColumn && innerVar->varattno ==
cacheEntry->partitionColumn->varattno)
{
return true;
}
@ -2754,25 +2761,27 @@ bool CheckPushDownConditionOnInnerVar(Var* innerVar, RangeTblEntry *rte)
* it computes the outer relation's range table index, the outer relation's
* range table entry, the inner (distributed) relation's range table entry, and the
* attribute number of the partition column in the outer relation.
*/
bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query, int *outerRtIndex, RangeTblEntry **outerRte, RangeTblEntry **distRte, int *attnum)
*/
bool
CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
int *outerRtIndex, RangeTblEntry **outerRte,
RangeTblEntry **distRte, int *attnum)
{
if(!IS_OUTER_JOIN(joinExpr->jointype))
if (!IS_OUTER_JOIN(joinExpr->jointype))
{
return false;
}
// TODO: generalize to right joins
if(joinExpr->jointype != JOIN_LEFT)
/* TODO: generalize to right joins */
if (joinExpr->jointype != JOIN_LEFT)
{
return false;
}
*outerRtIndex = (((RangeTblRef *)joinExpr->larg)->rtindex);
*outerRtIndex = (((RangeTblRef *) joinExpr->larg)->rtindex);
*outerRte = rt_fetch(*outerRtIndex, query->rtable);
if(!IsPushdownSafeForRTEInLeftJoin(*outerRte))
if (!IsPushdownSafeForRTEInLeftJoin(*outerRte))
{
return false;
}
@ -2780,7 +2789,8 @@ bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
/* Push down for chained joins is not supported in this path. */
if (IsA(joinExpr->rarg, JoinExpr) || IsA(joinExpr->larg, JoinExpr))
{
ereport(DEBUG5, (errmsg("One side is a join expression, pushdown is not supported in this path.")));
ereport(DEBUG5, (errmsg(
"One side is a join expression, pushdown is not supported in this path.")));
return false;
}
@ -2799,7 +2809,7 @@ bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
continue;
}
OpExpr *joinClauseExpr = castNode(OpExpr, joinClause);
Var *leftColumn = LeftColumnOrNULL(joinClauseExpr);
Var *rightColumn = RightColumnOrNULL(joinClauseExpr);
if (leftColumn == NULL || rightColumn == NULL)
@ -2808,12 +2818,13 @@ bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
}
RangeTblEntry *rte;
Var *innerVar;
Var *innerVar;
if (leftColumn->varno == *outerRtIndex)
{
/* left column is the outer table of the comparison, get right */
rte = rt_fetch(rightColumn->varno, query->rtable);
innerVar = rightColumn;
/* additional constraints will be introduced on outer relation variable */
*attnum = leftColumn->varattno;
}
@ -2822,6 +2833,7 @@ bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
/* right column is the outer table of the comparison, get left*/
rte = rt_fetch(leftColumn->varno, query->rtable);
innerVar = leftColumn;
/* additional constraints will be introduced on outer relation variable */
*attnum = rightColumn->varattno;
}
@ -2833,7 +2845,7 @@ bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
/* the simple case, the inner table itself a Citus table */
if (rte && IsCitusTable(rte->relid))
{
if(CheckPushDownConditionOnInnerVar(innerVar, rte))
if (CheckPushDownConditionOnInnerVar(innerVar, rte))
{
*distRte = rte;
return true;
@ -2849,14 +2861,14 @@ bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
{
if (baseRte && IsCitusTable(baseRte->relid))
{
if(CheckPushDownConditionOnInnerVar(baseVar, baseRte))
if (CheckPushDownConditionOnInnerVar(baseVar, baseRte))
{
*distRte = baseRte;
return true;
}
}
}
}
}
}
return false;
@ -2866,12 +2878,14 @@ bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
/*
* Initializes input variables to call CheckPushDownFeasibilityAndComputeIndexes.
* See CheckPushDownFeasibilityAndComputeIndexes for more details.
*/
bool CheckPushDownFeasibilityLeftJoin(JoinExpr *joinExpr, Query *query)
*/
bool
CheckPushDownFeasibilityLeftJoin(JoinExpr *joinExpr, Query *query)
{
int outerRtIndex;
RangeTblEntry *outerRte = NULL;
RangeTblEntry *innerRte = NULL;
int attnum;
return CheckPushDownFeasibilityAndComputeIndexes(joinExpr, query, &outerRtIndex, &outerRte, &innerRte, &attnum);
}
return CheckPushDownFeasibilityAndComputeIndexes(joinExpr, query, &outerRtIndex,
&outerRte, &innerRte, &attnum);
}

View File

@ -52,8 +52,13 @@ extern bool IsRelationLocalTableOrMatView(Oid relationId);
extern bool ContainsReferencesToOuterQuery(Query *query);
extern void UpdateVarNosInNode(Node *node, Index newVarNo);
extern bool IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte);
extern bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query, int *outerRtIndex, RangeTblEntry **outerRte, RangeTblEntry **distRte, int *attnum);
extern bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
int *outerRtIndex,
RangeTblEntry **outerRte,
RangeTblEntry **distRte,
int *attnum);
extern bool CheckPushDownFeasibilityLeftJoin(JoinExpr *joinExpr, Query *query);
bool ResolveBaseVarFromSubquery(Var *var, Query *query, Var **baseVar, RangeTblEntry **baseRte);
bool CheckPushDownConditionOnInnerVar(Var* innervar, RangeTblEntry *rte);
bool ResolveBaseVarFromSubquery(Var *var, Query *query, Var **baseVar,
RangeTblEntry **baseRte);
bool CheckPushDownConditionOnInnerVar(Var *innervar, RangeTblEntry *rte);
#endif /* RECURSIVE_PLANNING_H */