fixed join order when we have outer join

outer-join-noncolocated-dist-tables
aykutbozkurt 2022-12-18 21:25:24 +03:00
parent 47ff03123b
commit 17c6187448
6 changed files with 151 additions and 48 deletions

View File

@ -71,6 +71,8 @@ static JoinOrderNode * EvaluateJoinRules(List *joinedTableList,
TableEntry *candidateTable, TableEntry *candidateTable,
List *joinClauseList, JoinType joinType); List *joinClauseList, JoinType joinType);
static List * RangeTableIdList(List *tableList); static List * RangeTableIdList(List *tableList);
static JoinType JoinTypeBetweenTables(TableEntry *table1, TableEntry *table2,
JoinRestrictionContext *joinRestrictionContext);
static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType); static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType);
static char * JoinRuleName(JoinRuleType ruleType); static char * JoinRuleName(JoinRuleType ruleType);
static JoinOrderNode * ReferenceJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * ReferenceJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
@ -98,7 +100,7 @@ static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode,
static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry,
JoinRuleType joinRuleType, JoinRuleType joinRuleType,
List *partitionColumnList, char partitionMethod, List *partitionColumnList, char partitionMethod,
TableEntry *anchorTable); TableEntry *anchorTable, JoinType joinType);
/* /*
@ -325,6 +327,112 @@ JoinOrderList(List *tableEntryList, List *joinClauseList)
} }
/*
* JoinTypeBetweenTables returns join type between given tables.
*/
static JoinType
JoinTypeBetweenTables(TableEntry *table1, TableEntry *table2,
JoinRestrictionContext *joinRestrictionContext)
{
uint32 rteIdx1 = table1->rangeTableId;
uint32 rteIdx2 = table2->rangeTableId;
JoinRestriction *joinRestriction = NULL;
foreach_ptr(joinRestriction, joinRestrictionContext->joinRestrictionList)
{
if (bms_is_member(rteIdx1, joinRestriction->innerrelRelids) &&
bms_is_member(rteIdx2, joinRestriction->outerrelRelids))
{
return joinRestriction->joinType;
}
else if (bms_is_member(rteIdx2, joinRestriction->innerrelRelids) &&
bms_is_member(rteIdx1, joinRestriction->outerrelRelids))
{
return joinRestriction->joinType;
}
}
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("no join is found between tables")));
}
/*
* FixedJoinOrderList returns the best fixed join order according to
* applicable join rules for the nodes in the list.
*/
List *
FixedJoinOrderList(List *tableEntryList, List *joinClauseList,
JoinRestrictionContext *joinRestrictionContext)
{
List *joinOrderList = NIL;
List *joinedTableList = NIL;
bool firstTable = true;
JoinOrderNode *currentJoinNode = NULL;
JoinOrderNode *nextJoinNode = NULL;
int tableCount = list_length(tableEntryList);
int tableIdx;
for (tableIdx = 1; tableIdx < tableCount; tableIdx++)
{
TableEntry *currentTable = (TableEntry *) list_nth(tableEntryList, tableIdx - 1);
TableEntry *nextTable = (TableEntry *) list_nth(tableEntryList, tableIdx);
JoinType joinType = JoinTypeBetweenTables(currentTable, nextTable,
joinRestrictionContext);
if (firstTable)
{
/* add first table into joinedtable list */
joinedTableList = lappend(joinedTableList, currentTable);
/* create join node for the first table */
JoinRuleType joinRule = JOIN_RULE_INVALID_FIRST;
Oid relationId = currentTable->relationId;
uint32 tableId = currentTable->rangeTableId;
Var *partitionColumn = PartitionColumn(relationId, tableId);
char partitionMethod = PartitionMethod(relationId);
currentJoinNode = MakeJoinOrderNode(currentTable, joinRule,
list_make1(partitionColumn),
partitionMethod,
currentTable, joinType);
joinOrderList = lappend(joinOrderList, currentJoinNode);
firstTable = false;
}
nextJoinNode = EvaluateJoinRules(joinedTableList,
currentJoinNode,
nextTable,
joinClauseList, joinType);
if (nextJoinNode == NULL)
{
/* there are no plans that we can create, time to error */
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg(
"complex joins are only supported when all distributed "
"tables are joined on their distribution columns with "
"equal operator")));
}
Assert(nextJoinNode != NULL);
joinOrderList = lappend(joinOrderList, nextJoinNode);
joinedTableList = lappend(joinedTableList, nextTable);
currentJoinNode = nextJoinNode;
}
/* if logging is enabled, print join order */
if (LogMultiJoinOrder)
{
PrintJoinOrderList(joinOrderList);
}
return joinOrderList;
}
/* /*
* JoinOrderForTable creates a join order whose first element is the given first * JoinOrderForTable creates a join order whose first element is the given first
* table. To determine each subsequent element in the join order, the function * table. To determine each subsequent element in the join order, the function
@ -349,7 +457,7 @@ JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClause
JoinOrderNode *firstJoinNode = MakeJoinOrderNode(firstTable, firstJoinRule, JoinOrderNode *firstJoinNode = MakeJoinOrderNode(firstTable, firstJoinRule,
list_make1(firstPartitionColumn), list_make1(firstPartitionColumn),
firstPartitionMethod, firstPartitionMethod,
firstTable); firstTable, JOIN_INNER);
/* add first node to the join order */ /* add first node to the join order */
List *joinOrderList = list_make1(firstJoinNode); List *joinOrderList = list_make1(firstJoinNode);
@ -831,7 +939,7 @@ ReferenceJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
return MakeJoinOrderNode(candidateTable, REFERENCE_JOIN, return MakeJoinOrderNode(candidateTable, REFERENCE_JOIN,
currentJoinNode->partitionColumnList, currentJoinNode->partitionColumnList,
currentJoinNode->partitionMethod, currentJoinNode->partitionMethod,
currentJoinNode->anchorTable); currentJoinNode->anchorTable, joinType);
} }
@ -883,7 +991,7 @@ CartesianProductReferenceJoin(JoinOrderNode *currentJoinNode, TableEntry *candid
return MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT_REFERENCE_JOIN, return MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT_REFERENCE_JOIN,
currentJoinNode->partitionColumnList, currentJoinNode->partitionColumnList,
currentJoinNode->partitionMethod, currentJoinNode->partitionMethod,
currentJoinNode->anchorTable); currentJoinNode->anchorTable, joinType);
} }
@ -955,7 +1063,7 @@ LocalJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
JoinOrderNode *nextJoinNode = MakeJoinOrderNode(candidateTable, LOCAL_PARTITION_JOIN, JoinOrderNode *nextJoinNode = MakeJoinOrderNode(candidateTable, LOCAL_PARTITION_JOIN,
currentPartitionColumnList, currentPartitionColumnList,
currentPartitionMethod, currentPartitionMethod,
currentAnchorTable); currentAnchorTable, joinType);
return nextJoinNode; return nextJoinNode;
@ -984,12 +1092,6 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
Var *candidatePartitionColumn = PartitionColumn(relationId, tableId); Var *candidatePartitionColumn = PartitionColumn(relationId, tableId);
char candidatePartitionMethod = PartitionMethod(relationId); char candidatePartitionMethod = PartitionMethod(relationId);
/* outer joins are not supported yet */
if (IS_OUTER_JOIN(joinType))
{
return NULL;
}
/* /*
* If we previously dual-hash re-partitioned the tables for a join or made * If we previously dual-hash re-partitioned the tables for a join or made
* cartesian product, we currently don't allow a single-repartition join. * cartesian product, we currently don't allow a single-repartition join.
@ -1018,14 +1120,14 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
return MakeJoinOrderNode(candidateTable, SINGLE_HASH_PARTITION_JOIN, return MakeJoinOrderNode(candidateTable, SINGLE_HASH_PARTITION_JOIN,
currentPartitionColumnList, currentPartitionColumnList,
currentPartitionMethod, currentPartitionMethod,
currentAnchorTable); currentAnchorTable, joinType);
} }
else if (candidatePartitionMethod == DISTRIBUTE_BY_RANGE) else if (candidatePartitionMethod == DISTRIBUTE_BY_RANGE)
{ {
return MakeJoinOrderNode(candidateTable, SINGLE_RANGE_PARTITION_JOIN, return MakeJoinOrderNode(candidateTable, SINGLE_RANGE_PARTITION_JOIN,
currentPartitionColumnList, currentPartitionColumnList,
currentPartitionMethod, currentPartitionMethod,
currentAnchorTable); currentAnchorTable, joinType);
} }
} }
@ -1057,7 +1159,7 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
SINGLE_HASH_PARTITION_JOIN, SINGLE_HASH_PARTITION_JOIN,
candidatePartitionColumnList, candidatePartitionColumnList,
candidatePartitionMethod, candidatePartitionMethod,
candidateTable); candidateTable, joinType);
} }
else if (currentPartitionMethod == DISTRIBUTE_BY_RANGE) else if (currentPartitionMethod == DISTRIBUTE_BY_RANGE)
{ {
@ -1065,7 +1167,7 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
SINGLE_RANGE_PARTITION_JOIN, SINGLE_RANGE_PARTITION_JOIN,
candidatePartitionColumnList, candidatePartitionColumnList,
candidatePartitionMethod, candidatePartitionMethod,
candidateTable); candidateTable, joinType);
} }
} }
} }
@ -1151,7 +1253,7 @@ DualPartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
DUAL_PARTITION_JOIN, DUAL_PARTITION_JOIN,
NIL, NIL,
REDISTRIBUTE_BY_HASH, REDISTRIBUTE_BY_HASH,
NULL); NULL, joinType);
} }
return NULL; return NULL;
@ -1212,7 +1314,7 @@ CartesianProduct(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
return MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT, return MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT,
currentJoinNode->partitionColumnList, currentJoinNode->partitionColumnList,
currentJoinNode->partitionMethod, currentJoinNode->partitionMethod,
NULL); NULL, joinType);
} }
return NULL; return NULL;
@ -1223,12 +1325,12 @@ CartesianProduct(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
JoinOrderNode * JoinOrderNode *
MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType, MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType,
List *partitionColumnList, char partitionMethod, List *partitionColumnList, char partitionMethod,
TableEntry *anchorTable) TableEntry *anchorTable, JoinType joinType)
{ {
JoinOrderNode *joinOrderNode = palloc0(sizeof(JoinOrderNode)); JoinOrderNode *joinOrderNode = palloc0(sizeof(JoinOrderNode));
joinOrderNode->tableEntry = tableEntry; joinOrderNode->tableEntry = tableEntry;
joinOrderNode->joinRuleType = joinRuleType; joinOrderNode->joinRuleType = joinRuleType;
joinOrderNode->joinType = JOIN_INNER; joinOrderNode->joinType = joinType;
joinOrderNode->partitionColumnList = partitionColumnList; joinOrderNode->partitionColumnList = partitionColumnList;
joinOrderNode->partitionMethod = partitionMethod; joinOrderNode->partitionMethod = partitionMethod;
joinOrderNode->joinClauseList = NIL; joinOrderNode->joinClauseList = NIL;

View File

@ -153,7 +153,8 @@ MultiLogicalPlanCreate(Query *originalQuery, Query *queryTree,
} }
else else
{ {
multiQueryNode = MultiNodeTree(queryTree); multiQueryNode = MultiNodeTree(queryTree,
plannerRestrictionContext->joinRestrictionContext);
} }
/* add a root node to serve as the permanent handle to the tree */ /* add a root node to serve as the permanent handle to the tree */
@ -562,7 +563,7 @@ SubqueryEntryList(Query *queryTree)
* group, and limit nodes if they appear in the original query tree. * group, and limit nodes if they appear in the original query tree.
*/ */
MultiNode * MultiNode *
MultiNodeTree(Query *queryTree) MultiNodeTree(Query *queryTree, JoinRestrictionContext *joinRestrictionContext)
{ {
List *rangeTableList = queryTree->rtable; List *rangeTableList = queryTree->rtable;
List *targetEntryList = queryTree->targetList; List *targetEntryList = queryTree->targetList;
@ -637,7 +638,8 @@ MultiNodeTree(Query *queryTree)
} }
/* recursively create child nested multitree */ /* recursively create child nested multitree */
MultiNode *subqueryExtendedNode = MultiNodeTree(subqueryTree); MultiNode *subqueryExtendedNode = MultiNodeTree(subqueryTree,
joinRestrictionContext);
SetChild((MultiUnaryNode *) subqueryCollectNode, (MultiNode *) subqueryNode); SetChild((MultiUnaryNode *) subqueryCollectNode, (MultiNode *) subqueryNode);
SetChild((MultiUnaryNode *) subqueryNode, subqueryExtendedNode); SetChild((MultiUnaryNode *) subqueryNode, subqueryExtendedNode);
@ -652,7 +654,6 @@ MultiNodeTree(Query *queryTree)
* entry list's memory, and JoinOrderList() shallow copies the list's * entry list's memory, and JoinOrderList() shallow copies the list's
* elements. * elements.
*/ */
joinClauseList = JoinClauseList(whereClauseList);
tableEntryList = UsedTableEntryList(queryTree); tableEntryList = UsedTableEntryList(queryTree);
/* build the list of multi table nodes */ /* build the list of multi table nodes */
@ -661,8 +662,24 @@ MultiNodeTree(Query *queryTree)
/* add collect nodes on top of the multi table nodes */ /* add collect nodes on top of the multi table nodes */
collectTableList = AddMultiCollectNodes(tableNodeList); collectTableList = AddMultiCollectNodes(tableNodeList);
/* find best join order for commutative inner joins */ if (FindNodeMatchingCheckFunction((Node *) queryTree->jointree, IsOuterJoinExpr))
joinOrderList = JoinOrderList(tableEntryList, joinClauseList); {
/* consider outer join qualifications as well */
List *allRestrictionClauseList = QualifierList(queryTree->jointree);
joinClauseList = JoinClauseList(allRestrictionClauseList);
/* we simply donot commute joins as we have at least 1 outer join */
joinOrderList = FixedJoinOrderList(tableEntryList, joinClauseList,
joinRestrictionContext);
}
else
{
/* only consider base qualifications */
joinClauseList = JoinClauseList(whereClauseList);
/* find best join order for commutative inner joins */
joinOrderList = JoinOrderList(tableEntryList, joinClauseList);
}
/* build join tree using the join order and collected tables */ /* build join tree using the join order and collected tables */
joinTreeNode = MultiJoinTree(joinOrderList, collectTableList, joinClauseList); joinTreeNode = MultiJoinTree(joinOrderList, collectTableList, joinClauseList);

View File

@ -82,7 +82,6 @@ int ValuesMaterializationThreshold = 100;
/* Local functions forward declarations */ /* Local functions forward declarations */
static bool JoinTreeContainsSubqueryWalker(Node *joinTreeNode, void *context); static bool JoinTreeContainsSubqueryWalker(Node *joinTreeNode, void *context);
static bool IsFunctionOrValuesRTE(Node *node); static bool IsFunctionOrValuesRTE(Node *node);
static bool IsOuterJoinExpr(Node *node);
static bool WindowPartitionOnDistributionColumn(Query *query); static bool WindowPartitionOnDistributionColumn(Query *query);
static DeferredErrorMessage * DeferErrorIfFromClauseRecurs(Query *queryTree); static DeferredErrorMessage * DeferErrorIfFromClauseRecurs(Query *queryTree);
static RecurringTuplesType FromClauseRecurringTupleType(Query *queryTree); static RecurringTuplesType FromClauseRecurringTupleType(Query *queryTree);
@ -184,26 +183,6 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery,
return true; return true;
} }
/*
* We handle outer joins as subqueries, since the join order planner
* does not know how to handle them.
*/
if (FindNodeMatchingCheckFunction((Node *) originalQuery->jointree, IsOuterJoinExpr))
{
return true;
}
/*
* Original query may not have an outer join while rewritten query does.
* We should push down in this case.
* An example of this is https://github.com/citusdata/citus/issues/2739
* where postgres pulls-up the outer-join in the subquery.
*/
if (FindNodeMatchingCheckFunction((Node *) rewrittenQuery->jointree, IsOuterJoinExpr))
{
return true;
}
/* /*
* Some unsupported join clauses in logical planner * Some unsupported join clauses in logical planner
* may be supported by subquery pushdown planner. * may be supported by subquery pushdown planner.
@ -391,7 +370,7 @@ IsNodeSubquery(Node *node)
/* /*
* IsOuterJoinExpr returns whether the given node is an outer join expression. * IsOuterJoinExpr returns whether the given node is an outer join expression.
*/ */
static bool bool
IsOuterJoinExpr(Node *node) IsOuterJoinExpr(Node *node)
{ {
bool isOuterJoin = false; bool isOuterJoin = false;

View File

@ -19,6 +19,7 @@
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
#include "nodes/primnodes.h" #include "nodes/primnodes.h"
#include "server/distributed/distributed_planner.h"
/* /*
@ -91,6 +92,8 @@ extern bool EnableSingleHashRepartitioning;
/* Function declaration for determining table join orders */ /* Function declaration for determining table join orders */
extern List * JoinExprList(FromExpr *fromExpr); extern List * JoinExprList(FromExpr *fromExpr);
extern List * JoinOrderList(List *rangeTableEntryList, List *joinClauseList); extern List * JoinOrderList(List *rangeTableEntryList, List *joinClauseList);
extern List * FixedJoinOrderList(List *rangeTableEntryList, List *joinClauseList,
JoinRestrictionContext *joinRestrictionContext);
extern bool IsApplicableJoinClause(List *leftTableIdList, uint32 rightTableId, extern bool IsApplicableJoinClause(List *leftTableIdList, uint32 rightTableId,
Node *joinClause); Node *joinClause);
extern List * ApplicableJoinClauses(List *leftTableIdList, uint32 rightTableId, extern List * ApplicableJoinClauses(List *leftTableIdList, uint32 rightTableId,

View File

@ -229,7 +229,8 @@ extern MultiProject * MultiProjectNode(List *targetEntryList);
extern MultiExtendedOp * MultiExtendedOpNode(Query *queryTree, Query *originalQuery); extern MultiExtendedOp * MultiExtendedOpNode(Query *queryTree, Query *originalQuery);
extern DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryRepartition(Query * extern DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryRepartition(Query *
subqueryTree); subqueryTree);
extern MultiNode * MultiNodeTree(Query *queryTree); extern MultiNode * MultiNodeTree(Query *queryTree,
JoinRestrictionContext *joinRestrictionContext);
#endif /* MULTI_LOGICAL_PLANNER_H */ #endif /* MULTI_LOGICAL_PLANNER_H */

View File

@ -33,6 +33,7 @@ extern bool HasEmptyJoinTree(Query *query);
extern bool WhereOrHavingClauseContainsSubquery(Query *query); extern bool WhereOrHavingClauseContainsSubquery(Query *query);
extern bool TargetListContainsSubquery(List *targetList); extern bool TargetListContainsSubquery(List *targetList);
extern bool SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail); extern bool SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail);
extern bool IsOuterJoinExpr(Node *node);
extern MultiNode * SubqueryMultiNodeTree(Query *originalQuery, extern MultiNode * SubqueryMultiNodeTree(Query *originalQuery,
Query *queryTree, Query *queryTree,
PlannerRestrictionContext * PlannerRestrictionContext *