diff --git a/src/backend/distributed/planner/multi_join_order.c b/src/backend/distributed/planner/multi_join_order.c index 9b2342b20..e4aabbbb1 100644 --- a/src/backend/distributed/planner/multi_join_order.c +++ b/src/backend/distributed/planner/multi_join_order.c @@ -71,6 +71,8 @@ static JoinOrderNode * EvaluateJoinRules(List *joinedTableList, TableEntry *candidateTable, List *joinClauseList, JoinType joinType); static List * RangeTableIdList(List *tableList); +static JoinType JoinTypeBetweenTables(TableEntry *table1, TableEntry *table2, + JoinRestrictionContext *joinRestrictionContext); static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType); static char * JoinRuleName(JoinRuleType ruleType); static JoinOrderNode * ReferenceJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, @@ -98,7 +100,7 @@ static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode, static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType, List *partitionColumnList, char partitionMethod, - TableEntry *anchorTable); + TableEntry *anchorTable, JoinType joinType); /* @@ -325,6 +327,112 @@ JoinOrderList(List *tableEntryList, List *joinClauseList) } +/* + * JoinTypeBetweenTables returns join type between given tables. + */ +static JoinType +JoinTypeBetweenTables(TableEntry *table1, TableEntry *table2, + JoinRestrictionContext *joinRestrictionContext) +{ + uint32 rteIdx1 = table1->rangeTableId; + uint32 rteIdx2 = table2->rangeTableId; + + JoinRestriction *joinRestriction = NULL; + foreach_ptr(joinRestriction, joinRestrictionContext->joinRestrictionList) + { + if (bms_is_member(rteIdx1, joinRestriction->innerrelRelids) && + bms_is_member(rteIdx2, joinRestriction->outerrelRelids)) + { + return joinRestriction->joinType; + } + else if (bms_is_member(rteIdx2, joinRestriction->innerrelRelids) && + bms_is_member(rteIdx1, joinRestriction->outerrelRelids)) + { + return joinRestriction->joinType; + } + } + + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("no join is found between tables"))); +} + + +/* + * FixedJoinOrderList returns the best fixed join order according to + * applicable join rules for the nodes in the list. + */ +List * +FixedJoinOrderList(List *tableEntryList, List *joinClauseList, + JoinRestrictionContext *joinRestrictionContext) +{ + List *joinOrderList = NIL; + List *joinedTableList = NIL; + bool firstTable = true; + JoinOrderNode *currentJoinNode = NULL; + JoinOrderNode *nextJoinNode = NULL; + + int tableCount = list_length(tableEntryList); + int tableIdx; + for (tableIdx = 1; tableIdx < tableCount; tableIdx++) + { + TableEntry *currentTable = (TableEntry *) list_nth(tableEntryList, tableIdx - 1); + TableEntry *nextTable = (TableEntry *) list_nth(tableEntryList, tableIdx); + JoinType joinType = JoinTypeBetweenTables(currentTable, nextTable, + joinRestrictionContext); + + if (firstTable) + { + /* add first table into joinedtable list */ + joinedTableList = lappend(joinedTableList, currentTable); + + /* create join node for the first table */ + JoinRuleType joinRule = JOIN_RULE_INVALID_FIRST; + Oid relationId = currentTable->relationId; + uint32 tableId = currentTable->rangeTableId; + Var *partitionColumn = PartitionColumn(relationId, tableId); + char partitionMethod = PartitionMethod(relationId); + + currentJoinNode = MakeJoinOrderNode(currentTable, joinRule, + list_make1(partitionColumn), + partitionMethod, + currentTable, joinType); + joinOrderList = lappend(joinOrderList, currentJoinNode); + + firstTable = false; + } + + nextJoinNode = EvaluateJoinRules(joinedTableList, + currentJoinNode, + nextTable, + joinClauseList, joinType); + + if (nextJoinNode == NULL) + { + /* there are no plans that we can create, time to error */ + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "complex joins are only supported when all distributed " + "tables are joined on their distribution columns with " + "equal operator"))); + } + + Assert(nextJoinNode != NULL); + joinOrderList = lappend(joinOrderList, nextJoinNode); + joinedTableList = lappend(joinedTableList, nextTable); + + currentJoinNode = nextJoinNode; + } + + /* if logging is enabled, print join order */ + if (LogMultiJoinOrder) + { + PrintJoinOrderList(joinOrderList); + } + + return joinOrderList; +} + + /* * JoinOrderForTable creates a join order whose first element is the given first * table. To determine each subsequent element in the join order, the function @@ -349,7 +457,7 @@ JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClause JoinOrderNode *firstJoinNode = MakeJoinOrderNode(firstTable, firstJoinRule, list_make1(firstPartitionColumn), firstPartitionMethod, - firstTable); + firstTable, JOIN_INNER); /* add first node to the join order */ List *joinOrderList = list_make1(firstJoinNode); @@ -831,7 +939,7 @@ ReferenceJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, return MakeJoinOrderNode(candidateTable, REFERENCE_JOIN, currentJoinNode->partitionColumnList, currentJoinNode->partitionMethod, - currentJoinNode->anchorTable); + currentJoinNode->anchorTable, joinType); } @@ -883,7 +991,7 @@ CartesianProductReferenceJoin(JoinOrderNode *currentJoinNode, TableEntry *candid return MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT_REFERENCE_JOIN, currentJoinNode->partitionColumnList, currentJoinNode->partitionMethod, - currentJoinNode->anchorTable); + currentJoinNode->anchorTable, joinType); } @@ -955,7 +1063,7 @@ LocalJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, JoinOrderNode *nextJoinNode = MakeJoinOrderNode(candidateTable, LOCAL_PARTITION_JOIN, currentPartitionColumnList, currentPartitionMethod, - currentAnchorTable); + currentAnchorTable, joinType); return nextJoinNode; @@ -984,12 +1092,6 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, Var *candidatePartitionColumn = PartitionColumn(relationId, tableId); char candidatePartitionMethod = PartitionMethod(relationId); - /* outer joins are not supported yet */ - if (IS_OUTER_JOIN(joinType)) - { - return NULL; - } - /* * If we previously dual-hash re-partitioned the tables for a join or made * cartesian product, we currently don't allow a single-repartition join. @@ -1018,14 +1120,14 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, return MakeJoinOrderNode(candidateTable, SINGLE_HASH_PARTITION_JOIN, currentPartitionColumnList, currentPartitionMethod, - currentAnchorTable); + currentAnchorTable, joinType); } else if (candidatePartitionMethod == DISTRIBUTE_BY_RANGE) { return MakeJoinOrderNode(candidateTable, SINGLE_RANGE_PARTITION_JOIN, currentPartitionColumnList, currentPartitionMethod, - currentAnchorTable); + currentAnchorTable, joinType); } } @@ -1057,7 +1159,7 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, SINGLE_HASH_PARTITION_JOIN, candidatePartitionColumnList, candidatePartitionMethod, - candidateTable); + candidateTable, joinType); } else if (currentPartitionMethod == DISTRIBUTE_BY_RANGE) { @@ -1065,7 +1167,7 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, SINGLE_RANGE_PARTITION_JOIN, candidatePartitionColumnList, candidatePartitionMethod, - candidateTable); + candidateTable, joinType); } } } @@ -1151,7 +1253,7 @@ DualPartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, DUAL_PARTITION_JOIN, NIL, REDISTRIBUTE_BY_HASH, - NULL); + NULL, joinType); } return NULL; @@ -1212,7 +1314,7 @@ CartesianProduct(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, return MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT, currentJoinNode->partitionColumnList, currentJoinNode->partitionMethod, - NULL); + NULL, joinType); } return NULL; @@ -1223,12 +1325,12 @@ CartesianProduct(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType, List *partitionColumnList, char partitionMethod, - TableEntry *anchorTable) + TableEntry *anchorTable, JoinType joinType) { JoinOrderNode *joinOrderNode = palloc0(sizeof(JoinOrderNode)); joinOrderNode->tableEntry = tableEntry; joinOrderNode->joinRuleType = joinRuleType; - joinOrderNode->joinType = JOIN_INNER; + joinOrderNode->joinType = joinType; joinOrderNode->partitionColumnList = partitionColumnList; joinOrderNode->partitionMethod = partitionMethod; joinOrderNode->joinClauseList = NIL; diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 7e665b567..2b365e9aa 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -153,7 +153,8 @@ MultiLogicalPlanCreate(Query *originalQuery, Query *queryTree, } else { - multiQueryNode = MultiNodeTree(queryTree); + multiQueryNode = MultiNodeTree(queryTree, + plannerRestrictionContext->joinRestrictionContext); } /* add a root node to serve as the permanent handle to the tree */ @@ -562,7 +563,7 @@ SubqueryEntryList(Query *queryTree) * group, and limit nodes if they appear in the original query tree. */ MultiNode * -MultiNodeTree(Query *queryTree) +MultiNodeTree(Query *queryTree, JoinRestrictionContext *joinRestrictionContext) { List *rangeTableList = queryTree->rtable; List *targetEntryList = queryTree->targetList; @@ -637,7 +638,8 @@ MultiNodeTree(Query *queryTree) } /* recursively create child nested multitree */ - MultiNode *subqueryExtendedNode = MultiNodeTree(subqueryTree); + MultiNode *subqueryExtendedNode = MultiNodeTree(subqueryTree, + joinRestrictionContext); SetChild((MultiUnaryNode *) subqueryCollectNode, (MultiNode *) subqueryNode); SetChild((MultiUnaryNode *) subqueryNode, subqueryExtendedNode); @@ -652,7 +654,6 @@ MultiNodeTree(Query *queryTree) * entry list's memory, and JoinOrderList() shallow copies the list's * elements. */ - joinClauseList = JoinClauseList(whereClauseList); tableEntryList = UsedTableEntryList(queryTree); /* build the list of multi table nodes */ @@ -661,8 +662,24 @@ MultiNodeTree(Query *queryTree) /* add collect nodes on top of the multi table nodes */ collectTableList = AddMultiCollectNodes(tableNodeList); - /* find best join order for commutative inner joins */ - joinOrderList = JoinOrderList(tableEntryList, joinClauseList); + if (FindNodeMatchingCheckFunction((Node *) queryTree->jointree, IsOuterJoinExpr)) + { + /* consider outer join qualifications as well */ + List *allRestrictionClauseList = QualifierList(queryTree->jointree); + joinClauseList = JoinClauseList(allRestrictionClauseList); + + /* we simply donot commute joins as we have at least 1 outer join */ + joinOrderList = FixedJoinOrderList(tableEntryList, joinClauseList, + joinRestrictionContext); + } + else + { + /* only consider base qualifications */ + joinClauseList = JoinClauseList(whereClauseList); + + /* find best join order for commutative inner joins */ + joinOrderList = JoinOrderList(tableEntryList, joinClauseList); + } /* build join tree using the join order and collected tables */ joinTreeNode = MultiJoinTree(joinOrderList, collectTableList, joinClauseList); diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 5cae19497..60a6b7a3a 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -82,7 +82,6 @@ int ValuesMaterializationThreshold = 100; /* Local functions forward declarations */ static bool JoinTreeContainsSubqueryWalker(Node *joinTreeNode, void *context); static bool IsFunctionOrValuesRTE(Node *node); -static bool IsOuterJoinExpr(Node *node); static bool WindowPartitionOnDistributionColumn(Query *query); static DeferredErrorMessage * DeferErrorIfFromClauseRecurs(Query *queryTree); static RecurringTuplesType FromClauseRecurringTupleType(Query *queryTree); @@ -184,26 +183,6 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery, return true; } - /* - * We handle outer joins as subqueries, since the join order planner - * does not know how to handle them. - */ - if (FindNodeMatchingCheckFunction((Node *) originalQuery->jointree, IsOuterJoinExpr)) - { - return true; - } - - /* - * Original query may not have an outer join while rewritten query does. - * We should push down in this case. - * An example of this is https://github.com/citusdata/citus/issues/2739 - * where postgres pulls-up the outer-join in the subquery. - */ - if (FindNodeMatchingCheckFunction((Node *) rewrittenQuery->jointree, IsOuterJoinExpr)) - { - return true; - } - /* * Some unsupported join clauses in logical planner * may be supported by subquery pushdown planner. @@ -391,7 +370,7 @@ IsNodeSubquery(Node *node) /* * IsOuterJoinExpr returns whether the given node is an outer join expression. */ -static bool +bool IsOuterJoinExpr(Node *node) { bool isOuterJoin = false; diff --git a/src/include/distributed/multi_join_order.h b/src/include/distributed/multi_join_order.h index 92d1edaf2..21ee86806 100644 --- a/src/include/distributed/multi_join_order.h +++ b/src/include/distributed/multi_join_order.h @@ -19,6 +19,7 @@ #include "nodes/pg_list.h" #include "nodes/primnodes.h" +#include "server/distributed/distributed_planner.h" /* @@ -91,6 +92,8 @@ extern bool EnableSingleHashRepartitioning; /* Function declaration for determining table join orders */ extern List * JoinExprList(FromExpr *fromExpr); extern List * JoinOrderList(List *rangeTableEntryList, List *joinClauseList); +extern List * FixedJoinOrderList(List *rangeTableEntryList, List *joinClauseList, + JoinRestrictionContext *joinRestrictionContext); extern bool IsApplicableJoinClause(List *leftTableIdList, uint32 rightTableId, Node *joinClause); extern List * ApplicableJoinClauses(List *leftTableIdList, uint32 rightTableId, diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index 189170358..6f8ff6aec 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -229,7 +229,8 @@ extern MultiProject * MultiProjectNode(List *targetEntryList); extern MultiExtendedOp * MultiExtendedOpNode(Query *queryTree, Query *originalQuery); extern DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryRepartition(Query * subqueryTree); -extern MultiNode * MultiNodeTree(Query *queryTree); +extern MultiNode * MultiNodeTree(Query *queryTree, + JoinRestrictionContext *joinRestrictionContext); #endif /* MULTI_LOGICAL_PLANNER_H */ diff --git a/src/include/distributed/query_pushdown_planning.h b/src/include/distributed/query_pushdown_planning.h index 061a4a730..d59fcf999 100644 --- a/src/include/distributed/query_pushdown_planning.h +++ b/src/include/distributed/query_pushdown_planning.h @@ -33,6 +33,7 @@ extern bool HasEmptyJoinTree(Query *query); extern bool WhereOrHavingClauseContainsSubquery(Query *query); extern bool TargetListContainsSubquery(List *targetList); extern bool SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail); +extern bool IsOuterJoinExpr(Node *node); extern MultiNode * SubqueryMultiNodeTree(Query *originalQuery, Query *queryTree, PlannerRestrictionContext *