/*------------------------------------------------------------------------- * * multi_join_order.c * * Routines for constructing the join order list using a rule-based approach. * * Copyright (c) Citus Data, Inc. * * $Id$ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "distributed/pg_version_constants.h" #include #include "access/nbtree.h" #include "access/heapam.h" #include "access/htup_details.h" #include "catalog/pg_am.h" #include "distributed/listutils.h" #include "distributed/metadata_cache.h" #include "distributed/multi_join_order.h" #include "distributed/multi_physical_planner.h" #include "distributed/multi_router_planner.h" #include "distributed/pg_dist_partition.h" #include "distributed/shard_pruning.h" #include "distributed/worker_protocol.h" #include "lib/stringinfo.h" #include "optimizer/optimizer.h" #include "utils/builtins.h" #include "nodes/nodeFuncs.h" #include "nodes/nodes.h" #include "nodes/pathnodes.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/syscache.h" /* Config variables managed via guc.c */ bool LogMultiJoinOrder = false; /* print join order as a debugging aid */ bool EnableSingleHashRepartitioning = false; /* Function pointer type definition for join rule evaluation functions */ typedef JoinOrderNode *(*RuleEvalFunction) (JoinOrderNode *currentJoinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType); static char *RuleNameArray[JOIN_RULE_LAST] = { 0 }; /* ordered join rule names */ static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */ /* Local functions forward declarations */ static bool JoinExprListWalker(Node *node, List **joinList); static List * JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinRestrictInfoListList, List *generatedEcJoinClauseList); static List * BestJoinOrder(List *candidateJoinOrders); static List * FewestOfJoinRuleType(List *candidateJoinOrders, JoinRuleType ruleType); static uint32 JoinRuleTypeCount(List *joinOrder, JoinRuleType ruleTypeToCount); static List * LatestLargeDataTransfer(List *candidateJoinOrders); static void PrintJoinOrderList(List *joinOrder); static uint32 LargeDataTransferLocation(List *joinOrder); static List * TableEntryListDifference(List *lhsTableList, List *rhsTableList); static bool ConvertSemiToInnerInJoinInfoContext(JoinInfoContext *joinOrderContext); static bool JoinInfoContextHasAntiJoin(JoinInfoContext *joinOrderContext); static List * FindApplicableJoinClausesForTables(List *joinRestrictInfoListList, List *generatedEcJoinClauseList, List *lhsTableIdList, uint32 rhsTableId, JoinType joinType); static const char * JoinTypeName(JoinType jointype); static List * ExtractPushdownJoinRestrictInfos(List *joinRestrictInfoList, RestrictInfo *joinRestrictInfo, JoinType joinType); /* Local functions forward declarations for join evaluations */ static JoinOrderNode * EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode, TableEntry *candidateTable, List *joinRestrictInfoListList, List *generatedEcJoinClauseList, JoinType joinType); static List * RangeTableIdList(List *tableList); static TableEntry * TableEntryByRangeTableId(List *tableEntryList, uint32 rangeTableIdx); static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType); static char * JoinRuleName(JoinRuleType ruleType); static JoinOrderNode * ReferenceJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType); static JoinOrderNode * CartesianProductReferenceJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType); static JoinOrderNode * LocalJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType); static bool JoinOnColumns(List *currentPartitionColumnList, Var *candidatePartitionColumn, List *joinClauseList); static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType); static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType); static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType); static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType, List *partitionColumnList, char partitionMethod, TableEntry *anchorTable, JoinType joinType); /* * JoinExprList flattens the JoinExpr nodes in the FROM expression and translate implicit * joins to inner joins. This function does not consider (right-)nested joins. */ List * JoinExprList(FromExpr *fromExpr) { List *joinList = NIL; List *fromList = fromExpr->fromlist; ListCell *fromCell = NULL; foreach(fromCell, fromList) { Node *nextNode = (Node *) lfirst(fromCell); if (joinList != NIL) { /* multiple nodes in from clause, add an explicit join between them */ int nextRangeTableIndex = 0; /* find the left most range table in this node */ ExtractLeftMostRangeTableIndex((Node *) fromExpr, &nextRangeTableIndex); RangeTblRef *nextRangeTableRef = makeNode(RangeTblRef); nextRangeTableRef->rtindex = nextRangeTableIndex; /* join the previous node with nextRangeTableRef */ JoinExpr *newJoinExpr = makeNode(JoinExpr); newJoinExpr->jointype = JOIN_INNER; newJoinExpr->rarg = (Node *) nextRangeTableRef; newJoinExpr->quals = NULL; joinList = lappend(joinList, newJoinExpr); } JoinExprListWalker(nextNode, &joinList); } return joinList; } /* * JoinExprListWalker the JoinExpr nodes in a join tree in the order in which joins are * to be executed. If there are no joins then no elements are added to joinList. */ static bool JoinExprListWalker(Node *node, List **joinList) { bool walkerResult = false; if (node == NULL) { return false; } if (IsA(node, JoinExpr)) { JoinExpr *joinExpr = (JoinExpr *) node; walkerResult = JoinExprListWalker(joinExpr->larg, joinList); (*joinList) = lappend(*joinList, joinExpr); } else { walkerResult = expression_tree_walker(node, JoinExprListWalker, joinList); } return walkerResult; } /* * ExtractLeftMostRangeTableIndex extracts the range table index of the left-most * leaf in a join tree. */ bool ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex) { bool walkerResult = false; Assert(node != NULL); if (IsA(node, JoinExpr)) { JoinExpr *joinExpr = (JoinExpr *) node; walkerResult = ExtractLeftMostRangeTableIndex(joinExpr->larg, rangeTableIndex); } else if (IsA(node, RangeTblRef)) { RangeTblRef *rangeTableRef = (RangeTblRef *) node; *rangeTableIndex = rangeTableRef->rtindex; walkerResult = true; } else { walkerResult = expression_tree_walker(node, ExtractLeftMostRangeTableIndex, rangeTableIndex); } return walkerResult; } /* * JoinOnColumns determines whether two columns are joined by a given join clause list. */ static bool JoinOnColumns(List *currentPartitionColumnList, Var *candidateColumn, List *joinClauseList) { if (candidateColumn == NULL || list_length(currentPartitionColumnList) == 0) { /* * LocalJoin can only be happening if we have both a current column and a target * column, otherwise we are not joining two local tables */ return false; } Var *currentColumn = NULL; foreach_ptr(currentColumn, currentPartitionColumnList) { Node *joinClause = NULL; foreach_ptr(joinClause, joinClauseList) { if (!NodeIsEqualsOpExpr(joinClause)) { continue; } OpExpr *joinClauseOpExpr = castNode(OpExpr, joinClause); Var *leftColumn = LeftColumnOrNULL(joinClauseOpExpr); Var *rightColumn = RightColumnOrNULL(joinClauseOpExpr); /* * Check if both join columns and both partition key columns match, since the * current and candidate column's can't be NULL we know they won't match if either * of the columns resolved to NULL above. */ if (equal(leftColumn, currentColumn) && equal(rightColumn, candidateColumn)) { return true; } if (equal(leftColumn, candidateColumn) && equal(rightColumn, currentColumn)) { return true; } } } return false; } /* * NodeIsEqualsOpExpr checks if the node is an OpExpr, where the operator * matches OperatorImplementsEquality. */ bool NodeIsEqualsOpExpr(Node *node) { if (!IsA(node, OpExpr)) { return false; } OpExpr *opExpr = castNode(OpExpr, node); return OperatorImplementsEquality(opExpr->opno); } /* * JoinOrderList calculates the best join order and join rules that apply given * the list of tables and join clauses. First, the function generates a set of * candidate join orders, each with a different table as its first table. Then, * the function chooses among these candidates the join order that transfers the * least amount of data across the network, and returns this join order. */ List * JoinOrderList(List *tableEntryList, List *joinRestrictInfoListList, List *generatedEcJoinClauseList, List *pseudoClauseList) { List *candidateJoinOrderList = NIL; ListCell *tableEntryCell = NULL; foreach(tableEntryCell, tableEntryList) { TableEntry *startingTable = (TableEntry *) lfirst(tableEntryCell); /* each candidate join order starts with a different table */ List *candidateJoinOrder = JoinOrderForTable(startingTable, tableEntryList, joinRestrictInfoListList, generatedEcJoinClauseList); if (candidateJoinOrder != NULL) { candidateJoinOrderList = lappend(candidateJoinOrderList, candidateJoinOrder); } } if (list_length(candidateJoinOrderList) == 0) { /* there are no plans that we can create, time to error */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("complex joins are only supported when all distributed " "tables are joined on their distribution columns with " "equal operator"))); } List *bestJoinOrder = BestJoinOrder(candidateJoinOrderList); /* if logging is enabled, print join order */ if (LogMultiJoinOrder) { PrintJoinOrderList(bestJoinOrder); } return bestJoinOrder; } /* * TableEntryByRangeTableId returns TableEntry from given list with specified range table id. */ static TableEntry * TableEntryByRangeTableId(List *tableEntryList, uint32 rangeTableIdx) { TableEntry *tableEntry = NULL; foreach_ptr(tableEntry, tableEntryList) { if (tableEntry->rangeTableId == rangeTableIdx) { return tableEntry; } } ereport(ERROR, errmsg("Unexpected table entry!")); } /* * ExtractPushdownJoinRestrictInfos extracts clauses, which are pushed down and treated like a normal filter, * inside given join restriction infos. */ static List * ExtractPushdownJoinRestrictInfos(List *restrictInfoListOfJoin, RestrictInfo *joinRestrictInfo, JoinType joinType) { List *joinFilterRestrictInfoList = NIL; /* left and right relids of the join restriction */ Bitmapset *joinRelids = bms_union(joinRestrictInfo->left_relids, joinRestrictInfo->right_relids); /* todo:aykut joinRelids should be taken from planner context */ RestrictInfo *restrictInfo = NULL; foreach_ptr(restrictInfo, restrictInfoListOfJoin) { if (!restrictInfo->can_join && RINFO_IS_PUSHED_DOWN(restrictInfo, joinRelids)) { joinFilterRestrictInfoList = lappend(joinFilterRestrictInfoList, restrictInfo); } } return joinFilterRestrictInfoList; } /* * FindApplicableJoinClausesForTables finds all applicable join clauses for given * left hand side tables and right hand side table. It encapsulates pushdownable * and nonpushdownable parts of the join clauses inside ApplicableJoinClauseContext. */ static List * FindApplicableJoinClausesForTables(List *joinRestrictInfoListList, List *generatedEcJoinClauseList, List *lhsTableIdList, uint32 rhsTableId, JoinType joinType) { List *applicableJoinClauseContextList = NIL; List *joinRestrictInfoList = NIL; foreach_ptr(joinRestrictInfoList, joinRestrictInfoListList) { RestrictInfo *joinRestrictInfo = NULL; foreach_ptr(joinRestrictInfo, joinRestrictInfoList) { Node *restrictClause = (Node *) joinRestrictInfo->clause; if (joinRestrictInfo->can_join && IsApplicableJoinClause(lhsTableIdList, rhsTableId, restrictClause)) { List *pushdownableJoinRestrictInfoList = ExtractPushdownJoinRestrictInfos( joinRestrictInfoList, joinRestrictInfo, joinType); List *pushdownableJoinRestrictClauseList = get_all_actual_clauses(pushdownableJoinRestrictInfoList); List *nonPushdownableJoinRestrictInfoList = list_difference( joinRestrictInfoList, pushdownableJoinRestrictInfoList); List *nonPushdownableJoinRestrictClauseList = get_all_actual_clauses(nonPushdownableJoinRestrictInfoList); ApplicableJoinClauseContext *applicableJoinClauseContext = palloc0( sizeof(ApplicableJoinClauseContext)); applicableJoinClauseContext->joinClauseList = get_all_actual_clauses( joinRestrictInfoList); applicableJoinClauseContext->pushdownableJoinClauseList = pushdownableJoinRestrictClauseList; applicableJoinClauseContext->nonPushdownableJoinClauseList = nonPushdownableJoinRestrictClauseList; applicableJoinClauseContextList = lappend(applicableJoinClauseContextList, applicableJoinClauseContext); } } } /* we can find applicable join clause inside generated implicit join clauses */ if (joinType == JOIN_INNER) { Node *ecClause = NULL; foreach_ptr(ecClause, generatedEcJoinClauseList) { if (IsApplicableJoinClause(lhsTableIdList, rhsTableId, ecClause)) { List *generatedJoinClauseList = list_make1(ecClause); ApplicableJoinClauseContext *applicableJoinClauseContext = palloc0( sizeof(ApplicableJoinClauseContext)); applicableJoinClauseContext->joinClauseList = generatedJoinClauseList; applicableJoinClauseContext->pushdownableJoinClauseList = NIL; applicableJoinClauseContext->nonPushdownableJoinClauseList = generatedJoinClauseList; applicableJoinClauseContextList = lappend(applicableJoinClauseContextList, applicableJoinClauseContext); } } } /* add an empty join clause list to be evaluated by cartesian rules */ List *emptyClauseList = NIL; ApplicableJoinClauseContext *emptyApplicableJoinClauseContext = palloc0( sizeof(ApplicableJoinClauseContext)); emptyApplicableJoinClauseContext->joinClauseList = emptyClauseList; emptyApplicableJoinClauseContext->pushdownableJoinClauseList = emptyClauseList; emptyApplicableJoinClauseContext->nonPushdownableJoinClauseList = emptyClauseList; applicableJoinClauseContextList = lappend(applicableJoinClauseContextList, emptyApplicableJoinClauseContext); return applicableJoinClauseContextList; } /* * FixedJoinOrderList returns the best fixed join order according to * applicable join rules for the nodes in the list. */ List * FixedJoinOrderList(List *tableEntryList, JoinInfoContext *joinInfoContext, List *joinRestrictInfoListList, List *generatedEcJoinClauseList, List *pseudoClauseList) { /* we donot support anti joins as ruleutils files cannot deparse JOIN_ANTI */ if (JoinInfoContextHasAntiJoin(joinInfoContext)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg( "complex joins are only supported when all distributed " "tables are joined on their distribution columns with " "equal operator"))); } /* * converts semi joins in given join info context to inner joins as we checked * at query_pushdown_planning that planner did not actually plan any semi join. * ruleutils files cannot deparse JOIN_SEMI, so we convert those to JOIN_INNER. */ ConvertSemiToInnerInJoinInfoContext(joinInfoContext); List *joinOrderList = NIL; List *joinedTableList = NIL; JoinOrderNode *nextJoinNode = NULL; /* fetch joininfo */ JoinInfo *firstJoinInfo = (JoinInfo *) list_nth(joinInfoContext->joinInfoList, 0); /* add first table into joinedtable list */ TableEntry *firstTable = TableEntryByRangeTableId(tableEntryList, firstJoinInfo->leftTableIdx); joinedTableList = lappend(joinedTableList, firstTable); /* create join node for the first table */ JoinRuleType joinRule = JOIN_RULE_INVALID_FIRST; Oid relationId = firstTable->relationId; uint32 tableId = firstTable->rangeTableId; Var *partitionColumn = PartitionColumn(relationId, tableId); char partitionMethod = PartitionMethod(relationId); JoinOrderNode *currentJoinNode = MakeJoinOrderNode(firstTable, joinRule, list_make1(partitionColumn), partitionMethod, firstTable, firstJoinInfo->joinType); joinOrderList = lappend(joinOrderList, currentJoinNode); JoinInfo *joinInfo = NULL; foreach_ptr(joinInfo, joinInfoContext->joinInfoList) { TableEntry *nextTable = TableEntryByRangeTableId(tableEntryList, joinInfo->rightTableIdx); nextJoinNode = EvaluateJoinRules(joinedTableList, currentJoinNode, nextTable, joinRestrictInfoListList, generatedEcJoinClauseList, joinInfo->joinType); if (nextJoinNode == NULL) { /* there are no plans that we can create, time to error */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg( "complex joins are only supported when all distributed " "tables are joined on their distribution columns with " "equal operator"))); } Assert(nextJoinNode != NULL); joinOrderList = lappend(joinOrderList, nextJoinNode); joinedTableList = lappend(joinedTableList, nextTable); currentJoinNode = nextJoinNode; } /* if logging is enabled, print join order */ if (LogMultiJoinOrder) { PrintJoinOrderList(joinOrderList); } return joinOrderList; } /* * JoinInfoContextHasAntiJoin returns true if given join info context contains * an anti join. */ static bool JoinInfoContextHasAntiJoin(JoinInfoContext *joinOrderContext) { JoinInfo *joinInfo = NULL; foreach_ptr(joinInfo, joinOrderContext->joinInfoList) { if (joinInfo->joinType == JOIN_ANTI) { return true; } } return false; } /* * ConvertSemiToInnerInJoinInfoContext converts semi joins in given join info context * to inner joins as we checked at query_pushdown_planning that planner did not actually * plan any semi join. ruleutils files cannot deparse JOIN_SEMI, so we convert those * to JOIN_INNER. */ static bool ConvertSemiToInnerInJoinInfoContext(JoinInfoContext *joinOrderContext) { JoinInfo *joinInfo = NULL; foreach_ptr(joinInfo, joinOrderContext->joinInfoList) { if (joinInfo->joinType == JOIN_SEMI) { joinInfo->joinType = JOIN_INNER; } } return false; } /* * JoinOrderForTable creates a join order whose first element is the given first * table. To determine each subsequent element in the join order, the function * then chooses the table that has the lowest ranking join rule, and with which * it can join the table to the previous table in the join order. The function * repeats this until it determines all elements in the join order list, and * returns this list. */ static List * JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinRestrictInfoListList, List *generatedEcJoinClauseList) { JoinRuleType firstJoinRule = JOIN_RULE_INVALID_FIRST; int joinedTableCount = 1; int totalTableCount = list_length(tableEntryList); /* create join node for the first table */ Oid firstRelationId = firstTable->relationId; uint32 firstTableId = firstTable->rangeTableId; Var *firstPartitionColumn = PartitionColumn(firstRelationId, firstTableId); char firstPartitionMethod = PartitionMethod(firstRelationId); JoinOrderNode *firstJoinNode = MakeJoinOrderNode(firstTable, firstJoinRule, list_make1(firstPartitionColumn), firstPartitionMethod, firstTable, JOIN_INNER); /* add first node to the join order */ List *joinOrderList = list_make1(firstJoinNode); List *joinedTableList = list_make1(firstTable); JoinOrderNode *currentJoinNode = firstJoinNode; /* loop until we join all remaining tables */ while (joinedTableCount < totalTableCount) { ListCell *pendingTableCell = NULL; JoinOrderNode *nextJoinNode = NULL; JoinRuleType nextJoinRuleType = JOIN_RULE_LAST; List *pendingTableList = TableEntryListDifference(tableEntryList, joinedTableList); /* * Iterate over all pending tables, and find the next best table to * join. The best table is the one whose join rule requires the least * amount of data transfer. */ foreach(pendingTableCell, pendingTableList) { TableEntry *pendingTable = (TableEntry *) lfirst(pendingTableCell); JoinType joinType = JOIN_INNER; /* evaluate all join rules for this pending table */ JoinOrderNode *pendingJoinNode = EvaluateJoinRules(joinedTableList, currentJoinNode, pendingTable, joinRestrictInfoListList, generatedEcJoinClauseList, joinType); if (pendingJoinNode == NULL) { /* no join order could be generated, we try our next pending table */ continue; } /* if this rule is better than previous ones, keep it */ JoinRuleType pendingJoinRuleType = pendingJoinNode->joinRuleType; if (pendingJoinRuleType < nextJoinRuleType) { nextJoinNode = pendingJoinNode; nextJoinRuleType = pendingJoinRuleType; } } if (nextJoinNode == NULL) { /* * There is no next join node found, this will repeat indefinitely hence we * bail and let JoinOrderList try a new initial table */ return NULL; } Assert(nextJoinNode != NULL); TableEntry *nextJoinedTable = nextJoinNode->tableEntry; /* add next node to the join order */ joinOrderList = lappend(joinOrderList, nextJoinNode); joinedTableList = lappend(joinedTableList, nextJoinedTable); currentJoinNode = nextJoinNode; joinedTableCount++; } return joinOrderList; } /* * BestJoinOrder takes in a list of candidate join orders, and determines the * best join order among these candidates. The function uses two heuristics for * this. First, the function chooses join orders that have the fewest number of * join operators that cause large data transfers. Second, the function chooses * join orders where large data transfers occur later in the execution. */ static List * BestJoinOrder(List *candidateJoinOrders) { uint32 highestValidIndex = JOIN_RULE_LAST - 1; uint32 candidateCount PG_USED_FOR_ASSERTS_ONLY = 0; /* * We start with the highest ranking rule type (cartesian product), and walk * over these rules in reverse order. For each rule type, we then keep join * orders that only contain the fewest number of join rules of that type. * * For example, the algorithm chooses join orders like the following: * (a) The algorithm prefers join orders with 2 cartesian products (CP) to * those that have 3 or more, if there isn't a join order with fewer CPs. * (b) Assuming that all join orders have the same number of CPs, the * algorithm prefers join orders with 2 dual partitions (DP) to those that * have 3 or more, if there isn't a join order with fewer DPs; and so * forth. */ for (uint32 ruleTypeIndex = highestValidIndex; ruleTypeIndex > 0; ruleTypeIndex--) { JoinRuleType ruleType = (JoinRuleType) ruleTypeIndex; candidateJoinOrders = FewestOfJoinRuleType(candidateJoinOrders, ruleType); } /* * If there is a tie, we pick candidate join orders where large data * transfers happen at later stages of query execution. This results in more * data being filtered via joins, selections, and projections earlier on. */ candidateJoinOrders = LatestLargeDataTransfer(candidateJoinOrders); /* we should have at least one join order left after optimizations */ candidateCount = list_length(candidateJoinOrders); Assert(candidateCount > 0); /* * If there still is a tie, we pick the join order whose relation appeared * earliest in the query's range table entry list. */ List *bestJoinOrder = (List *) linitial(candidateJoinOrders); return bestJoinOrder; } /* * FewestOfJoinRuleType finds join orders that have the fewest number of times * the given join rule occurs in the candidate join orders, and filters all * other join orders. For example, if four candidate join orders have a join * rule appearing 3, 5, 3, and 6 times, only two join orders that have the join * rule appearing 3 times will be returned. */ static List * FewestOfJoinRuleType(List *candidateJoinOrders, JoinRuleType ruleType) { List *fewestJoinOrders = NULL; uint32 fewestRuleCount = INT_MAX; ListCell *joinOrderCell = NULL; foreach(joinOrderCell, candidateJoinOrders) { List *joinOrder = (List *) lfirst(joinOrderCell); uint32 ruleTypeCount = JoinRuleTypeCount(joinOrder, ruleType); if (ruleTypeCount == fewestRuleCount) { fewestJoinOrders = lappend(fewestJoinOrders, joinOrder); } else if (ruleTypeCount < fewestRuleCount) { fewestJoinOrders = list_make1(joinOrder); fewestRuleCount = ruleTypeCount; } } return fewestJoinOrders; } /* Counts the number of times the given join rule occurs in the join order. */ static uint32 JoinRuleTypeCount(List *joinOrder, JoinRuleType ruleTypeToCount) { uint32 ruleTypeCount = 0; ListCell *joinOrderNodeCell = NULL; foreach(joinOrderNodeCell, joinOrder) { JoinOrderNode *joinOrderNode = (JoinOrderNode *) lfirst(joinOrderNodeCell); JoinRuleType ruleType = joinOrderNode->joinRuleType; if (ruleType == ruleTypeToCount) { ruleTypeCount++; } } return ruleTypeCount; } /* * LatestLargeDataTransfer finds and returns join orders where a large data * transfer join rule occurs as late as possible in the join order. Late large * data transfers result in more data being filtered before data gets shuffled * in the network. */ static List * LatestLargeDataTransfer(List *candidateJoinOrders) { List *latestJoinOrders = NIL; uint32 latestJoinLocation = 0; ListCell *joinOrderCell = NULL; foreach(joinOrderCell, candidateJoinOrders) { List *joinOrder = (List *) lfirst(joinOrderCell); uint32 joinRuleLocation = LargeDataTransferLocation(joinOrder); if (joinRuleLocation == latestJoinLocation) { latestJoinOrders = lappend(latestJoinOrders, joinOrder); } else if (joinRuleLocation > latestJoinLocation) { latestJoinOrders = list_make1(joinOrder); latestJoinLocation = joinRuleLocation; } } return latestJoinOrders; } /* * LargeDataTransferLocation finds the first location of a large data transfer * join rule, and returns that location. If the join order does not have any * large data transfer rules, the function returns one location past the end of * the join order list. */ static uint32 LargeDataTransferLocation(List *joinOrder) { uint32 joinRuleLocation = 0; ListCell *joinOrderNodeCell = NULL; foreach(joinOrderNodeCell, joinOrder) { JoinOrderNode *joinOrderNode = (JoinOrderNode *) lfirst(joinOrderNodeCell); JoinRuleType joinRuleType = joinOrderNode->joinRuleType; /* we consider the following join rules to cause large data transfers */ if (joinRuleType == SINGLE_HASH_PARTITION_JOIN || joinRuleType == SINGLE_RANGE_PARTITION_JOIN || joinRuleType == DUAL_PARTITION_JOIN || joinRuleType == CARTESIAN_PRODUCT) { break; } joinRuleLocation++; } return joinRuleLocation; } /* Output join name for given join type */ const char * JoinTypeName(JoinType jointype) { switch (jointype) { case JOIN_INNER: { return "INNER"; } case JOIN_LEFT: { return "LEFT"; } case JOIN_RIGHT: { return "RIGHT"; } case JOIN_FULL: { return "FULL"; } default: /* Shouldn't come here, but protect from buggy code. */ elog(ERROR, "unsupported join type %d", jointype); } /* Keep compiler happy */ return NULL; } /* Prints the join order list and join rules for debugging purposes. */ static void PrintJoinOrderList(List *joinOrder) { StringInfo printBuffer = makeStringInfo(); ListCell *joinOrderNodeCell = NULL; bool firstJoinNode = true; foreach(joinOrderNodeCell, joinOrder) { JoinOrderNode *joinOrderNode = (JoinOrderNode *) lfirst(joinOrderNodeCell); Oid relationId = joinOrderNode->tableEntry->relationId; char *relationName = get_rel_name(relationId); if (firstJoinNode) { appendStringInfo(printBuffer, "[ \"%s\" ]", relationName); firstJoinNode = false; } else { JoinRuleType ruleType = (JoinRuleType) joinOrderNode->joinRuleType; char *ruleName = JoinRuleName(ruleType); JoinType joinType = joinOrderNode->joinType; const char *joinTypeName = JoinTypeName(joinType); appendStringInfo(printBuffer, "[ %s(%s) ", ruleName, joinTypeName); appendStringInfo(printBuffer, "\"%s\" ]", relationName); } } ereport(LOG, (errmsg("join order: %s", printBuffer->data))); } /* * TableEntryListDifference returns a list containing table entries that are in * the left-hand side table list, but not in the right-hand side table list. */ static List * TableEntryListDifference(List *lhsTableList, List *rhsTableList) { List *tableListDifference = NIL; ListCell *lhsTableCell = NULL; foreach(lhsTableCell, lhsTableList) { TableEntry *lhsTableEntry = (TableEntry *) lfirst(lhsTableCell); ListCell *rhsTableCell = NULL; bool lhsTableEntryExists = false; foreach(rhsTableCell, rhsTableList) { TableEntry *rhsTableEntry = (TableEntry *) lfirst(rhsTableCell); if ((lhsTableEntry->relationId == rhsTableEntry->relationId) && (lhsTableEntry->rangeTableId == rhsTableEntry->rangeTableId)) { lhsTableEntryExists = true; } } if (!lhsTableEntryExists) { tableListDifference = lappend(tableListDifference, lhsTableEntry); } } return tableListDifference; } /* * EvaluateJoinRules takes in a list of already joined tables and a candidate * next table, evaluates different join rules between the two tables, and finds * the best join rule that applies. The function returns the applicable join * order node which includes the join rule and the partition information. */ static JoinOrderNode * EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode, TableEntry *candidateTable, List *joinRestrictInfoListList, List *generatedEcJoinClauseList, JoinType joinType) { JoinOrderNode *nextJoinNode = NULL; uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1; uint32 highestValidIndex = JOIN_RULE_LAST - 1; /* * We first find all applicable join clauses between already joined tables * and the candidate table. */ List *joinedTableIdList = RangeTableIdList(joinedTableList); uint32 candidateTableId = candidateTable->rangeTableId; List *applicableJoinClauseContextList = FindApplicableJoinClausesForTables( joinRestrictInfoListList, generatedEcJoinClauseList, joinedTableIdList, candidateTableId, joinType); /* we then evaluate all join rules in order */ for (uint32 ruleIndex = lowestValidIndex; ruleIndex <= highestValidIndex; ruleIndex++) { JoinRuleType ruleType = (JoinRuleType) ruleIndex; RuleEvalFunction ruleEvalFunction = JoinRuleEvalFunction(ruleType); ApplicableJoinClauseContext *applicableJoinClauseContext = NULL; foreach_ptr(applicableJoinClauseContext, applicableJoinClauseContextList) { List *applicableJoinClauseList = applicableJoinClauseContext->nonPushdownableJoinClauseList; nextJoinNode = (*ruleEvalFunction)(currentJoinNode, candidateTable, applicableJoinClauseList, joinType); /* return after finding the first join rule that applies */ if (nextJoinNode != NULL) { nextJoinNode->joinType = joinType; nextJoinNode->joinClauseList = applicableJoinClauseContext->nonPushdownableJoinClauseList; nextJoinNode->applicableJoinClauseContext = applicableJoinClauseContext; return nextJoinNode; } } } Assert(nextJoinNode == NULL); return NULL; } /* Extracts range table identifiers from the given table list, and returns them. */ static List * RangeTableIdList(List *tableList) { List *rangeTableIdList = NIL; ListCell *tableCell = NULL; foreach(tableCell, tableList) { TableEntry *tableEntry = (TableEntry *) lfirst(tableCell); uint32 rangeTableId = tableEntry->rangeTableId; rangeTableIdList = lappend_int(rangeTableIdList, rangeTableId); } return rangeTableIdList; } /* * JoinRuleEvalFunction returns a function pointer for the rule evaluation * function; this rule evaluation function corresponds to the given rule type. * The function also initializes the rule evaluation function array in a static * code block, if the array has not been initialized. */ static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType) { static bool ruleEvalFunctionsInitialized = false; if (!ruleEvalFunctionsInitialized) { RuleEvalFunctionArray[REFERENCE_JOIN] = &ReferenceJoin; RuleEvalFunctionArray[LOCAL_PARTITION_JOIN] = &LocalJoin; RuleEvalFunctionArray[SINGLE_RANGE_PARTITION_JOIN] = &SinglePartitionJoin; RuleEvalFunctionArray[SINGLE_HASH_PARTITION_JOIN] = &SinglePartitionJoin; RuleEvalFunctionArray[DUAL_PARTITION_JOIN] = &DualPartitionJoin; RuleEvalFunctionArray[CARTESIAN_PRODUCT_REFERENCE_JOIN] = &CartesianProductReferenceJoin; RuleEvalFunctionArray[CARTESIAN_PRODUCT] = &CartesianProduct; ruleEvalFunctionsInitialized = true; } RuleEvalFunction ruleEvalFunction = RuleEvalFunctionArray[ruleType]; Assert(ruleEvalFunction != NULL); return ruleEvalFunction; } /* Returns a string name for the given join rule type. */ static char * JoinRuleName(JoinRuleType ruleType) { static bool ruleNamesInitialized = false; if (!ruleNamesInitialized) { /* use strdup() to be independent of memory contexts */ RuleNameArray[REFERENCE_JOIN] = strdup("reference join"); RuleNameArray[LOCAL_PARTITION_JOIN] = strdup("local partition join"); RuleNameArray[SINGLE_HASH_PARTITION_JOIN] = strdup("single hash partition join"); RuleNameArray[SINGLE_RANGE_PARTITION_JOIN] = strdup("single range partition join"); RuleNameArray[DUAL_PARTITION_JOIN] = strdup("dual partition join"); RuleNameArray[CARTESIAN_PRODUCT_REFERENCE_JOIN] = strdup( "cartesian product reference join"); RuleNameArray[CARTESIAN_PRODUCT] = strdup("cartesian product"); ruleNamesInitialized = true; } char *ruleName = RuleNameArray[ruleType]; Assert(ruleName != NULL); return ruleName; } /* * ReferenceJoin evaluates if the candidate table is a reference table for inner, * left and anti join. For right join, current join node must be represented by * a reference table. For full join, both of them must be a reference table. */ static JoinOrderNode * ReferenceJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType) { int applicableJoinCount = list_length(applicableJoinClauses); if (applicableJoinCount <= 0) { return NULL; } bool leftIsReferenceTable = IsCitusTableType( currentJoinNode->tableEntry->relationId, REFERENCE_TABLE); bool rightIsReferenceTable = IsCitusTableType(candidateTable->relationId, REFERENCE_TABLE); if (!IsSupportedReferenceJoin(joinType, leftIsReferenceTable, rightIsReferenceTable)) { return NULL; } return MakeJoinOrderNode(candidateTable, REFERENCE_JOIN, currentJoinNode->partitionColumnList, currentJoinNode->partitionMethod, currentJoinNode->anchorTable, joinType); } /* * IsSupportedReferenceJoin checks if with this join type we can safely do a simple join * on the reference table on all the workers. */ bool IsSupportedReferenceJoin(JoinType joinType, bool leftIsReferenceTable, bool rightIsReferenceTable) { if ((joinType == JOIN_INNER || joinType == JOIN_LEFT || joinType == JOIN_ANTI) && rightIsReferenceTable) { return true; } else if ((joinType == JOIN_RIGHT) && leftIsReferenceTable) { return true; } else if (joinType == JOIN_FULL && leftIsReferenceTable && rightIsReferenceTable) { return true; } return false; } /* * ReferenceJoin evaluates if the candidate table is a reference table for inner, * left and anti join. For right join, current join node must be represented by * a reference table. For full join, both of them must be a reference table. */ static JoinOrderNode * CartesianProductReferenceJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType) { bool leftIsReferenceTable = IsCitusTableType( currentJoinNode->tableEntry->relationId, REFERENCE_TABLE); bool rightIsReferenceTable = IsCitusTableType(candidateTable->relationId, REFERENCE_TABLE); if (!IsSupportedReferenceJoin(joinType, leftIsReferenceTable, rightIsReferenceTable)) { return NULL; } return MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT_REFERENCE_JOIN, currentJoinNode->partitionColumnList, currentJoinNode->partitionMethod, currentJoinNode->anchorTable, joinType); } /* * LocalJoin takes the current partition key column and the candidate table's * partition key column and the partition method for each table. The function * then evaluates if tables in the join order and the candidate table can be * joined locally, without any data transfers. If they can, the function returns * a join order node for a local join. Otherwise, the function returns null. * * Anchor table is used to decide whether the JoinOrderNode can be joined * locally with the candidate table. That table is updated by each join type * applied over JoinOrderNode. Note that, we lost the anchor table after * dual partitioning and cartesian product. */ static JoinOrderNode * LocalJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType) { Oid relationId = candidateTable->relationId; uint32 tableId = candidateTable->rangeTableId; Var *candidatePartitionColumn = PartitionColumn(relationId, tableId); List *currentPartitionColumnList = currentJoinNode->partitionColumnList; char candidatePartitionMethod = PartitionMethod(relationId); char currentPartitionMethod = currentJoinNode->partitionMethod; TableEntry *currentAnchorTable = currentJoinNode->anchorTable; /* * If we previously dual-hash re-partitioned the tables for a join or made cartesian * product, there is no anchor table anymore. In that case we don't allow local join. */ if (currentAnchorTable == NULL) { return NULL; } /* the partition method should be the same for a local join */ if (currentPartitionMethod != candidatePartitionMethod) { return NULL; } bool joinOnPartitionColumns = JoinOnColumns(currentPartitionColumnList, candidatePartitionColumn, applicableJoinClauses); if (!joinOnPartitionColumns) { return NULL; } /* shard interval lists must have 1-1 matching for local joins */ bool coPartitionedTables = CoPartitionedTables(currentAnchorTable->relationId, relationId); if (!coPartitionedTables) { return NULL; } /* * Since we are applying a local join to the candidate table we need to keep track of * the partition column of the candidate table on the MultiJoinNode. This will allow * subsequent joins colocated with this candidate table to correctly be recognized as * a local join as well. */ currentPartitionColumnList = list_append_unique(currentPartitionColumnList, candidatePartitionColumn); JoinOrderNode *nextJoinNode = MakeJoinOrderNode(candidateTable, LOCAL_PARTITION_JOIN, currentPartitionColumnList, currentPartitionMethod, currentAnchorTable, joinType); return nextJoinNode; } /* * SinglePartitionJoin takes the current and the candidate table's partition keys * and methods. The function then evaluates if either "tables in the join order" * or the candidate table is already partitioned on a join column. If they are, * the function returns a join order node with the already partitioned column as * the next partition key. Otherwise, the function returns null. */ static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType) { List *currentPartitionColumnList = currentJoinNode->partitionColumnList; char currentPartitionMethod = currentJoinNode->partitionMethod; TableEntry *currentAnchorTable = currentJoinNode->anchorTable; JoinRuleType currentJoinRuleType = currentJoinNode->joinRuleType; Oid relationId = candidateTable->relationId; uint32 tableId = candidateTable->rangeTableId; Var *candidatePartitionColumn = PartitionColumn(relationId, tableId); char candidatePartitionMethod = PartitionMethod(relationId); /* * If we previously dual-hash re-partitioned the tables for a join or made * cartesian product, we currently don't allow a single-repartition join. */ if (currentJoinRuleType == DUAL_PARTITION_JOIN || currentJoinRuleType == CARTESIAN_PRODUCT) { return NULL; } /* * we only allow single range redistribution if both tables are range distributed. * The reason is that we cannot guarantee all values in non-range distributed table * will be inside the shard ranges of range distributed table. */ if ((currentPartitionMethod == DISTRIBUTE_BY_RANGE && candidatePartitionMethod != DISTRIBUTE_BY_RANGE) || (candidatePartitionMethod == DISTRIBUTE_BY_RANGE && currentPartitionMethod != DISTRIBUTE_BY_RANGE)) { return NULL; } OpExpr *joinClause = SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses); if (joinClause != NULL) { if (currentPartitionMethod != DISTRIBUTE_BY_RANGE) { /* * Single hash repartitioning may perform worse than dual hash * repartitioning. Thus, we control it via a guc. */ if (!EnableSingleHashRepartitioning) { return NULL; } return MakeJoinOrderNode(candidateTable, SINGLE_HASH_PARTITION_JOIN, currentPartitionColumnList, currentPartitionMethod, currentAnchorTable, joinType); } else if (candidatePartitionMethod == DISTRIBUTE_BY_RANGE) { return MakeJoinOrderNode(candidateTable, SINGLE_RANGE_PARTITION_JOIN, currentPartitionColumnList, currentPartitionMethod, currentAnchorTable, joinType); } } /* evaluate re-partitioning the current table only if the rule didn't apply above */ if (candidatePartitionMethod != DISTRIBUTE_BY_NONE) { /* * Create a new unique list (set) with the partition column of the candidate table * to check if a single repartition join will work for this table. When it works * the set is retained on the MultiJoinNode for later local join verification. */ List *candidatePartitionColumnList = list_make1(candidatePartitionColumn); joinClause = SinglePartitionJoinClause(candidatePartitionColumnList, applicableJoinClauses); if (joinClause != NULL) { if (candidatePartitionMethod != DISTRIBUTE_BY_RANGE) { /* * Single hash repartitioning may perform worse than dual hash * repartitioning. Thus, we control it via a guc. */ if (!EnableSingleHashRepartitioning) { return NULL; } return MakeJoinOrderNode(candidateTable, SINGLE_HASH_PARTITION_JOIN, candidatePartitionColumnList, candidatePartitionMethod, candidateTable, joinType); } else if (currentPartitionMethod == DISTRIBUTE_BY_RANGE) { return MakeJoinOrderNode(candidateTable, SINGLE_RANGE_PARTITION_JOIN, candidatePartitionColumnList, candidatePartitionMethod, candidateTable, joinType); } } } return NULL; } /* * SinglePartitionJoinClause walks over the applicable join clause list, and * finds an applicable join clause for the given partition column. If no such * clause exists, the function returns NULL. */ OpExpr * SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses) { if (list_length(partitionColumnList) == 0) { return NULL; } Var *partitionColumn = NULL; foreach_ptr(partitionColumn, partitionColumnList) { Node *applicableJoinClause = NULL; foreach_ptr(applicableJoinClause, applicableJoinClauses) { if (!NodeIsEqualsOpExpr(applicableJoinClause)) { continue; } OpExpr *applicableJoinOpExpr = castNode(OpExpr, applicableJoinClause); Var *leftColumn = LeftColumnOrNULL(applicableJoinOpExpr); Var *rightColumn = RightColumnOrNULL(applicableJoinOpExpr); if (leftColumn == NULL || rightColumn == NULL) { /* not a simple partition column join */ continue; } /* * We first check if partition column matches either of the join columns * and if it does, we then check if the join column types match. If the * types are different, we will use different hash functions for the two * column types, and will incorrectly repartition the data. */ if (equal(leftColumn, partitionColumn) || equal(rightColumn, partitionColumn)) { if (leftColumn->vartype == rightColumn->vartype) { return applicableJoinOpExpr; } else { ereport(DEBUG1, (errmsg("single partition column types do not " "match"))); } } } } return NULL; } /* * DualPartitionJoin evaluates if a join clause exists between "tables in the * join order" and the candidate table. If such a clause exists, both tables can * be repartitioned on the join column; and the function returns a join order * node with the join column as the next partition key. Otherwise, the function * returns null. */ static JoinOrderNode * DualPartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType) { OpExpr *joinClause = DualPartitionJoinClause(applicableJoinClauses); if (joinClause) { /* because of the dual partition, anchor table and partition column get lost */ return MakeJoinOrderNode(candidateTable, DUAL_PARTITION_JOIN, NIL, REDISTRIBUTE_BY_HASH, NULL, joinType); } return NULL; } /* * DualPartitionJoinClause walks over the applicable join clause list, and finds * an applicable join clause for dual re-partitioning. If no such clause exists, * the function returns NULL. */ OpExpr * DualPartitionJoinClause(List *applicableJoinClauses) { Node *applicableJoinClause = NULL; foreach_ptr(applicableJoinClause, applicableJoinClauses) { if (!NodeIsEqualsOpExpr(applicableJoinClause)) { continue; } OpExpr *applicableJoinOpExpr = castNode(OpExpr, applicableJoinClause); Var *leftColumn = LeftColumnOrNULL(applicableJoinOpExpr); Var *rightColumn = RightColumnOrNULL(applicableJoinOpExpr); if (leftColumn == NULL || rightColumn == NULL) { continue; } /* we only need to check that the join column types match */ if (leftColumn->vartype == rightColumn->vartype) { return applicableJoinOpExpr; } else { ereport(DEBUG1, (errmsg("dual partition column types do not match"))); } } return NULL; } /* * CartesianProduct always evaluates to true since all tables can be combined * using a cartesian product operator. This function acts as a catch-all rule, * in case none of the join rules apply. */ static JoinOrderNode * CartesianProduct(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, List *applicableJoinClauses, JoinType joinType) { if (list_length(applicableJoinClauses) == 0) { /* Because of the cartesian product, anchor table information got lost */ return MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT, currentJoinNode->partitionColumnList, currentJoinNode->partitionMethod, NULL, joinType); } return NULL; } /* Constructs and returns a join-order node with the given arguments */ JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType, List *partitionColumnList, char partitionMethod, TableEntry *anchorTable, JoinType joinType) { JoinOrderNode *joinOrderNode = palloc0(sizeof(JoinOrderNode)); joinOrderNode->tableEntry = tableEntry; joinOrderNode->joinRuleType = joinRuleType; joinOrderNode->joinType = joinType; joinOrderNode->partitionColumnList = partitionColumnList; joinOrderNode->partitionMethod = partitionMethod; joinOrderNode->joinClauseList = NIL; joinOrderNode->applicableJoinClauseContext = NULL; joinOrderNode->anchorTable = anchorTable; return joinOrderNode; } /* * IsApplicableJoinClause tests if the current joinClause is applicable to the join at * hand. * * Given a list of left hand tables and a candidate right hand table the join clause is * valid if atleast 1 column is from the right hand table AND all columns can be found * in either the list of tables on the left *or* in the right hand table. */ bool IsApplicableJoinClause(List *leftTableIdList, uint32 rightTableId, Node *joinClause) { List *varList = pull_var_clause_default(joinClause); Var *var = NULL; bool joinContainsRightTable = false; foreach_ptr(var, varList) { uint32 columnTableId = var->varno; if (rightTableId == columnTableId) { joinContainsRightTable = true; } else if (!list_member_int(leftTableIdList, columnTableId)) { /* * We couldn't find this column either on the right hand side (first if * statement), nor in the list on the left. This join clause involves a table * not yet available during the candidate join. */ return false; } } /* * All columns referenced in this clause are available during this join, now the join * is applicable if we found our candidate table as well */ return joinContainsRightTable; } /* * IsApplicableFalseConstantJoinClause returns true if given restrictinfo is a constant false * filter which is applied to right table and also at least one of the table in left tables. */ bool IsApplicableFalseConstantJoinClause(List *leftTableIdList, uint32 rightTableId, RestrictInfo *restrictInfo) { /* find whether restrictinfo relids contain right table relid */ Relids restrictionRelIds = restrictInfo->required_relids; bool hasRightTable = bms_is_member(rightTableId, restrictionRelIds); /* convert left table id list to bitmapset */ Relids leftTableRelIds = NULL; int leftTableId = -1; foreach_int(leftTableId, leftTableIdList) { leftTableRelIds = bms_add_member(leftTableRelIds, leftTableId); } /* find whether restrictinfo relids contain any of the left table relids */ Relids intersectLeftRelids = bms_intersect(restrictionRelIds, leftTableRelIds); bool hasOneOfLeftTables = bms_num_members(intersectLeftRelids) > 0; return hasRightTable && hasOneOfLeftTables; } /* * Returns the left column only when directly referenced in the given join clause, * otherwise NULL is returned. */ Var * LeftColumnOrNULL(OpExpr *joinClause) { List *argumentList = joinClause->args; Node *leftArgument = (Node *) linitial(argumentList); leftArgument = strip_implicit_coercions(leftArgument); if (!IsA(leftArgument, Var)) { return NULL; } return castNode(Var, leftArgument); } /* * Returns the right column only when directly referenced in the given join clause, * otherwise NULL is returned. * */ Var * RightColumnOrNULL(OpExpr *joinClause) { List *argumentList = joinClause->args; Node *rightArgument = (Node *) lsecond(argumentList); rightArgument = strip_implicit_coercions(rightArgument); if (!IsA(rightArgument, Var)) { return NULL; } return castNode(Var, rightArgument); } /* * PartitionColumn builds the partition column for the given relation, and sets * the partition column's range table references to the given table identifier. * * Note that reference tables do not have partition column. Thus, this function * returns NULL when called for reference tables. */ Var * PartitionColumn(Oid relationId, uint32 rangeTableId) { Var *partitionKey = DistPartitionKey(relationId); Var *partitionColumn = NULL; /* short circuit for reference tables */ if (partitionKey == NULL) { return partitionColumn; } partitionColumn = partitionKey; partitionColumn->varno = rangeTableId; partitionColumn->varnosyn = rangeTableId; return partitionColumn; } /* * DistPartitionKey returns the partition key column for the given relation. Note * that in the context of distributed join and query planning, the callers of * this function *must* set the partition key column's range table reference * (varno) to match the table's location in the query range table list. * * Note that reference tables do not have partition column. Thus, this function * returns NULL when called for reference tables. */ Var * DistPartitionKey(Oid relationId) { CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId); /* non-distributed tables do not have partition column */ if (IsCitusTableTypeCacheEntry(partitionEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) { return NULL; } return copyObject(partitionEntry->partitionColumn); } /* * DistPartitionKeyOrError is the same as DistPartitionKey but errors out instead * of returning NULL if this is called with a relationId of a reference table. */ Var * DistPartitionKeyOrError(Oid relationId) { Var *partitionKey = DistPartitionKey(relationId); if (partitionKey == NULL) { ereport(ERROR, (errmsg( "no distribution column found for relation %d, because it is a reference table", relationId))); } return partitionKey; } /* Returns the partition method for the given relation. */ char PartitionMethod(Oid relationId) { /* errors out if not a distributed table */ CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId); char partitionMethod = partitionEntry->partitionMethod; return partitionMethod; } /* Returns the replication model for the given relation. */ char TableReplicationModel(Oid relationId) { /* errors out if not a distributed table */ CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId); char replicationModel = partitionEntry->replicationModel; return replicationModel; }