mirror of https://github.com/citusdata/citus.git
Feature: Expression in reference join (#3180)
DESCRIPTION: Expression in reference join Fixed: #2582 This patch allows arbitrary expressions in the join clause when joining to a reference table. An example of such joins could be found in CHbenCHmark queries 7, 8, 9 and 11; `mod((s_w_id * s_i_id),10000) = su_suppkey` and `ascii(substr(c_state,1,1)) = n2.n_nationkey`. Since the join is on a reference table these queries are able to be pushed down to the workers. To implement these queries we will widen the `IsJoinClause` predicate to not check if the expressions are a type `Var` after stripping the implicit coerciens. Instead we define a join clause when the `Var`'s in a clause come from more than 1 table. This allows more clauses to pass into the logical planner's `MultiNodeTree(...)` planning function. To compensate for this we tighten down the `LocalJoin`, `SinglePartitionJoin` and `DualPartitionJoin` to check for direct column references when planning. This allows the planner to work with arbitrary join expressions on reference tables.pull/3184/head
parent
a4c90b6ee1
commit
217890af5f
|
@ -18,6 +18,7 @@
|
|||
#include "access/heapam.h"
|
||||
#include "access/htup_details.h"
|
||||
#include "catalog/pg_am.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/multi_join_order.h"
|
||||
#include "distributed/multi_physical_planner.h"
|
||||
|
@ -208,37 +209,45 @@ ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex)
|
|||
|
||||
|
||||
/*
|
||||
* JoinOnColumns determines whether two columns are joined by a given join clause
|
||||
* list.
|
||||
* JoinOnColumns determines whether two columns are joined by a given join clause list.
|
||||
*/
|
||||
static bool
|
||||
JoinOnColumns(Var *currentColumn, Var *candidateColumn, List *joinClauseList)
|
||||
{
|
||||
ListCell *joinClauseCell = NULL;
|
||||
bool joinOnColumns = false;
|
||||
if (currentColumn == NULL || candidateColumn == NULL)
|
||||
{
|
||||
/*
|
||||
* LocalJoin can only be happening if we have both a current column and a target
|
||||
* column, otherwise we are not joining two local tables
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach(joinClauseCell, joinClauseList)
|
||||
{
|
||||
OpExpr *joinClause = (OpExpr *) lfirst(joinClauseCell);
|
||||
Var *leftColumn = LeftColumn(joinClause);
|
||||
Var *rightColumn = RightColumn(joinClause);
|
||||
OpExpr *joinClause = castNode(OpExpr, lfirst(joinClauseCell));
|
||||
Var *leftColumn = LeftColumnOrNULL(joinClause);
|
||||
Var *rightColumn = RightColumnOrNULL(joinClause);
|
||||
|
||||
/* check if both join columns and both partition key columns match */
|
||||
/*
|
||||
* Check if both join columns and both partition key columns match, since the
|
||||
* current and candidate column's can't be NULL we know they wont match if either
|
||||
* of the columns resolved to NULL above.
|
||||
*/
|
||||
if (equal(leftColumn, currentColumn) &&
|
||||
equal(rightColumn, candidateColumn))
|
||||
{
|
||||
joinOnColumns = true;
|
||||
break;
|
||||
return true;
|
||||
}
|
||||
if (equal(leftColumn, candidateColumn) &&
|
||||
equal(rightColumn, currentColumn))
|
||||
{
|
||||
joinOnColumns = true;
|
||||
break;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return joinOnColumns;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -265,7 +274,19 @@ JoinOrderList(List *tableEntryList, List *joinClauseList)
|
|||
candidateJoinOrder = JoinOrderForTable(startingTable, tableEntryList,
|
||||
joinClauseList);
|
||||
|
||||
candidateJoinOrderList = lappend(candidateJoinOrderList, candidateJoinOrder);
|
||||
if (candidateJoinOrder != NULL)
|
||||
{
|
||||
candidateJoinOrderList = lappend(candidateJoinOrderList, candidateJoinOrder);
|
||||
}
|
||||
}
|
||||
|
||||
if (list_length(candidateJoinOrderList) == 0)
|
||||
{
|
||||
/* there are no plans that we can create, time to error */
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("complex joins are only supported when all distributed "
|
||||
"tables are joined on their distribution columns with "
|
||||
"equal operator")));
|
||||
}
|
||||
|
||||
bestJoinOrder = BestJoinOrder(candidateJoinOrderList);
|
||||
|
@ -341,6 +362,12 @@ JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClause
|
|||
pendingJoinNode = EvaluateJoinRules(joinedTableList, currentJoinNode,
|
||||
pendingTable, joinClauseList, joinType);
|
||||
|
||||
if (pendingJoinNode == NULL)
|
||||
{
|
||||
/* no join order could be generated, we try our next pending table */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* if this rule is better than previous ones, keep it */
|
||||
pendingJoinRuleType = pendingJoinNode->joinRuleType;
|
||||
if (pendingJoinRuleType < nextJoinRuleType)
|
||||
|
@ -350,6 +377,15 @@ JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClause
|
|||
}
|
||||
}
|
||||
|
||||
if (nextJoinNode == NULL)
|
||||
{
|
||||
/*
|
||||
* There is no next join node found, this will repeat indefinitely hence we
|
||||
* bail and let JoinOrderList try a new initial table
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Assert(nextJoinNode != NULL);
|
||||
nextJoinedTable = nextJoinNode->tableEntry;
|
||||
|
||||
|
@ -660,6 +696,11 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
|
|||
}
|
||||
}
|
||||
|
||||
if (nextJoinNode == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Assert(nextJoinNode != NULL);
|
||||
nextJoinNode->joinType = joinType;
|
||||
nextJoinNode->joinClauseList = applicableJoinClauses;
|
||||
|
@ -824,22 +865,18 @@ LocalJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
|||
char candidatePartitionMethod = PartitionMethod(relationId);
|
||||
char currentPartitionMethod = currentJoinNode->partitionMethod;
|
||||
TableEntry *currentAnchorTable = currentJoinNode->anchorTable;
|
||||
JoinRuleType currentJoinRuleType = currentJoinNode->joinRuleType;
|
||||
bool joinOnPartitionColumns = false;
|
||||
bool coPartitionedTables = false;
|
||||
|
||||
/*
|
||||
* If we previously dual-hash re-partitioned the tables for a join or made
|
||||
* cartesian product, we currently don't allow local join.
|
||||
* If we previously dual-hash re-partitioned the tables for a join or made cartesian
|
||||
* product, there is no anchor table anymore. In that case we don't allow local join.
|
||||
*/
|
||||
if (currentJoinRuleType == DUAL_PARTITION_JOIN ||
|
||||
currentJoinRuleType == CARTESIAN_PRODUCT)
|
||||
if (currentAnchorTable == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Assert(currentAnchorTable != NULL);
|
||||
|
||||
/* the partition method should be the same for a local join */
|
||||
if (currentPartitionMethod != candidatePartitionMethod)
|
||||
{
|
||||
|
@ -883,7 +920,6 @@ static JoinOrderNode *
|
|||
SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||
List *applicableJoinClauses, JoinType joinType)
|
||||
{
|
||||
JoinOrderNode *nextJoinNode = NULL;
|
||||
Var *currentPartitionColumn = currentJoinNode->partitionColumn;
|
||||
char currentPartitionMethod = currentJoinNode->partitionMethod;
|
||||
TableEntry *currentAnchorTable = currentJoinNode->anchorTable;
|
||||
|
@ -927,22 +963,22 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
nextJoinNode = MakeJoinOrderNode(candidateTable, SINGLE_HASH_PARTITION_JOIN,
|
||||
currentPartitionColumn,
|
||||
currentPartitionMethod,
|
||||
currentAnchorTable);
|
||||
return MakeJoinOrderNode(candidateTable, SINGLE_HASH_PARTITION_JOIN,
|
||||
currentPartitionColumn,
|
||||
currentPartitionMethod,
|
||||
currentAnchorTable);
|
||||
}
|
||||
else
|
||||
{
|
||||
nextJoinNode = MakeJoinOrderNode(candidateTable, SINGLE_RANGE_PARTITION_JOIN,
|
||||
currentPartitionColumn,
|
||||
currentPartitionMethod,
|
||||
currentAnchorTable);
|
||||
return MakeJoinOrderNode(candidateTable, SINGLE_RANGE_PARTITION_JOIN,
|
||||
currentPartitionColumn,
|
||||
currentPartitionMethod,
|
||||
currentAnchorTable);
|
||||
}
|
||||
}
|
||||
|
||||
/* evaluate re-partitioning the current table only if the rule didn't apply above */
|
||||
if (nextJoinNode == NULL && candidatePartitionMethod != DISTRIBUTE_BY_NONE)
|
||||
if (candidatePartitionMethod != DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
joinClause = SinglePartitionJoinClause(candidatePartitionColumn,
|
||||
applicableJoinClauses);
|
||||
|
@ -959,24 +995,24 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
nextJoinNode = MakeJoinOrderNode(candidateTable,
|
||||
SINGLE_HASH_PARTITION_JOIN,
|
||||
candidatePartitionColumn,
|
||||
candidatePartitionMethod,
|
||||
candidateTable);
|
||||
return MakeJoinOrderNode(candidateTable,
|
||||
SINGLE_HASH_PARTITION_JOIN,
|
||||
candidatePartitionColumn,
|
||||
candidatePartitionMethod,
|
||||
candidateTable);
|
||||
}
|
||||
else
|
||||
{
|
||||
nextJoinNode = MakeJoinOrderNode(candidateTable,
|
||||
SINGLE_RANGE_PARTITION_JOIN,
|
||||
candidatePartitionColumn,
|
||||
candidatePartitionMethod,
|
||||
candidateTable);
|
||||
return MakeJoinOrderNode(candidateTable,
|
||||
SINGLE_RANGE_PARTITION_JOIN,
|
||||
candidatePartitionColumn,
|
||||
candidatePartitionMethod,
|
||||
candidateTable);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nextJoinNode;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -988,14 +1024,23 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
|||
OpExpr *
|
||||
SinglePartitionJoinClause(Var *partitionColumn, List *applicableJoinClauses)
|
||||
{
|
||||
OpExpr *joinClause = NULL;
|
||||
ListCell *applicableJoinClauseCell = NULL;
|
||||
if (partitionColumn == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
foreach(applicableJoinClauseCell, applicableJoinClauses)
|
||||
{
|
||||
OpExpr *applicableJoinClause = (OpExpr *) lfirst(applicableJoinClauseCell);
|
||||
Var *leftColumn = LeftColumn(applicableJoinClause);
|
||||
Var *rightColumn = RightColumn(applicableJoinClause);
|
||||
OpExpr *applicableJoinClause = castNode(OpExpr, lfirst(applicableJoinClauseCell));
|
||||
Var *leftColumn = LeftColumnOrNULL(applicableJoinClause);
|
||||
Var *rightColumn = RightColumnOrNULL(applicableJoinClause);
|
||||
if (leftColumn == NULL || rightColumn == NULL)
|
||||
{
|
||||
/* not a simple partition column join */
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* We first check if partition column matches either of the join columns
|
||||
|
@ -1007,8 +1052,7 @@ SinglePartitionJoinClause(Var *partitionColumn, List *applicableJoinClauses)
|
|||
{
|
||||
if (leftColumn->vartype == rightColumn->vartype)
|
||||
{
|
||||
joinClause = applicableJoinClause;
|
||||
break;
|
||||
return applicableJoinClause;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1017,7 +1061,7 @@ SinglePartitionJoinClause(Var *partitionColumn, List *applicableJoinClauses)
|
|||
}
|
||||
}
|
||||
|
||||
return joinClause;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1032,20 +1076,18 @@ static JoinOrderNode *
|
|||
DualPartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||
List *applicableJoinClauses, JoinType joinType)
|
||||
{
|
||||
/* Because of the dual partition, anchor table information got lost */
|
||||
TableEntry *anchorTable = NULL;
|
||||
JoinOrderNode *nextJoinNode = NULL;
|
||||
|
||||
OpExpr *joinClause = DualPartitionJoinClause(applicableJoinClauses);
|
||||
if (joinClause)
|
||||
{
|
||||
Var *nextPartitionColumn = LeftColumn(joinClause);
|
||||
nextJoinNode = MakeJoinOrderNode(candidateTable, DUAL_PARTITION_JOIN,
|
||||
nextPartitionColumn, REDISTRIBUTE_BY_HASH,
|
||||
anchorTable);
|
||||
/* because of the dual partition, anchor table and partition column get lost */
|
||||
return MakeJoinOrderNode(candidateTable,
|
||||
DUAL_PARTITION_JOIN,
|
||||
NULL,
|
||||
REDISTRIBUTE_BY_HASH,
|
||||
NULL);
|
||||
}
|
||||
|
||||
return nextJoinNode;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1057,20 +1099,23 @@ DualPartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
|||
OpExpr *
|
||||
DualPartitionJoinClause(List *applicableJoinClauses)
|
||||
{
|
||||
OpExpr *joinClause = NULL;
|
||||
ListCell *applicableJoinClauseCell = NULL;
|
||||
|
||||
foreach(applicableJoinClauseCell, applicableJoinClauses)
|
||||
{
|
||||
OpExpr *applicableJoinClause = (OpExpr *) lfirst(applicableJoinClauseCell);
|
||||
Var *leftColumn = LeftColumn(applicableJoinClause);
|
||||
Var *rightColumn = RightColumn(applicableJoinClause);
|
||||
Var *leftColumn = LeftColumnOrNULL(applicableJoinClause);
|
||||
Var *rightColumn = RightColumnOrNULL(applicableJoinClause);
|
||||
|
||||
if (leftColumn == NULL || rightColumn == NULL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we only need to check that the join column types match */
|
||||
if (leftColumn->vartype == rightColumn->vartype)
|
||||
{
|
||||
joinClause = applicableJoinClause;
|
||||
break;
|
||||
return applicableJoinClause;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1078,7 +1123,7 @@ DualPartitionJoinClause(List *applicableJoinClauses)
|
|||
}
|
||||
}
|
||||
|
||||
return joinClause;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1091,15 +1136,16 @@ static JoinOrderNode *
|
|||
CartesianProduct(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||
List *applicableJoinClauses, JoinType joinType)
|
||||
{
|
||||
/* Because of the cartesian product, anchor table information got lost */
|
||||
TableEntry *anchorTable = NULL;
|
||||
if (list_length(applicableJoinClauses) == 0)
|
||||
{
|
||||
/* Because of the cartesian product, anchor table information got lost */
|
||||
return MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT,
|
||||
currentJoinNode->partitionColumn,
|
||||
currentJoinNode->partitionMethod,
|
||||
NULL);
|
||||
}
|
||||
|
||||
JoinOrderNode *nextJoinNode = MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT,
|
||||
currentJoinNode->partitionColumn,
|
||||
currentJoinNode->partitionMethod,
|
||||
anchorTable);
|
||||
|
||||
return nextJoinNode;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1121,6 +1167,46 @@ MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsApplicableJoinClause tests if the current joinClause is applicable to the join at
|
||||
* hand.
|
||||
*
|
||||
* Given a list of left hand tables and a candidate right hand table the join clause is
|
||||
* valid if atleast 1 column is from the right hand table AND all columns can be found
|
||||
* in either the list of tables on the left *or* in the right hand table.
|
||||
*/
|
||||
bool
|
||||
IsApplicableJoinClause(List *leftTableIdList, uint32 rightTableId, OpExpr *joinClause)
|
||||
{
|
||||
List *varList = pull_var_clause_default((Node *) joinClause);
|
||||
Var *var = NULL;
|
||||
bool joinContainsRightTable = false;
|
||||
foreach_ptr(var, varList)
|
||||
{
|
||||
uint32 columnTableId = var->varno;
|
||||
if (rightTableId == columnTableId)
|
||||
{
|
||||
joinContainsRightTable = true;
|
||||
}
|
||||
else if (!list_member_int(leftTableIdList, columnTableId))
|
||||
{
|
||||
/*
|
||||
* We couldn't find this column either on the right hand side (first if
|
||||
* statement), nor in the list on the left. This join clause involves a table
|
||||
* not yet available during the candidate join.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* All columns referenced in this clause are available during this join, now the join
|
||||
* is applicable if we found our candidate table as well
|
||||
*/
|
||||
return joinContainsRightTable;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ApplicableJoinClauses finds all join clauses that apply between the given
|
||||
* left table list and the right table, and returns these found join clauses.
|
||||
|
@ -1136,18 +1222,8 @@ ApplicableJoinClauses(List *leftTableIdList, uint32 rightTableId, List *joinClau
|
|||
|
||||
foreach(joinClauseCell, joinClauseList)
|
||||
{
|
||||
OpExpr *joinClause = (OpExpr *) lfirst(joinClauseCell);
|
||||
Var *joinLeftColumn = LeftColumn(joinClause);
|
||||
Var *joinRightColumn = RightColumn(joinClause);
|
||||
|
||||
uint32 joinLeftTableId = joinLeftColumn->varno;
|
||||
uint32 joinRightTableId = joinRightColumn->varno;
|
||||
|
||||
bool leftListHasJoinLeft = list_member_int(leftTableIdList, joinLeftTableId);
|
||||
bool leftListHasJoinRight = list_member_int(leftTableIdList, joinRightTableId);
|
||||
|
||||
if ((leftListHasJoinLeft && (rightTableId == joinRightTableId)) ||
|
||||
(leftListHasJoinRight && (rightTableId == joinLeftTableId)))
|
||||
OpExpr *joinClause = castNode(OpExpr, lfirst(joinClauseCell));
|
||||
if (IsApplicableJoinClause(leftTableIdList, rightTableId, joinClause))
|
||||
{
|
||||
applicableJoinClauses = lappend(applicableJoinClauses, joinClause);
|
||||
}
|
||||
|
@ -1157,37 +1233,41 @@ ApplicableJoinClauses(List *leftTableIdList, uint32 rightTableId, List *joinClau
|
|||
}
|
||||
|
||||
|
||||
/* Returns the left column in the given join clause. */
|
||||
/*
|
||||
* Returns the left column only when directly referenced in the given join clause,
|
||||
* otherwise NULL is returned.
|
||||
*/
|
||||
Var *
|
||||
LeftColumn(OpExpr *joinClause)
|
||||
LeftColumnOrNULL(OpExpr *joinClause)
|
||||
{
|
||||
List *argumentList = joinClause->args;
|
||||
Node *leftArgument = (Node *) linitial(argumentList);
|
||||
|
||||
List *varList = pull_var_clause_default(leftArgument);
|
||||
Var *leftColumn = NULL;
|
||||
|
||||
Assert(list_length(varList) == 1);
|
||||
leftColumn = (Var *) linitial(varList);
|
||||
|
||||
return leftColumn;
|
||||
leftArgument = strip_implicit_coercions(leftArgument);
|
||||
if (!IsA(leftArgument, Var))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return castNode(Var, leftArgument);
|
||||
}
|
||||
|
||||
|
||||
/* Returns the right column in the given join clause. */
|
||||
/*
|
||||
* Returns the right column only when directly referenced in the given join clause,
|
||||
* otherwise NULL is returned.
|
||||
* */
|
||||
Var *
|
||||
RightColumn(OpExpr *joinClause)
|
||||
RightColumnOrNULL(OpExpr *joinClause)
|
||||
{
|
||||
List *argumentList = joinClause->args;
|
||||
Node *rightArgument = (Node *) lsecond(argumentList);
|
||||
|
||||
List *varList = pull_var_clause_default(rightArgument);
|
||||
Var *rightColumn = NULL;
|
||||
|
||||
Assert(list_length(varList) == 1);
|
||||
rightColumn = (Var *) linitial(varList);
|
||||
|
||||
return rightColumn;
|
||||
rightArgument = strip_implicit_coercions(rightArgument);
|
||||
if (!IsA(rightArgument, Var))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return castNode(Var, rightArgument);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "distributed/colocation_utils.h"
|
||||
#include "distributed/metadata_cache.h"
|
||||
#include "distributed/insert_select_planner.h"
|
||||
#include "distributed/listutils.h"
|
||||
#include "distributed/multi_logical_optimizer.h"
|
||||
#include "distributed/multi_logical_planner.h"
|
||||
#include "distributed/multi_physical_planner.h"
|
||||
|
@ -1387,62 +1388,59 @@ ExtractFromExpressionWalker(Node *node, QualifierWalkerContext *walkerContext)
|
|||
bool
|
||||
IsJoinClause(Node *clause)
|
||||
{
|
||||
bool isJoinClause = false;
|
||||
OpExpr *operatorExpression = NULL;
|
||||
List *argumentList = NIL;
|
||||
Node *leftArgument = NULL;
|
||||
Node *rightArgument = NULL;
|
||||
Node *strippedLeftArgument = NULL;
|
||||
Node *strippedRightArgument = NULL;
|
||||
bool equalsOperator = false;
|
||||
List *varList = NIL;
|
||||
Var *initialVar = NULL;
|
||||
Var *var = NULL;
|
||||
|
||||
if (!IsA(clause, OpExpr))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
operatorExpression = (OpExpr *) clause;
|
||||
argumentList = operatorExpression->args;
|
||||
operatorExpression = castNode(OpExpr, clause);
|
||||
equalsOperator = OperatorImplementsEquality(operatorExpression->opno);
|
||||
|
||||
/* join clauses must have two arguments */
|
||||
if (list_length(argumentList) != 2)
|
||||
if (!equalsOperator)
|
||||
{
|
||||
/*
|
||||
* The single and dual repartition join and local join planners expect the clauses
|
||||
* to be equi-join to calculate a hash on which to distribute.
|
||||
*
|
||||
* In the future we should move this clause to those planners and allow
|
||||
* non-equi-join's in the reference join and cartesian product. This is tracked in
|
||||
* https://github.com/citusdata/citus/issues/3198
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
/* get left and right side of the expression */
|
||||
leftArgument = (Node *) linitial(argumentList);
|
||||
rightArgument = (Node *) lsecond(argumentList);
|
||||
|
||||
strippedLeftArgument = strip_implicit_coercions(leftArgument);
|
||||
strippedRightArgument = strip_implicit_coercions(rightArgument);
|
||||
|
||||
/* each side of the expression should have only one column */
|
||||
if (IsA(strippedLeftArgument, Var) && IsA(strippedRightArgument, Var))
|
||||
/*
|
||||
* take all column references from the clause, if we find 2 column references from a
|
||||
* different relation we assume this is a join clause
|
||||
*/
|
||||
varList = pull_var_clause_default(clause);
|
||||
if (list_length(varList) <= 0)
|
||||
{
|
||||
Var *leftColumn = (Var *) strippedLeftArgument;
|
||||
Var *rightColumn = (Var *) strippedRightArgument;
|
||||
bool equiJoin = false;
|
||||
bool joinBetweenDifferentTables = false;
|
||||
/* no column references in query, not describing a join */
|
||||
return false;
|
||||
}
|
||||
initialVar = castNode(Var, linitial(varList));
|
||||
|
||||
bool equalsOperator = OperatorImplementsEquality(operatorExpression->opno);
|
||||
if (equalsOperator)
|
||||
foreach_ptr(var, varList)
|
||||
{
|
||||
if (var->varno != initialVar->varno)
|
||||
{
|
||||
equiJoin = true;
|
||||
}
|
||||
|
||||
if (leftColumn->varno != rightColumn->varno)
|
||||
{
|
||||
joinBetweenDifferentTables = true;
|
||||
}
|
||||
|
||||
/* codifies our logic for determining if this node is a join clause */
|
||||
if (equiJoin && joinBetweenDifferentTables)
|
||||
{
|
||||
isJoinClause = true;
|
||||
/*
|
||||
* this column reference comes from a different relation, hence describing a
|
||||
* join
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return isJoinClause;
|
||||
/* all column references were to the same relation, no join */
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2143,8 +2141,12 @@ ApplySinglePartitionJoin(MultiNode *leftNode, MultiNode *rightNode,
|
|||
joinClause = SinglePartitionJoinClause(partitionColumn, applicableJoinClauses);
|
||||
Assert(joinClause != NULL);
|
||||
|
||||
leftColumn = LeftColumn(joinClause);
|
||||
rightColumn = RightColumn(joinClause);
|
||||
/* both are verified in SinglePartitionJoinClause to not be NULL, assert is to guard */
|
||||
leftColumn = LeftColumnOrNULL(joinClause);
|
||||
rightColumn = RightColumnOrNULL(joinClause);
|
||||
|
||||
Assert(leftColumn != NULL);
|
||||
Assert(rightColumn != NULL);
|
||||
|
||||
if (equal(partitionColumn, leftColumn))
|
||||
{
|
||||
|
@ -2218,8 +2220,11 @@ ApplyDualPartitionJoin(MultiNode *leftNode, MultiNode *rightNode,
|
|||
joinClause = DualPartitionJoinClause(applicableJoinClauses);
|
||||
Assert(joinClause != NULL);
|
||||
|
||||
leftColumn = LeftColumn(joinClause);
|
||||
rightColumn = RightColumn(joinClause);
|
||||
/* both are verified in DualPartitionJoinClause to not be NULL, assert is to guard */
|
||||
leftColumn = LeftColumnOrNULL(joinClause);
|
||||
rightColumn = RightColumnOrNULL(joinClause);
|
||||
Assert(leftColumn != NULL);
|
||||
Assert(rightColumn != NULL);
|
||||
|
||||
rightTableIdList = OutputTableIdList(rightNode);
|
||||
rightTableId = (uint32) linitial_int(rightTableIdList);
|
||||
|
|
|
@ -3603,8 +3603,13 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended
|
|||
continue;
|
||||
}
|
||||
|
||||
leftColumn = LeftColumn(nextJoinClause);
|
||||
rightColumn = RightColumn(nextJoinClause);
|
||||
leftColumn = LeftColumnOrNULL(nextJoinClause);
|
||||
rightColumn = RightColumnOrNULL(nextJoinClause);
|
||||
if (leftColumn == NULL || rightColumn == NULL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
leftRangeTableId = leftColumn->varno;
|
||||
rightRangeTableId = rightColumn->varno;
|
||||
|
||||
|
|
|
@ -82,13 +82,15 @@ extern bool EnableSingleHashRepartitioning;
|
|||
/* Function declaration for determining table join orders */
|
||||
extern List * JoinExprList(FromExpr *fromExpr);
|
||||
extern List * JoinOrderList(List *rangeTableEntryList, List *joinClauseList);
|
||||
extern bool IsApplicableJoinClause(List *leftTableIdList, uint32 rightTableId,
|
||||
OpExpr *joinClause);
|
||||
extern List * ApplicableJoinClauses(List *leftTableIdList, uint32 rightTableId,
|
||||
List *joinClauseList);
|
||||
extern OpExpr * SinglePartitionJoinClause(Var *partitionColumn,
|
||||
List *applicableJoinClauses);
|
||||
extern OpExpr * DualPartitionJoinClause(List *applicableJoinClauses);
|
||||
extern Var * LeftColumn(OpExpr *joinClause);
|
||||
extern Var * RightColumn(OpExpr *joinClause);
|
||||
extern Var * LeftColumnOrNULL(OpExpr *joinClause);
|
||||
extern Var * RightColumnOrNULL(OpExpr *joinClause);
|
||||
extern Var * PartitionColumn(Oid relationId, uint32 rangeTableId);
|
||||
extern Var * DistPartitionKey(Oid relationId);
|
||||
extern char PartitionMethod(Oid relationId);
|
||||
|
|
|
@ -0,0 +1,903 @@
|
|||
SET citus.next_shard_id TO 1650000;
|
||||
CREATE SCHEMA chbenchmark_all_queries;
|
||||
SET search_path TO chbenchmark_all_queries;
|
||||
SET citus.enable_repartition_joins TO on;
|
||||
CREATE TABLE order_line (
|
||||
ol_w_id int NOT NULL,
|
||||
ol_d_id int NOT NULL,
|
||||
ol_o_id int NOT NULL,
|
||||
ol_number int NOT NULL,
|
||||
ol_i_id int NOT NULL,
|
||||
ol_delivery_d timestamp NULL DEFAULT NULL,
|
||||
ol_amount decimal(6,2) NOT NULL,
|
||||
ol_supply_w_id int NOT NULL,
|
||||
ol_quantity decimal(2,0) NOT NULL,
|
||||
ol_dist_info char(24) NOT NULL,
|
||||
PRIMARY KEY (ol_w_id,ol_d_id,ol_o_id,ol_number)
|
||||
);
|
||||
CREATE TABLE new_order (
|
||||
no_w_id int NOT NULL,
|
||||
no_d_id int NOT NULL,
|
||||
no_o_id int NOT NULL,
|
||||
PRIMARY KEY (no_w_id,no_d_id,no_o_id)
|
||||
);
|
||||
CREATE TABLE stock (
|
||||
s_w_id int NOT NULL,
|
||||
s_i_id int NOT NULL,
|
||||
s_quantity decimal(4,0) NOT NULL,
|
||||
s_ytd decimal(8,2) NOT NULL,
|
||||
s_order_cnt int NOT NULL,
|
||||
s_remote_cnt int NOT NULL,
|
||||
s_data varchar(50) NOT NULL,
|
||||
s_dist_01 char(24) NOT NULL,
|
||||
s_dist_02 char(24) NOT NULL,
|
||||
s_dist_03 char(24) NOT NULL,
|
||||
s_dist_04 char(24) NOT NULL,
|
||||
s_dist_05 char(24) NOT NULL,
|
||||
s_dist_06 char(24) NOT NULL,
|
||||
s_dist_07 char(24) NOT NULL,
|
||||
s_dist_08 char(24) NOT NULL,
|
||||
s_dist_09 char(24) NOT NULL,
|
||||
s_dist_10 char(24) NOT NULL,
|
||||
PRIMARY KEY (s_w_id,s_i_id)
|
||||
);
|
||||
CREATE TABLE oorder (
|
||||
o_w_id int NOT NULL,
|
||||
o_d_id int NOT NULL,
|
||||
o_id int NOT NULL,
|
||||
o_c_id int NOT NULL,
|
||||
o_carrier_id int DEFAULT NULL,
|
||||
o_ol_cnt decimal(2,0) NOT NULL,
|
||||
o_all_local decimal(1,0) NOT NULL,
|
||||
o_entry_d timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (o_w_id,o_d_id,o_id),
|
||||
UNIQUE (o_w_id,o_d_id,o_c_id,o_id)
|
||||
);
|
||||
CREATE TABLE history (
|
||||
h_c_id int NOT NULL,
|
||||
h_c_d_id int NOT NULL,
|
||||
h_c_w_id int NOT NULL,
|
||||
h_d_id int NOT NULL,
|
||||
h_w_id int NOT NULL,
|
||||
h_date timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
h_amount decimal(6,2) NOT NULL,
|
||||
h_data varchar(24) NOT NULL
|
||||
);
|
||||
CREATE TABLE customer (
|
||||
c_w_id int NOT NULL,
|
||||
c_d_id int NOT NULL,
|
||||
c_id int NOT NULL,
|
||||
c_discount decimal(4,4) NOT NULL,
|
||||
c_credit char(2) NOT NULL,
|
||||
c_last varchar(16) NOT NULL,
|
||||
c_first varchar(16) NOT NULL,
|
||||
c_credit_lim decimal(12,2) NOT NULL,
|
||||
c_balance decimal(12,2) NOT NULL,
|
||||
c_ytd_payment float NOT NULL,
|
||||
c_payment_cnt int NOT NULL,
|
||||
c_delivery_cnt int NOT NULL,
|
||||
c_street_1 varchar(20) NOT NULL,
|
||||
c_street_2 varchar(20) NOT NULL,
|
||||
c_city varchar(20) NOT NULL,
|
||||
c_state char(2) NOT NULL,
|
||||
c_zip char(9) NOT NULL,
|
||||
c_phone char(16) NOT NULL,
|
||||
c_since timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
c_middle char(2) NOT NULL,
|
||||
c_data varchar(500) NOT NULL,
|
||||
PRIMARY KEY (c_w_id,c_d_id,c_id)
|
||||
);
|
||||
CREATE TABLE district (
|
||||
d_w_id int NOT NULL,
|
||||
d_id int NOT NULL,
|
||||
d_ytd decimal(12,2) NOT NULL,
|
||||
d_tax decimal(4,4) NOT NULL,
|
||||
d_next_o_id int NOT NULL,
|
||||
d_name varchar(10) NOT NULL,
|
||||
d_street_1 varchar(20) NOT NULL,
|
||||
d_street_2 varchar(20) NOT NULL,
|
||||
d_city varchar(20) NOT NULL,
|
||||
d_state char(2) NOT NULL,
|
||||
d_zip char(9) NOT NULL,
|
||||
PRIMARY KEY (d_w_id,d_id)
|
||||
);
|
||||
CREATE TABLE item (
|
||||
i_id int NOT NULL,
|
||||
i_name varchar(24) NOT NULL,
|
||||
i_price decimal(5,2) NOT NULL,
|
||||
i_data varchar(50) NOT NULL,
|
||||
i_im_id int NOT NULL,
|
||||
PRIMARY KEY (i_id)
|
||||
);
|
||||
CREATE TABLE warehouse (
|
||||
w_id int NOT NULL,
|
||||
w_ytd decimal(12,2) NOT NULL,
|
||||
w_tax decimal(4,4) NOT NULL,
|
||||
w_name varchar(10) NOT NULL,
|
||||
w_street_1 varchar(20) NOT NULL,
|
||||
w_street_2 varchar(20) NOT NULL,
|
||||
w_city varchar(20) NOT NULL,
|
||||
w_state char(2) NOT NULL,
|
||||
w_zip char(9) NOT NULL,
|
||||
PRIMARY KEY (w_id)
|
||||
);
|
||||
CREATE TABLE region (
|
||||
r_regionkey int not null,
|
||||
r_name char(55) not null,
|
||||
r_comment char(152) not null,
|
||||
PRIMARY KEY ( r_regionkey )
|
||||
);
|
||||
CREATE TABLE nation (
|
||||
n_nationkey int not null,
|
||||
n_name char(25) not null,
|
||||
n_regionkey int not null,
|
||||
n_comment char(152) not null,
|
||||
PRIMARY KEY ( n_nationkey )
|
||||
);
|
||||
CREATE TABLE supplier (
|
||||
su_suppkey int not null,
|
||||
su_name char(25) not null,
|
||||
su_address varchar(40) not null,
|
||||
su_nationkey int not null,
|
||||
su_phone char(15) not null,
|
||||
su_acctbal numeric(12,2) not null,
|
||||
su_comment char(101) not null,
|
||||
PRIMARY KEY ( su_suppkey )
|
||||
);
|
||||
SELECT create_distributed_table('order_line','ol_w_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('new_order','no_w_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('stock','s_w_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('oorder','o_w_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('history','h_w_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('customer','c_w_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('district','d_w_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('warehouse','w_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_reference_table('item');
|
||||
create_reference_table
|
||||
------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_reference_table('region');
|
||||
create_reference_table
|
||||
------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_reference_table('nation');
|
||||
create_reference_table
|
||||
------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_reference_table('supplier');
|
||||
create_reference_table
|
||||
------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
TRUNCATE order_line, new_order, stock, oorder, history, customer, district, warehouse, item, region, nation, supplier; -- for easy copy in development
|
||||
INSERT INTO supplier SELECT c, 'abc', 'def', c, 'ghi', c, 'jkl' FROM generate_series(0,10) AS c;
|
||||
INSERT INTO new_order SELECT c, c, c FROM generate_series(0,10) AS c;
|
||||
INSERT INTO stock SELECT c,c,c,c,c,c, 'abc','abc','abc','abc','abc','abc','abc','abc','abc','abc','abc' FROM generate_series(1,3) AS c;
|
||||
INSERT INTO stock SELECT c, 5000,c,c,c,c, 'abc','abc','abc','abc','abc','abc','abc','abc','abc','abc','abc' FROM generate_series(1,3) AS c; -- mod(2*5000,10000) == 0
|
||||
INSERT INTO order_line SELECT c, c, c, c, c, '2008-10-17 00:00:00.000000', c, c, c, 'abc' FROM generate_series(0,10) AS c;
|
||||
INSERT INTO oorder SELECT c, c, c, c, c, 1, 1, '2008-10-17 00:00:00.000000' FROM generate_series(0,10) AS c;
|
||||
INSERT INTO customer SELECT c, c, c, 0, 'XX', 'John', 'Doe', 1000, 0, 0, c, c, 'Name', 'Street', 'Some City', 'CA', '12345', '+1 000 0000000', '2007-01-02 00:00:00.000000', 'NA', 'nothing special' FROM generate_series(0,10) AS c;
|
||||
INSERT INTO item SELECT c, 'Keyboard', 50, 'co b', c FROM generate_series(0,10) AS c; --co% and %b filters all around
|
||||
INSERT INTO region VALUES
|
||||
(1, 'Not Europe', 'Big'),
|
||||
(2, 'Europe', 'Big');
|
||||
INSERT INTO nation VALUES
|
||||
(1, 'United States', 1, 'Also Big'),
|
||||
(4, 'The Netherlands', 2, 'Flat'),
|
||||
(9, 'Germany', 2, 'Germany must be in here for Q7'),
|
||||
(67, 'Cambodia', 2, 'I don''t understand how we got from California to Cambodia but I will take it, it also is not in Europe, but we need it to be for Q8');
|
||||
-- Query 1
|
||||
SELECT
|
||||
ol_number,
|
||||
sum(ol_quantity) as sum_qty,
|
||||
sum(ol_amount) as sum_amount,
|
||||
avg(ol_quantity) as avg_qty,
|
||||
avg(ol_amount) as avg_amount,
|
||||
count(*) as count_order
|
||||
FROM order_line
|
||||
WHERE ol_delivery_d > '2007-01-02 00:00:00.000000'
|
||||
GROUP BY ol_number
|
||||
ORDER BY ol_number;
|
||||
ol_number | sum_qty | sum_amount | avg_qty | avg_amount | count_order
|
||||
-----------+---------+------------+------------------------+------------------------+-------------
|
||||
0 | 0 | 0.00 | 0.00000000000000000000 | 0.00000000000000000000 | 1
|
||||
1 | 1 | 1.00 | 1.00000000000000000000 | 1.00000000000000000000 | 1
|
||||
2 | 2 | 2.00 | 2.0000000000000000 | 2.0000000000000000 | 1
|
||||
3 | 3 | 3.00 | 3.0000000000000000 | 3.0000000000000000 | 1
|
||||
4 | 4 | 4.00 | 4.0000000000000000 | 4.0000000000000000 | 1
|
||||
5 | 5 | 5.00 | 5.0000000000000000 | 5.0000000000000000 | 1
|
||||
6 | 6 | 6.00 | 6.0000000000000000 | 6.0000000000000000 | 1
|
||||
7 | 7 | 7.00 | 7.0000000000000000 | 7.0000000000000000 | 1
|
||||
8 | 8 | 8.00 | 8.0000000000000000 | 8.0000000000000000 | 1
|
||||
9 | 9 | 9.00 | 9.0000000000000000 | 9.0000000000000000 | 1
|
||||
10 | 10 | 10.00 | 10.0000000000000000 | 10.0000000000000000 | 1
|
||||
(11 rows)
|
||||
|
||||
-- Query 2
|
||||
SELECT
|
||||
su_suppkey,
|
||||
su_name,
|
||||
n_name,
|
||||
i_id,
|
||||
i_name,
|
||||
su_address,
|
||||
su_phone,
|
||||
su_comment
|
||||
FROM
|
||||
item,
|
||||
supplier,
|
||||
stock,
|
||||
nation,
|
||||
region,
|
||||
(SELECT
|
||||
s_i_id AS m_i_id,
|
||||
min(s_quantity) as m_s_quantity
|
||||
FROM
|
||||
stock,
|
||||
supplier,
|
||||
nation,
|
||||
region
|
||||
WHERE mod((s_w_id*s_i_id),10000)=su_suppkey
|
||||
AND su_nationkey=n_nationkey
|
||||
AND n_regionkey=r_regionkey
|
||||
AND r_name LIKE 'Europ%'
|
||||
GROUP BY s_i_id) m
|
||||
WHERE i_id = s_i_id
|
||||
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_regionkey = r_regionkey
|
||||
AND i_data LIKE '%b'
|
||||
AND r_name LIKE 'Europ%'
|
||||
AND i_id = m_i_id
|
||||
AND s_quantity = m_s_quantity
|
||||
ORDER BY
|
||||
n_name,
|
||||
su_name,
|
||||
i_id;
|
||||
su_suppkey | su_name | n_name | i_id | i_name | su_address | su_phone | su_comment
|
||||
------------+---------------------------+---------------------------+------+----------+------------+-----------------+-------------------------------------------------------------------------------------------------------
|
||||
9 | abc | Germany | 3 | Keyboard | def | ghi | jkl
|
||||
4 | abc | The Netherlands | 2 | Keyboard | def | ghi | jkl
|
||||
(2 rows)
|
||||
|
||||
-- Query 3
|
||||
SELECT
|
||||
ol_o_id,
|
||||
ol_w_id,
|
||||
ol_d_id,
|
||||
sum(ol_amount) AS revenue,
|
||||
o_entry_d
|
||||
FROM
|
||||
customer,
|
||||
new_order,
|
||||
oorder,
|
||||
order_line
|
||||
WHERE c_state LIKE 'C%' -- used to ba A%, but C% works with our small data
|
||||
AND c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND no_w_id = o_w_id
|
||||
AND no_d_id = o_d_id
|
||||
AND no_o_id = o_id
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND o_entry_d > '2007-01-02 00:00:00.000000'
|
||||
GROUP BY
|
||||
ol_o_id,
|
||||
ol_w_id,
|
||||
ol_d_id,
|
||||
o_entry_d
|
||||
ORDER BY
|
||||
revenue DESC,
|
||||
o_entry_d;
|
||||
ol_o_id | ol_w_id | ol_d_id | revenue | o_entry_d
|
||||
---------+---------+---------+---------+--------------------------
|
||||
10 | 10 | 10 | 10.00 | Fri Oct 17 00:00:00 2008
|
||||
9 | 9 | 9 | 9.00 | Fri Oct 17 00:00:00 2008
|
||||
8 | 8 | 8 | 8.00 | Fri Oct 17 00:00:00 2008
|
||||
7 | 7 | 7 | 7.00 | Fri Oct 17 00:00:00 2008
|
||||
6 | 6 | 6 | 6.00 | Fri Oct 17 00:00:00 2008
|
||||
5 | 5 | 5 | 5.00 | Fri Oct 17 00:00:00 2008
|
||||
4 | 4 | 4 | 4.00 | Fri Oct 17 00:00:00 2008
|
||||
3 | 3 | 3 | 3.00 | Fri Oct 17 00:00:00 2008
|
||||
2 | 2 | 2 | 2.00 | Fri Oct 17 00:00:00 2008
|
||||
1 | 1 | 1 | 1.00 | Fri Oct 17 00:00:00 2008
|
||||
0 | 0 | 0 | 0.00 | Fri Oct 17 00:00:00 2008
|
||||
(11 rows)
|
||||
|
||||
-- Query 4
|
||||
SELECT
|
||||
o_ol_cnt,
|
||||
count(*) as order_count
|
||||
FROM
|
||||
oorder
|
||||
WHERE o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||
AND o_entry_d < '2012-01-02 00:00:00.000000'
|
||||
AND exists (SELECT *
|
||||
FROM order_line
|
||||
WHERE o_id = ol_o_id
|
||||
AND o_w_id = ol_w_id
|
||||
AND o_d_id = ol_d_id
|
||||
AND ol_delivery_d >= o_entry_d)
|
||||
GROUP BY o_ol_cnt
|
||||
ORDER BY o_ol_cnt;
|
||||
o_ol_cnt | order_count
|
||||
----------+-------------
|
||||
1 | 11
|
||||
(1 row)
|
||||
|
||||
-- Query 5
|
||||
SELECT
|
||||
n_name,
|
||||
sum(ol_amount) AS revenue
|
||||
FROM
|
||||
customer,
|
||||
oorder,
|
||||
order_line,
|
||||
stock,
|
||||
supplier,
|
||||
nation,
|
||||
region
|
||||
WHERE c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id=o_d_id
|
||||
AND ol_w_id = s_w_id
|
||||
AND ol_i_id = s_i_id
|
||||
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||
-- our dataset does not have the supplier in the same nation as the customer causing this
|
||||
-- join to filter out all the data. We verify later on that we can actually perform an
|
||||
-- ascii(substr(c_state,1,1)) == reference table column join later on so it should not
|
||||
-- matter we skip this filter here.
|
||||
--AND ascii(substr(c_state,1,1)) = su_nationkey
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_regionkey = r_regionkey
|
||||
AND r_name = 'Europe'
|
||||
AND o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||
GROUP BY n_name
|
||||
ORDER BY revenue DESC;
|
||||
n_name | revenue
|
||||
---------------------------+---------
|
||||
Germany | 3.00
|
||||
The Netherlands | 2.00
|
||||
(2 rows)
|
||||
|
||||
-- Query 6
|
||||
SELECT
|
||||
sum(ol_amount) AS revenue
|
||||
FROM order_line
|
||||
WHERE ol_delivery_d >= '1999-01-01 00:00:00.000000'
|
||||
AND ol_delivery_d < '2020-01-01 00:00:00.000000'
|
||||
AND ol_quantity BETWEEN 1 AND 100000;
|
||||
revenue
|
||||
---------
|
||||
55.00
|
||||
(1 row)
|
||||
|
||||
-- Query 7
|
||||
SELECT
|
||||
su_nationkey as supp_nation,
|
||||
substr(c_state,1,1) as cust_nation,
|
||||
extract(year from o_entry_d) as l_year,
|
||||
sum(ol_amount) as revenue
|
||||
FROM
|
||||
supplier,
|
||||
stock,
|
||||
order_line,
|
||||
oorder,
|
||||
customer,
|
||||
nation n1,
|
||||
nation n2
|
||||
WHERE ol_supply_w_id = s_w_id
|
||||
AND ol_i_id = s_i_id
|
||||
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND su_nationkey = n1.n_nationkey
|
||||
AND ascii(substr(c_state,1,1)) = n2.n_nationkey
|
||||
AND (
|
||||
(n1.n_name = 'Germany' AND n2.n_name = 'Cambodia')
|
||||
OR (n1.n_name = 'Cambodia' AND n2.n_name = 'Germany')
|
||||
)
|
||||
AND ol_delivery_d BETWEEN '2007-01-02 00:00:00.000000' AND '2012-01-02 00:00:00.000000'
|
||||
GROUP BY
|
||||
su_nationkey,
|
||||
substr(c_state,1,1),
|
||||
extract(year from o_entry_d)
|
||||
ORDER BY
|
||||
su_nationkey,
|
||||
cust_nation,
|
||||
l_year;
|
||||
supp_nation | cust_nation | l_year | revenue
|
||||
-------------+-------------+--------+---------
|
||||
9 | C | 2008 | 3.00
|
||||
(1 row)
|
||||
|
||||
-- Query 8
|
||||
SELECT
|
||||
extract(year from o_entry_d) as l_year,
|
||||
sum(case when n2.n_name = 'Germany' then ol_amount else 0 end) / sum(ol_amount) as mkt_share
|
||||
FROM
|
||||
item,
|
||||
supplier,
|
||||
stock,
|
||||
order_line,
|
||||
oorder,
|
||||
customer,
|
||||
nation n1,
|
||||
nation n2,
|
||||
region
|
||||
WHERE i_id = s_i_id
|
||||
AND ol_i_id = s_i_id
|
||||
AND ol_supply_w_id = s_w_id
|
||||
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND n1.n_nationkey = ascii(substr(c_state,1,1))
|
||||
AND n1.n_regionkey = r_regionkey
|
||||
AND ol_i_id < 1000
|
||||
AND r_name = 'Europe'
|
||||
AND su_nationkey = n2.n_nationkey
|
||||
AND o_entry_d BETWEEN '2007-01-02 00:00:00.000000' AND '2012-01-02 00:00:00.000000'
|
||||
AND i_data LIKE '%b'
|
||||
AND i_id = ol_i_id
|
||||
GROUP BY extract(YEAR FROM o_entry_d)
|
||||
ORDER BY l_year;
|
||||
l_year | mkt_share
|
||||
--------+------------------------
|
||||
2008 | 0.50000000000000000000
|
||||
(1 row)
|
||||
|
||||
-- Query 9
|
||||
SELECT
|
||||
n_name,
|
||||
extract(year from o_entry_d) as l_year,
|
||||
sum(ol_amount) as sum_profit
|
||||
FROM
|
||||
item,
|
||||
stock,
|
||||
supplier,
|
||||
order_line,
|
||||
oorder,
|
||||
nation
|
||||
WHERE ol_i_id = s_i_id
|
||||
AND ol_supply_w_id = s_w_id
|
||||
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND ol_i_id = i_id
|
||||
AND su_nationkey = n_nationkey
|
||||
AND i_data LIKE '%b' -- this used to be %BB but that will not work with our small dataset
|
||||
GROUP BY
|
||||
n_name,
|
||||
extract(YEAR FROM o_entry_d)
|
||||
ORDER BY
|
||||
n_name,
|
||||
l_year DESC;
|
||||
n_name | l_year | sum_profit
|
||||
---------------------------+--------+------------
|
||||
Germany | 2008 | 3.00
|
||||
The Netherlands | 2008 | 2.00
|
||||
United States | 2008 | 1.00
|
||||
(3 rows)
|
||||
|
||||
-- Query 10
|
||||
SELECT
|
||||
c_id,
|
||||
c_last,
|
||||
sum(ol_amount) AS revenue,
|
||||
c_city,
|
||||
c_phone,
|
||||
n_name
|
||||
FROM
|
||||
customer,
|
||||
oorder,
|
||||
order_line,
|
||||
nation
|
||||
WHERE c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||
AND o_entry_d <= ol_delivery_d
|
||||
AND n_nationkey = ascii(substr(c_state,1,1))
|
||||
GROUP BY
|
||||
c_id,
|
||||
c_last,
|
||||
c_city,
|
||||
c_phone,
|
||||
n_name
|
||||
ORDER BY revenue DESC;
|
||||
c_id | c_last | revenue | c_city | c_phone | n_name
|
||||
------+--------+---------+-----------+------------------+---------------------------
|
||||
10 | John | 10.00 | Some City | +1 000 0000000 | Cambodia
|
||||
9 | John | 9.00 | Some City | +1 000 0000000 | Cambodia
|
||||
8 | John | 8.00 | Some City | +1 000 0000000 | Cambodia
|
||||
7 | John | 7.00 | Some City | +1 000 0000000 | Cambodia
|
||||
6 | John | 6.00 | Some City | +1 000 0000000 | Cambodia
|
||||
5 | John | 5.00 | Some City | +1 000 0000000 | Cambodia
|
||||
4 | John | 4.00 | Some City | +1 000 0000000 | Cambodia
|
||||
3 | John | 3.00 | Some City | +1 000 0000000 | Cambodia
|
||||
2 | John | 2.00 | Some City | +1 000 0000000 | Cambodia
|
||||
1 | John | 1.00 | Some City | +1 000 0000000 | Cambodia
|
||||
0 | John | 0.00 | Some City | +1 000 0000000 | Cambodia
|
||||
(11 rows)
|
||||
|
||||
-- Query 11
|
||||
SELECT
|
||||
s_i_id,
|
||||
sum(s_order_cnt) AS ordercount
|
||||
FROM
|
||||
stock,
|
||||
supplier,
|
||||
nation
|
||||
WHERE mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_name = 'Germany'
|
||||
GROUP BY s_i_id
|
||||
HAVING sum(s_order_cnt) >
|
||||
(SELECT sum(s_order_cnt) * .005
|
||||
FROM
|
||||
stock,
|
||||
supplier,
|
||||
nation
|
||||
WHERE mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_name = 'Germany')
|
||||
ORDER BY ordercount DESC;
|
||||
s_i_id | ordercount
|
||||
--------+------------
|
||||
3 | 3
|
||||
(1 row)
|
||||
|
||||
-- Query 12
|
||||
SELECT
|
||||
o_ol_cnt,
|
||||
sum(case when o_carrier_id = 1 or o_carrier_id = 2 then 1 else 0 end) as high_line_count,
|
||||
sum(case when o_carrier_id <> 1 and o_carrier_id <> 2 then 1 else 0 end) as low_line_count
|
||||
FROM
|
||||
oorder,
|
||||
order_line
|
||||
WHERE ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND o_entry_d <= ol_delivery_d
|
||||
AND ol_delivery_d < '2020-01-01 00:00:00.000000'
|
||||
GROUP BY o_ol_cnt
|
||||
ORDER BY o_ol_cnt;
|
||||
o_ol_cnt | high_line_count | low_line_count
|
||||
----------+-----------------+----------------
|
||||
1 | 2 | 9
|
||||
(1 row)
|
||||
|
||||
-- Query 13
|
||||
SELECT
|
||||
c_count,
|
||||
count(*) AS custdist
|
||||
FROM (SELECT
|
||||
c_id,
|
||||
count(o_id)
|
||||
FROM customer
|
||||
LEFT OUTER JOIN oorder ON (
|
||||
c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND c_id = o_c_id
|
||||
AND o_carrier_id > 8)
|
||||
GROUP BY c_id) AS c_orders (c_id, c_count)
|
||||
GROUP BY c_count
|
||||
ORDER BY
|
||||
custdist DESC,
|
||||
c_count DESC;
|
||||
c_count | custdist
|
||||
---------+----------
|
||||
0 | 9
|
||||
1 | 2
|
||||
(2 rows)
|
||||
|
||||
-- Query 14
|
||||
SELECT
|
||||
100.00 * sum(CASE WHEN i_data LIKE 'PR%' THEN ol_amount ELSE 0 END) / (1+sum(ol_amount)) AS promo_revenue
|
||||
FROM
|
||||
order_line,
|
||||
item
|
||||
WHERE ol_i_id = i_id
|
||||
AND ol_delivery_d >= '2007-01-02 00:00:00.000000'
|
||||
AND ol_delivery_d < '2020-01-02 00:00:00.000000';
|
||||
promo_revenue
|
||||
------------------------
|
||||
0.00000000000000000000
|
||||
(1 row)
|
||||
|
||||
-- Query 15
|
||||
WITH revenue (supplier_no, total_revenue) AS (
|
||||
SELECT
|
||||
mod((s_w_id * s_i_id),10000) AS supplier_no,
|
||||
sum(ol_amount) AS total_revenue
|
||||
FROM
|
||||
order_line,
|
||||
stock
|
||||
WHERE ol_i_id = s_i_id
|
||||
AND ol_supply_w_id = s_w_id
|
||||
AND ol_delivery_d >= '2007-01-02 00:00:00.000000'
|
||||
GROUP BY mod((s_w_id * s_i_id),10000))
|
||||
SELECT
|
||||
su_suppkey,
|
||||
su_name,
|
||||
su_address,
|
||||
su_phone,
|
||||
total_revenue
|
||||
FROM
|
||||
supplier,
|
||||
revenue
|
||||
WHERE su_suppkey = supplier_no
|
||||
AND total_revenue = (SELECT max(total_revenue) FROM revenue)
|
||||
ORDER BY su_suppkey;
|
||||
su_suppkey | su_name | su_address | su_phone | total_revenue
|
||||
------------+---------------------------+------------+-----------------+---------------
|
||||
9 | abc | def | ghi | 3.00
|
||||
(1 row)
|
||||
|
||||
--Q16
|
||||
SELECT
|
||||
i_name,
|
||||
substr(i_data, 1, 3) AS brand,
|
||||
i_price,
|
||||
count(DISTINCT (mod((s_w_id * s_i_id),10000))) AS supplier_cnt
|
||||
FROM
|
||||
stock,
|
||||
item
|
||||
WHERE i_id = s_i_id
|
||||
AND i_data NOT LIKE 'zz%'
|
||||
AND (mod((s_w_id * s_i_id),10000) NOT IN
|
||||
(SELECT su_suppkey
|
||||
FROM supplier
|
||||
WHERE su_comment LIKE '%bad%'))
|
||||
GROUP BY
|
||||
i_name,
|
||||
substr(i_data, 1, 3),
|
||||
i_price
|
||||
ORDER BY supplier_cnt DESC;
|
||||
i_name | brand | i_price | supplier_cnt
|
||||
----------+-------+---------+--------------
|
||||
Keyboard | co | 50.00 | 3
|
||||
(1 row)
|
||||
|
||||
--Q17
|
||||
SELECT
|
||||
sum(ol_amount) / 2.0 AS avg_yearly
|
||||
FROM
|
||||
order_line,
|
||||
(SELECT
|
||||
i_id,
|
||||
avg(ol_quantity) AS a
|
||||
FROM
|
||||
item,
|
||||
order_line
|
||||
WHERE i_data LIKE '%b'
|
||||
AND ol_i_id = i_id
|
||||
GROUP BY i_id) t
|
||||
WHERE ol_i_id = t.i_id;
|
||||
avg_yearly
|
||||
---------------------
|
||||
27.5000000000000000
|
||||
(1 row)
|
||||
|
||||
-- this filter was at the end causing the dataset to be empty. it should not have any
|
||||
-- influence on how the query gets planned so I removed the clause
|
||||
--AND ol_quantity < t.a;
|
||||
-- Query 18
|
||||
SELECT
|
||||
c_last,
|
||||
c_id o_id,
|
||||
o_entry_d,
|
||||
o_ol_cnt,
|
||||
sum(ol_amount)
|
||||
FROM
|
||||
customer,
|
||||
oorder,
|
||||
order_line
|
||||
WHERE c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
GROUP BY
|
||||
o_id,
|
||||
o_w_id,
|
||||
o_d_id,
|
||||
c_id,
|
||||
c_last,
|
||||
o_entry_d,
|
||||
o_ol_cnt
|
||||
HAVING sum(ol_amount) > 5 -- was 200, but thats too big for the dataset
|
||||
ORDER BY
|
||||
sum(ol_amount) DESC,
|
||||
o_entry_d;
|
||||
c_last | o_id | o_entry_d | o_ol_cnt | sum
|
||||
--------+------+--------------------------+----------+-------
|
||||
John | 10 | Fri Oct 17 00:00:00 2008 | 1 | 10.00
|
||||
John | 9 | Fri Oct 17 00:00:00 2008 | 1 | 9.00
|
||||
John | 8 | Fri Oct 17 00:00:00 2008 | 1 | 8.00
|
||||
John | 7 | Fri Oct 17 00:00:00 2008 | 1 | 7.00
|
||||
John | 6 | Fri Oct 17 00:00:00 2008 | 1 | 6.00
|
||||
(5 rows)
|
||||
|
||||
-- Query 19
|
||||
SELECT
|
||||
sum(ol_amount) AS revenue
|
||||
FROM
|
||||
order_line,
|
||||
item
|
||||
WHERE ( ol_i_id = i_id
|
||||
AND i_data LIKE '%a'
|
||||
AND ol_quantity >= 1
|
||||
AND ol_quantity <= 10
|
||||
AND i_price BETWEEN 1 AND 400000
|
||||
AND ol_w_id IN (1,2,3))
|
||||
OR ( ol_i_id = i_id
|
||||
AND i_data LIKE '%b'
|
||||
AND ol_quantity >= 1
|
||||
AND ol_quantity <= 10
|
||||
AND i_price BETWEEN 1 AND 400000
|
||||
AND ol_w_id IN (1,2,4))
|
||||
OR ( ol_i_id = i_id
|
||||
AND i_data LIKE '%c'
|
||||
AND ol_quantity >= 1
|
||||
AND ol_quantity <= 10
|
||||
AND i_price BETWEEN 1 AND 400000
|
||||
AND ol_w_id IN (1,5,3));
|
||||
revenue
|
||||
---------
|
||||
7.00
|
||||
(1 row)
|
||||
|
||||
-- Query 20
|
||||
SELECT
|
||||
su_name,
|
||||
su_address
|
||||
FROM
|
||||
supplier,
|
||||
nation
|
||||
WHERE su_suppkey in
|
||||
(SELECT
|
||||
mod(s_i_id * s_w_id, 10000)
|
||||
FROM
|
||||
stock,
|
||||
order_line
|
||||
WHERE s_i_id IN
|
||||
(SELECT i_id
|
||||
FROM item
|
||||
WHERE i_data LIKE 'co%')
|
||||
AND ol_i_id = s_i_id
|
||||
AND ol_delivery_d > '2008-05-23 12:00:00' -- was 2010, but our order is in 2008
|
||||
GROUP BY s_i_id, s_w_id, s_quantity
|
||||
HAVING 2*s_quantity > sum(ol_quantity))
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_name = 'Germany'
|
||||
ORDER BY su_name;
|
||||
su_name | su_address
|
||||
---------------------------+------------
|
||||
abc | def
|
||||
(1 row)
|
||||
|
||||
-- Query 21
|
||||
-- DATA SET DOES NOT COVER THIS QUERY
|
||||
SELECT
|
||||
su_name,
|
||||
count(*) AS numwait
|
||||
FROM
|
||||
supplier,
|
||||
order_line l1,
|
||||
oorder,
|
||||
stock,
|
||||
nation
|
||||
WHERE ol_o_id = o_id
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_w_id = s_w_id
|
||||
AND ol_i_id = s_i_id
|
||||
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||
AND l1.ol_delivery_d > o_entry_d
|
||||
AND NOT exists (SELECT *
|
||||
FROM order_line l2
|
||||
WHERE l2.ol_o_id = l1.ol_o_id
|
||||
AND l2.ol_w_id = l1.ol_w_id
|
||||
AND l2.ol_d_id = l1.ol_d_id
|
||||
AND l2.ol_delivery_d > l1.ol_delivery_d)
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_name = 'Germany'
|
||||
GROUP BY su_name
|
||||
ORDER BY
|
||||
numwait desc,
|
||||
su_name;
|
||||
su_name | numwait
|
||||
---------+---------
|
||||
(0 rows)
|
||||
|
||||
-- Query 22
|
||||
-- DATA SET DOES NOT COVER THIS QUERY
|
||||
SELECT
|
||||
substr(c_state,1,1) AS country,
|
||||
count(*) AS numcust,
|
||||
sum(c_balance) AS totacctbal
|
||||
FROM customer
|
||||
WHERE substr(c_phone,1,1) in ('1','2','3','4','5','6','7')
|
||||
AND c_balance > (SELECT avg(c_BALANCE)
|
||||
FROM customer
|
||||
WHERE c_balance > 0.00
|
||||
AND substr(c_phone,1,1) in ('1','2','3','4','5','6','7'))
|
||||
AND NOT exists (SELECT *
|
||||
FROM oorder
|
||||
WHERE o_c_id = c_id
|
||||
AND o_w_id = c_w_id
|
||||
AND o_d_id = c_d_id)
|
||||
GROUP BY substr(c_state,1,1)
|
||||
ORDER BY substr(c_state,1,1);
|
||||
country | numcust | totacctbal
|
||||
---------+---------+------------
|
||||
(0 rows)
|
||||
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA chbenchmark_all_queries CASCADE;
|
|
@ -0,0 +1,56 @@
|
|||
SET citus.next_shard_id TO 1670000;
|
||||
CREATE SCHEMA expression_reference_join;
|
||||
SET search_path TO expression_reference_join;
|
||||
SET citus.shard_count TO 4;
|
||||
SET citus.enable_repartition_joins TO on;
|
||||
CREATE TABLE ref (a int, b int);
|
||||
CREATE TABLE test (x int, y int);
|
||||
INSERT INTO ref VALUES
|
||||
(2,2),
|
||||
(4,4);
|
||||
INSERT INTO test VALUES
|
||||
(1,2),
|
||||
(2,2);
|
||||
SELECT create_reference_table('ref');
|
||||
NOTICE: Copying data from local table...
|
||||
create_reference_table
|
||||
------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('test', 'x');
|
||||
NOTICE: Copying data from local table...
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- PR 3180 implements expressions in join clauses to reference tables to support CHbenCHmark queries 7/8/9
|
||||
-- plannable as a repartition + reference join, now with an expression in the join clause
|
||||
SELECT *
|
||||
FROM
|
||||
test t1 JOIN test t2 USING (y), -- causes repartition, which makes this not routable or pushdownable
|
||||
ref a
|
||||
WHERE t2.y * 2 = a.a
|
||||
ORDER BY 1,2,3;
|
||||
y | x | x | a | b
|
||||
---+---+---+---+---
|
||||
2 | 1 | 1 | 4 | 4
|
||||
2 | 1 | 2 | 4 | 4
|
||||
2 | 2 | 1 | 4 | 4
|
||||
2 | 2 | 2 | 4 | 4
|
||||
(4 rows)
|
||||
|
||||
-- The join clause is wider than it used to be, causing this query to be recognized by the LogicalPlanner as a repartition join.
|
||||
-- Unplannable query due to a three-way join which causes no valid path (besides the cartesian product) to be found
|
||||
SELECT *
|
||||
FROM
|
||||
test t1 JOIN test t2 USING (y), -- causes repartition, which makes this not routable or pushdownable
|
||||
ref a,
|
||||
ref b
|
||||
WHERE t2.y - a.a - b.b = 0
|
||||
ORDER BY 1,2,3;
|
||||
ERROR: cannot perform distributed planning on this query
|
||||
DETAIL: Cartesian products are currently unsupported
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA expression_reference_join CASCADE;
|
|
@ -243,7 +243,7 @@ DEBUG: dual partition column types do not match
|
|||
DEBUG: single partition column types do not match
|
||||
DEBUG: single partition column types do not match
|
||||
DEBUG: dual partition column types do not match
|
||||
LOG: join order: [ "single_hash_repartition_first" ][ local partition join "single_hash_repartition_first" ][ cartesian product "single_hash_repartition_second" ]
|
||||
LOG: join order: [ "single_hash_repartition_second" ][ cartesian product "single_hash_repartition_first" ][ dual partition join "single_hash_repartition_first" ]
|
||||
ERROR: cannot perform distributed planning on this query
|
||||
DETAIL: Cartesian products are currently unsupported
|
||||
-- single repartition query in CTE
|
||||
|
|
|
@ -73,7 +73,7 @@ test: multi_reference_table multi_select_for_update relation_access_tracking
|
|||
test: custom_aggregate_support aggregate_support
|
||||
test: multi_average_expression multi_working_columns multi_having_pushdown
|
||||
test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown
|
||||
test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg ch_bench_having ch_bench_subquery_repartition
|
||||
test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg ch_bench_having ch_bench_subquery_repartition chbenchmark_all_queries expression_reference_join
|
||||
test: multi_agg_type_conversion multi_count_type_conversion
|
||||
test: multi_partition_pruning single_hash_repartition_join
|
||||
test: multi_join_pruning multi_hash_pruning
|
||||
|
|
|
@ -0,0 +1,722 @@
|
|||
SET citus.next_shard_id TO 1650000;
|
||||
CREATE SCHEMA chbenchmark_all_queries;
|
||||
SET search_path TO chbenchmark_all_queries;
|
||||
|
||||
SET citus.enable_repartition_joins TO on;
|
||||
CREATE TABLE order_line (
|
||||
ol_w_id int NOT NULL,
|
||||
ol_d_id int NOT NULL,
|
||||
ol_o_id int NOT NULL,
|
||||
ol_number int NOT NULL,
|
||||
ol_i_id int NOT NULL,
|
||||
ol_delivery_d timestamp NULL DEFAULT NULL,
|
||||
ol_amount decimal(6,2) NOT NULL,
|
||||
ol_supply_w_id int NOT NULL,
|
||||
ol_quantity decimal(2,0) NOT NULL,
|
||||
ol_dist_info char(24) NOT NULL,
|
||||
PRIMARY KEY (ol_w_id,ol_d_id,ol_o_id,ol_number)
|
||||
);
|
||||
CREATE TABLE new_order (
|
||||
no_w_id int NOT NULL,
|
||||
no_d_id int NOT NULL,
|
||||
no_o_id int NOT NULL,
|
||||
PRIMARY KEY (no_w_id,no_d_id,no_o_id)
|
||||
);
|
||||
CREATE TABLE stock (
|
||||
s_w_id int NOT NULL,
|
||||
s_i_id int NOT NULL,
|
||||
s_quantity decimal(4,0) NOT NULL,
|
||||
s_ytd decimal(8,2) NOT NULL,
|
||||
s_order_cnt int NOT NULL,
|
||||
s_remote_cnt int NOT NULL,
|
||||
s_data varchar(50) NOT NULL,
|
||||
s_dist_01 char(24) NOT NULL,
|
||||
s_dist_02 char(24) NOT NULL,
|
||||
s_dist_03 char(24) NOT NULL,
|
||||
s_dist_04 char(24) NOT NULL,
|
||||
s_dist_05 char(24) NOT NULL,
|
||||
s_dist_06 char(24) NOT NULL,
|
||||
s_dist_07 char(24) NOT NULL,
|
||||
s_dist_08 char(24) NOT NULL,
|
||||
s_dist_09 char(24) NOT NULL,
|
||||
s_dist_10 char(24) NOT NULL,
|
||||
PRIMARY KEY (s_w_id,s_i_id)
|
||||
);
|
||||
CREATE TABLE oorder (
|
||||
o_w_id int NOT NULL,
|
||||
o_d_id int NOT NULL,
|
||||
o_id int NOT NULL,
|
||||
o_c_id int NOT NULL,
|
||||
o_carrier_id int DEFAULT NULL,
|
||||
o_ol_cnt decimal(2,0) NOT NULL,
|
||||
o_all_local decimal(1,0) NOT NULL,
|
||||
o_entry_d timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (o_w_id,o_d_id,o_id),
|
||||
UNIQUE (o_w_id,o_d_id,o_c_id,o_id)
|
||||
);
|
||||
CREATE TABLE history (
|
||||
h_c_id int NOT NULL,
|
||||
h_c_d_id int NOT NULL,
|
||||
h_c_w_id int NOT NULL,
|
||||
h_d_id int NOT NULL,
|
||||
h_w_id int NOT NULL,
|
||||
h_date timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
h_amount decimal(6,2) NOT NULL,
|
||||
h_data varchar(24) NOT NULL
|
||||
);
|
||||
CREATE TABLE customer (
|
||||
c_w_id int NOT NULL,
|
||||
c_d_id int NOT NULL,
|
||||
c_id int NOT NULL,
|
||||
c_discount decimal(4,4) NOT NULL,
|
||||
c_credit char(2) NOT NULL,
|
||||
c_last varchar(16) NOT NULL,
|
||||
c_first varchar(16) NOT NULL,
|
||||
c_credit_lim decimal(12,2) NOT NULL,
|
||||
c_balance decimal(12,2) NOT NULL,
|
||||
c_ytd_payment float NOT NULL,
|
||||
c_payment_cnt int NOT NULL,
|
||||
c_delivery_cnt int NOT NULL,
|
||||
c_street_1 varchar(20) NOT NULL,
|
||||
c_street_2 varchar(20) NOT NULL,
|
||||
c_city varchar(20) NOT NULL,
|
||||
c_state char(2) NOT NULL,
|
||||
c_zip char(9) NOT NULL,
|
||||
c_phone char(16) NOT NULL,
|
||||
c_since timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
c_middle char(2) NOT NULL,
|
||||
c_data varchar(500) NOT NULL,
|
||||
PRIMARY KEY (c_w_id,c_d_id,c_id)
|
||||
);
|
||||
CREATE TABLE district (
|
||||
d_w_id int NOT NULL,
|
||||
d_id int NOT NULL,
|
||||
d_ytd decimal(12,2) NOT NULL,
|
||||
d_tax decimal(4,4) NOT NULL,
|
||||
d_next_o_id int NOT NULL,
|
||||
d_name varchar(10) NOT NULL,
|
||||
d_street_1 varchar(20) NOT NULL,
|
||||
d_street_2 varchar(20) NOT NULL,
|
||||
d_city varchar(20) NOT NULL,
|
||||
d_state char(2) NOT NULL,
|
||||
d_zip char(9) NOT NULL,
|
||||
PRIMARY KEY (d_w_id,d_id)
|
||||
);
|
||||
CREATE TABLE item (
|
||||
i_id int NOT NULL,
|
||||
i_name varchar(24) NOT NULL,
|
||||
i_price decimal(5,2) NOT NULL,
|
||||
i_data varchar(50) NOT NULL,
|
||||
i_im_id int NOT NULL,
|
||||
PRIMARY KEY (i_id)
|
||||
);
|
||||
CREATE TABLE warehouse (
|
||||
w_id int NOT NULL,
|
||||
w_ytd decimal(12,2) NOT NULL,
|
||||
w_tax decimal(4,4) NOT NULL,
|
||||
w_name varchar(10) NOT NULL,
|
||||
w_street_1 varchar(20) NOT NULL,
|
||||
w_street_2 varchar(20) NOT NULL,
|
||||
w_city varchar(20) NOT NULL,
|
||||
w_state char(2) NOT NULL,
|
||||
w_zip char(9) NOT NULL,
|
||||
PRIMARY KEY (w_id)
|
||||
);
|
||||
CREATE TABLE region (
|
||||
r_regionkey int not null,
|
||||
r_name char(55) not null,
|
||||
r_comment char(152) not null,
|
||||
PRIMARY KEY ( r_regionkey )
|
||||
);
|
||||
CREATE TABLE nation (
|
||||
n_nationkey int not null,
|
||||
n_name char(25) not null,
|
||||
n_regionkey int not null,
|
||||
n_comment char(152) not null,
|
||||
PRIMARY KEY ( n_nationkey )
|
||||
);
|
||||
CREATE TABLE supplier (
|
||||
su_suppkey int not null,
|
||||
su_name char(25) not null,
|
||||
su_address varchar(40) not null,
|
||||
su_nationkey int not null,
|
||||
su_phone char(15) not null,
|
||||
su_acctbal numeric(12,2) not null,
|
||||
su_comment char(101) not null,
|
||||
PRIMARY KEY ( su_suppkey )
|
||||
);
|
||||
|
||||
SELECT create_distributed_table('order_line','ol_w_id');
|
||||
SELECT create_distributed_table('new_order','no_w_id');
|
||||
SELECT create_distributed_table('stock','s_w_id');
|
||||
SELECT create_distributed_table('oorder','o_w_id');
|
||||
SELECT create_distributed_table('history','h_w_id');
|
||||
SELECT create_distributed_table('customer','c_w_id');
|
||||
SELECT create_distributed_table('district','d_w_id');
|
||||
SELECT create_distributed_table('warehouse','w_id');
|
||||
SELECT create_reference_table('item');
|
||||
SELECT create_reference_table('region');
|
||||
SELECT create_reference_table('nation');
|
||||
SELECT create_reference_table('supplier');
|
||||
|
||||
TRUNCATE order_line, new_order, stock, oorder, history, customer, district, warehouse, item, region, nation, supplier; -- for easy copy in development
|
||||
INSERT INTO supplier SELECT c, 'abc', 'def', c, 'ghi', c, 'jkl' FROM generate_series(0,10) AS c;
|
||||
INSERT INTO new_order SELECT c, c, c FROM generate_series(0,10) AS c;
|
||||
INSERT INTO stock SELECT c,c,c,c,c,c, 'abc','abc','abc','abc','abc','abc','abc','abc','abc','abc','abc' FROM generate_series(1,3) AS c;
|
||||
INSERT INTO stock SELECT c, 5000,c,c,c,c, 'abc','abc','abc','abc','abc','abc','abc','abc','abc','abc','abc' FROM generate_series(1,3) AS c; -- mod(2*5000,10000) == 0
|
||||
INSERT INTO order_line SELECT c, c, c, c, c, '2008-10-17 00:00:00.000000', c, c, c, 'abc' FROM generate_series(0,10) AS c;
|
||||
INSERT INTO oorder SELECT c, c, c, c, c, 1, 1, '2008-10-17 00:00:00.000000' FROM generate_series(0,10) AS c;
|
||||
INSERT INTO customer SELECT c, c, c, 0, 'XX', 'John', 'Doe', 1000, 0, 0, c, c, 'Name', 'Street', 'Some City', 'CA', '12345', '+1 000 0000000', '2007-01-02 00:00:00.000000', 'NA', 'nothing special' FROM generate_series(0,10) AS c;
|
||||
INSERT INTO item SELECT c, 'Keyboard', 50, 'co b', c FROM generate_series(0,10) AS c; --co% and %b filters all around
|
||||
INSERT INTO region VALUES
|
||||
(1, 'Not Europe', 'Big'),
|
||||
(2, 'Europe', 'Big');
|
||||
INSERT INTO nation VALUES
|
||||
(1, 'United States', 1, 'Also Big'),
|
||||
(4, 'The Netherlands', 2, 'Flat'),
|
||||
(9, 'Germany', 2, 'Germany must be in here for Q7'),
|
||||
(67, 'Cambodia', 2, 'I don''t understand how we got from California to Cambodia but I will take it, it also is not in Europe, but we need it to be for Q8');
|
||||
|
||||
-- Query 1
|
||||
SELECT
|
||||
ol_number,
|
||||
sum(ol_quantity) as sum_qty,
|
||||
sum(ol_amount) as sum_amount,
|
||||
avg(ol_quantity) as avg_qty,
|
||||
avg(ol_amount) as avg_amount,
|
||||
count(*) as count_order
|
||||
FROM order_line
|
||||
WHERE ol_delivery_d > '2007-01-02 00:00:00.000000'
|
||||
GROUP BY ol_number
|
||||
ORDER BY ol_number;
|
||||
|
||||
-- Query 2
|
||||
SELECT
|
||||
su_suppkey,
|
||||
su_name,
|
||||
n_name,
|
||||
i_id,
|
||||
i_name,
|
||||
su_address,
|
||||
su_phone,
|
||||
su_comment
|
||||
FROM
|
||||
item,
|
||||
supplier,
|
||||
stock,
|
||||
nation,
|
||||
region,
|
||||
(SELECT
|
||||
s_i_id AS m_i_id,
|
||||
min(s_quantity) as m_s_quantity
|
||||
FROM
|
||||
stock,
|
||||
supplier,
|
||||
nation,
|
||||
region
|
||||
WHERE mod((s_w_id*s_i_id),10000)=su_suppkey
|
||||
AND su_nationkey=n_nationkey
|
||||
AND n_regionkey=r_regionkey
|
||||
AND r_name LIKE 'Europ%'
|
||||
GROUP BY s_i_id) m
|
||||
WHERE i_id = s_i_id
|
||||
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_regionkey = r_regionkey
|
||||
AND i_data LIKE '%b'
|
||||
AND r_name LIKE 'Europ%'
|
||||
AND i_id = m_i_id
|
||||
AND s_quantity = m_s_quantity
|
||||
ORDER BY
|
||||
n_name,
|
||||
su_name,
|
||||
i_id;
|
||||
|
||||
-- Query 3
|
||||
SELECT
|
||||
ol_o_id,
|
||||
ol_w_id,
|
||||
ol_d_id,
|
||||
sum(ol_amount) AS revenue,
|
||||
o_entry_d
|
||||
FROM
|
||||
customer,
|
||||
new_order,
|
||||
oorder,
|
||||
order_line
|
||||
WHERE c_state LIKE 'C%' -- used to ba A%, but C% works with our small data
|
||||
AND c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND no_w_id = o_w_id
|
||||
AND no_d_id = o_d_id
|
||||
AND no_o_id = o_id
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND o_entry_d > '2007-01-02 00:00:00.000000'
|
||||
GROUP BY
|
||||
ol_o_id,
|
||||
ol_w_id,
|
||||
ol_d_id,
|
||||
o_entry_d
|
||||
ORDER BY
|
||||
revenue DESC,
|
||||
o_entry_d;
|
||||
|
||||
-- Query 4
|
||||
SELECT
|
||||
o_ol_cnt,
|
||||
count(*) as order_count
|
||||
FROM
|
||||
oorder
|
||||
WHERE o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||
AND o_entry_d < '2012-01-02 00:00:00.000000'
|
||||
AND exists (SELECT *
|
||||
FROM order_line
|
||||
WHERE o_id = ol_o_id
|
||||
AND o_w_id = ol_w_id
|
||||
AND o_d_id = ol_d_id
|
||||
AND ol_delivery_d >= o_entry_d)
|
||||
GROUP BY o_ol_cnt
|
||||
ORDER BY o_ol_cnt;
|
||||
|
||||
-- Query 5
|
||||
SELECT
|
||||
n_name,
|
||||
sum(ol_amount) AS revenue
|
||||
FROM
|
||||
customer,
|
||||
oorder,
|
||||
order_line,
|
||||
stock,
|
||||
supplier,
|
||||
nation,
|
||||
region
|
||||
WHERE c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id=o_d_id
|
||||
AND ol_w_id = s_w_id
|
||||
AND ol_i_id = s_i_id
|
||||
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||
-- our dataset does not have the supplier in the same nation as the customer causing this
|
||||
-- join to filter out all the data. We verify later on that we can actually perform an
|
||||
-- ascii(substr(c_state,1,1)) == reference table column join later on so it should not
|
||||
-- matter we skip this filter here.
|
||||
--AND ascii(substr(c_state,1,1)) = su_nationkey
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_regionkey = r_regionkey
|
||||
AND r_name = 'Europe'
|
||||
AND o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||
GROUP BY n_name
|
||||
ORDER BY revenue DESC;
|
||||
|
||||
-- Query 6
|
||||
SELECT
|
||||
sum(ol_amount) AS revenue
|
||||
FROM order_line
|
||||
WHERE ol_delivery_d >= '1999-01-01 00:00:00.000000'
|
||||
AND ol_delivery_d < '2020-01-01 00:00:00.000000'
|
||||
AND ol_quantity BETWEEN 1 AND 100000;
|
||||
|
||||
-- Query 7
|
||||
SELECT
|
||||
su_nationkey as supp_nation,
|
||||
substr(c_state,1,1) as cust_nation,
|
||||
extract(year from o_entry_d) as l_year,
|
||||
sum(ol_amount) as revenue
|
||||
FROM
|
||||
supplier,
|
||||
stock,
|
||||
order_line,
|
||||
oorder,
|
||||
customer,
|
||||
nation n1,
|
||||
nation n2
|
||||
WHERE ol_supply_w_id = s_w_id
|
||||
AND ol_i_id = s_i_id
|
||||
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND su_nationkey = n1.n_nationkey
|
||||
AND ascii(substr(c_state,1,1)) = n2.n_nationkey
|
||||
AND (
|
||||
(n1.n_name = 'Germany' AND n2.n_name = 'Cambodia')
|
||||
OR (n1.n_name = 'Cambodia' AND n2.n_name = 'Germany')
|
||||
)
|
||||
AND ol_delivery_d BETWEEN '2007-01-02 00:00:00.000000' AND '2012-01-02 00:00:00.000000'
|
||||
GROUP BY
|
||||
su_nationkey,
|
||||
substr(c_state,1,1),
|
||||
extract(year from o_entry_d)
|
||||
ORDER BY
|
||||
su_nationkey,
|
||||
cust_nation,
|
||||
l_year;
|
||||
|
||||
-- Query 8
|
||||
SELECT
|
||||
extract(year from o_entry_d) as l_year,
|
||||
sum(case when n2.n_name = 'Germany' then ol_amount else 0 end) / sum(ol_amount) as mkt_share
|
||||
FROM
|
||||
item,
|
||||
supplier,
|
||||
stock,
|
||||
order_line,
|
||||
oorder,
|
||||
customer,
|
||||
nation n1,
|
||||
nation n2,
|
||||
region
|
||||
WHERE i_id = s_i_id
|
||||
AND ol_i_id = s_i_id
|
||||
AND ol_supply_w_id = s_w_id
|
||||
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND n1.n_nationkey = ascii(substr(c_state,1,1))
|
||||
AND n1.n_regionkey = r_regionkey
|
||||
AND ol_i_id < 1000
|
||||
AND r_name = 'Europe'
|
||||
AND su_nationkey = n2.n_nationkey
|
||||
AND o_entry_d BETWEEN '2007-01-02 00:00:00.000000' AND '2012-01-02 00:00:00.000000'
|
||||
AND i_data LIKE '%b'
|
||||
AND i_id = ol_i_id
|
||||
GROUP BY extract(YEAR FROM o_entry_d)
|
||||
ORDER BY l_year;
|
||||
|
||||
-- Query 9
|
||||
SELECT
|
||||
n_name,
|
||||
extract(year from o_entry_d) as l_year,
|
||||
sum(ol_amount) as sum_profit
|
||||
FROM
|
||||
item,
|
||||
stock,
|
||||
supplier,
|
||||
order_line,
|
||||
oorder,
|
||||
nation
|
||||
WHERE ol_i_id = s_i_id
|
||||
AND ol_supply_w_id = s_w_id
|
||||
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND ol_i_id = i_id
|
||||
AND su_nationkey = n_nationkey
|
||||
AND i_data LIKE '%b' -- this used to be %BB but that will not work with our small dataset
|
||||
GROUP BY
|
||||
n_name,
|
||||
extract(YEAR FROM o_entry_d)
|
||||
ORDER BY
|
||||
n_name,
|
||||
l_year DESC;
|
||||
|
||||
-- Query 10
|
||||
SELECT
|
||||
c_id,
|
||||
c_last,
|
||||
sum(ol_amount) AS revenue,
|
||||
c_city,
|
||||
c_phone,
|
||||
n_name
|
||||
FROM
|
||||
customer,
|
||||
oorder,
|
||||
order_line,
|
||||
nation
|
||||
WHERE c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||
AND o_entry_d <= ol_delivery_d
|
||||
AND n_nationkey = ascii(substr(c_state,1,1))
|
||||
GROUP BY
|
||||
c_id,
|
||||
c_last,
|
||||
c_city,
|
||||
c_phone,
|
||||
n_name
|
||||
ORDER BY revenue DESC;
|
||||
|
||||
-- Query 11
|
||||
SELECT
|
||||
s_i_id,
|
||||
sum(s_order_cnt) AS ordercount
|
||||
FROM
|
||||
stock,
|
||||
supplier,
|
||||
nation
|
||||
WHERE mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_name = 'Germany'
|
||||
GROUP BY s_i_id
|
||||
HAVING sum(s_order_cnt) >
|
||||
(SELECT sum(s_order_cnt) * .005
|
||||
FROM
|
||||
stock,
|
||||
supplier,
|
||||
nation
|
||||
WHERE mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_name = 'Germany')
|
||||
ORDER BY ordercount DESC;
|
||||
|
||||
-- Query 12
|
||||
SELECT
|
||||
o_ol_cnt,
|
||||
sum(case when o_carrier_id = 1 or o_carrier_id = 2 then 1 else 0 end) as high_line_count,
|
||||
sum(case when o_carrier_id <> 1 and o_carrier_id <> 2 then 1 else 0 end) as low_line_count
|
||||
FROM
|
||||
oorder,
|
||||
order_line
|
||||
WHERE ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
AND o_entry_d <= ol_delivery_d
|
||||
AND ol_delivery_d < '2020-01-01 00:00:00.000000'
|
||||
GROUP BY o_ol_cnt
|
||||
ORDER BY o_ol_cnt;
|
||||
|
||||
-- Query 13
|
||||
SELECT
|
||||
c_count,
|
||||
count(*) AS custdist
|
||||
FROM (SELECT
|
||||
c_id,
|
||||
count(o_id)
|
||||
FROM customer
|
||||
LEFT OUTER JOIN oorder ON (
|
||||
c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND c_id = o_c_id
|
||||
AND o_carrier_id > 8)
|
||||
GROUP BY c_id) AS c_orders (c_id, c_count)
|
||||
GROUP BY c_count
|
||||
ORDER BY
|
||||
custdist DESC,
|
||||
c_count DESC;
|
||||
|
||||
-- Query 14
|
||||
SELECT
|
||||
100.00 * sum(CASE WHEN i_data LIKE 'PR%' THEN ol_amount ELSE 0 END) / (1+sum(ol_amount)) AS promo_revenue
|
||||
FROM
|
||||
order_line,
|
||||
item
|
||||
WHERE ol_i_id = i_id
|
||||
AND ol_delivery_d >= '2007-01-02 00:00:00.000000'
|
||||
AND ol_delivery_d < '2020-01-02 00:00:00.000000';
|
||||
|
||||
-- Query 15
|
||||
WITH revenue (supplier_no, total_revenue) AS (
|
||||
SELECT
|
||||
mod((s_w_id * s_i_id),10000) AS supplier_no,
|
||||
sum(ol_amount) AS total_revenue
|
||||
FROM
|
||||
order_line,
|
||||
stock
|
||||
WHERE ol_i_id = s_i_id
|
||||
AND ol_supply_w_id = s_w_id
|
||||
AND ol_delivery_d >= '2007-01-02 00:00:00.000000'
|
||||
GROUP BY mod((s_w_id * s_i_id),10000))
|
||||
SELECT
|
||||
su_suppkey,
|
||||
su_name,
|
||||
su_address,
|
||||
su_phone,
|
||||
total_revenue
|
||||
FROM
|
||||
supplier,
|
||||
revenue
|
||||
WHERE su_suppkey = supplier_no
|
||||
AND total_revenue = (SELECT max(total_revenue) FROM revenue)
|
||||
ORDER BY su_suppkey;
|
||||
|
||||
--Q16
|
||||
SELECT
|
||||
i_name,
|
||||
substr(i_data, 1, 3) AS brand,
|
||||
i_price,
|
||||
count(DISTINCT (mod((s_w_id * s_i_id),10000))) AS supplier_cnt
|
||||
FROM
|
||||
stock,
|
||||
item
|
||||
WHERE i_id = s_i_id
|
||||
AND i_data NOT LIKE 'zz%'
|
||||
AND (mod((s_w_id * s_i_id),10000) NOT IN
|
||||
(SELECT su_suppkey
|
||||
FROM supplier
|
||||
WHERE su_comment LIKE '%bad%'))
|
||||
GROUP BY
|
||||
i_name,
|
||||
substr(i_data, 1, 3),
|
||||
i_price
|
||||
ORDER BY supplier_cnt DESC;
|
||||
|
||||
--Q17
|
||||
SELECT
|
||||
sum(ol_amount) / 2.0 AS avg_yearly
|
||||
FROM
|
||||
order_line,
|
||||
(SELECT
|
||||
i_id,
|
||||
avg(ol_quantity) AS a
|
||||
FROM
|
||||
item,
|
||||
order_line
|
||||
WHERE i_data LIKE '%b'
|
||||
AND ol_i_id = i_id
|
||||
GROUP BY i_id) t
|
||||
WHERE ol_i_id = t.i_id;
|
||||
-- this filter was at the end causing the dataset to be empty. it should not have any
|
||||
-- influence on how the query gets planned so I removed the clause
|
||||
--AND ol_quantity < t.a;
|
||||
|
||||
-- Query 18
|
||||
SELECT
|
||||
c_last,
|
||||
c_id o_id,
|
||||
o_entry_d,
|
||||
o_ol_cnt,
|
||||
sum(ol_amount)
|
||||
FROM
|
||||
customer,
|
||||
oorder,
|
||||
order_line
|
||||
WHERE c_id = o_c_id
|
||||
AND c_w_id = o_w_id
|
||||
AND c_d_id = o_d_id
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_o_id = o_id
|
||||
GROUP BY
|
||||
o_id,
|
||||
o_w_id,
|
||||
o_d_id,
|
||||
c_id,
|
||||
c_last,
|
||||
o_entry_d,
|
||||
o_ol_cnt
|
||||
HAVING sum(ol_amount) > 5 -- was 200, but thats too big for the dataset
|
||||
ORDER BY
|
||||
sum(ol_amount) DESC,
|
||||
o_entry_d;
|
||||
|
||||
-- Query 19
|
||||
SELECT
|
||||
sum(ol_amount) AS revenue
|
||||
FROM
|
||||
order_line,
|
||||
item
|
||||
WHERE ( ol_i_id = i_id
|
||||
AND i_data LIKE '%a'
|
||||
AND ol_quantity >= 1
|
||||
AND ol_quantity <= 10
|
||||
AND i_price BETWEEN 1 AND 400000
|
||||
AND ol_w_id IN (1,2,3))
|
||||
OR ( ol_i_id = i_id
|
||||
AND i_data LIKE '%b'
|
||||
AND ol_quantity >= 1
|
||||
AND ol_quantity <= 10
|
||||
AND i_price BETWEEN 1 AND 400000
|
||||
AND ol_w_id IN (1,2,4))
|
||||
OR ( ol_i_id = i_id
|
||||
AND i_data LIKE '%c'
|
||||
AND ol_quantity >= 1
|
||||
AND ol_quantity <= 10
|
||||
AND i_price BETWEEN 1 AND 400000
|
||||
AND ol_w_id IN (1,5,3));
|
||||
|
||||
-- Query 20
|
||||
SELECT
|
||||
su_name,
|
||||
su_address
|
||||
FROM
|
||||
supplier,
|
||||
nation
|
||||
WHERE su_suppkey in
|
||||
(SELECT
|
||||
mod(s_i_id * s_w_id, 10000)
|
||||
FROM
|
||||
stock,
|
||||
order_line
|
||||
WHERE s_i_id IN
|
||||
(SELECT i_id
|
||||
FROM item
|
||||
WHERE i_data LIKE 'co%')
|
||||
AND ol_i_id = s_i_id
|
||||
AND ol_delivery_d > '2008-05-23 12:00:00' -- was 2010, but our order is in 2008
|
||||
GROUP BY s_i_id, s_w_id, s_quantity
|
||||
HAVING 2*s_quantity > sum(ol_quantity))
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_name = 'Germany'
|
||||
ORDER BY su_name;
|
||||
|
||||
-- Query 21
|
||||
-- DATA SET DOES NOT COVER THIS QUERY
|
||||
SELECT
|
||||
su_name,
|
||||
count(*) AS numwait
|
||||
FROM
|
||||
supplier,
|
||||
order_line l1,
|
||||
oorder,
|
||||
stock,
|
||||
nation
|
||||
WHERE ol_o_id = o_id
|
||||
AND ol_w_id = o_w_id
|
||||
AND ol_d_id = o_d_id
|
||||
AND ol_w_id = s_w_id
|
||||
AND ol_i_id = s_i_id
|
||||
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||
AND l1.ol_delivery_d > o_entry_d
|
||||
AND NOT exists (SELECT *
|
||||
FROM order_line l2
|
||||
WHERE l2.ol_o_id = l1.ol_o_id
|
||||
AND l2.ol_w_id = l1.ol_w_id
|
||||
AND l2.ol_d_id = l1.ol_d_id
|
||||
AND l2.ol_delivery_d > l1.ol_delivery_d)
|
||||
AND su_nationkey = n_nationkey
|
||||
AND n_name = 'Germany'
|
||||
GROUP BY su_name
|
||||
ORDER BY
|
||||
numwait desc,
|
||||
su_name;
|
||||
|
||||
-- Query 22
|
||||
-- DATA SET DOES NOT COVER THIS QUERY
|
||||
SELECT
|
||||
substr(c_state,1,1) AS country,
|
||||
count(*) AS numcust,
|
||||
sum(c_balance) AS totacctbal
|
||||
FROM customer
|
||||
WHERE substr(c_phone,1,1) in ('1','2','3','4','5','6','7')
|
||||
AND c_balance > (SELECT avg(c_BALANCE)
|
||||
FROM customer
|
||||
WHERE c_balance > 0.00
|
||||
AND substr(c_phone,1,1) in ('1','2','3','4','5','6','7'))
|
||||
AND NOT exists (SELECT *
|
||||
FROM oorder
|
||||
WHERE o_c_id = c_id
|
||||
AND o_w_id = c_w_id
|
||||
AND o_d_id = c_d_id)
|
||||
GROUP BY substr(c_state,1,1)
|
||||
ORDER BY substr(c_state,1,1);
|
||||
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA chbenchmark_all_queries CASCADE;
|
|
@ -0,0 +1,41 @@
|
|||
SET citus.next_shard_id TO 1670000;
|
||||
CREATE SCHEMA expression_reference_join;
|
||||
SET search_path TO expression_reference_join;
|
||||
SET citus.shard_count TO 4;
|
||||
SET citus.enable_repartition_joins TO on;
|
||||
|
||||
CREATE TABLE ref (a int, b int);
|
||||
CREATE TABLE test (x int, y int);
|
||||
|
||||
INSERT INTO ref VALUES
|
||||
(2,2),
|
||||
(4,4);
|
||||
|
||||
INSERT INTO test VALUES
|
||||
(1,2),
|
||||
(2,2);
|
||||
|
||||
SELECT create_reference_table('ref');
|
||||
SELECT create_distributed_table('test', 'x');
|
||||
|
||||
-- PR 3180 implements expressions in join clauses to reference tables to support CHbenCHmark queries 7/8/9
|
||||
-- plannable as a repartition + reference join, now with an expression in the join clause
|
||||
SELECT *
|
||||
FROM
|
||||
test t1 JOIN test t2 USING (y), -- causes repartition, which makes this not routable or pushdownable
|
||||
ref a
|
||||
WHERE t2.y * 2 = a.a
|
||||
ORDER BY 1,2,3;
|
||||
|
||||
-- The join clause is wider than it used to be, causing this query to be recognized by the LogicalPlanner as a repartition join.
|
||||
-- Unplannable query due to a three-way join which causes no valid path (besides the cartesian product) to be found
|
||||
SELECT *
|
||||
FROM
|
||||
test t1 JOIN test t2 USING (y), -- causes repartition, which makes this not routable or pushdownable
|
||||
ref a,
|
||||
ref b
|
||||
WHERE t2.y - a.a - b.b = 0
|
||||
ORDER BY 1,2,3;
|
||||
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA expression_reference_join CASCADE;
|
Loading…
Reference in New Issue