mirror of https://github.com/citusdata/citus.git
Feature: Expression in reference join (#3180)
DESCRIPTION: Expression in reference join Fixed: #2582 This patch allows arbitrary expressions in the join clause when joining to a reference table. An example of such joins could be found in CHbenCHmark queries 7, 8, 9 and 11; `mod((s_w_id * s_i_id),10000) = su_suppkey` and `ascii(substr(c_state,1,1)) = n2.n_nationkey`. Since the join is on a reference table these queries are able to be pushed down to the workers. To implement these queries we will widen the `IsJoinClause` predicate to not check if the expressions are a type `Var` after stripping the implicit coerciens. Instead we define a join clause when the `Var`'s in a clause come from more than 1 table. This allows more clauses to pass into the logical planner's `MultiNodeTree(...)` planning function. To compensate for this we tighten down the `LocalJoin`, `SinglePartitionJoin` and `DualPartitionJoin` to check for direct column references when planning. This allows the planner to work with arbitrary join expressions on reference tables.pull/3184/head
parent
a4c90b6ee1
commit
217890af5f
|
@ -18,6 +18,7 @@
|
||||||
#include "access/heapam.h"
|
#include "access/heapam.h"
|
||||||
#include "access/htup_details.h"
|
#include "access/htup_details.h"
|
||||||
#include "catalog/pg_am.h"
|
#include "catalog/pg_am.h"
|
||||||
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
#include "distributed/multi_join_order.h"
|
#include "distributed/multi_join_order.h"
|
||||||
#include "distributed/multi_physical_planner.h"
|
#include "distributed/multi_physical_planner.h"
|
||||||
|
@ -208,37 +209,45 @@ ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* JoinOnColumns determines whether two columns are joined by a given join clause
|
* JoinOnColumns determines whether two columns are joined by a given join clause list.
|
||||||
* list.
|
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
JoinOnColumns(Var *currentColumn, Var *candidateColumn, List *joinClauseList)
|
JoinOnColumns(Var *currentColumn, Var *candidateColumn, List *joinClauseList)
|
||||||
{
|
{
|
||||||
ListCell *joinClauseCell = NULL;
|
ListCell *joinClauseCell = NULL;
|
||||||
bool joinOnColumns = false;
|
if (currentColumn == NULL || candidateColumn == NULL)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* LocalJoin can only be happening if we have both a current column and a target
|
||||||
|
* column, otherwise we are not joining two local tables
|
||||||
|
*/
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
foreach(joinClauseCell, joinClauseList)
|
foreach(joinClauseCell, joinClauseList)
|
||||||
{
|
{
|
||||||
OpExpr *joinClause = (OpExpr *) lfirst(joinClauseCell);
|
OpExpr *joinClause = castNode(OpExpr, lfirst(joinClauseCell));
|
||||||
Var *leftColumn = LeftColumn(joinClause);
|
Var *leftColumn = LeftColumnOrNULL(joinClause);
|
||||||
Var *rightColumn = RightColumn(joinClause);
|
Var *rightColumn = RightColumnOrNULL(joinClause);
|
||||||
|
|
||||||
/* check if both join columns and both partition key columns match */
|
/*
|
||||||
|
* Check if both join columns and both partition key columns match, since the
|
||||||
|
* current and candidate column's can't be NULL we know they wont match if either
|
||||||
|
* of the columns resolved to NULL above.
|
||||||
|
*/
|
||||||
if (equal(leftColumn, currentColumn) &&
|
if (equal(leftColumn, currentColumn) &&
|
||||||
equal(rightColumn, candidateColumn))
|
equal(rightColumn, candidateColumn))
|
||||||
{
|
{
|
||||||
joinOnColumns = true;
|
return true;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
if (equal(leftColumn, candidateColumn) &&
|
if (equal(leftColumn, candidateColumn) &&
|
||||||
equal(rightColumn, currentColumn))
|
equal(rightColumn, currentColumn))
|
||||||
{
|
{
|
||||||
joinOnColumns = true;
|
return true;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return joinOnColumns;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -265,8 +274,20 @@ JoinOrderList(List *tableEntryList, List *joinClauseList)
|
||||||
candidateJoinOrder = JoinOrderForTable(startingTable, tableEntryList,
|
candidateJoinOrder = JoinOrderForTable(startingTable, tableEntryList,
|
||||||
joinClauseList);
|
joinClauseList);
|
||||||
|
|
||||||
|
if (candidateJoinOrder != NULL)
|
||||||
|
{
|
||||||
candidateJoinOrderList = lappend(candidateJoinOrderList, candidateJoinOrder);
|
candidateJoinOrderList = lappend(candidateJoinOrderList, candidateJoinOrder);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list_length(candidateJoinOrderList) == 0)
|
||||||
|
{
|
||||||
|
/* there are no plans that we can create, time to error */
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("complex joins are only supported when all distributed "
|
||||||
|
"tables are joined on their distribution columns with "
|
||||||
|
"equal operator")));
|
||||||
|
}
|
||||||
|
|
||||||
bestJoinOrder = BestJoinOrder(candidateJoinOrderList);
|
bestJoinOrder = BestJoinOrder(candidateJoinOrderList);
|
||||||
|
|
||||||
|
@ -341,6 +362,12 @@ JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClause
|
||||||
pendingJoinNode = EvaluateJoinRules(joinedTableList, currentJoinNode,
|
pendingJoinNode = EvaluateJoinRules(joinedTableList, currentJoinNode,
|
||||||
pendingTable, joinClauseList, joinType);
|
pendingTable, joinClauseList, joinType);
|
||||||
|
|
||||||
|
if (pendingJoinNode == NULL)
|
||||||
|
{
|
||||||
|
/* no join order could be generated, we try our next pending table */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* if this rule is better than previous ones, keep it */
|
/* if this rule is better than previous ones, keep it */
|
||||||
pendingJoinRuleType = pendingJoinNode->joinRuleType;
|
pendingJoinRuleType = pendingJoinNode->joinRuleType;
|
||||||
if (pendingJoinRuleType < nextJoinRuleType)
|
if (pendingJoinRuleType < nextJoinRuleType)
|
||||||
|
@ -350,6 +377,15 @@ JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClause
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nextJoinNode == NULL)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* There is no next join node found, this will repeat indefinitely hence we
|
||||||
|
* bail and let JoinOrderList try a new initial table
|
||||||
|
*/
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
Assert(nextJoinNode != NULL);
|
Assert(nextJoinNode != NULL);
|
||||||
nextJoinedTable = nextJoinNode->tableEntry;
|
nextJoinedTable = nextJoinNode->tableEntry;
|
||||||
|
|
||||||
|
@ -660,6 +696,11 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nextJoinNode == NULL)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
Assert(nextJoinNode != NULL);
|
Assert(nextJoinNode != NULL);
|
||||||
nextJoinNode->joinType = joinType;
|
nextJoinNode->joinType = joinType;
|
||||||
nextJoinNode->joinClauseList = applicableJoinClauses;
|
nextJoinNode->joinClauseList = applicableJoinClauses;
|
||||||
|
@ -824,22 +865,18 @@ LocalJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
char candidatePartitionMethod = PartitionMethod(relationId);
|
char candidatePartitionMethod = PartitionMethod(relationId);
|
||||||
char currentPartitionMethod = currentJoinNode->partitionMethod;
|
char currentPartitionMethod = currentJoinNode->partitionMethod;
|
||||||
TableEntry *currentAnchorTable = currentJoinNode->anchorTable;
|
TableEntry *currentAnchorTable = currentJoinNode->anchorTable;
|
||||||
JoinRuleType currentJoinRuleType = currentJoinNode->joinRuleType;
|
|
||||||
bool joinOnPartitionColumns = false;
|
bool joinOnPartitionColumns = false;
|
||||||
bool coPartitionedTables = false;
|
bool coPartitionedTables = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we previously dual-hash re-partitioned the tables for a join or made
|
* If we previously dual-hash re-partitioned the tables for a join or made cartesian
|
||||||
* cartesian product, we currently don't allow local join.
|
* product, there is no anchor table anymore. In that case we don't allow local join.
|
||||||
*/
|
*/
|
||||||
if (currentJoinRuleType == DUAL_PARTITION_JOIN ||
|
if (currentAnchorTable == NULL)
|
||||||
currentJoinRuleType == CARTESIAN_PRODUCT)
|
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert(currentAnchorTable != NULL);
|
|
||||||
|
|
||||||
/* the partition method should be the same for a local join */
|
/* the partition method should be the same for a local join */
|
||||||
if (currentPartitionMethod != candidatePartitionMethod)
|
if (currentPartitionMethod != candidatePartitionMethod)
|
||||||
{
|
{
|
||||||
|
@ -883,7 +920,6 @@ static JoinOrderNode *
|
||||||
SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
List *applicableJoinClauses, JoinType joinType)
|
List *applicableJoinClauses, JoinType joinType)
|
||||||
{
|
{
|
||||||
JoinOrderNode *nextJoinNode = NULL;
|
|
||||||
Var *currentPartitionColumn = currentJoinNode->partitionColumn;
|
Var *currentPartitionColumn = currentJoinNode->partitionColumn;
|
||||||
char currentPartitionMethod = currentJoinNode->partitionMethod;
|
char currentPartitionMethod = currentJoinNode->partitionMethod;
|
||||||
TableEntry *currentAnchorTable = currentJoinNode->anchorTable;
|
TableEntry *currentAnchorTable = currentJoinNode->anchorTable;
|
||||||
|
@ -927,14 +963,14 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
nextJoinNode = MakeJoinOrderNode(candidateTable, SINGLE_HASH_PARTITION_JOIN,
|
return MakeJoinOrderNode(candidateTable, SINGLE_HASH_PARTITION_JOIN,
|
||||||
currentPartitionColumn,
|
currentPartitionColumn,
|
||||||
currentPartitionMethod,
|
currentPartitionMethod,
|
||||||
currentAnchorTable);
|
currentAnchorTable);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
nextJoinNode = MakeJoinOrderNode(candidateTable, SINGLE_RANGE_PARTITION_JOIN,
|
return MakeJoinOrderNode(candidateTable, SINGLE_RANGE_PARTITION_JOIN,
|
||||||
currentPartitionColumn,
|
currentPartitionColumn,
|
||||||
currentPartitionMethod,
|
currentPartitionMethod,
|
||||||
currentAnchorTable);
|
currentAnchorTable);
|
||||||
|
@ -942,7 +978,7 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* evaluate re-partitioning the current table only if the rule didn't apply above */
|
/* evaluate re-partitioning the current table only if the rule didn't apply above */
|
||||||
if (nextJoinNode == NULL && candidatePartitionMethod != DISTRIBUTE_BY_NONE)
|
if (candidatePartitionMethod != DISTRIBUTE_BY_NONE)
|
||||||
{
|
{
|
||||||
joinClause = SinglePartitionJoinClause(candidatePartitionColumn,
|
joinClause = SinglePartitionJoinClause(candidatePartitionColumn,
|
||||||
applicableJoinClauses);
|
applicableJoinClauses);
|
||||||
|
@ -959,7 +995,7 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
nextJoinNode = MakeJoinOrderNode(candidateTable,
|
return MakeJoinOrderNode(candidateTable,
|
||||||
SINGLE_HASH_PARTITION_JOIN,
|
SINGLE_HASH_PARTITION_JOIN,
|
||||||
candidatePartitionColumn,
|
candidatePartitionColumn,
|
||||||
candidatePartitionMethod,
|
candidatePartitionMethod,
|
||||||
|
@ -967,7 +1003,7 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
nextJoinNode = MakeJoinOrderNode(candidateTable,
|
return MakeJoinOrderNode(candidateTable,
|
||||||
SINGLE_RANGE_PARTITION_JOIN,
|
SINGLE_RANGE_PARTITION_JOIN,
|
||||||
candidatePartitionColumn,
|
candidatePartitionColumn,
|
||||||
candidatePartitionMethod,
|
candidatePartitionMethod,
|
||||||
|
@ -976,7 +1012,7 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nextJoinNode;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -988,14 +1024,23 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
OpExpr *
|
OpExpr *
|
||||||
SinglePartitionJoinClause(Var *partitionColumn, List *applicableJoinClauses)
|
SinglePartitionJoinClause(Var *partitionColumn, List *applicableJoinClauses)
|
||||||
{
|
{
|
||||||
OpExpr *joinClause = NULL;
|
|
||||||
ListCell *applicableJoinClauseCell = NULL;
|
ListCell *applicableJoinClauseCell = NULL;
|
||||||
|
if (partitionColumn == NULL)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
foreach(applicableJoinClauseCell, applicableJoinClauses)
|
foreach(applicableJoinClauseCell, applicableJoinClauses)
|
||||||
{
|
{
|
||||||
OpExpr *applicableJoinClause = (OpExpr *) lfirst(applicableJoinClauseCell);
|
OpExpr *applicableJoinClause = castNode(OpExpr, lfirst(applicableJoinClauseCell));
|
||||||
Var *leftColumn = LeftColumn(applicableJoinClause);
|
Var *leftColumn = LeftColumnOrNULL(applicableJoinClause);
|
||||||
Var *rightColumn = RightColumn(applicableJoinClause);
|
Var *rightColumn = RightColumnOrNULL(applicableJoinClause);
|
||||||
|
if (leftColumn == NULL || rightColumn == NULL)
|
||||||
|
{
|
||||||
|
/* not a simple partition column join */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We first check if partition column matches either of the join columns
|
* We first check if partition column matches either of the join columns
|
||||||
|
@ -1007,8 +1052,7 @@ SinglePartitionJoinClause(Var *partitionColumn, List *applicableJoinClauses)
|
||||||
{
|
{
|
||||||
if (leftColumn->vartype == rightColumn->vartype)
|
if (leftColumn->vartype == rightColumn->vartype)
|
||||||
{
|
{
|
||||||
joinClause = applicableJoinClause;
|
return applicableJoinClause;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1017,7 +1061,7 @@ SinglePartitionJoinClause(Var *partitionColumn, List *applicableJoinClauses)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return joinClause;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1032,20 +1076,18 @@ static JoinOrderNode *
|
||||||
DualPartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
DualPartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
List *applicableJoinClauses, JoinType joinType)
|
List *applicableJoinClauses, JoinType joinType)
|
||||||
{
|
{
|
||||||
/* Because of the dual partition, anchor table information got lost */
|
|
||||||
TableEntry *anchorTable = NULL;
|
|
||||||
JoinOrderNode *nextJoinNode = NULL;
|
|
||||||
|
|
||||||
OpExpr *joinClause = DualPartitionJoinClause(applicableJoinClauses);
|
OpExpr *joinClause = DualPartitionJoinClause(applicableJoinClauses);
|
||||||
if (joinClause)
|
if (joinClause)
|
||||||
{
|
{
|
||||||
Var *nextPartitionColumn = LeftColumn(joinClause);
|
/* because of the dual partition, anchor table and partition column get lost */
|
||||||
nextJoinNode = MakeJoinOrderNode(candidateTable, DUAL_PARTITION_JOIN,
|
return MakeJoinOrderNode(candidateTable,
|
||||||
nextPartitionColumn, REDISTRIBUTE_BY_HASH,
|
DUAL_PARTITION_JOIN,
|
||||||
anchorTable);
|
NULL,
|
||||||
|
REDISTRIBUTE_BY_HASH,
|
||||||
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
return nextJoinNode;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1057,20 +1099,23 @@ DualPartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
OpExpr *
|
OpExpr *
|
||||||
DualPartitionJoinClause(List *applicableJoinClauses)
|
DualPartitionJoinClause(List *applicableJoinClauses)
|
||||||
{
|
{
|
||||||
OpExpr *joinClause = NULL;
|
|
||||||
ListCell *applicableJoinClauseCell = NULL;
|
ListCell *applicableJoinClauseCell = NULL;
|
||||||
|
|
||||||
foreach(applicableJoinClauseCell, applicableJoinClauses)
|
foreach(applicableJoinClauseCell, applicableJoinClauses)
|
||||||
{
|
{
|
||||||
OpExpr *applicableJoinClause = (OpExpr *) lfirst(applicableJoinClauseCell);
|
OpExpr *applicableJoinClause = (OpExpr *) lfirst(applicableJoinClauseCell);
|
||||||
Var *leftColumn = LeftColumn(applicableJoinClause);
|
Var *leftColumn = LeftColumnOrNULL(applicableJoinClause);
|
||||||
Var *rightColumn = RightColumn(applicableJoinClause);
|
Var *rightColumn = RightColumnOrNULL(applicableJoinClause);
|
||||||
|
|
||||||
|
if (leftColumn == NULL || rightColumn == NULL)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* we only need to check that the join column types match */
|
/* we only need to check that the join column types match */
|
||||||
if (leftColumn->vartype == rightColumn->vartype)
|
if (leftColumn->vartype == rightColumn->vartype)
|
||||||
{
|
{
|
||||||
joinClause = applicableJoinClause;
|
return applicableJoinClause;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1078,7 +1123,7 @@ DualPartitionJoinClause(List *applicableJoinClauses)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return joinClause;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1090,16 +1135,17 @@ DualPartitionJoinClause(List *applicableJoinClauses)
|
||||||
static JoinOrderNode *
|
static JoinOrderNode *
|
||||||
CartesianProduct(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
CartesianProduct(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
List *applicableJoinClauses, JoinType joinType)
|
List *applicableJoinClauses, JoinType joinType)
|
||||||
|
{
|
||||||
|
if (list_length(applicableJoinClauses) == 0)
|
||||||
{
|
{
|
||||||
/* Because of the cartesian product, anchor table information got lost */
|
/* Because of the cartesian product, anchor table information got lost */
|
||||||
TableEntry *anchorTable = NULL;
|
return MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT,
|
||||||
|
|
||||||
JoinOrderNode *nextJoinNode = MakeJoinOrderNode(candidateTable, CARTESIAN_PRODUCT,
|
|
||||||
currentJoinNode->partitionColumn,
|
currentJoinNode->partitionColumn,
|
||||||
currentJoinNode->partitionMethod,
|
currentJoinNode->partitionMethod,
|
||||||
anchorTable);
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
return nextJoinNode;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1121,6 +1167,46 @@ MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IsApplicableJoinClause tests if the current joinClause is applicable to the join at
|
||||||
|
* hand.
|
||||||
|
*
|
||||||
|
* Given a list of left hand tables and a candidate right hand table the join clause is
|
||||||
|
* valid if atleast 1 column is from the right hand table AND all columns can be found
|
||||||
|
* in either the list of tables on the left *or* in the right hand table.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
IsApplicableJoinClause(List *leftTableIdList, uint32 rightTableId, OpExpr *joinClause)
|
||||||
|
{
|
||||||
|
List *varList = pull_var_clause_default((Node *) joinClause);
|
||||||
|
Var *var = NULL;
|
||||||
|
bool joinContainsRightTable = false;
|
||||||
|
foreach_ptr(var, varList)
|
||||||
|
{
|
||||||
|
uint32 columnTableId = var->varno;
|
||||||
|
if (rightTableId == columnTableId)
|
||||||
|
{
|
||||||
|
joinContainsRightTable = true;
|
||||||
|
}
|
||||||
|
else if (!list_member_int(leftTableIdList, columnTableId))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We couldn't find this column either on the right hand side (first if
|
||||||
|
* statement), nor in the list on the left. This join clause involves a table
|
||||||
|
* not yet available during the candidate join.
|
||||||
|
*/
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All columns referenced in this clause are available during this join, now the join
|
||||||
|
* is applicable if we found our candidate table as well
|
||||||
|
*/
|
||||||
|
return joinContainsRightTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ApplicableJoinClauses finds all join clauses that apply between the given
|
* ApplicableJoinClauses finds all join clauses that apply between the given
|
||||||
* left table list and the right table, and returns these found join clauses.
|
* left table list and the right table, and returns these found join clauses.
|
||||||
|
@ -1136,18 +1222,8 @@ ApplicableJoinClauses(List *leftTableIdList, uint32 rightTableId, List *joinClau
|
||||||
|
|
||||||
foreach(joinClauseCell, joinClauseList)
|
foreach(joinClauseCell, joinClauseList)
|
||||||
{
|
{
|
||||||
OpExpr *joinClause = (OpExpr *) lfirst(joinClauseCell);
|
OpExpr *joinClause = castNode(OpExpr, lfirst(joinClauseCell));
|
||||||
Var *joinLeftColumn = LeftColumn(joinClause);
|
if (IsApplicableJoinClause(leftTableIdList, rightTableId, joinClause))
|
||||||
Var *joinRightColumn = RightColumn(joinClause);
|
|
||||||
|
|
||||||
uint32 joinLeftTableId = joinLeftColumn->varno;
|
|
||||||
uint32 joinRightTableId = joinRightColumn->varno;
|
|
||||||
|
|
||||||
bool leftListHasJoinLeft = list_member_int(leftTableIdList, joinLeftTableId);
|
|
||||||
bool leftListHasJoinRight = list_member_int(leftTableIdList, joinRightTableId);
|
|
||||||
|
|
||||||
if ((leftListHasJoinLeft && (rightTableId == joinRightTableId)) ||
|
|
||||||
(leftListHasJoinRight && (rightTableId == joinLeftTableId)))
|
|
||||||
{
|
{
|
||||||
applicableJoinClauses = lappend(applicableJoinClauses, joinClause);
|
applicableJoinClauses = lappend(applicableJoinClauses, joinClause);
|
||||||
}
|
}
|
||||||
|
@ -1157,37 +1233,41 @@ ApplicableJoinClauses(List *leftTableIdList, uint32 rightTableId, List *joinClau
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Returns the left column in the given join clause. */
|
/*
|
||||||
|
* Returns the left column only when directly referenced in the given join clause,
|
||||||
|
* otherwise NULL is returned.
|
||||||
|
*/
|
||||||
Var *
|
Var *
|
||||||
LeftColumn(OpExpr *joinClause)
|
LeftColumnOrNULL(OpExpr *joinClause)
|
||||||
{
|
{
|
||||||
List *argumentList = joinClause->args;
|
List *argumentList = joinClause->args;
|
||||||
Node *leftArgument = (Node *) linitial(argumentList);
|
Node *leftArgument = (Node *) linitial(argumentList);
|
||||||
|
|
||||||
List *varList = pull_var_clause_default(leftArgument);
|
leftArgument = strip_implicit_coercions(leftArgument);
|
||||||
Var *leftColumn = NULL;
|
if (!IsA(leftArgument, Var))
|
||||||
|
{
|
||||||
Assert(list_length(varList) == 1);
|
return NULL;
|
||||||
leftColumn = (Var *) linitial(varList);
|
}
|
||||||
|
return castNode(Var, leftArgument);
|
||||||
return leftColumn;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Returns the right column in the given join clause. */
|
/*
|
||||||
|
* Returns the right column only when directly referenced in the given join clause,
|
||||||
|
* otherwise NULL is returned.
|
||||||
|
* */
|
||||||
Var *
|
Var *
|
||||||
RightColumn(OpExpr *joinClause)
|
RightColumnOrNULL(OpExpr *joinClause)
|
||||||
{
|
{
|
||||||
List *argumentList = joinClause->args;
|
List *argumentList = joinClause->args;
|
||||||
Node *rightArgument = (Node *) lsecond(argumentList);
|
Node *rightArgument = (Node *) lsecond(argumentList);
|
||||||
|
|
||||||
List *varList = pull_var_clause_default(rightArgument);
|
rightArgument = strip_implicit_coercions(rightArgument);
|
||||||
Var *rightColumn = NULL;
|
if (!IsA(rightArgument, Var))
|
||||||
|
{
|
||||||
Assert(list_length(varList) == 1);
|
return NULL;
|
||||||
rightColumn = (Var *) linitial(varList);
|
}
|
||||||
|
return castNode(Var, rightArgument);
|
||||||
return rightColumn;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "distributed/colocation_utils.h"
|
#include "distributed/colocation_utils.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
#include "distributed/insert_select_planner.h"
|
#include "distributed/insert_select_planner.h"
|
||||||
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/multi_logical_optimizer.h"
|
#include "distributed/multi_logical_optimizer.h"
|
||||||
#include "distributed/multi_logical_planner.h"
|
#include "distributed/multi_logical_planner.h"
|
||||||
#include "distributed/multi_physical_planner.h"
|
#include "distributed/multi_physical_planner.h"
|
||||||
|
@ -1387,62 +1388,59 @@ ExtractFromExpressionWalker(Node *node, QualifierWalkerContext *walkerContext)
|
||||||
bool
|
bool
|
||||||
IsJoinClause(Node *clause)
|
IsJoinClause(Node *clause)
|
||||||
{
|
{
|
||||||
bool isJoinClause = false;
|
|
||||||
OpExpr *operatorExpression = NULL;
|
OpExpr *operatorExpression = NULL;
|
||||||
List *argumentList = NIL;
|
bool equalsOperator = false;
|
||||||
Node *leftArgument = NULL;
|
List *varList = NIL;
|
||||||
Node *rightArgument = NULL;
|
Var *initialVar = NULL;
|
||||||
Node *strippedLeftArgument = NULL;
|
Var *var = NULL;
|
||||||
Node *strippedRightArgument = NULL;
|
|
||||||
|
|
||||||
if (!IsA(clause, OpExpr))
|
if (!IsA(clause, OpExpr))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
operatorExpression = (OpExpr *) clause;
|
operatorExpression = castNode(OpExpr, clause);
|
||||||
argumentList = operatorExpression->args;
|
equalsOperator = OperatorImplementsEquality(operatorExpression->opno);
|
||||||
|
|
||||||
/* join clauses must have two arguments */
|
if (!equalsOperator)
|
||||||
if (list_length(argumentList) != 2)
|
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* The single and dual repartition join and local join planners expect the clauses
|
||||||
|
* to be equi-join to calculate a hash on which to distribute.
|
||||||
|
*
|
||||||
|
* In the future we should move this clause to those planners and allow
|
||||||
|
* non-equi-join's in the reference join and cartesian product. This is tracked in
|
||||||
|
* https://github.com/citusdata/citus/issues/3198
|
||||||
|
*/
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get left and right side of the expression */
|
/*
|
||||||
leftArgument = (Node *) linitial(argumentList);
|
* take all column references from the clause, if we find 2 column references from a
|
||||||
rightArgument = (Node *) lsecond(argumentList);
|
* different relation we assume this is a join clause
|
||||||
|
*/
|
||||||
strippedLeftArgument = strip_implicit_coercions(leftArgument);
|
varList = pull_var_clause_default(clause);
|
||||||
strippedRightArgument = strip_implicit_coercions(rightArgument);
|
if (list_length(varList) <= 0)
|
||||||
|
|
||||||
/* each side of the expression should have only one column */
|
|
||||||
if (IsA(strippedLeftArgument, Var) && IsA(strippedRightArgument, Var))
|
|
||||||
{
|
{
|
||||||
Var *leftColumn = (Var *) strippedLeftArgument;
|
/* no column references in query, not describing a join */
|
||||||
Var *rightColumn = (Var *) strippedRightArgument;
|
return false;
|
||||||
bool equiJoin = false;
|
|
||||||
bool joinBetweenDifferentTables = false;
|
|
||||||
|
|
||||||
bool equalsOperator = OperatorImplementsEquality(operatorExpression->opno);
|
|
||||||
if (equalsOperator)
|
|
||||||
{
|
|
||||||
equiJoin = true;
|
|
||||||
}
|
}
|
||||||
|
initialVar = castNode(Var, linitial(varList));
|
||||||
|
|
||||||
if (leftColumn->varno != rightColumn->varno)
|
foreach_ptr(var, varList)
|
||||||
{
|
{
|
||||||
joinBetweenDifferentTables = true;
|
if (var->varno != initialVar->varno)
|
||||||
}
|
|
||||||
|
|
||||||
/* codifies our logic for determining if this node is a join clause */
|
|
||||||
if (equiJoin && joinBetweenDifferentTables)
|
|
||||||
{
|
{
|
||||||
isJoinClause = true;
|
/*
|
||||||
|
* this column reference comes from a different relation, hence describing a
|
||||||
|
* join
|
||||||
|
*/
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return isJoinClause;
|
/* all column references were to the same relation, no join */
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -2143,8 +2141,12 @@ ApplySinglePartitionJoin(MultiNode *leftNode, MultiNode *rightNode,
|
||||||
joinClause = SinglePartitionJoinClause(partitionColumn, applicableJoinClauses);
|
joinClause = SinglePartitionJoinClause(partitionColumn, applicableJoinClauses);
|
||||||
Assert(joinClause != NULL);
|
Assert(joinClause != NULL);
|
||||||
|
|
||||||
leftColumn = LeftColumn(joinClause);
|
/* both are verified in SinglePartitionJoinClause to not be NULL, assert is to guard */
|
||||||
rightColumn = RightColumn(joinClause);
|
leftColumn = LeftColumnOrNULL(joinClause);
|
||||||
|
rightColumn = RightColumnOrNULL(joinClause);
|
||||||
|
|
||||||
|
Assert(leftColumn != NULL);
|
||||||
|
Assert(rightColumn != NULL);
|
||||||
|
|
||||||
if (equal(partitionColumn, leftColumn))
|
if (equal(partitionColumn, leftColumn))
|
||||||
{
|
{
|
||||||
|
@ -2218,8 +2220,11 @@ ApplyDualPartitionJoin(MultiNode *leftNode, MultiNode *rightNode,
|
||||||
joinClause = DualPartitionJoinClause(applicableJoinClauses);
|
joinClause = DualPartitionJoinClause(applicableJoinClauses);
|
||||||
Assert(joinClause != NULL);
|
Assert(joinClause != NULL);
|
||||||
|
|
||||||
leftColumn = LeftColumn(joinClause);
|
/* both are verified in DualPartitionJoinClause to not be NULL, assert is to guard */
|
||||||
rightColumn = RightColumn(joinClause);
|
leftColumn = LeftColumnOrNULL(joinClause);
|
||||||
|
rightColumn = RightColumnOrNULL(joinClause);
|
||||||
|
Assert(leftColumn != NULL);
|
||||||
|
Assert(rightColumn != NULL);
|
||||||
|
|
||||||
rightTableIdList = OutputTableIdList(rightNode);
|
rightTableIdList = OutputTableIdList(rightNode);
|
||||||
rightTableId = (uint32) linitial_int(rightTableIdList);
|
rightTableId = (uint32) linitial_int(rightTableIdList);
|
||||||
|
|
|
@ -3603,8 +3603,13 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
leftColumn = LeftColumn(nextJoinClause);
|
leftColumn = LeftColumnOrNULL(nextJoinClause);
|
||||||
rightColumn = RightColumn(nextJoinClause);
|
rightColumn = RightColumnOrNULL(nextJoinClause);
|
||||||
|
if (leftColumn == NULL || rightColumn == NULL)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
leftRangeTableId = leftColumn->varno;
|
leftRangeTableId = leftColumn->varno;
|
||||||
rightRangeTableId = rightColumn->varno;
|
rightRangeTableId = rightColumn->varno;
|
||||||
|
|
||||||
|
|
|
@ -82,13 +82,15 @@ extern bool EnableSingleHashRepartitioning;
|
||||||
/* Function declaration for determining table join orders */
|
/* Function declaration for determining table join orders */
|
||||||
extern List * JoinExprList(FromExpr *fromExpr);
|
extern List * JoinExprList(FromExpr *fromExpr);
|
||||||
extern List * JoinOrderList(List *rangeTableEntryList, List *joinClauseList);
|
extern List * JoinOrderList(List *rangeTableEntryList, List *joinClauseList);
|
||||||
|
extern bool IsApplicableJoinClause(List *leftTableIdList, uint32 rightTableId,
|
||||||
|
OpExpr *joinClause);
|
||||||
extern List * ApplicableJoinClauses(List *leftTableIdList, uint32 rightTableId,
|
extern List * ApplicableJoinClauses(List *leftTableIdList, uint32 rightTableId,
|
||||||
List *joinClauseList);
|
List *joinClauseList);
|
||||||
extern OpExpr * SinglePartitionJoinClause(Var *partitionColumn,
|
extern OpExpr * SinglePartitionJoinClause(Var *partitionColumn,
|
||||||
List *applicableJoinClauses);
|
List *applicableJoinClauses);
|
||||||
extern OpExpr * DualPartitionJoinClause(List *applicableJoinClauses);
|
extern OpExpr * DualPartitionJoinClause(List *applicableJoinClauses);
|
||||||
extern Var * LeftColumn(OpExpr *joinClause);
|
extern Var * LeftColumnOrNULL(OpExpr *joinClause);
|
||||||
extern Var * RightColumn(OpExpr *joinClause);
|
extern Var * RightColumnOrNULL(OpExpr *joinClause);
|
||||||
extern Var * PartitionColumn(Oid relationId, uint32 rangeTableId);
|
extern Var * PartitionColumn(Oid relationId, uint32 rangeTableId);
|
||||||
extern Var * DistPartitionKey(Oid relationId);
|
extern Var * DistPartitionKey(Oid relationId);
|
||||||
extern char PartitionMethod(Oid relationId);
|
extern char PartitionMethod(Oid relationId);
|
||||||
|
|
|
@ -0,0 +1,903 @@
|
||||||
|
SET citus.next_shard_id TO 1650000;
|
||||||
|
CREATE SCHEMA chbenchmark_all_queries;
|
||||||
|
SET search_path TO chbenchmark_all_queries;
|
||||||
|
SET citus.enable_repartition_joins TO on;
|
||||||
|
CREATE TABLE order_line (
|
||||||
|
ol_w_id int NOT NULL,
|
||||||
|
ol_d_id int NOT NULL,
|
||||||
|
ol_o_id int NOT NULL,
|
||||||
|
ol_number int NOT NULL,
|
||||||
|
ol_i_id int NOT NULL,
|
||||||
|
ol_delivery_d timestamp NULL DEFAULT NULL,
|
||||||
|
ol_amount decimal(6,2) NOT NULL,
|
||||||
|
ol_supply_w_id int NOT NULL,
|
||||||
|
ol_quantity decimal(2,0) NOT NULL,
|
||||||
|
ol_dist_info char(24) NOT NULL,
|
||||||
|
PRIMARY KEY (ol_w_id,ol_d_id,ol_o_id,ol_number)
|
||||||
|
);
|
||||||
|
CREATE TABLE new_order (
|
||||||
|
no_w_id int NOT NULL,
|
||||||
|
no_d_id int NOT NULL,
|
||||||
|
no_o_id int NOT NULL,
|
||||||
|
PRIMARY KEY (no_w_id,no_d_id,no_o_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE stock (
|
||||||
|
s_w_id int NOT NULL,
|
||||||
|
s_i_id int NOT NULL,
|
||||||
|
s_quantity decimal(4,0) NOT NULL,
|
||||||
|
s_ytd decimal(8,2) NOT NULL,
|
||||||
|
s_order_cnt int NOT NULL,
|
||||||
|
s_remote_cnt int NOT NULL,
|
||||||
|
s_data varchar(50) NOT NULL,
|
||||||
|
s_dist_01 char(24) NOT NULL,
|
||||||
|
s_dist_02 char(24) NOT NULL,
|
||||||
|
s_dist_03 char(24) NOT NULL,
|
||||||
|
s_dist_04 char(24) NOT NULL,
|
||||||
|
s_dist_05 char(24) NOT NULL,
|
||||||
|
s_dist_06 char(24) NOT NULL,
|
||||||
|
s_dist_07 char(24) NOT NULL,
|
||||||
|
s_dist_08 char(24) NOT NULL,
|
||||||
|
s_dist_09 char(24) NOT NULL,
|
||||||
|
s_dist_10 char(24) NOT NULL,
|
||||||
|
PRIMARY KEY (s_w_id,s_i_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE oorder (
|
||||||
|
o_w_id int NOT NULL,
|
||||||
|
o_d_id int NOT NULL,
|
||||||
|
o_id int NOT NULL,
|
||||||
|
o_c_id int NOT NULL,
|
||||||
|
o_carrier_id int DEFAULT NULL,
|
||||||
|
o_ol_cnt decimal(2,0) NOT NULL,
|
||||||
|
o_all_local decimal(1,0) NOT NULL,
|
||||||
|
o_entry_d timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
PRIMARY KEY (o_w_id,o_d_id,o_id),
|
||||||
|
UNIQUE (o_w_id,o_d_id,o_c_id,o_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE history (
|
||||||
|
h_c_id int NOT NULL,
|
||||||
|
h_c_d_id int NOT NULL,
|
||||||
|
h_c_w_id int NOT NULL,
|
||||||
|
h_d_id int NOT NULL,
|
||||||
|
h_w_id int NOT NULL,
|
||||||
|
h_date timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
h_amount decimal(6,2) NOT NULL,
|
||||||
|
h_data varchar(24) NOT NULL
|
||||||
|
);
|
||||||
|
CREATE TABLE customer (
|
||||||
|
c_w_id int NOT NULL,
|
||||||
|
c_d_id int NOT NULL,
|
||||||
|
c_id int NOT NULL,
|
||||||
|
c_discount decimal(4,4) NOT NULL,
|
||||||
|
c_credit char(2) NOT NULL,
|
||||||
|
c_last varchar(16) NOT NULL,
|
||||||
|
c_first varchar(16) NOT NULL,
|
||||||
|
c_credit_lim decimal(12,2) NOT NULL,
|
||||||
|
c_balance decimal(12,2) NOT NULL,
|
||||||
|
c_ytd_payment float NOT NULL,
|
||||||
|
c_payment_cnt int NOT NULL,
|
||||||
|
c_delivery_cnt int NOT NULL,
|
||||||
|
c_street_1 varchar(20) NOT NULL,
|
||||||
|
c_street_2 varchar(20) NOT NULL,
|
||||||
|
c_city varchar(20) NOT NULL,
|
||||||
|
c_state char(2) NOT NULL,
|
||||||
|
c_zip char(9) NOT NULL,
|
||||||
|
c_phone char(16) NOT NULL,
|
||||||
|
c_since timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
c_middle char(2) NOT NULL,
|
||||||
|
c_data varchar(500) NOT NULL,
|
||||||
|
PRIMARY KEY (c_w_id,c_d_id,c_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE district (
|
||||||
|
d_w_id int NOT NULL,
|
||||||
|
d_id int NOT NULL,
|
||||||
|
d_ytd decimal(12,2) NOT NULL,
|
||||||
|
d_tax decimal(4,4) NOT NULL,
|
||||||
|
d_next_o_id int NOT NULL,
|
||||||
|
d_name varchar(10) NOT NULL,
|
||||||
|
d_street_1 varchar(20) NOT NULL,
|
||||||
|
d_street_2 varchar(20) NOT NULL,
|
||||||
|
d_city varchar(20) NOT NULL,
|
||||||
|
d_state char(2) NOT NULL,
|
||||||
|
d_zip char(9) NOT NULL,
|
||||||
|
PRIMARY KEY (d_w_id,d_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE item (
|
||||||
|
i_id int NOT NULL,
|
||||||
|
i_name varchar(24) NOT NULL,
|
||||||
|
i_price decimal(5,2) NOT NULL,
|
||||||
|
i_data varchar(50) NOT NULL,
|
||||||
|
i_im_id int NOT NULL,
|
||||||
|
PRIMARY KEY (i_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE warehouse (
|
||||||
|
w_id int NOT NULL,
|
||||||
|
w_ytd decimal(12,2) NOT NULL,
|
||||||
|
w_tax decimal(4,4) NOT NULL,
|
||||||
|
w_name varchar(10) NOT NULL,
|
||||||
|
w_street_1 varchar(20) NOT NULL,
|
||||||
|
w_street_2 varchar(20) NOT NULL,
|
||||||
|
w_city varchar(20) NOT NULL,
|
||||||
|
w_state char(2) NOT NULL,
|
||||||
|
w_zip char(9) NOT NULL,
|
||||||
|
PRIMARY KEY (w_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE region (
|
||||||
|
r_regionkey int not null,
|
||||||
|
r_name char(55) not null,
|
||||||
|
r_comment char(152) not null,
|
||||||
|
PRIMARY KEY ( r_regionkey )
|
||||||
|
);
|
||||||
|
CREATE TABLE nation (
|
||||||
|
n_nationkey int not null,
|
||||||
|
n_name char(25) not null,
|
||||||
|
n_regionkey int not null,
|
||||||
|
n_comment char(152) not null,
|
||||||
|
PRIMARY KEY ( n_nationkey )
|
||||||
|
);
|
||||||
|
CREATE TABLE supplier (
|
||||||
|
su_suppkey int not null,
|
||||||
|
su_name char(25) not null,
|
||||||
|
su_address varchar(40) not null,
|
||||||
|
su_nationkey int not null,
|
||||||
|
su_phone char(15) not null,
|
||||||
|
su_acctbal numeric(12,2) not null,
|
||||||
|
su_comment char(101) not null,
|
||||||
|
PRIMARY KEY ( su_suppkey )
|
||||||
|
);
|
||||||
|
SELECT create_distributed_table('order_line','ol_w_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_distributed_table('new_order','no_w_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_distributed_table('stock','s_w_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_distributed_table('oorder','o_w_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_distributed_table('history','h_w_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_distributed_table('customer','c_w_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_distributed_table('district','d_w_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_distributed_table('warehouse','w_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_reference_table('item');
|
||||||
|
create_reference_table
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_reference_table('region');
|
||||||
|
create_reference_table
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_reference_table('nation');
|
||||||
|
create_reference_table
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_reference_table('supplier');
|
||||||
|
create_reference_table
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE order_line, new_order, stock, oorder, history, customer, district, warehouse, item, region, nation, supplier; -- for easy copy in development
|
||||||
|
INSERT INTO supplier SELECT c, 'abc', 'def', c, 'ghi', c, 'jkl' FROM generate_series(0,10) AS c;
|
||||||
|
INSERT INTO new_order SELECT c, c, c FROM generate_series(0,10) AS c;
|
||||||
|
INSERT INTO stock SELECT c,c,c,c,c,c, 'abc','abc','abc','abc','abc','abc','abc','abc','abc','abc','abc' FROM generate_series(1,3) AS c;
|
||||||
|
INSERT INTO stock SELECT c, 5000,c,c,c,c, 'abc','abc','abc','abc','abc','abc','abc','abc','abc','abc','abc' FROM generate_series(1,3) AS c; -- mod(2*5000,10000) == 0
|
||||||
|
INSERT INTO order_line SELECT c, c, c, c, c, '2008-10-17 00:00:00.000000', c, c, c, 'abc' FROM generate_series(0,10) AS c;
|
||||||
|
INSERT INTO oorder SELECT c, c, c, c, c, 1, 1, '2008-10-17 00:00:00.000000' FROM generate_series(0,10) AS c;
|
||||||
|
INSERT INTO customer SELECT c, c, c, 0, 'XX', 'John', 'Doe', 1000, 0, 0, c, c, 'Name', 'Street', 'Some City', 'CA', '12345', '+1 000 0000000', '2007-01-02 00:00:00.000000', 'NA', 'nothing special' FROM generate_series(0,10) AS c;
|
||||||
|
INSERT INTO item SELECT c, 'Keyboard', 50, 'co b', c FROM generate_series(0,10) AS c; --co% and %b filters all around
|
||||||
|
INSERT INTO region VALUES
|
||||||
|
(1, 'Not Europe', 'Big'),
|
||||||
|
(2, 'Europe', 'Big');
|
||||||
|
INSERT INTO nation VALUES
|
||||||
|
(1, 'United States', 1, 'Also Big'),
|
||||||
|
(4, 'The Netherlands', 2, 'Flat'),
|
||||||
|
(9, 'Germany', 2, 'Germany must be in here for Q7'),
|
||||||
|
(67, 'Cambodia', 2, 'I don''t understand how we got from California to Cambodia but I will take it, it also is not in Europe, but we need it to be for Q8');
|
||||||
|
-- Query 1
|
||||||
|
SELECT
|
||||||
|
ol_number,
|
||||||
|
sum(ol_quantity) as sum_qty,
|
||||||
|
sum(ol_amount) as sum_amount,
|
||||||
|
avg(ol_quantity) as avg_qty,
|
||||||
|
avg(ol_amount) as avg_amount,
|
||||||
|
count(*) as count_order
|
||||||
|
FROM order_line
|
||||||
|
WHERE ol_delivery_d > '2007-01-02 00:00:00.000000'
|
||||||
|
GROUP BY ol_number
|
||||||
|
ORDER BY ol_number;
|
||||||
|
ol_number | sum_qty | sum_amount | avg_qty | avg_amount | count_order
|
||||||
|
-----------+---------+------------+------------------------+------------------------+-------------
|
||||||
|
0 | 0 | 0.00 | 0.00000000000000000000 | 0.00000000000000000000 | 1
|
||||||
|
1 | 1 | 1.00 | 1.00000000000000000000 | 1.00000000000000000000 | 1
|
||||||
|
2 | 2 | 2.00 | 2.0000000000000000 | 2.0000000000000000 | 1
|
||||||
|
3 | 3 | 3.00 | 3.0000000000000000 | 3.0000000000000000 | 1
|
||||||
|
4 | 4 | 4.00 | 4.0000000000000000 | 4.0000000000000000 | 1
|
||||||
|
5 | 5 | 5.00 | 5.0000000000000000 | 5.0000000000000000 | 1
|
||||||
|
6 | 6 | 6.00 | 6.0000000000000000 | 6.0000000000000000 | 1
|
||||||
|
7 | 7 | 7.00 | 7.0000000000000000 | 7.0000000000000000 | 1
|
||||||
|
8 | 8 | 8.00 | 8.0000000000000000 | 8.0000000000000000 | 1
|
||||||
|
9 | 9 | 9.00 | 9.0000000000000000 | 9.0000000000000000 | 1
|
||||||
|
10 | 10 | 10.00 | 10.0000000000000000 | 10.0000000000000000 | 1
|
||||||
|
(11 rows)
|
||||||
|
|
||||||
|
-- Query 2
|
||||||
|
SELECT
|
||||||
|
su_suppkey,
|
||||||
|
su_name,
|
||||||
|
n_name,
|
||||||
|
i_id,
|
||||||
|
i_name,
|
||||||
|
su_address,
|
||||||
|
su_phone,
|
||||||
|
su_comment
|
||||||
|
FROM
|
||||||
|
item,
|
||||||
|
supplier,
|
||||||
|
stock,
|
||||||
|
nation,
|
||||||
|
region,
|
||||||
|
(SELECT
|
||||||
|
s_i_id AS m_i_id,
|
||||||
|
min(s_quantity) as m_s_quantity
|
||||||
|
FROM
|
||||||
|
stock,
|
||||||
|
supplier,
|
||||||
|
nation,
|
||||||
|
region
|
||||||
|
WHERE mod((s_w_id*s_i_id),10000)=su_suppkey
|
||||||
|
AND su_nationkey=n_nationkey
|
||||||
|
AND n_regionkey=r_regionkey
|
||||||
|
AND r_name LIKE 'Europ%'
|
||||||
|
GROUP BY s_i_id) m
|
||||||
|
WHERE i_id = s_i_id
|
||||||
|
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_regionkey = r_regionkey
|
||||||
|
AND i_data LIKE '%b'
|
||||||
|
AND r_name LIKE 'Europ%'
|
||||||
|
AND i_id = m_i_id
|
||||||
|
AND s_quantity = m_s_quantity
|
||||||
|
ORDER BY
|
||||||
|
n_name,
|
||||||
|
su_name,
|
||||||
|
i_id;
|
||||||
|
su_suppkey | su_name | n_name | i_id | i_name | su_address | su_phone | su_comment
|
||||||
|
------------+---------------------------+---------------------------+------+----------+------------+-----------------+-------------------------------------------------------------------------------------------------------
|
||||||
|
9 | abc | Germany | 3 | Keyboard | def | ghi | jkl
|
||||||
|
4 | abc | The Netherlands | 2 | Keyboard | def | ghi | jkl
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
-- Query 3
|
||||||
|
SELECT
|
||||||
|
ol_o_id,
|
||||||
|
ol_w_id,
|
||||||
|
ol_d_id,
|
||||||
|
sum(ol_amount) AS revenue,
|
||||||
|
o_entry_d
|
||||||
|
FROM
|
||||||
|
customer,
|
||||||
|
new_order,
|
||||||
|
oorder,
|
||||||
|
order_line
|
||||||
|
WHERE c_state LIKE 'C%' -- used to ba A%, but C% works with our small data
|
||||||
|
AND c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND no_w_id = o_w_id
|
||||||
|
AND no_d_id = o_d_id
|
||||||
|
AND no_o_id = o_id
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND o_entry_d > '2007-01-02 00:00:00.000000'
|
||||||
|
GROUP BY
|
||||||
|
ol_o_id,
|
||||||
|
ol_w_id,
|
||||||
|
ol_d_id,
|
||||||
|
o_entry_d
|
||||||
|
ORDER BY
|
||||||
|
revenue DESC,
|
||||||
|
o_entry_d;
|
||||||
|
ol_o_id | ol_w_id | ol_d_id | revenue | o_entry_d
|
||||||
|
---------+---------+---------+---------+--------------------------
|
||||||
|
10 | 10 | 10 | 10.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
9 | 9 | 9 | 9.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
8 | 8 | 8 | 8.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
7 | 7 | 7 | 7.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
6 | 6 | 6 | 6.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
5 | 5 | 5 | 5.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
4 | 4 | 4 | 4.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
3 | 3 | 3 | 3.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
2 | 2 | 2 | 2.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
1 | 1 | 1 | 1.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
0 | 0 | 0 | 0.00 | Fri Oct 17 00:00:00 2008
|
||||||
|
(11 rows)
|
||||||
|
|
||||||
|
-- Query 4
|
||||||
|
SELECT
|
||||||
|
o_ol_cnt,
|
||||||
|
count(*) as order_count
|
||||||
|
FROM
|
||||||
|
oorder
|
||||||
|
WHERE o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||||
|
AND o_entry_d < '2012-01-02 00:00:00.000000'
|
||||||
|
AND exists (SELECT *
|
||||||
|
FROM order_line
|
||||||
|
WHERE o_id = ol_o_id
|
||||||
|
AND o_w_id = ol_w_id
|
||||||
|
AND o_d_id = ol_d_id
|
||||||
|
AND ol_delivery_d >= o_entry_d)
|
||||||
|
GROUP BY o_ol_cnt
|
||||||
|
ORDER BY o_ol_cnt;
|
||||||
|
o_ol_cnt | order_count
|
||||||
|
----------+-------------
|
||||||
|
1 | 11
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Query 5
|
||||||
|
SELECT
|
||||||
|
n_name,
|
||||||
|
sum(ol_amount) AS revenue
|
||||||
|
FROM
|
||||||
|
customer,
|
||||||
|
oorder,
|
||||||
|
order_line,
|
||||||
|
stock,
|
||||||
|
supplier,
|
||||||
|
nation,
|
||||||
|
region
|
||||||
|
WHERE c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id=o_d_id
|
||||||
|
AND ol_w_id = s_w_id
|
||||||
|
AND ol_i_id = s_i_id
|
||||||
|
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||||
|
-- our dataset does not have the supplier in the same nation as the customer causing this
|
||||||
|
-- join to filter out all the data. We verify later on that we can actually perform an
|
||||||
|
-- ascii(substr(c_state,1,1)) == reference table column join later on so it should not
|
||||||
|
-- matter we skip this filter here.
|
||||||
|
--AND ascii(substr(c_state,1,1)) = su_nationkey
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_regionkey = r_regionkey
|
||||||
|
AND r_name = 'Europe'
|
||||||
|
AND o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||||
|
GROUP BY n_name
|
||||||
|
ORDER BY revenue DESC;
|
||||||
|
n_name | revenue
|
||||||
|
---------------------------+---------
|
||||||
|
Germany | 3.00
|
||||||
|
The Netherlands | 2.00
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
-- Query 6
|
||||||
|
SELECT
|
||||||
|
sum(ol_amount) AS revenue
|
||||||
|
FROM order_line
|
||||||
|
WHERE ol_delivery_d >= '1999-01-01 00:00:00.000000'
|
||||||
|
AND ol_delivery_d < '2020-01-01 00:00:00.000000'
|
||||||
|
AND ol_quantity BETWEEN 1 AND 100000;
|
||||||
|
revenue
|
||||||
|
---------
|
||||||
|
55.00
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Query 7
|
||||||
|
SELECT
|
||||||
|
su_nationkey as supp_nation,
|
||||||
|
substr(c_state,1,1) as cust_nation,
|
||||||
|
extract(year from o_entry_d) as l_year,
|
||||||
|
sum(ol_amount) as revenue
|
||||||
|
FROM
|
||||||
|
supplier,
|
||||||
|
stock,
|
||||||
|
order_line,
|
||||||
|
oorder,
|
||||||
|
customer,
|
||||||
|
nation n1,
|
||||||
|
nation n2
|
||||||
|
WHERE ol_supply_w_id = s_w_id
|
||||||
|
AND ol_i_id = s_i_id
|
||||||
|
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND su_nationkey = n1.n_nationkey
|
||||||
|
AND ascii(substr(c_state,1,1)) = n2.n_nationkey
|
||||||
|
AND (
|
||||||
|
(n1.n_name = 'Germany' AND n2.n_name = 'Cambodia')
|
||||||
|
OR (n1.n_name = 'Cambodia' AND n2.n_name = 'Germany')
|
||||||
|
)
|
||||||
|
AND ol_delivery_d BETWEEN '2007-01-02 00:00:00.000000' AND '2012-01-02 00:00:00.000000'
|
||||||
|
GROUP BY
|
||||||
|
su_nationkey,
|
||||||
|
substr(c_state,1,1),
|
||||||
|
extract(year from o_entry_d)
|
||||||
|
ORDER BY
|
||||||
|
su_nationkey,
|
||||||
|
cust_nation,
|
||||||
|
l_year;
|
||||||
|
supp_nation | cust_nation | l_year | revenue
|
||||||
|
-------------+-------------+--------+---------
|
||||||
|
9 | C | 2008 | 3.00
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Query 8
|
||||||
|
SELECT
|
||||||
|
extract(year from o_entry_d) as l_year,
|
||||||
|
sum(case when n2.n_name = 'Germany' then ol_amount else 0 end) / sum(ol_amount) as mkt_share
|
||||||
|
FROM
|
||||||
|
item,
|
||||||
|
supplier,
|
||||||
|
stock,
|
||||||
|
order_line,
|
||||||
|
oorder,
|
||||||
|
customer,
|
||||||
|
nation n1,
|
||||||
|
nation n2,
|
||||||
|
region
|
||||||
|
WHERE i_id = s_i_id
|
||||||
|
AND ol_i_id = s_i_id
|
||||||
|
AND ol_supply_w_id = s_w_id
|
||||||
|
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND n1.n_nationkey = ascii(substr(c_state,1,1))
|
||||||
|
AND n1.n_regionkey = r_regionkey
|
||||||
|
AND ol_i_id < 1000
|
||||||
|
AND r_name = 'Europe'
|
||||||
|
AND su_nationkey = n2.n_nationkey
|
||||||
|
AND o_entry_d BETWEEN '2007-01-02 00:00:00.000000' AND '2012-01-02 00:00:00.000000'
|
||||||
|
AND i_data LIKE '%b'
|
||||||
|
AND i_id = ol_i_id
|
||||||
|
GROUP BY extract(YEAR FROM o_entry_d)
|
||||||
|
ORDER BY l_year;
|
||||||
|
l_year | mkt_share
|
||||||
|
--------+------------------------
|
||||||
|
2008 | 0.50000000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Query 9
|
||||||
|
SELECT
|
||||||
|
n_name,
|
||||||
|
extract(year from o_entry_d) as l_year,
|
||||||
|
sum(ol_amount) as sum_profit
|
||||||
|
FROM
|
||||||
|
item,
|
||||||
|
stock,
|
||||||
|
supplier,
|
||||||
|
order_line,
|
||||||
|
oorder,
|
||||||
|
nation
|
||||||
|
WHERE ol_i_id = s_i_id
|
||||||
|
AND ol_supply_w_id = s_w_id
|
||||||
|
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND ol_i_id = i_id
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND i_data LIKE '%b' -- this used to be %BB but that will not work with our small dataset
|
||||||
|
GROUP BY
|
||||||
|
n_name,
|
||||||
|
extract(YEAR FROM o_entry_d)
|
||||||
|
ORDER BY
|
||||||
|
n_name,
|
||||||
|
l_year DESC;
|
||||||
|
n_name | l_year | sum_profit
|
||||||
|
---------------------------+--------+------------
|
||||||
|
Germany | 2008 | 3.00
|
||||||
|
The Netherlands | 2008 | 2.00
|
||||||
|
United States | 2008 | 1.00
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
-- Query 10
|
||||||
|
SELECT
|
||||||
|
c_id,
|
||||||
|
c_last,
|
||||||
|
sum(ol_amount) AS revenue,
|
||||||
|
c_city,
|
||||||
|
c_phone,
|
||||||
|
n_name
|
||||||
|
FROM
|
||||||
|
customer,
|
||||||
|
oorder,
|
||||||
|
order_line,
|
||||||
|
nation
|
||||||
|
WHERE c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||||
|
AND o_entry_d <= ol_delivery_d
|
||||||
|
AND n_nationkey = ascii(substr(c_state,1,1))
|
||||||
|
GROUP BY
|
||||||
|
c_id,
|
||||||
|
c_last,
|
||||||
|
c_city,
|
||||||
|
c_phone,
|
||||||
|
n_name
|
||||||
|
ORDER BY revenue DESC;
|
||||||
|
c_id | c_last | revenue | c_city | c_phone | n_name
|
||||||
|
------+--------+---------+-----------+------------------+---------------------------
|
||||||
|
10 | John | 10.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
9 | John | 9.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
8 | John | 8.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
7 | John | 7.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
6 | John | 6.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
5 | John | 5.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
4 | John | 4.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
3 | John | 3.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
2 | John | 2.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
1 | John | 1.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
0 | John | 0.00 | Some City | +1 000 0000000 | Cambodia
|
||||||
|
(11 rows)
|
||||||
|
|
||||||
|
-- Query 11
|
||||||
|
SELECT
|
||||||
|
s_i_id,
|
||||||
|
sum(s_order_cnt) AS ordercount
|
||||||
|
FROM
|
||||||
|
stock,
|
||||||
|
supplier,
|
||||||
|
nation
|
||||||
|
WHERE mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_name = 'Germany'
|
||||||
|
GROUP BY s_i_id
|
||||||
|
HAVING sum(s_order_cnt) >
|
||||||
|
(SELECT sum(s_order_cnt) * .005
|
||||||
|
FROM
|
||||||
|
stock,
|
||||||
|
supplier,
|
||||||
|
nation
|
||||||
|
WHERE mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_name = 'Germany')
|
||||||
|
ORDER BY ordercount DESC;
|
||||||
|
s_i_id | ordercount
|
||||||
|
--------+------------
|
||||||
|
3 | 3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Query 12
|
||||||
|
SELECT
|
||||||
|
o_ol_cnt,
|
||||||
|
sum(case when o_carrier_id = 1 or o_carrier_id = 2 then 1 else 0 end) as high_line_count,
|
||||||
|
sum(case when o_carrier_id <> 1 and o_carrier_id <> 2 then 1 else 0 end) as low_line_count
|
||||||
|
FROM
|
||||||
|
oorder,
|
||||||
|
order_line
|
||||||
|
WHERE ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND o_entry_d <= ol_delivery_d
|
||||||
|
AND ol_delivery_d < '2020-01-01 00:00:00.000000'
|
||||||
|
GROUP BY o_ol_cnt
|
||||||
|
ORDER BY o_ol_cnt;
|
||||||
|
o_ol_cnt | high_line_count | low_line_count
|
||||||
|
----------+-----------------+----------------
|
||||||
|
1 | 2 | 9
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Query 13
|
||||||
|
SELECT
|
||||||
|
c_count,
|
||||||
|
count(*) AS custdist
|
||||||
|
FROM (SELECT
|
||||||
|
c_id,
|
||||||
|
count(o_id)
|
||||||
|
FROM customer
|
||||||
|
LEFT OUTER JOIN oorder ON (
|
||||||
|
c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND c_id = o_c_id
|
||||||
|
AND o_carrier_id > 8)
|
||||||
|
GROUP BY c_id) AS c_orders (c_id, c_count)
|
||||||
|
GROUP BY c_count
|
||||||
|
ORDER BY
|
||||||
|
custdist DESC,
|
||||||
|
c_count DESC;
|
||||||
|
c_count | custdist
|
||||||
|
---------+----------
|
||||||
|
0 | 9
|
||||||
|
1 | 2
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
-- Query 14
|
||||||
|
SELECT
|
||||||
|
100.00 * sum(CASE WHEN i_data LIKE 'PR%' THEN ol_amount ELSE 0 END) / (1+sum(ol_amount)) AS promo_revenue
|
||||||
|
FROM
|
||||||
|
order_line,
|
||||||
|
item
|
||||||
|
WHERE ol_i_id = i_id
|
||||||
|
AND ol_delivery_d >= '2007-01-02 00:00:00.000000'
|
||||||
|
AND ol_delivery_d < '2020-01-02 00:00:00.000000';
|
||||||
|
promo_revenue
|
||||||
|
------------------------
|
||||||
|
0.00000000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Query 15
|
||||||
|
WITH revenue (supplier_no, total_revenue) AS (
|
||||||
|
SELECT
|
||||||
|
mod((s_w_id * s_i_id),10000) AS supplier_no,
|
||||||
|
sum(ol_amount) AS total_revenue
|
||||||
|
FROM
|
||||||
|
order_line,
|
||||||
|
stock
|
||||||
|
WHERE ol_i_id = s_i_id
|
||||||
|
AND ol_supply_w_id = s_w_id
|
||||||
|
AND ol_delivery_d >= '2007-01-02 00:00:00.000000'
|
||||||
|
GROUP BY mod((s_w_id * s_i_id),10000))
|
||||||
|
SELECT
|
||||||
|
su_suppkey,
|
||||||
|
su_name,
|
||||||
|
su_address,
|
||||||
|
su_phone,
|
||||||
|
total_revenue
|
||||||
|
FROM
|
||||||
|
supplier,
|
||||||
|
revenue
|
||||||
|
WHERE su_suppkey = supplier_no
|
||||||
|
AND total_revenue = (SELECT max(total_revenue) FROM revenue)
|
||||||
|
ORDER BY su_suppkey;
|
||||||
|
su_suppkey | su_name | su_address | su_phone | total_revenue
|
||||||
|
------------+---------------------------+------------+-----------------+---------------
|
||||||
|
9 | abc | def | ghi | 3.00
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
--Q16
|
||||||
|
SELECT
|
||||||
|
i_name,
|
||||||
|
substr(i_data, 1, 3) AS brand,
|
||||||
|
i_price,
|
||||||
|
count(DISTINCT (mod((s_w_id * s_i_id),10000))) AS supplier_cnt
|
||||||
|
FROM
|
||||||
|
stock,
|
||||||
|
item
|
||||||
|
WHERE i_id = s_i_id
|
||||||
|
AND i_data NOT LIKE 'zz%'
|
||||||
|
AND (mod((s_w_id * s_i_id),10000) NOT IN
|
||||||
|
(SELECT su_suppkey
|
||||||
|
FROM supplier
|
||||||
|
WHERE su_comment LIKE '%bad%'))
|
||||||
|
GROUP BY
|
||||||
|
i_name,
|
||||||
|
substr(i_data, 1, 3),
|
||||||
|
i_price
|
||||||
|
ORDER BY supplier_cnt DESC;
|
||||||
|
i_name | brand | i_price | supplier_cnt
|
||||||
|
----------+-------+---------+--------------
|
||||||
|
Keyboard | co | 50.00 | 3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
--Q17
|
||||||
|
SELECT
|
||||||
|
sum(ol_amount) / 2.0 AS avg_yearly
|
||||||
|
FROM
|
||||||
|
order_line,
|
||||||
|
(SELECT
|
||||||
|
i_id,
|
||||||
|
avg(ol_quantity) AS a
|
||||||
|
FROM
|
||||||
|
item,
|
||||||
|
order_line
|
||||||
|
WHERE i_data LIKE '%b'
|
||||||
|
AND ol_i_id = i_id
|
||||||
|
GROUP BY i_id) t
|
||||||
|
WHERE ol_i_id = t.i_id;
|
||||||
|
avg_yearly
|
||||||
|
---------------------
|
||||||
|
27.5000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- this filter was at the end causing the dataset to be empty. it should not have any
|
||||||
|
-- influence on how the query gets planned so I removed the clause
|
||||||
|
--AND ol_quantity < t.a;
|
||||||
|
-- Query 18
|
||||||
|
SELECT
|
||||||
|
c_last,
|
||||||
|
c_id o_id,
|
||||||
|
o_entry_d,
|
||||||
|
o_ol_cnt,
|
||||||
|
sum(ol_amount)
|
||||||
|
FROM
|
||||||
|
customer,
|
||||||
|
oorder,
|
||||||
|
order_line
|
||||||
|
WHERE c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
GROUP BY
|
||||||
|
o_id,
|
||||||
|
o_w_id,
|
||||||
|
o_d_id,
|
||||||
|
c_id,
|
||||||
|
c_last,
|
||||||
|
o_entry_d,
|
||||||
|
o_ol_cnt
|
||||||
|
HAVING sum(ol_amount) > 5 -- was 200, but thats too big for the dataset
|
||||||
|
ORDER BY
|
||||||
|
sum(ol_amount) DESC,
|
||||||
|
o_entry_d;
|
||||||
|
c_last | o_id | o_entry_d | o_ol_cnt | sum
|
||||||
|
--------+------+--------------------------+----------+-------
|
||||||
|
John | 10 | Fri Oct 17 00:00:00 2008 | 1 | 10.00
|
||||||
|
John | 9 | Fri Oct 17 00:00:00 2008 | 1 | 9.00
|
||||||
|
John | 8 | Fri Oct 17 00:00:00 2008 | 1 | 8.00
|
||||||
|
John | 7 | Fri Oct 17 00:00:00 2008 | 1 | 7.00
|
||||||
|
John | 6 | Fri Oct 17 00:00:00 2008 | 1 | 6.00
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- Query 19
|
||||||
|
SELECT
|
||||||
|
sum(ol_amount) AS revenue
|
||||||
|
FROM
|
||||||
|
order_line,
|
||||||
|
item
|
||||||
|
WHERE ( ol_i_id = i_id
|
||||||
|
AND i_data LIKE '%a'
|
||||||
|
AND ol_quantity >= 1
|
||||||
|
AND ol_quantity <= 10
|
||||||
|
AND i_price BETWEEN 1 AND 400000
|
||||||
|
AND ol_w_id IN (1,2,3))
|
||||||
|
OR ( ol_i_id = i_id
|
||||||
|
AND i_data LIKE '%b'
|
||||||
|
AND ol_quantity >= 1
|
||||||
|
AND ol_quantity <= 10
|
||||||
|
AND i_price BETWEEN 1 AND 400000
|
||||||
|
AND ol_w_id IN (1,2,4))
|
||||||
|
OR ( ol_i_id = i_id
|
||||||
|
AND i_data LIKE '%c'
|
||||||
|
AND ol_quantity >= 1
|
||||||
|
AND ol_quantity <= 10
|
||||||
|
AND i_price BETWEEN 1 AND 400000
|
||||||
|
AND ol_w_id IN (1,5,3));
|
||||||
|
revenue
|
||||||
|
---------
|
||||||
|
7.00
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Query 20
|
||||||
|
SELECT
|
||||||
|
su_name,
|
||||||
|
su_address
|
||||||
|
FROM
|
||||||
|
supplier,
|
||||||
|
nation
|
||||||
|
WHERE su_suppkey in
|
||||||
|
(SELECT
|
||||||
|
mod(s_i_id * s_w_id, 10000)
|
||||||
|
FROM
|
||||||
|
stock,
|
||||||
|
order_line
|
||||||
|
WHERE s_i_id IN
|
||||||
|
(SELECT i_id
|
||||||
|
FROM item
|
||||||
|
WHERE i_data LIKE 'co%')
|
||||||
|
AND ol_i_id = s_i_id
|
||||||
|
AND ol_delivery_d > '2008-05-23 12:00:00' -- was 2010, but our order is in 2008
|
||||||
|
GROUP BY s_i_id, s_w_id, s_quantity
|
||||||
|
HAVING 2*s_quantity > sum(ol_quantity))
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_name = 'Germany'
|
||||||
|
ORDER BY su_name;
|
||||||
|
su_name | su_address
|
||||||
|
---------------------------+------------
|
||||||
|
abc | def
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Query 21
|
||||||
|
-- DATA SET DOES NOT COVER THIS QUERY
|
||||||
|
SELECT
|
||||||
|
su_name,
|
||||||
|
count(*) AS numwait
|
||||||
|
FROM
|
||||||
|
supplier,
|
||||||
|
order_line l1,
|
||||||
|
oorder,
|
||||||
|
stock,
|
||||||
|
nation
|
||||||
|
WHERE ol_o_id = o_id
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_w_id = s_w_id
|
||||||
|
AND ol_i_id = s_i_id
|
||||||
|
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||||
|
AND l1.ol_delivery_d > o_entry_d
|
||||||
|
AND NOT exists (SELECT *
|
||||||
|
FROM order_line l2
|
||||||
|
WHERE l2.ol_o_id = l1.ol_o_id
|
||||||
|
AND l2.ol_w_id = l1.ol_w_id
|
||||||
|
AND l2.ol_d_id = l1.ol_d_id
|
||||||
|
AND l2.ol_delivery_d > l1.ol_delivery_d)
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_name = 'Germany'
|
||||||
|
GROUP BY su_name
|
||||||
|
ORDER BY
|
||||||
|
numwait desc,
|
||||||
|
su_name;
|
||||||
|
su_name | numwait
|
||||||
|
---------+---------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
-- Query 22
|
||||||
|
-- DATA SET DOES NOT COVER THIS QUERY
|
||||||
|
SELECT
|
||||||
|
substr(c_state,1,1) AS country,
|
||||||
|
count(*) AS numcust,
|
||||||
|
sum(c_balance) AS totacctbal
|
||||||
|
FROM customer
|
||||||
|
WHERE substr(c_phone,1,1) in ('1','2','3','4','5','6','7')
|
||||||
|
AND c_balance > (SELECT avg(c_BALANCE)
|
||||||
|
FROM customer
|
||||||
|
WHERE c_balance > 0.00
|
||||||
|
AND substr(c_phone,1,1) in ('1','2','3','4','5','6','7'))
|
||||||
|
AND NOT exists (SELECT *
|
||||||
|
FROM oorder
|
||||||
|
WHERE o_c_id = c_id
|
||||||
|
AND o_w_id = c_w_id
|
||||||
|
AND o_d_id = c_d_id)
|
||||||
|
GROUP BY substr(c_state,1,1)
|
||||||
|
ORDER BY substr(c_state,1,1);
|
||||||
|
country | numcust | totacctbal
|
||||||
|
---------+---------+------------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
SET client_min_messages TO WARNING;
|
||||||
|
DROP SCHEMA chbenchmark_all_queries CASCADE;
|
|
@ -0,0 +1,56 @@
|
||||||
|
SET citus.next_shard_id TO 1670000;
|
||||||
|
CREATE SCHEMA expression_reference_join;
|
||||||
|
SET search_path TO expression_reference_join;
|
||||||
|
SET citus.shard_count TO 4;
|
||||||
|
SET citus.enable_repartition_joins TO on;
|
||||||
|
CREATE TABLE ref (a int, b int);
|
||||||
|
CREATE TABLE test (x int, y int);
|
||||||
|
INSERT INTO ref VALUES
|
||||||
|
(2,2),
|
||||||
|
(4,4);
|
||||||
|
INSERT INTO test VALUES
|
||||||
|
(1,2),
|
||||||
|
(2,2);
|
||||||
|
SELECT create_reference_table('ref');
|
||||||
|
NOTICE: Copying data from local table...
|
||||||
|
create_reference_table
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT create_distributed_table('test', 'x');
|
||||||
|
NOTICE: Copying data from local table...
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- PR 3180 implements expressions in join clauses to reference tables to support CHbenCHmark queries 7/8/9
|
||||||
|
-- plannable as a repartition + reference join, now with an expression in the join clause
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
test t1 JOIN test t2 USING (y), -- causes repartition, which makes this not routable or pushdownable
|
||||||
|
ref a
|
||||||
|
WHERE t2.y * 2 = a.a
|
||||||
|
ORDER BY 1,2,3;
|
||||||
|
y | x | x | a | b
|
||||||
|
---+---+---+---+---
|
||||||
|
2 | 1 | 1 | 4 | 4
|
||||||
|
2 | 1 | 2 | 4 | 4
|
||||||
|
2 | 2 | 1 | 4 | 4
|
||||||
|
2 | 2 | 2 | 4 | 4
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
-- The join clause is wider than it used to be, causing this query to be recognized by the LogicalPlanner as a repartition join.
|
||||||
|
-- Unplannable query due to a three-way join which causes no valid path (besides the cartesian product) to be found
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
test t1 JOIN test t2 USING (y), -- causes repartition, which makes this not routable or pushdownable
|
||||||
|
ref a,
|
||||||
|
ref b
|
||||||
|
WHERE t2.y - a.a - b.b = 0
|
||||||
|
ORDER BY 1,2,3;
|
||||||
|
ERROR: cannot perform distributed planning on this query
|
||||||
|
DETAIL: Cartesian products are currently unsupported
|
||||||
|
SET client_min_messages TO WARNING;
|
||||||
|
DROP SCHEMA expression_reference_join CASCADE;
|
|
@ -243,7 +243,7 @@ DEBUG: dual partition column types do not match
|
||||||
DEBUG: single partition column types do not match
|
DEBUG: single partition column types do not match
|
||||||
DEBUG: single partition column types do not match
|
DEBUG: single partition column types do not match
|
||||||
DEBUG: dual partition column types do not match
|
DEBUG: dual partition column types do not match
|
||||||
LOG: join order: [ "single_hash_repartition_first" ][ local partition join "single_hash_repartition_first" ][ cartesian product "single_hash_repartition_second" ]
|
LOG: join order: [ "single_hash_repartition_second" ][ cartesian product "single_hash_repartition_first" ][ dual partition join "single_hash_repartition_first" ]
|
||||||
ERROR: cannot perform distributed planning on this query
|
ERROR: cannot perform distributed planning on this query
|
||||||
DETAIL: Cartesian products are currently unsupported
|
DETAIL: Cartesian products are currently unsupported
|
||||||
-- single repartition query in CTE
|
-- single repartition query in CTE
|
||||||
|
|
|
@ -73,7 +73,7 @@ test: multi_reference_table multi_select_for_update relation_access_tracking
|
||||||
test: custom_aggregate_support aggregate_support
|
test: custom_aggregate_support aggregate_support
|
||||||
test: multi_average_expression multi_working_columns multi_having_pushdown
|
test: multi_average_expression multi_working_columns multi_having_pushdown
|
||||||
test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown
|
test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown
|
||||||
test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg ch_bench_having ch_bench_subquery_repartition
|
test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg ch_bench_having ch_bench_subquery_repartition chbenchmark_all_queries expression_reference_join
|
||||||
test: multi_agg_type_conversion multi_count_type_conversion
|
test: multi_agg_type_conversion multi_count_type_conversion
|
||||||
test: multi_partition_pruning single_hash_repartition_join
|
test: multi_partition_pruning single_hash_repartition_join
|
||||||
test: multi_join_pruning multi_hash_pruning
|
test: multi_join_pruning multi_hash_pruning
|
||||||
|
|
|
@ -0,0 +1,722 @@
|
||||||
|
SET citus.next_shard_id TO 1650000;
|
||||||
|
CREATE SCHEMA chbenchmark_all_queries;
|
||||||
|
SET search_path TO chbenchmark_all_queries;
|
||||||
|
|
||||||
|
SET citus.enable_repartition_joins TO on;
|
||||||
|
CREATE TABLE order_line (
|
||||||
|
ol_w_id int NOT NULL,
|
||||||
|
ol_d_id int NOT NULL,
|
||||||
|
ol_o_id int NOT NULL,
|
||||||
|
ol_number int NOT NULL,
|
||||||
|
ol_i_id int NOT NULL,
|
||||||
|
ol_delivery_d timestamp NULL DEFAULT NULL,
|
||||||
|
ol_amount decimal(6,2) NOT NULL,
|
||||||
|
ol_supply_w_id int NOT NULL,
|
||||||
|
ol_quantity decimal(2,0) NOT NULL,
|
||||||
|
ol_dist_info char(24) NOT NULL,
|
||||||
|
PRIMARY KEY (ol_w_id,ol_d_id,ol_o_id,ol_number)
|
||||||
|
);
|
||||||
|
CREATE TABLE new_order (
|
||||||
|
no_w_id int NOT NULL,
|
||||||
|
no_d_id int NOT NULL,
|
||||||
|
no_o_id int NOT NULL,
|
||||||
|
PRIMARY KEY (no_w_id,no_d_id,no_o_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE stock (
|
||||||
|
s_w_id int NOT NULL,
|
||||||
|
s_i_id int NOT NULL,
|
||||||
|
s_quantity decimal(4,0) NOT NULL,
|
||||||
|
s_ytd decimal(8,2) NOT NULL,
|
||||||
|
s_order_cnt int NOT NULL,
|
||||||
|
s_remote_cnt int NOT NULL,
|
||||||
|
s_data varchar(50) NOT NULL,
|
||||||
|
s_dist_01 char(24) NOT NULL,
|
||||||
|
s_dist_02 char(24) NOT NULL,
|
||||||
|
s_dist_03 char(24) NOT NULL,
|
||||||
|
s_dist_04 char(24) NOT NULL,
|
||||||
|
s_dist_05 char(24) NOT NULL,
|
||||||
|
s_dist_06 char(24) NOT NULL,
|
||||||
|
s_dist_07 char(24) NOT NULL,
|
||||||
|
s_dist_08 char(24) NOT NULL,
|
||||||
|
s_dist_09 char(24) NOT NULL,
|
||||||
|
s_dist_10 char(24) NOT NULL,
|
||||||
|
PRIMARY KEY (s_w_id,s_i_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE oorder (
|
||||||
|
o_w_id int NOT NULL,
|
||||||
|
o_d_id int NOT NULL,
|
||||||
|
o_id int NOT NULL,
|
||||||
|
o_c_id int NOT NULL,
|
||||||
|
o_carrier_id int DEFAULT NULL,
|
||||||
|
o_ol_cnt decimal(2,0) NOT NULL,
|
||||||
|
o_all_local decimal(1,0) NOT NULL,
|
||||||
|
o_entry_d timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
PRIMARY KEY (o_w_id,o_d_id,o_id),
|
||||||
|
UNIQUE (o_w_id,o_d_id,o_c_id,o_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE history (
|
||||||
|
h_c_id int NOT NULL,
|
||||||
|
h_c_d_id int NOT NULL,
|
||||||
|
h_c_w_id int NOT NULL,
|
||||||
|
h_d_id int NOT NULL,
|
||||||
|
h_w_id int NOT NULL,
|
||||||
|
h_date timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
h_amount decimal(6,2) NOT NULL,
|
||||||
|
h_data varchar(24) NOT NULL
|
||||||
|
);
|
||||||
|
CREATE TABLE customer (
|
||||||
|
c_w_id int NOT NULL,
|
||||||
|
c_d_id int NOT NULL,
|
||||||
|
c_id int NOT NULL,
|
||||||
|
c_discount decimal(4,4) NOT NULL,
|
||||||
|
c_credit char(2) NOT NULL,
|
||||||
|
c_last varchar(16) NOT NULL,
|
||||||
|
c_first varchar(16) NOT NULL,
|
||||||
|
c_credit_lim decimal(12,2) NOT NULL,
|
||||||
|
c_balance decimal(12,2) NOT NULL,
|
||||||
|
c_ytd_payment float NOT NULL,
|
||||||
|
c_payment_cnt int NOT NULL,
|
||||||
|
c_delivery_cnt int NOT NULL,
|
||||||
|
c_street_1 varchar(20) NOT NULL,
|
||||||
|
c_street_2 varchar(20) NOT NULL,
|
||||||
|
c_city varchar(20) NOT NULL,
|
||||||
|
c_state char(2) NOT NULL,
|
||||||
|
c_zip char(9) NOT NULL,
|
||||||
|
c_phone char(16) NOT NULL,
|
||||||
|
c_since timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
c_middle char(2) NOT NULL,
|
||||||
|
c_data varchar(500) NOT NULL,
|
||||||
|
PRIMARY KEY (c_w_id,c_d_id,c_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE district (
|
||||||
|
d_w_id int NOT NULL,
|
||||||
|
d_id int NOT NULL,
|
||||||
|
d_ytd decimal(12,2) NOT NULL,
|
||||||
|
d_tax decimal(4,4) NOT NULL,
|
||||||
|
d_next_o_id int NOT NULL,
|
||||||
|
d_name varchar(10) NOT NULL,
|
||||||
|
d_street_1 varchar(20) NOT NULL,
|
||||||
|
d_street_2 varchar(20) NOT NULL,
|
||||||
|
d_city varchar(20) NOT NULL,
|
||||||
|
d_state char(2) NOT NULL,
|
||||||
|
d_zip char(9) NOT NULL,
|
||||||
|
PRIMARY KEY (d_w_id,d_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE item (
|
||||||
|
i_id int NOT NULL,
|
||||||
|
i_name varchar(24) NOT NULL,
|
||||||
|
i_price decimal(5,2) NOT NULL,
|
||||||
|
i_data varchar(50) NOT NULL,
|
||||||
|
i_im_id int NOT NULL,
|
||||||
|
PRIMARY KEY (i_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE warehouse (
|
||||||
|
w_id int NOT NULL,
|
||||||
|
w_ytd decimal(12,2) NOT NULL,
|
||||||
|
w_tax decimal(4,4) NOT NULL,
|
||||||
|
w_name varchar(10) NOT NULL,
|
||||||
|
w_street_1 varchar(20) NOT NULL,
|
||||||
|
w_street_2 varchar(20) NOT NULL,
|
||||||
|
w_city varchar(20) NOT NULL,
|
||||||
|
w_state char(2) NOT NULL,
|
||||||
|
w_zip char(9) NOT NULL,
|
||||||
|
PRIMARY KEY (w_id)
|
||||||
|
);
|
||||||
|
CREATE TABLE region (
|
||||||
|
r_regionkey int not null,
|
||||||
|
r_name char(55) not null,
|
||||||
|
r_comment char(152) not null,
|
||||||
|
PRIMARY KEY ( r_regionkey )
|
||||||
|
);
|
||||||
|
CREATE TABLE nation (
|
||||||
|
n_nationkey int not null,
|
||||||
|
n_name char(25) not null,
|
||||||
|
n_regionkey int not null,
|
||||||
|
n_comment char(152) not null,
|
||||||
|
PRIMARY KEY ( n_nationkey )
|
||||||
|
);
|
||||||
|
CREATE TABLE supplier (
|
||||||
|
su_suppkey int not null,
|
||||||
|
su_name char(25) not null,
|
||||||
|
su_address varchar(40) not null,
|
||||||
|
su_nationkey int not null,
|
||||||
|
su_phone char(15) not null,
|
||||||
|
su_acctbal numeric(12,2) not null,
|
||||||
|
su_comment char(101) not null,
|
||||||
|
PRIMARY KEY ( su_suppkey )
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT create_distributed_table('order_line','ol_w_id');
|
||||||
|
SELECT create_distributed_table('new_order','no_w_id');
|
||||||
|
SELECT create_distributed_table('stock','s_w_id');
|
||||||
|
SELECT create_distributed_table('oorder','o_w_id');
|
||||||
|
SELECT create_distributed_table('history','h_w_id');
|
||||||
|
SELECT create_distributed_table('customer','c_w_id');
|
||||||
|
SELECT create_distributed_table('district','d_w_id');
|
||||||
|
SELECT create_distributed_table('warehouse','w_id');
|
||||||
|
SELECT create_reference_table('item');
|
||||||
|
SELECT create_reference_table('region');
|
||||||
|
SELECT create_reference_table('nation');
|
||||||
|
SELECT create_reference_table('supplier');
|
||||||
|
|
||||||
|
TRUNCATE order_line, new_order, stock, oorder, history, customer, district, warehouse, item, region, nation, supplier; -- for easy copy in development
|
||||||
|
INSERT INTO supplier SELECT c, 'abc', 'def', c, 'ghi', c, 'jkl' FROM generate_series(0,10) AS c;
|
||||||
|
INSERT INTO new_order SELECT c, c, c FROM generate_series(0,10) AS c;
|
||||||
|
INSERT INTO stock SELECT c,c,c,c,c,c, 'abc','abc','abc','abc','abc','abc','abc','abc','abc','abc','abc' FROM generate_series(1,3) AS c;
|
||||||
|
INSERT INTO stock SELECT c, 5000,c,c,c,c, 'abc','abc','abc','abc','abc','abc','abc','abc','abc','abc','abc' FROM generate_series(1,3) AS c; -- mod(2*5000,10000) == 0
|
||||||
|
INSERT INTO order_line SELECT c, c, c, c, c, '2008-10-17 00:00:00.000000', c, c, c, 'abc' FROM generate_series(0,10) AS c;
|
||||||
|
INSERT INTO oorder SELECT c, c, c, c, c, 1, 1, '2008-10-17 00:00:00.000000' FROM generate_series(0,10) AS c;
|
||||||
|
INSERT INTO customer SELECT c, c, c, 0, 'XX', 'John', 'Doe', 1000, 0, 0, c, c, 'Name', 'Street', 'Some City', 'CA', '12345', '+1 000 0000000', '2007-01-02 00:00:00.000000', 'NA', 'nothing special' FROM generate_series(0,10) AS c;
|
||||||
|
INSERT INTO item SELECT c, 'Keyboard', 50, 'co b', c FROM generate_series(0,10) AS c; --co% and %b filters all around
|
||||||
|
INSERT INTO region VALUES
|
||||||
|
(1, 'Not Europe', 'Big'),
|
||||||
|
(2, 'Europe', 'Big');
|
||||||
|
INSERT INTO nation VALUES
|
||||||
|
(1, 'United States', 1, 'Also Big'),
|
||||||
|
(4, 'The Netherlands', 2, 'Flat'),
|
||||||
|
(9, 'Germany', 2, 'Germany must be in here for Q7'),
|
||||||
|
(67, 'Cambodia', 2, 'I don''t understand how we got from California to Cambodia but I will take it, it also is not in Europe, but we need it to be for Q8');
|
||||||
|
|
||||||
|
-- Query 1
|
||||||
|
SELECT
|
||||||
|
ol_number,
|
||||||
|
sum(ol_quantity) as sum_qty,
|
||||||
|
sum(ol_amount) as sum_amount,
|
||||||
|
avg(ol_quantity) as avg_qty,
|
||||||
|
avg(ol_amount) as avg_amount,
|
||||||
|
count(*) as count_order
|
||||||
|
FROM order_line
|
||||||
|
WHERE ol_delivery_d > '2007-01-02 00:00:00.000000'
|
||||||
|
GROUP BY ol_number
|
||||||
|
ORDER BY ol_number;
|
||||||
|
|
||||||
|
-- Query 2
|
||||||
|
SELECT
|
||||||
|
su_suppkey,
|
||||||
|
su_name,
|
||||||
|
n_name,
|
||||||
|
i_id,
|
||||||
|
i_name,
|
||||||
|
su_address,
|
||||||
|
su_phone,
|
||||||
|
su_comment
|
||||||
|
FROM
|
||||||
|
item,
|
||||||
|
supplier,
|
||||||
|
stock,
|
||||||
|
nation,
|
||||||
|
region,
|
||||||
|
(SELECT
|
||||||
|
s_i_id AS m_i_id,
|
||||||
|
min(s_quantity) as m_s_quantity
|
||||||
|
FROM
|
||||||
|
stock,
|
||||||
|
supplier,
|
||||||
|
nation,
|
||||||
|
region
|
||||||
|
WHERE mod((s_w_id*s_i_id),10000)=su_suppkey
|
||||||
|
AND su_nationkey=n_nationkey
|
||||||
|
AND n_regionkey=r_regionkey
|
||||||
|
AND r_name LIKE 'Europ%'
|
||||||
|
GROUP BY s_i_id) m
|
||||||
|
WHERE i_id = s_i_id
|
||||||
|
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_regionkey = r_regionkey
|
||||||
|
AND i_data LIKE '%b'
|
||||||
|
AND r_name LIKE 'Europ%'
|
||||||
|
AND i_id = m_i_id
|
||||||
|
AND s_quantity = m_s_quantity
|
||||||
|
ORDER BY
|
||||||
|
n_name,
|
||||||
|
su_name,
|
||||||
|
i_id;
|
||||||
|
|
||||||
|
-- Query 3
|
||||||
|
SELECT
|
||||||
|
ol_o_id,
|
||||||
|
ol_w_id,
|
||||||
|
ol_d_id,
|
||||||
|
sum(ol_amount) AS revenue,
|
||||||
|
o_entry_d
|
||||||
|
FROM
|
||||||
|
customer,
|
||||||
|
new_order,
|
||||||
|
oorder,
|
||||||
|
order_line
|
||||||
|
WHERE c_state LIKE 'C%' -- used to ba A%, but C% works with our small data
|
||||||
|
AND c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND no_w_id = o_w_id
|
||||||
|
AND no_d_id = o_d_id
|
||||||
|
AND no_o_id = o_id
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND o_entry_d > '2007-01-02 00:00:00.000000'
|
||||||
|
GROUP BY
|
||||||
|
ol_o_id,
|
||||||
|
ol_w_id,
|
||||||
|
ol_d_id,
|
||||||
|
o_entry_d
|
||||||
|
ORDER BY
|
||||||
|
revenue DESC,
|
||||||
|
o_entry_d;
|
||||||
|
|
||||||
|
-- Query 4
|
||||||
|
SELECT
|
||||||
|
o_ol_cnt,
|
||||||
|
count(*) as order_count
|
||||||
|
FROM
|
||||||
|
oorder
|
||||||
|
WHERE o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||||
|
AND o_entry_d < '2012-01-02 00:00:00.000000'
|
||||||
|
AND exists (SELECT *
|
||||||
|
FROM order_line
|
||||||
|
WHERE o_id = ol_o_id
|
||||||
|
AND o_w_id = ol_w_id
|
||||||
|
AND o_d_id = ol_d_id
|
||||||
|
AND ol_delivery_d >= o_entry_d)
|
||||||
|
GROUP BY o_ol_cnt
|
||||||
|
ORDER BY o_ol_cnt;
|
||||||
|
|
||||||
|
-- Query 5
|
||||||
|
SELECT
|
||||||
|
n_name,
|
||||||
|
sum(ol_amount) AS revenue
|
||||||
|
FROM
|
||||||
|
customer,
|
||||||
|
oorder,
|
||||||
|
order_line,
|
||||||
|
stock,
|
||||||
|
supplier,
|
||||||
|
nation,
|
||||||
|
region
|
||||||
|
WHERE c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id=o_d_id
|
||||||
|
AND ol_w_id = s_w_id
|
||||||
|
AND ol_i_id = s_i_id
|
||||||
|
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||||
|
-- our dataset does not have the supplier in the same nation as the customer causing this
|
||||||
|
-- join to filter out all the data. We verify later on that we can actually perform an
|
||||||
|
-- ascii(substr(c_state,1,1)) == reference table column join later on so it should not
|
||||||
|
-- matter we skip this filter here.
|
||||||
|
--AND ascii(substr(c_state,1,1)) = su_nationkey
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_regionkey = r_regionkey
|
||||||
|
AND r_name = 'Europe'
|
||||||
|
AND o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||||
|
GROUP BY n_name
|
||||||
|
ORDER BY revenue DESC;
|
||||||
|
|
||||||
|
-- Query 6
|
||||||
|
SELECT
|
||||||
|
sum(ol_amount) AS revenue
|
||||||
|
FROM order_line
|
||||||
|
WHERE ol_delivery_d >= '1999-01-01 00:00:00.000000'
|
||||||
|
AND ol_delivery_d < '2020-01-01 00:00:00.000000'
|
||||||
|
AND ol_quantity BETWEEN 1 AND 100000;
|
||||||
|
|
||||||
|
-- Query 7
|
||||||
|
SELECT
|
||||||
|
su_nationkey as supp_nation,
|
||||||
|
substr(c_state,1,1) as cust_nation,
|
||||||
|
extract(year from o_entry_d) as l_year,
|
||||||
|
sum(ol_amount) as revenue
|
||||||
|
FROM
|
||||||
|
supplier,
|
||||||
|
stock,
|
||||||
|
order_line,
|
||||||
|
oorder,
|
||||||
|
customer,
|
||||||
|
nation n1,
|
||||||
|
nation n2
|
||||||
|
WHERE ol_supply_w_id = s_w_id
|
||||||
|
AND ol_i_id = s_i_id
|
||||||
|
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND su_nationkey = n1.n_nationkey
|
||||||
|
AND ascii(substr(c_state,1,1)) = n2.n_nationkey
|
||||||
|
AND (
|
||||||
|
(n1.n_name = 'Germany' AND n2.n_name = 'Cambodia')
|
||||||
|
OR (n1.n_name = 'Cambodia' AND n2.n_name = 'Germany')
|
||||||
|
)
|
||||||
|
AND ol_delivery_d BETWEEN '2007-01-02 00:00:00.000000' AND '2012-01-02 00:00:00.000000'
|
||||||
|
GROUP BY
|
||||||
|
su_nationkey,
|
||||||
|
substr(c_state,1,1),
|
||||||
|
extract(year from o_entry_d)
|
||||||
|
ORDER BY
|
||||||
|
su_nationkey,
|
||||||
|
cust_nation,
|
||||||
|
l_year;
|
||||||
|
|
||||||
|
-- Query 8
|
||||||
|
SELECT
|
||||||
|
extract(year from o_entry_d) as l_year,
|
||||||
|
sum(case when n2.n_name = 'Germany' then ol_amount else 0 end) / sum(ol_amount) as mkt_share
|
||||||
|
FROM
|
||||||
|
item,
|
||||||
|
supplier,
|
||||||
|
stock,
|
||||||
|
order_line,
|
||||||
|
oorder,
|
||||||
|
customer,
|
||||||
|
nation n1,
|
||||||
|
nation n2,
|
||||||
|
region
|
||||||
|
WHERE i_id = s_i_id
|
||||||
|
AND ol_i_id = s_i_id
|
||||||
|
AND ol_supply_w_id = s_w_id
|
||||||
|
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND n1.n_nationkey = ascii(substr(c_state,1,1))
|
||||||
|
AND n1.n_regionkey = r_regionkey
|
||||||
|
AND ol_i_id < 1000
|
||||||
|
AND r_name = 'Europe'
|
||||||
|
AND su_nationkey = n2.n_nationkey
|
||||||
|
AND o_entry_d BETWEEN '2007-01-02 00:00:00.000000' AND '2012-01-02 00:00:00.000000'
|
||||||
|
AND i_data LIKE '%b'
|
||||||
|
AND i_id = ol_i_id
|
||||||
|
GROUP BY extract(YEAR FROM o_entry_d)
|
||||||
|
ORDER BY l_year;
|
||||||
|
|
||||||
|
-- Query 9
|
||||||
|
SELECT
|
||||||
|
n_name,
|
||||||
|
extract(year from o_entry_d) as l_year,
|
||||||
|
sum(ol_amount) as sum_profit
|
||||||
|
FROM
|
||||||
|
item,
|
||||||
|
stock,
|
||||||
|
supplier,
|
||||||
|
order_line,
|
||||||
|
oorder,
|
||||||
|
nation
|
||||||
|
WHERE ol_i_id = s_i_id
|
||||||
|
AND ol_supply_w_id = s_w_id
|
||||||
|
AND mod((s_w_id * s_i_id), 10000) = su_suppkey
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND ol_i_id = i_id
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND i_data LIKE '%b' -- this used to be %BB but that will not work with our small dataset
|
||||||
|
GROUP BY
|
||||||
|
n_name,
|
||||||
|
extract(YEAR FROM o_entry_d)
|
||||||
|
ORDER BY
|
||||||
|
n_name,
|
||||||
|
l_year DESC;
|
||||||
|
|
||||||
|
-- Query 10
|
||||||
|
SELECT
|
||||||
|
c_id,
|
||||||
|
c_last,
|
||||||
|
sum(ol_amount) AS revenue,
|
||||||
|
c_city,
|
||||||
|
c_phone,
|
||||||
|
n_name
|
||||||
|
FROM
|
||||||
|
customer,
|
||||||
|
oorder,
|
||||||
|
order_line,
|
||||||
|
nation
|
||||||
|
WHERE c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND o_entry_d >= '2007-01-02 00:00:00.000000'
|
||||||
|
AND o_entry_d <= ol_delivery_d
|
||||||
|
AND n_nationkey = ascii(substr(c_state,1,1))
|
||||||
|
GROUP BY
|
||||||
|
c_id,
|
||||||
|
c_last,
|
||||||
|
c_city,
|
||||||
|
c_phone,
|
||||||
|
n_name
|
||||||
|
ORDER BY revenue DESC;
|
||||||
|
|
||||||
|
-- Query 11
|
||||||
|
SELECT
|
||||||
|
s_i_id,
|
||||||
|
sum(s_order_cnt) AS ordercount
|
||||||
|
FROM
|
||||||
|
stock,
|
||||||
|
supplier,
|
||||||
|
nation
|
||||||
|
WHERE mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_name = 'Germany'
|
||||||
|
GROUP BY s_i_id
|
||||||
|
HAVING sum(s_order_cnt) >
|
||||||
|
(SELECT sum(s_order_cnt) * .005
|
||||||
|
FROM
|
||||||
|
stock,
|
||||||
|
supplier,
|
||||||
|
nation
|
||||||
|
WHERE mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_name = 'Germany')
|
||||||
|
ORDER BY ordercount DESC;
|
||||||
|
|
||||||
|
-- Query 12
|
||||||
|
SELECT
|
||||||
|
o_ol_cnt,
|
||||||
|
sum(case when o_carrier_id = 1 or o_carrier_id = 2 then 1 else 0 end) as high_line_count,
|
||||||
|
sum(case when o_carrier_id <> 1 and o_carrier_id <> 2 then 1 else 0 end) as low_line_count
|
||||||
|
FROM
|
||||||
|
oorder,
|
||||||
|
order_line
|
||||||
|
WHERE ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
AND o_entry_d <= ol_delivery_d
|
||||||
|
AND ol_delivery_d < '2020-01-01 00:00:00.000000'
|
||||||
|
GROUP BY o_ol_cnt
|
||||||
|
ORDER BY o_ol_cnt;
|
||||||
|
|
||||||
|
-- Query 13
|
||||||
|
SELECT
|
||||||
|
c_count,
|
||||||
|
count(*) AS custdist
|
||||||
|
FROM (SELECT
|
||||||
|
c_id,
|
||||||
|
count(o_id)
|
||||||
|
FROM customer
|
||||||
|
LEFT OUTER JOIN oorder ON (
|
||||||
|
c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND c_id = o_c_id
|
||||||
|
AND o_carrier_id > 8)
|
||||||
|
GROUP BY c_id) AS c_orders (c_id, c_count)
|
||||||
|
GROUP BY c_count
|
||||||
|
ORDER BY
|
||||||
|
custdist DESC,
|
||||||
|
c_count DESC;
|
||||||
|
|
||||||
|
-- Query 14
|
||||||
|
SELECT
|
||||||
|
100.00 * sum(CASE WHEN i_data LIKE 'PR%' THEN ol_amount ELSE 0 END) / (1+sum(ol_amount)) AS promo_revenue
|
||||||
|
FROM
|
||||||
|
order_line,
|
||||||
|
item
|
||||||
|
WHERE ol_i_id = i_id
|
||||||
|
AND ol_delivery_d >= '2007-01-02 00:00:00.000000'
|
||||||
|
AND ol_delivery_d < '2020-01-02 00:00:00.000000';
|
||||||
|
|
||||||
|
-- Query 15
|
||||||
|
WITH revenue (supplier_no, total_revenue) AS (
|
||||||
|
SELECT
|
||||||
|
mod((s_w_id * s_i_id),10000) AS supplier_no,
|
||||||
|
sum(ol_amount) AS total_revenue
|
||||||
|
FROM
|
||||||
|
order_line,
|
||||||
|
stock
|
||||||
|
WHERE ol_i_id = s_i_id
|
||||||
|
AND ol_supply_w_id = s_w_id
|
||||||
|
AND ol_delivery_d >= '2007-01-02 00:00:00.000000'
|
||||||
|
GROUP BY mod((s_w_id * s_i_id),10000))
|
||||||
|
SELECT
|
||||||
|
su_suppkey,
|
||||||
|
su_name,
|
||||||
|
su_address,
|
||||||
|
su_phone,
|
||||||
|
total_revenue
|
||||||
|
FROM
|
||||||
|
supplier,
|
||||||
|
revenue
|
||||||
|
WHERE su_suppkey = supplier_no
|
||||||
|
AND total_revenue = (SELECT max(total_revenue) FROM revenue)
|
||||||
|
ORDER BY su_suppkey;
|
||||||
|
|
||||||
|
--Q16
|
||||||
|
SELECT
|
||||||
|
i_name,
|
||||||
|
substr(i_data, 1, 3) AS brand,
|
||||||
|
i_price,
|
||||||
|
count(DISTINCT (mod((s_w_id * s_i_id),10000))) AS supplier_cnt
|
||||||
|
FROM
|
||||||
|
stock,
|
||||||
|
item
|
||||||
|
WHERE i_id = s_i_id
|
||||||
|
AND i_data NOT LIKE 'zz%'
|
||||||
|
AND (mod((s_w_id * s_i_id),10000) NOT IN
|
||||||
|
(SELECT su_suppkey
|
||||||
|
FROM supplier
|
||||||
|
WHERE su_comment LIKE '%bad%'))
|
||||||
|
GROUP BY
|
||||||
|
i_name,
|
||||||
|
substr(i_data, 1, 3),
|
||||||
|
i_price
|
||||||
|
ORDER BY supplier_cnt DESC;
|
||||||
|
|
||||||
|
--Q17
|
||||||
|
SELECT
|
||||||
|
sum(ol_amount) / 2.0 AS avg_yearly
|
||||||
|
FROM
|
||||||
|
order_line,
|
||||||
|
(SELECT
|
||||||
|
i_id,
|
||||||
|
avg(ol_quantity) AS a
|
||||||
|
FROM
|
||||||
|
item,
|
||||||
|
order_line
|
||||||
|
WHERE i_data LIKE '%b'
|
||||||
|
AND ol_i_id = i_id
|
||||||
|
GROUP BY i_id) t
|
||||||
|
WHERE ol_i_id = t.i_id;
|
||||||
|
-- this filter was at the end causing the dataset to be empty. it should not have any
|
||||||
|
-- influence on how the query gets planned so I removed the clause
|
||||||
|
--AND ol_quantity < t.a;
|
||||||
|
|
||||||
|
-- Query 18
|
||||||
|
SELECT
|
||||||
|
c_last,
|
||||||
|
c_id o_id,
|
||||||
|
o_entry_d,
|
||||||
|
o_ol_cnt,
|
||||||
|
sum(ol_amount)
|
||||||
|
FROM
|
||||||
|
customer,
|
||||||
|
oorder,
|
||||||
|
order_line
|
||||||
|
WHERE c_id = o_c_id
|
||||||
|
AND c_w_id = o_w_id
|
||||||
|
AND c_d_id = o_d_id
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_o_id = o_id
|
||||||
|
GROUP BY
|
||||||
|
o_id,
|
||||||
|
o_w_id,
|
||||||
|
o_d_id,
|
||||||
|
c_id,
|
||||||
|
c_last,
|
||||||
|
o_entry_d,
|
||||||
|
o_ol_cnt
|
||||||
|
HAVING sum(ol_amount) > 5 -- was 200, but thats too big for the dataset
|
||||||
|
ORDER BY
|
||||||
|
sum(ol_amount) DESC,
|
||||||
|
o_entry_d;
|
||||||
|
|
||||||
|
-- Query 19
|
||||||
|
SELECT
|
||||||
|
sum(ol_amount) AS revenue
|
||||||
|
FROM
|
||||||
|
order_line,
|
||||||
|
item
|
||||||
|
WHERE ( ol_i_id = i_id
|
||||||
|
AND i_data LIKE '%a'
|
||||||
|
AND ol_quantity >= 1
|
||||||
|
AND ol_quantity <= 10
|
||||||
|
AND i_price BETWEEN 1 AND 400000
|
||||||
|
AND ol_w_id IN (1,2,3))
|
||||||
|
OR ( ol_i_id = i_id
|
||||||
|
AND i_data LIKE '%b'
|
||||||
|
AND ol_quantity >= 1
|
||||||
|
AND ol_quantity <= 10
|
||||||
|
AND i_price BETWEEN 1 AND 400000
|
||||||
|
AND ol_w_id IN (1,2,4))
|
||||||
|
OR ( ol_i_id = i_id
|
||||||
|
AND i_data LIKE '%c'
|
||||||
|
AND ol_quantity >= 1
|
||||||
|
AND ol_quantity <= 10
|
||||||
|
AND i_price BETWEEN 1 AND 400000
|
||||||
|
AND ol_w_id IN (1,5,3));
|
||||||
|
|
||||||
|
-- Query 20
|
||||||
|
SELECT
|
||||||
|
su_name,
|
||||||
|
su_address
|
||||||
|
FROM
|
||||||
|
supplier,
|
||||||
|
nation
|
||||||
|
WHERE su_suppkey in
|
||||||
|
(SELECT
|
||||||
|
mod(s_i_id * s_w_id, 10000)
|
||||||
|
FROM
|
||||||
|
stock,
|
||||||
|
order_line
|
||||||
|
WHERE s_i_id IN
|
||||||
|
(SELECT i_id
|
||||||
|
FROM item
|
||||||
|
WHERE i_data LIKE 'co%')
|
||||||
|
AND ol_i_id = s_i_id
|
||||||
|
AND ol_delivery_d > '2008-05-23 12:00:00' -- was 2010, but our order is in 2008
|
||||||
|
GROUP BY s_i_id, s_w_id, s_quantity
|
||||||
|
HAVING 2*s_quantity > sum(ol_quantity))
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_name = 'Germany'
|
||||||
|
ORDER BY su_name;
|
||||||
|
|
||||||
|
-- Query 21
|
||||||
|
-- DATA SET DOES NOT COVER THIS QUERY
|
||||||
|
SELECT
|
||||||
|
su_name,
|
||||||
|
count(*) AS numwait
|
||||||
|
FROM
|
||||||
|
supplier,
|
||||||
|
order_line l1,
|
||||||
|
oorder,
|
||||||
|
stock,
|
||||||
|
nation
|
||||||
|
WHERE ol_o_id = o_id
|
||||||
|
AND ol_w_id = o_w_id
|
||||||
|
AND ol_d_id = o_d_id
|
||||||
|
AND ol_w_id = s_w_id
|
||||||
|
AND ol_i_id = s_i_id
|
||||||
|
AND mod((s_w_id * s_i_id),10000) = su_suppkey
|
||||||
|
AND l1.ol_delivery_d > o_entry_d
|
||||||
|
AND NOT exists (SELECT *
|
||||||
|
FROM order_line l2
|
||||||
|
WHERE l2.ol_o_id = l1.ol_o_id
|
||||||
|
AND l2.ol_w_id = l1.ol_w_id
|
||||||
|
AND l2.ol_d_id = l1.ol_d_id
|
||||||
|
AND l2.ol_delivery_d > l1.ol_delivery_d)
|
||||||
|
AND su_nationkey = n_nationkey
|
||||||
|
AND n_name = 'Germany'
|
||||||
|
GROUP BY su_name
|
||||||
|
ORDER BY
|
||||||
|
numwait desc,
|
||||||
|
su_name;
|
||||||
|
|
||||||
|
-- Query 22
|
||||||
|
-- DATA SET DOES NOT COVER THIS QUERY
|
||||||
|
SELECT
|
||||||
|
substr(c_state,1,1) AS country,
|
||||||
|
count(*) AS numcust,
|
||||||
|
sum(c_balance) AS totacctbal
|
||||||
|
FROM customer
|
||||||
|
WHERE substr(c_phone,1,1) in ('1','2','3','4','5','6','7')
|
||||||
|
AND c_balance > (SELECT avg(c_BALANCE)
|
||||||
|
FROM customer
|
||||||
|
WHERE c_balance > 0.00
|
||||||
|
AND substr(c_phone,1,1) in ('1','2','3','4','5','6','7'))
|
||||||
|
AND NOT exists (SELECT *
|
||||||
|
FROM oorder
|
||||||
|
WHERE o_c_id = c_id
|
||||||
|
AND o_w_id = c_w_id
|
||||||
|
AND o_d_id = c_d_id)
|
||||||
|
GROUP BY substr(c_state,1,1)
|
||||||
|
ORDER BY substr(c_state,1,1);
|
||||||
|
|
||||||
|
SET client_min_messages TO WARNING;
|
||||||
|
DROP SCHEMA chbenchmark_all_queries CASCADE;
|
|
@ -0,0 +1,41 @@
|
||||||
|
SET citus.next_shard_id TO 1670000;
|
||||||
|
CREATE SCHEMA expression_reference_join;
|
||||||
|
SET search_path TO expression_reference_join;
|
||||||
|
SET citus.shard_count TO 4;
|
||||||
|
SET citus.enable_repartition_joins TO on;
|
||||||
|
|
||||||
|
CREATE TABLE ref (a int, b int);
|
||||||
|
CREATE TABLE test (x int, y int);
|
||||||
|
|
||||||
|
INSERT INTO ref VALUES
|
||||||
|
(2,2),
|
||||||
|
(4,4);
|
||||||
|
|
||||||
|
INSERT INTO test VALUES
|
||||||
|
(1,2),
|
||||||
|
(2,2);
|
||||||
|
|
||||||
|
SELECT create_reference_table('ref');
|
||||||
|
SELECT create_distributed_table('test', 'x');
|
||||||
|
|
||||||
|
-- PR 3180 implements expressions in join clauses to reference tables to support CHbenCHmark queries 7/8/9
|
||||||
|
-- plannable as a repartition + reference join, now with an expression in the join clause
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
test t1 JOIN test t2 USING (y), -- causes repartition, which makes this not routable or pushdownable
|
||||||
|
ref a
|
||||||
|
WHERE t2.y * 2 = a.a
|
||||||
|
ORDER BY 1,2,3;
|
||||||
|
|
||||||
|
-- The join clause is wider than it used to be, causing this query to be recognized by the LogicalPlanner as a repartition join.
|
||||||
|
-- Unplannable query due to a three-way join which causes no valid path (besides the cartesian product) to be found
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
test t1 JOIN test t2 USING (y), -- causes repartition, which makes this not routable or pushdownable
|
||||||
|
ref a,
|
||||||
|
ref b
|
||||||
|
WHERE t2.y - a.a - b.b = 0
|
||||||
|
ORDER BY 1,2,3;
|
||||||
|
|
||||||
|
SET client_min_messages TO WARNING;
|
||||||
|
DROP SCHEMA expression_reference_join CASCADE;
|
Loading…
Reference in New Issue