mirror of https://github.com/citusdata/citus.git
Merge pull request #1628 from citusdata/expand_subquery_reference_table
Expand subquery pushdown for reference tablespull/1653/head
commit
6c9dffccbf
|
@ -2880,6 +2880,17 @@ FindReferencedTableColumn(Expr *columnExpression, List *parentQueryList, Query *
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We currently don't support finding partition keys in the subqueries
|
||||||
|
* that references from outer subqueries. For example, in corrolated
|
||||||
|
* subqueries in WHERE clause, we don't support use of partition keys
|
||||||
|
* in the subquery that is referred from the outer query.
|
||||||
|
*/
|
||||||
|
if (candidateColumn->varlevelsup > 0)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
rangeTableEntryIndex = candidateColumn->varno - 1;
|
rangeTableEntryIndex = candidateColumn->varno - 1;
|
||||||
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
||||||
|
|
||||||
|
@ -3096,7 +3107,14 @@ PartitionColumnOpExpressionList(Query *query)
|
||||||
rangeTableEntryIndex = candidatePartitionColumn->varno - 1;
|
rangeTableEntryIndex = candidatePartitionColumn->varno - 1;
|
||||||
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
||||||
|
|
||||||
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
/*
|
||||||
|
* We currently don't support checking for equality when user refers
|
||||||
|
* to a column from the JOIN instead of the relation.
|
||||||
|
*/
|
||||||
|
if (rangeTableEntry->rtekind != RTE_RELATION)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
relationId = rangeTableEntry->relid;
|
relationId = rangeTableEntry->relid;
|
||||||
partitionColumn = DistPartitionKey(relationId);
|
partitionColumn = DistPartitionKey(relationId);
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include "distributed/worker_protocol.h"
|
#include "distributed/worker_protocol.h"
|
||||||
#include "nodes/makefuncs.h"
|
#include "nodes/makefuncs.h"
|
||||||
#include "nodes/nodeFuncs.h"
|
#include "nodes/nodeFuncs.h"
|
||||||
|
#include "nodes/relation.h"
|
||||||
#include "optimizer/clauses.h"
|
#include "optimizer/clauses.h"
|
||||||
#include "optimizer/prep.h"
|
#include "optimizer/prep.h"
|
||||||
#include "optimizer/tlist.h"
|
#include "optimizer/tlist.h"
|
||||||
|
@ -69,6 +70,8 @@ static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryPushdown(Query *
|
||||||
PlannerRestrictionContext
|
PlannerRestrictionContext
|
||||||
*
|
*
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
|
static DeferredErrorMessage * DeferErrorIfUnsupportedSublinkAndReferenceTable(
|
||||||
|
Query *queryTree);
|
||||||
static DeferredErrorMessage * DeferErrorIfUnsupportedFilters(Query *subquery);
|
static DeferredErrorMessage * DeferErrorIfUnsupportedFilters(Query *subquery);
|
||||||
static bool EqualOpExpressionLists(List *firstOpExpressionList,
|
static bool EqualOpExpressionLists(List *firstOpExpressionList,
|
||||||
List *secondOpExpressionList);
|
List *secondOpExpressionList);
|
||||||
|
@ -87,6 +90,7 @@ static MultiNode * MultiPlanTree(Query *queryTree);
|
||||||
static void ErrorIfQueryNotSupported(Query *queryTree);
|
static void ErrorIfQueryNotSupported(Query *queryTree);
|
||||||
static bool HasUnsupportedReferenceTableJoin(
|
static bool HasUnsupportedReferenceTableJoin(
|
||||||
PlannerRestrictionContext *plannerRestrictionContext);
|
PlannerRestrictionContext *plannerRestrictionContext);
|
||||||
|
static bool ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo);
|
||||||
static bool HasUnsupportedJoinWalker(Node *node, void *context);
|
static bool HasUnsupportedJoinWalker(Node *node, void *context);
|
||||||
static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
|
static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
|
||||||
static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryRepartition(Query *
|
static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryRepartition(Query *
|
||||||
|
@ -96,8 +100,8 @@ static bool HasOuterJoin(Query *queryTree);
|
||||||
static bool HasOuterJoinWalker(Node *node, void *maxJoinLevel);
|
static bool HasOuterJoinWalker(Node *node, void *maxJoinLevel);
|
||||||
static bool HasComplexJoinOrder(Query *queryTree);
|
static bool HasComplexJoinOrder(Query *queryTree);
|
||||||
static bool HasComplexRangeTableType(Query *queryTree);
|
static bool HasComplexRangeTableType(Query *queryTree);
|
||||||
static bool RelationInfoHasReferenceTable(PlannerInfo *plannerInfo,
|
static bool RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo,
|
||||||
RelOptInfo *relationInfo);
|
RelOptInfo *relationInfo);
|
||||||
static void ValidateClauseList(List *clauseList);
|
static void ValidateClauseList(List *clauseList);
|
||||||
static void ValidateSubqueryPushdownClauseList(List *clauseList);
|
static void ValidateSubqueryPushdownClauseList(List *clauseList);
|
||||||
static bool ExtractFromExpressionWalker(Node *node,
|
static bool ExtractFromExpressionWalker(Node *node,
|
||||||
|
@ -542,7 +546,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
||||||
"one another relation using distribution keys and "
|
"one another relation using distribution keys and "
|
||||||
"equality operator.", NULL);
|
"equality operator.", NULL);
|
||||||
}
|
}
|
||||||
else if (HasUnsupportedReferenceTableJoin(plannerRestrictionContext))
|
|
||||||
|
/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
|
||||||
|
error = DeferErrorIfUnsupportedSublinkAndReferenceTable(originalQuery);
|
||||||
|
if (error)
|
||||||
|
{
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we shouldn't allow reference tables in the outer part of outer joins */
|
||||||
|
if (HasUnsupportedReferenceTableJoin(plannerRestrictionContext))
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
"cannot pushdown the subquery",
|
"cannot pushdown the subquery",
|
||||||
|
@ -582,6 +595,43 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeferErrorIfUnsupportedSublinkAndReferenceTable returns a deferred error if the
|
||||||
|
* given query is not suitable for subquery pushdown.
|
||||||
|
*
|
||||||
|
* While planning sublinks, we rely on Postgres in the sense that it converts some of
|
||||||
|
* sublinks into joins.
|
||||||
|
*
|
||||||
|
* In some cases, sublinks are pulled up and converted into outer joins. Those cases
|
||||||
|
* are already handled with HasUnsupportedReferenceTableJoin().
|
||||||
|
*
|
||||||
|
* If the sublinks are not pulled up, we should still error out in if any reference table
|
||||||
|
* appears in the FROM clause of a subquery.
|
||||||
|
*
|
||||||
|
* Otherwise, the result would include duplicate rows.
|
||||||
|
*/
|
||||||
|
static DeferredErrorMessage *
|
||||||
|
DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
|
||||||
|
{
|
||||||
|
if (!queryTree->hasSubLinks)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (HasReferenceTable((Node *) queryTree->rtable))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"cannot pushdown the subquery",
|
||||||
|
"Reference tables are not allowed in FROM "
|
||||||
|
"clause when the query has subqueries in "
|
||||||
|
"WHERE clause",
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DeferErrorIfUnsupportedFilters checks if all leaf queries in the given query have
|
* DeferErrorIfUnsupportedFilters checks if all leaf queries in the given query have
|
||||||
* same filter on the partition column. Note that if there are queries without
|
* same filter on the partition column. Note that if there are queries without
|
||||||
|
@ -853,6 +903,14 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
deferredError = DeferErrorIfUnsupportedSublinkAndReferenceTable(subqueryTree);
|
||||||
|
if (deferredError)
|
||||||
|
{
|
||||||
|
preconditionsSatisfied = false;
|
||||||
|
errorDetail = (char *) deferredError->detail;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* finally check and return deferred if not satisfied */
|
/* finally check and return deferred if not satisfied */
|
||||||
if (!preconditionsSatisfied)
|
if (!preconditionsSatisfied)
|
||||||
{
|
{
|
||||||
|
@ -1027,7 +1085,7 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TargetListOnPartitionColumn checks if at least one target list entry is on
|
* TargetListOnPartitionColumn checks if at least one target list entry is on
|
||||||
* partition column or the table is a reference table.
|
* partition column.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||||
|
@ -1047,15 +1105,11 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||||
|
|
||||||
FindReferencedTableColumn(targetExpression, NIL, query, &relationId, &column);
|
FindReferencedTableColumn(targetExpression, NIL, query, &relationId, &column);
|
||||||
|
|
||||||
/*
|
/* if the expression belongs to reference table directly returns false */
|
||||||
* If the expression belongs to reference table directly returns true.
|
|
||||||
* We can assume that target list entry always on partition column of
|
|
||||||
* reference tables.
|
|
||||||
*/
|
|
||||||
if (IsDistributedTable(relationId) && PartitionMethod(relationId) ==
|
if (IsDistributedTable(relationId) && PartitionMethod(relationId) ==
|
||||||
DISTRIBUTE_BY_NONE)
|
DISTRIBUTE_BY_NONE)
|
||||||
{
|
{
|
||||||
targetListOnPartitionColumn = true;
|
targetListOnPartitionColumn = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1459,15 +1513,18 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo
|
||||||
|
|
||||||
if (joinType == JOIN_SEMI || joinType == JOIN_ANTI || joinType == JOIN_LEFT)
|
if (joinType == JOIN_SEMI || joinType == JOIN_ANTI || joinType == JOIN_LEFT)
|
||||||
{
|
{
|
||||||
if (RelationInfoHasReferenceTable(plannerInfo, outerrel))
|
if (ShouldRecurseForReferenceTableJoinChecks(outerrel) &&
|
||||||
|
RelationInfoContainsReferenceTable(plannerInfo, outerrel))
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (joinType == JOIN_FULL)
|
else if (joinType == JOIN_FULL)
|
||||||
{
|
{
|
||||||
if (RelationInfoHasReferenceTable(plannerInfo, innerrel) ||
|
if ((ShouldRecurseForReferenceTableJoinChecks(innerrel) &&
|
||||||
RelationInfoHasReferenceTable(plannerInfo, outerrel))
|
RelationInfoContainsReferenceTable(plannerInfo, innerrel)) ||
|
||||||
|
(ShouldRecurseForReferenceTableJoinChecks(outerrel) &&
|
||||||
|
RelationInfoContainsReferenceTable(plannerInfo, outerrel)))
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1479,12 +1536,66 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RelationInfoHasReferenceTable check whether the relationInfo has reference table.
|
* ShouldRecurseForReferenceTableJoinChecks is a helper function for deciding
|
||||||
* Since relation ids of relationInfo indexes to the range table entry list of
|
* on whether the input relOptInfo should be checked for unsupported reference
|
||||||
* planner info, planner info is also passed.
|
* tables.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
RelationInfoHasReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relationInfo)
|
ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We shouldn't recursively go down for joins since we're already
|
||||||
|
* going to process each join seperately. Otherwise we'd restrict
|
||||||
|
* the coverage. See the below sketch where (h) denotes a hash
|
||||||
|
* distributed relation, (r) denotes a reference table, (L) denotes
|
||||||
|
* LEFT JOIN and (I) denotes INNER JOIN. If we're to recurse into
|
||||||
|
* the inner join, we'd be preventing to push down the following
|
||||||
|
* join tree, which is actually safe to push down.
|
||||||
|
*
|
||||||
|
* (L)
|
||||||
|
* / \
|
||||||
|
* (I) h
|
||||||
|
* / \
|
||||||
|
* r h
|
||||||
|
*/
|
||||||
|
if (relOptInfo->reloptkind == RELOPT_JOINREL)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that we treat the same query where relations appear in subqueries
|
||||||
|
* differently. (i.e., use SELECT * FROM r; instead of r)
|
||||||
|
*
|
||||||
|
* In that case, to relax some restrictions, we do the following optimization:
|
||||||
|
* If the subplan (i.e., plannerInfo corresponding to the subquery) contains any
|
||||||
|
* joins, we skip reference table checks keeping in mind that the join is already
|
||||||
|
* going to be processed seperately. This optimization should suffice for many
|
||||||
|
* use cases.
|
||||||
|
*/
|
||||||
|
if (relOptInfo->reloptkind == RELOPT_BASEREL && relOptInfo->subroot != NULL)
|
||||||
|
{
|
||||||
|
PlannerInfo *subroot = relOptInfo->subroot;
|
||||||
|
|
||||||
|
if (list_length(subroot->join_rel_list) > 0)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RelationInfoContainsReferenceTable checks whether the relationInfo
|
||||||
|
* contains any reference tables. If found, the function returns true.
|
||||||
|
*
|
||||||
|
* Note that since relation ids of relationInfo indexes to the range
|
||||||
|
* table entry list of planner info, planner info is also passed.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relationInfo)
|
||||||
{
|
{
|
||||||
Relids relids = bms_copy(relationInfo->relids);
|
Relids relids = bms_copy(relationInfo->relids);
|
||||||
int relationId = -1;
|
int relationId = -1;
|
||||||
|
@ -2927,9 +3038,17 @@ ExtractRangeTableRelationWalkerWithRTEExpand(Node *node, List **rangeTableRelati
|
||||||
rangeTableRelationList, 0);
|
rangeTableRelationList, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (IsA(node, Query))
|
||||||
|
{
|
||||||
|
walkIsComplete = query_tree_walker((Query *) node,
|
||||||
|
ExtractRangeTableRelationWalkerWithRTEExpand,
|
||||||
|
rangeTableRelationList, QTW_EXAMINE_RTES);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
walkIsComplete = ExtractRangeTableRelationWalker(node, rangeTableRelationList);
|
walkIsComplete = expression_tree_walker(node,
|
||||||
|
ExtractRangeTableRelationWalkerWithRTEExpand,
|
||||||
|
rangeTableRelationList);
|
||||||
}
|
}
|
||||||
|
|
||||||
return walkIsComplete;
|
return walkIsComplete;
|
||||||
|
|
|
@ -62,6 +62,7 @@ typedef struct AttributeEquivalenceClassMember
|
||||||
} AttributeEquivalenceClassMember;
|
} AttributeEquivalenceClassMember;
|
||||||
|
|
||||||
|
|
||||||
|
static uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext);
|
||||||
static Var * FindTranslatedVar(List *appendRelList, Oid relationOid,
|
static Var * FindTranslatedVar(List *appendRelList, Oid relationOid,
|
||||||
Index relationRteIndex, Index *partitionKeyIndex);
|
Index relationRteIndex, Index *partitionKeyIndex);
|
||||||
static bool EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
|
static bool EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
|
||||||
|
@ -329,9 +330,8 @@ FindTranslatedVar(List *appendRelList, Oid relationOid, Index relationRteIndex,
|
||||||
* joined on their partition keys.
|
* joined on their partition keys.
|
||||||
*
|
*
|
||||||
* The function returns true if all relations are joined on their partition keys.
|
* The function returns true if all relations are joined on their partition keys.
|
||||||
* Otherwise, the function returns false. In order to support reference tables
|
* Otherwise, the function returns false. We ignore reference tables at all since
|
||||||
* with subqueries, equality between attributes of reference tables and partition
|
* they don't have partition keys.
|
||||||
* key of distributed tables are also considered.
|
|
||||||
*
|
*
|
||||||
* In order to do that, we invented a new equivalence class namely:
|
* In order to do that, we invented a new equivalence class namely:
|
||||||
* AttributeEquivalenceClass. In very simple words, a AttributeEquivalenceClass is
|
* AttributeEquivalenceClass. In very simple words, a AttributeEquivalenceClass is
|
||||||
|
@ -365,14 +365,24 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
|
||||||
List *joinRestrictionAttributeEquivalenceList = NIL;
|
List *joinRestrictionAttributeEquivalenceList = NIL;
|
||||||
List *allAttributeEquivalenceList = NIL;
|
List *allAttributeEquivalenceList = NIL;
|
||||||
|
|
||||||
|
uint32 referenceRelationCount = ReferenceRelationCount(restrictionContext);
|
||||||
uint32 totalRelationCount = list_length(restrictionContext->relationRestrictionList);
|
uint32 totalRelationCount = list_length(restrictionContext->relationRestrictionList);
|
||||||
|
uint32 nonReferenceRelationCount = totalRelationCount - referenceRelationCount;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the query includes only one relation, we should not check the partition
|
* If the query includes a single relation which is not a reference table,
|
||||||
* column equality. Single table should not need to fetch data from other nodes
|
* we should not check the partition column equality.
|
||||||
* except it's own node(s).
|
* Consider two example cases:
|
||||||
|
* (i) The query includes only a single colocated relation
|
||||||
|
* (ii) A colocated relation is joined with a (or multiple) reference
|
||||||
|
* table(s) where colocated relation is not joined on the partition key
|
||||||
|
*
|
||||||
|
* For the above two cases, we don't need to execute the partition column equality
|
||||||
|
* algorithm. The reason is that the essence of this function is to ensure that the
|
||||||
|
* tasks that are going to be created should not need data from other tasks. In both
|
||||||
|
* cases mentioned above, the necessary data per task would be on available.
|
||||||
*/
|
*/
|
||||||
if (totalRelationCount == 1)
|
if (nonReferenceRelationCount <= 1)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -394,6 +404,31 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReferenceRelationCount iterates over the relations and returns the reference table
|
||||||
|
* relation count.
|
||||||
|
*/
|
||||||
|
static uint32
|
||||||
|
ReferenceRelationCount(RelationRestrictionContext *restrictionContext)
|
||||||
|
{
|
||||||
|
ListCell *relationRestrictionCell = NULL;
|
||||||
|
uint32 referenceRelationCount = 0;
|
||||||
|
|
||||||
|
foreach(relationRestrictionCell, restrictionContext->relationRestrictionList)
|
||||||
|
{
|
||||||
|
RelationRestriction *relationRestriction =
|
||||||
|
(RelationRestriction *) lfirst(relationRestrictionCell);
|
||||||
|
|
||||||
|
if (PartitionMethod(relationRestriction->relationId) == DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
referenceRelationCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return referenceRelationCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* EquivalenceListContainsRelationsEquality gets a list of attributed equivalence
|
* EquivalenceListContainsRelationsEquality gets a list of attributed equivalence
|
||||||
* list and a relation restriction context. The function first generates a common
|
* list and a relation restriction context. The function first generates a common
|
||||||
|
@ -434,6 +469,12 @@ EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
|
||||||
(RelationRestriction *) lfirst(relationRestrictionCell);
|
(RelationRestriction *) lfirst(relationRestrictionCell);
|
||||||
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
|
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
|
||||||
|
|
||||||
|
/* we shouldn't check for the equality of reference tables */
|
||||||
|
if (PartitionMethod(relationRestriction->relationId) == DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (!bms_is_member(rteIdentity, commonRteIdentities))
|
if (!bms_is_member(rteIdentity, commonRteIdentities))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
@ -621,7 +662,7 @@ GetVarFromAssignedParam(List *parentPlannerParamList, Param *plannerParam)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GenerateCommonEquivalence gets a list of unrelated AttributeEquiavalanceClass
|
* GenerateCommonEquivalence gets a list of unrelated AttributeEquiavalanceClass
|
||||||
* whose all members are partition keys or a column of reference table.
|
* whose all members are partition keys.
|
||||||
*
|
*
|
||||||
* With the equivalence classes, the function follows the algorithm
|
* With the equivalence classes, the function follows the algorithm
|
||||||
* outlined below:
|
* outlined below:
|
||||||
|
@ -1084,8 +1125,14 @@ AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||||
|
|
||||||
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
||||||
|
|
||||||
if (PartitionMethod(relationId) != DISTRIBUTE_BY_NONE &&
|
/* we don't need reference tables in the equality on columns */
|
||||||
relationPartitionKey->varattno != varToBeAdded->varattno)
|
if (relationPartitionKey == NULL)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we're only interested in distribution columns */
|
||||||
|
if (relationPartitionKey->varattno != varToBeAdded->varattno)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1122,7 +1122,7 @@ WHERE
|
||||||
colocated_table_test.value_1 = reference_table_test.value_1;
|
colocated_table_test.value_1 = reference_table_test.value_1;
|
||||||
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
|
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
|
||||||
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||||
-- not pushable due to lack of equality between partition column and column of reference table
|
-- safe to push down even lack of equality between partition column and column of reference table
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
colocated_table_test (value_1, value_2)
|
colocated_table_test (value_1, value_2)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -1132,9 +1132,13 @@ FROM
|
||||||
WHERE
|
WHERE
|
||||||
colocated_table_test_2.value_4 = reference_table_test.value_4
|
colocated_table_test_2.value_4 = reference_table_test.value_4
|
||||||
RETURNING value_1, value_2;
|
RETURNING value_1, value_2;
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
value_1 | value_2
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
---------+---------
|
||||||
-- some more complex queries (Note that there are more complex queries in multi_insert_select.sql)
|
1 | 1
|
||||||
|
2 | 2
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
-- similar query with the above, this time partition key but without equality
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
colocated_table_test (value_1, value_2)
|
colocated_table_test (value_1, value_2)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -1142,10 +1146,13 @@ SELECT
|
||||||
FROM
|
FROM
|
||||||
colocated_table_test_2, reference_table_test
|
colocated_table_test_2, reference_table_test
|
||||||
WHERE
|
WHERE
|
||||||
colocated_table_test_2.value_2 = reference_table_test.value_2
|
colocated_table_test_2.value_1 > reference_table_test.value_2
|
||||||
RETURNING value_1, value_2;
|
RETURNING value_1, value_2;
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
value_1 | value_2
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
---------+---------
|
||||||
|
2 | 1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- partition column value comes from reference table, goes via coordinator
|
-- partition column value comes from reference table, goes via coordinator
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
colocated_table_test (value_1, value_2)
|
colocated_table_test (value_1, value_2)
|
||||||
|
@ -1606,7 +1613,7 @@ INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02');
|
||||||
SELECT master_modify_multiple_shards('DELETE FROM colocated_table_test');
|
SELECT master_modify_multiple_shards('DELETE FROM colocated_table_test');
|
||||||
master_modify_multiple_shards
|
master_modify_multiple_shards
|
||||||
-------------------------------
|
-------------------------------
|
||||||
6
|
9
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
ROLLBACK;
|
ROLLBACK;
|
||||||
|
|
|
@ -60,18 +60,38 @@ SELECT count(*) FROM
|
||||||
1
|
1
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
-- Should not work, no equality between partition column and reference table
|
-- Should work, although no equality between partition column and reference table
|
||||||
SELECT * FROM
|
SELECT subquery_1.item_id FROM
|
||||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
(SELECT user_buy_test_table.item_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1
|
||||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
ORDER BY 1;
|
||||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
item_id
|
||||||
-- Should not work, no equality between partition column and reference table
|
---------
|
||||||
SELECT * FROM
|
2
|
||||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
3
|
||||||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1;
|
4
|
||||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
5
|
||||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
(4 rows)
|
||||||
|
|
||||||
|
-- Should work, although no equality between partition column and reference table
|
||||||
|
SELECT subquery_1.user_id FROM
|
||||||
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1
|
||||||
|
ORDER BY 1;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
3
|
||||||
|
7
|
||||||
|
7
|
||||||
|
7
|
||||||
|
7
|
||||||
|
7
|
||||||
|
7
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
-- Shouldn't work, reference table at the outer side is not allowed
|
-- Shouldn't work, reference table at the outer side is not allowed
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||||
|
@ -93,6 +113,33 @@ SELECT * FROM
|
||||||
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
|
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- Equi join test with reference table on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
4
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Non-equi join test with reference table on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
10
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Non-equi left joins with reference tables on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
10
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- Should pass since reference table locates in the inner part of each left join
|
-- Should pass since reference table locates in the inner part of each left join
|
||||||
SELECT count(*) FROM
|
SELECT count(*) FROM
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
|
@ -105,13 +152,238 @@ SELECT count(*) FROM
|
||||||
2
|
2
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
-- Should not pass since reference table locates in the outer part of right join
|
-- two subqueries, each include joins with reference table
|
||||||
|
-- also, two hash distributed tables are joined on partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id AND users_ref_test_table.k_no > 88 AND user_buy_test_table.item_id < 88) subquery_1,
|
||||||
|
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
|
||||||
|
WHERE subquery_1.user_id = subquery_2.user_id ;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
4
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Should be able to push down since reference tables are inner joined
|
||||||
|
-- with hash distributed tables, the results of those joins are the parts of
|
||||||
|
-- an outer join
|
||||||
|
SELECT subquery_2.id FROM
|
||||||
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
|
ON tt1.user_id = tt2.user_id) subquery_1
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||||
|
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5;
|
||||||
|
id
|
||||||
|
----
|
||||||
|
3
|
||||||
|
2
|
||||||
|
1
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
-- the same query as the above, but this Citus fails to pushdown the query
|
||||||
|
-- since the outer part of the right join doesn't include any joins
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
ON tt1.user_id = tt2.user_id) subquery_1
|
ON tt1.user_id = tt2.user_id) subquery_1
|
||||||
RIGHT JOIN
|
RIGHT JOIN
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
(SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
user_id | sum
|
||||||
|
---------+----------
|
||||||
|
12 | 92221920
|
||||||
|
17 | 89192642
|
||||||
|
96 | 85143744
|
||||||
|
45 | 84267456
|
||||||
|
90 | 84157047
|
||||||
|
43 | 82110240
|
||||||
|
1 | 81735612
|
||||||
|
72 | 78992640
|
||||||
|
67 | 72385516
|
||||||
|
97 | 71002659
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
-- same query as above, reference table is wrapped into a subquery
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
user_id | sum
|
||||||
|
---------+----------
|
||||||
|
12 | 92221920
|
||||||
|
17 | 89192642
|
||||||
|
96 | 85143744
|
||||||
|
45 | 84267456
|
||||||
|
90 | 84157047
|
||||||
|
43 | 82110240
|
||||||
|
1 | 81735612
|
||||||
|
72 | 78992640
|
||||||
|
67 | 72385516
|
||||||
|
97 | 71002659
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
user_id | sum
|
||||||
|
---------+----------
|
||||||
|
12 | 92221920
|
||||||
|
17 | 89192642
|
||||||
|
96 | 85143744
|
||||||
|
45 | 84267456
|
||||||
|
90 | 84157047
|
||||||
|
43 | 82110240
|
||||||
|
1 | 81735612
|
||||||
|
72 | 78992640
|
||||||
|
67 | 72385516
|
||||||
|
97 | 71002659
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- direct outer part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- direct outer part of the left join wrapped into a subquery
|
||||||
|
SELECT
|
||||||
|
*
|
||||||
|
FROM
|
||||||
|
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
|
||||||
|
ON (users_table.user_id = ref_all.value_2);
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- outer part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT foo.user_id
|
||||||
|
FROM
|
||||||
|
((SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
event_type > 80) as "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
89
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- the same query but this time reference table is in the outer part of the query
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT foo.user_id
|
||||||
|
FROM
|
||||||
|
((SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
event_type > 80) as "temp_data_queries"
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- we could even suuport the following where the subquery
|
||||||
|
-- on the outer part of the left join contains a reference table
|
||||||
|
SELECT max(events_all.cnt), events_all.usr_id
|
||||||
|
FROM
|
||||||
|
(SELECT users_table.user_id as usr_id,
|
||||||
|
count(*) as cnt
|
||||||
|
FROM events_reference_table
|
||||||
|
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) GROUP BY users_table.user_id) AS events_all
|
||||||
|
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5;
|
||||||
|
max | usr_id
|
||||||
|
-------+--------
|
||||||
|
14605 | 23
|
||||||
|
13090 | 17
|
||||||
|
12915 | 25
|
||||||
|
12317 | 90
|
||||||
|
12285 | 87
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- but, we fail to pushdown the following query where join that reference table appears
|
||||||
|
-- wrapped into a subquery
|
||||||
|
SELECT max(events_all.cnt),
|
||||||
|
events_all.usr_id
|
||||||
|
FROM(
|
||||||
|
SELECT *, random() FROM
|
||||||
|
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||||
|
FROM events_reference_table
|
||||||
|
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||||
|
GROUP BY users_table.user_id) AS events_all_inner) AS events_all
|
||||||
|
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id)
|
||||||
|
GROUP BY 2
|
||||||
|
ORDER BY 1 DESC,
|
||||||
|
2 DESC
|
||||||
|
LIMIT 5;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
-- LATERAL JOINs used with INNER JOINs with reference tables
|
-- LATERAL JOINs used with INNER JOINs with reference tables
|
||||||
|
@ -193,10 +465,10 @@ SELECT
|
||||||
count(*) AS value, "generated_group_field"
|
count(*) AS value, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
|
@ -204,7 +476,7 @@ SELECT
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id", "events"."value_2"
|
"events"."time", "events"."user_id", "events"."value_2"
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
|
@ -355,8 +627,8 @@ LIMIT 10;
|
||||||
(6 rows)
|
(6 rows)
|
||||||
|
|
||||||
SET citus.subquery_pushdown to OFF;
|
SET citus.subquery_pushdown to OFF;
|
||||||
-- LEFT JOINs used with INNER JOINs should error out since reference table exist in the
|
-- LEFT JOINs used with INNER JOINs should not error out since reference table joined
|
||||||
-- left side of the LEFT JOIN.
|
-- with hash table that Citus can push down
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
|
@ -393,8 +665,20 @@ count(*) AS cnt, "generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
ERROR: cannot pushdown the subquery
|
cnt | generated_group_field
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
-----+-----------------------
|
||||||
|
176 | 551
|
||||||
|
176 | 569
|
||||||
|
176 | 645
|
||||||
|
176 | 713
|
||||||
|
176 | 734
|
||||||
|
88 | 3
|
||||||
|
88 | 5
|
||||||
|
88 | 15
|
||||||
|
88 | 32
|
||||||
|
88 | 68
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
|
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||||
-- right side of the RIGHT JOIN.
|
-- right side of the RIGHT JOIN.
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -435,6 +719,133 @@ count(*) AS cnt, "generated_group_field"
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- right join where the inner part of the join includes a reference table
|
||||||
|
-- joined with hash partitioned table using non-equi join
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_reference_table AS e
|
||||||
|
WHERE u.user_id > e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
) t1 RIGHT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event
|
||||||
|
ORDER BY user_id;
|
||||||
|
user_id | sum | length | hasdone_event
|
||||||
|
---------+-----+--------+----------------
|
||||||
|
11 | 306 | 14 | Has done event
|
||||||
|
12 | 363 | 14 | Has done event
|
||||||
|
14 | 510 | 14 | Has done event
|
||||||
|
18 | 600 | 14 | Has done event
|
||||||
|
19 | 618 | 14 | Has done event
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- a similar query as the above, with non-partition key comparison
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_reference_table AS e
|
||||||
|
WHERE u.value_1 > e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type >= 125 AND e.event_type < 130
|
||||||
|
)
|
||||||
|
) t1 RIGHT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type >= 130 AND e.event_type < 135
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event
|
||||||
|
ORDER BY user_id;
|
||||||
|
user_id | sum | length | hasdone_event
|
||||||
|
---------+------+--------+----------------
|
||||||
|
10 | 6018 | 14 | Has done event
|
||||||
|
16 | 5373 | 14 | Has done event
|
||||||
|
17 | 5683 | 14 | Has done event
|
||||||
|
18 | 5321 | 14 | Has done event
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
-- LEFT JOINs used with INNER JOINs
|
||||||
|
-- events_table and users_reference_table joined
|
||||||
|
-- with event_table.non_part_key < reference_table.any_key
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_table as "users") "left_group_by_1"
|
||||||
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
cnt | generated_group_field
|
||||||
|
-----+-----------------------
|
||||||
|
540 | 814
|
||||||
|
533 | 746
|
||||||
|
473 | 914
|
||||||
|
449 | 684
|
||||||
|
445 | 715
|
||||||
|
423 | 191
|
||||||
|
419 | 39
|
||||||
|
415 | 108
|
||||||
|
414 | 819
|
||||||
|
411 | 642
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
-- Outer subquery with reference table
|
-- Outer subquery with reference table
|
||||||
SELECT "some_users_data".user_id, lastseen
|
SELECT "some_users_data".user_id, lastseen
|
||||||
FROM
|
FROM
|
||||||
|
@ -468,9 +879,9 @@ limit 50;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
--
|
--
|
||||||
-- UNIONs and JOINs with reference tables, shoukld error out
|
-- UNIONs and JOINs with reference tables, should error out
|
||||||
--
|
--
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types
|
||||||
FROM
|
FROM
|
||||||
( SELECT *, random()
|
( SELECT *, random()
|
||||||
FROM
|
FROM
|
||||||
|
@ -484,7 +895,7 @@ FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 0 AS event
|
"events"."user_id", "events"."time", 0 AS event
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||||
UNION
|
UNION
|
||||||
|
@ -494,7 +905,7 @@ FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 1 AS event
|
"events"."user_id", "events"."time", 1 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
|
@ -526,11 +937,9 @@ INNER JOIN
|
||||||
WHERE
|
WHERE
|
||||||
value_1 > 50 and value_1 < 70) AS t
|
value_1 > 50 and value_1 < 70) AS t
|
||||||
ON (t.user_id = q.user_id)) as final_query
|
ON (t.user_id = q.user_id)) as final_query
|
||||||
GROUP BY
|
|
||||||
types
|
|
||||||
ORDER BY
|
ORDER BY
|
||||||
types;
|
types;
|
||||||
ERROR: cannot push down this subquery
|
ERROR: cannot push down this subquery
|
||||||
DETAIL: Reference tables are not supported with union operator
|
DETAIL: Reference tables are not supported with union operator
|
||||||
-- reference table exist in the subquery of union, should error out
|
-- reference table exist in the subquery of union, should error out
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||||
|
@ -560,15 +969,15 @@ FROM
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
max("events"."time"),
|
max("users"."time"),
|
||||||
0 AS event,
|
0 AS event,
|
||||||
"events"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events", users_table as "users"
|
events_reference_table as "events", users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
events.user_id = users.user_id AND
|
events.user_id = users.user_id AND
|
||||||
event_type IN (10, 11, 12, 13, 14, 15)
|
event_type IN (10, 11, 12, 13, 14, 15)
|
||||||
GROUP BY "events"."user_id"
|
GROUP BY "users"."user_id"
|
||||||
) as events_subquery_5
|
) as events_subquery_5
|
||||||
) events_subquery_2)
|
) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
|
@ -660,6 +1069,117 @@ GROUP BY types
|
||||||
ORDER BY types;
|
ORDER BY types;
|
||||||
ERROR: cannot push down this subquery
|
ERROR: cannot push down this subquery
|
||||||
DETAIL: Reference tables are not supported with union operator
|
DETAIL: Reference tables are not supported with union operator
|
||||||
|
-- just a sanity check that we don't allow this if the reference table is on the
|
||||||
|
-- left part of the left join
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- we don't allow non equi join among hash partitioned tables
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1,
|
||||||
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2
|
||||||
|
WHERE subquery_1.user_id != subquery_2.user_id ;
|
||||||
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
|
-- we cannot push this query since hash partitioned tables
|
||||||
|
-- are not joined on partition keys with equality
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_table as "users") "left_group_by_1"
|
||||||
|
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
|
-- two hash partitioned relations are not joined
|
||||||
|
-- on partiton keys although reference table is fine
|
||||||
|
-- to push down
|
||||||
|
SELECT
|
||||||
|
u1.user_id, count(*)
|
||||||
|
FROM
|
||||||
|
events_table as e1, users_table as u1
|
||||||
|
WHERE
|
||||||
|
event_type IN
|
||||||
|
(SELECT
|
||||||
|
event_type
|
||||||
|
FROM
|
||||||
|
events_reference_table as e2
|
||||||
|
WHERE
|
||||||
|
value_2 = 15 AND
|
||||||
|
value_3 > 25 AND
|
||||||
|
e1.value_2 > e2.value_2
|
||||||
|
)
|
||||||
|
AND u1.user_id > e1.user_id
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
WHERE event_type > 100000
|
||||||
|
) as foo;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
-- not supported since group by is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
GROUP BY r.user_id
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
|
-- not supported since distinct is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||||
|
-- not supported since distinct on is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||||
DROP TABLE user_buy_test_table;
|
DROP TABLE user_buy_test_table;
|
||||||
DROP TABLE users_ref_test_table;
|
DROP TABLE users_ref_test_table;
|
||||||
DROP TABLE users_return_test_table;
|
DROP TABLE users_return_test_table;
|
||||||
|
|
|
@ -1,33 +1,5 @@
|
||||||
--
|
--
|
||||||
-- queries to test the subquery pushdown on reference tables
|
-- queries to test the subquery pushdown on reference tables
|
||||||
-- subqueries in WHERE with greater operator
|
|
||||||
SELECT
|
|
||||||
user_id
|
|
||||||
FROM
|
|
||||||
users_table
|
|
||||||
WHERE
|
|
||||||
value_2 >
|
|
||||||
(SELECT
|
|
||||||
max(value_2)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
)
|
|
||||||
GROUP BY user_id
|
|
||||||
HAVING count(*) > 66
|
|
||||||
ORDER BY user_id
|
|
||||||
LIMIT 5;
|
|
||||||
user_id
|
|
||||||
---------
|
|
||||||
49
|
|
||||||
55
|
|
||||||
56
|
|
||||||
63
|
|
||||||
(4 rows)
|
|
||||||
|
|
||||||
-- subqueries in WHERE with IN operator
|
-- subqueries in WHERE with IN operator
|
||||||
SELECT
|
SELECT
|
||||||
user_id
|
user_id
|
||||||
|
@ -91,10 +63,10 @@ WHERE
|
||||||
)
|
)
|
||||||
LIMIT 3;
|
LIMIT 3;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||||
-- subqueries in WHERE with IN operator without equality
|
-- subqueries in WHERE with IN operator without equality
|
||||||
SELECT
|
SELECT
|
||||||
user_id
|
users_table.user_id, count(*)
|
||||||
FROM
|
FROM
|
||||||
users_table
|
users_table
|
||||||
WHERE
|
WHERE
|
||||||
|
@ -106,59 +78,15 @@ WHERE
|
||||||
WHERE
|
WHERE
|
||||||
users_table.user_id > events_reference_table.user_id
|
users_table.user_id > events_reference_table.user_id
|
||||||
)
|
)
|
||||||
GROUP BY user_id
|
GROUP BY users_table.user_id
|
||||||
ORDER BY user_id
|
ORDER BY 2 DESC, 1 DESC
|
||||||
LIMIT 3;
|
LIMIT 3;
|
||||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
user_id | count
|
||||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
---------+-------
|
||||||
-- have reference table without any equality, should error out
|
87 | 117
|
||||||
SELECT
|
59 | 115
|
||||||
user_id
|
46 | 115
|
||||||
FROM
|
(3 rows)
|
||||||
users_table
|
|
||||||
WHERE
|
|
||||||
value_2 >
|
|
||||||
(SELECT
|
|
||||||
max(value_2)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
event_type = 50
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
)
|
|
||||||
GROUP BY user_id
|
|
||||||
HAVING count(*) > 66
|
|
||||||
ORDER BY user_id
|
|
||||||
LIMIT 5;
|
|
||||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
|
||||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
|
||||||
-- users that appeared more than 118 times, should run since the reference table
|
|
||||||
-- on the right side of the semi join
|
|
||||||
SELECT
|
|
||||||
user_id
|
|
||||||
FROM
|
|
||||||
users_table
|
|
||||||
WHERE 118 <=
|
|
||||||
(SELECT
|
|
||||||
count(*)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
users_table.user_id = events_reference_table.user_id
|
|
||||||
GROUP BY
|
|
||||||
user_id)
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
ORDER BY
|
|
||||||
user_id;
|
|
||||||
user_id
|
|
||||||
---------
|
|
||||||
13
|
|
||||||
17
|
|
||||||
23
|
|
||||||
25
|
|
||||||
(4 rows)
|
|
||||||
|
|
||||||
-- should error out since reference table exist on the left side
|
-- should error out since reference table exist on the left side
|
||||||
-- of the left lateral join
|
-- of the left lateral join
|
||||||
|
@ -224,3 +152,194 @@ SELECT user_id, value_2 FROM users_table WHERE
|
||||||
ORDER BY 1, 2;
|
ORDER BY 1, 2;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- non-partition key equality with reference table
|
||||||
|
SELECT
|
||||||
|
user_id, count(*)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_3 =ANY(SELECT value_2 FROM users_reference_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 5;
|
||||||
|
user_id | count
|
||||||
|
---------+-------
|
||||||
|
48 | 18
|
||||||
|
26 | 18
|
||||||
|
15 | 17
|
||||||
|
54 | 16
|
||||||
|
35 | 15
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- non-partition key comparison with reference table
|
||||||
|
SELECT
|
||||||
|
user_id, count(*)
|
||||||
|
FROM
|
||||||
|
events_table as e1
|
||||||
|
WHERE
|
||||||
|
event_type IN
|
||||||
|
(SELECT
|
||||||
|
event_type
|
||||||
|
FROM
|
||||||
|
events_reference_table as e2
|
||||||
|
WHERE
|
||||||
|
value_2 = 15 AND
|
||||||
|
value_3 > 25 AND
|
||||||
|
e1.value_2 > e2.value_2
|
||||||
|
)
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
user_id | count
|
||||||
|
---------+-------
|
||||||
|
3 | 5
|
||||||
|
56 | 4
|
||||||
|
99 | 2
|
||||||
|
94 | 2
|
||||||
|
92 | 2
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- subqueries in both WHERE and FROM clauses
|
||||||
|
-- should work since reference table is on the
|
||||||
|
-- inner part of the join
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND user_id IN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
e1.user_id
|
||||||
|
FROM (
|
||||||
|
-- Get the first time each user viewed the homepage.
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS view_homepage,
|
||||||
|
min(time) AS view_homepage_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||||
|
GROUP BY user_id
|
||||||
|
) e1 LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS use_demo,
|
||||||
|
time AS use_demo_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e1.user_id AND
|
||||||
|
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||||
|
ORDER BY time
|
||||||
|
) e2 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS enter_credit_card,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e2.user_id AND
|
||||||
|
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||||
|
ORDER BY time
|
||||||
|
) e3 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS submit_card_info,
|
||||||
|
user_id,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e3.user_id AND
|
||||||
|
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||||
|
ORDER BY time
|
||||||
|
) e4 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS see_bought_screen
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e4.user_id AND
|
||||||
|
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||||
|
ORDER BY time
|
||||||
|
) e5 ON true
|
||||||
|
group by e1.user_id
|
||||||
|
HAVING sum(submit_card_info) > 0
|
||||||
|
)
|
||||||
|
ORDER BY 1, 2;
|
||||||
|
user_id | value_2
|
||||||
|
---------+---------
|
||||||
|
5 | 884
|
||||||
|
42 | 55
|
||||||
|
42 | 471
|
||||||
|
51 | 758
|
||||||
|
72 | 897
|
||||||
|
82 | 691
|
||||||
|
95 | 951
|
||||||
|
(7 rows)
|
||||||
|
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT
|
||||||
|
count(*)
|
||||||
|
FROM
|
||||||
|
users_reference_table
|
||||||
|
WHERE user_id
|
||||||
|
NOT IN
|
||||||
|
(SELECT users_table.value_2 FROM users_table JOIN users_reference_table as u2 ON users_table.value_2 = u2.value_2);
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT count(*)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, random() FROM users_reference_table) AS vals
|
||||||
|
WHERE vals.user_id NOT IN
|
||||||
|
(SELECT users_table.value_2
|
||||||
|
FROM users_table
|
||||||
|
JOIN users_reference_table AS u2 ON users_table.value_2 = u2.value_2);
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT user_id,
|
||||||
|
count(*)
|
||||||
|
FROM users_reference_table
|
||||||
|
WHERE value_2 > ALL
|
||||||
|
(SELECT min(value_2)
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 50 AND users_reference_table.user_id = events_table.user_id
|
||||||
|
GROUP BY user_id)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY 2 DESC,
|
||||||
|
1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
-- this time in the subquery
|
||||||
|
SELECT *
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN
|
||||||
|
(SELECT users_table.user_id
|
||||||
|
FROM users_table,
|
||||||
|
users_reference_table
|
||||||
|
WHERE users_reference_table.user_id NOT IN
|
||||||
|
(SELECT value_2
|
||||||
|
FROM users_reference_table AS u2));
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||||
|
-- not supported since GROUP BY references to an upper level query
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 >
|
||||||
|
(SELECT
|
||||||
|
max(value_2)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||||
|
GROUP BY
|
||||||
|
users_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY user_id
|
||||||
|
LIMIT 5;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
|
|
|
@ -356,6 +356,7 @@ SET citus.shard_max_size TO "1MB";
|
||||||
|
|
||||||
CREATE TABLE events_reference_table (like events_table including all);
|
CREATE TABLE events_reference_table (like events_table including all);
|
||||||
SELECT create_reference_table('events_reference_table');
|
SELECT create_reference_table('events_reference_table');
|
||||||
|
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
||||||
INSERT INTO events_reference_table SELECT * FROM events_table;
|
INSERT INTO events_reference_table SELECT * FROM events_table;
|
||||||
|
|
||||||
CREATE TABLE users_reference_table (like users_table including all);
|
CREATE TABLE users_reference_table (like users_table including all);
|
||||||
|
|
|
@ -428,6 +428,7 @@ SELECT create_reference_table('events_reference_table');
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
||||||
INSERT INTO events_reference_table SELECT * FROM events_table;
|
INSERT INTO events_reference_table SELECT * FROM events_table;
|
||||||
CREATE TABLE users_reference_table (like users_table including all);
|
CREATE TABLE users_reference_table (like users_table including all);
|
||||||
SELECT create_reference_table('users_reference_table');
|
SELECT create_reference_table('users_reference_table');
|
||||||
|
|
|
@ -707,7 +707,7 @@ FROM
|
||||||
WHERE
|
WHERE
|
||||||
colocated_table_test.value_1 = reference_table_test.value_1;
|
colocated_table_test.value_1 = reference_table_test.value_1;
|
||||||
|
|
||||||
-- not pushable due to lack of equality between partition column and column of reference table
|
-- safe to push down even lack of equality between partition column and column of reference table
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
colocated_table_test (value_1, value_2)
|
colocated_table_test (value_1, value_2)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -718,7 +718,7 @@ WHERE
|
||||||
colocated_table_test_2.value_4 = reference_table_test.value_4
|
colocated_table_test_2.value_4 = reference_table_test.value_4
|
||||||
RETURNING value_1, value_2;
|
RETURNING value_1, value_2;
|
||||||
|
|
||||||
-- some more complex queries (Note that there are more complex queries in multi_insert_select.sql)
|
-- similar query with the above, this time partition key but without equality
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
colocated_table_test (value_1, value_2)
|
colocated_table_test (value_1, value_2)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -726,7 +726,7 @@ SELECT
|
||||||
FROM
|
FROM
|
||||||
colocated_table_test_2, reference_table_test
|
colocated_table_test_2, reference_table_test
|
||||||
WHERE
|
WHERE
|
||||||
colocated_table_test_2.value_2 = reference_table_test.value_2
|
colocated_table_test_2.value_1 > reference_table_test.value_2
|
||||||
RETURNING value_1, value_2;
|
RETURNING value_1, value_2;
|
||||||
|
|
||||||
-- partition column value comes from reference table, goes via coordinator
|
-- partition column value comes from reference table, goes via coordinator
|
||||||
|
|
|
@ -42,15 +42,17 @@ SELECT count(*) FROM
|
||||||
(SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
(SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47;
|
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47;
|
||||||
|
|
||||||
-- Should not work, no equality between partition column and reference table
|
-- Should work, although no equality between partition column and reference table
|
||||||
SELECT * FROM
|
SELECT subquery_1.item_id FROM
|
||||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
(SELECT user_buy_test_table.item_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
-- Should not work, no equality between partition column and reference table
|
-- Should work, although no equality between partition column and reference table
|
||||||
SELECT * FROM
|
SELECT subquery_1.user_id FROM
|
||||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1;
|
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
-- Shouldn't work, reference table at the outer side is not allowed
|
-- Shouldn't work, reference table at the outer side is not allowed
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
|
@ -67,6 +69,21 @@ SELECT * FROM
|
||||||
(SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table
|
(SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table
|
||||||
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
|
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
|
||||||
|
|
||||||
|
-- Equi join test with reference table on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
||||||
|
|
||||||
|
-- Non-equi join test with reference table on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
|
||||||
|
-- Non-equi left joins with reference tables on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
|
||||||
-- Should pass since reference table locates in the inner part of each left join
|
-- Should pass since reference table locates in the inner part of each left join
|
||||||
SELECT count(*) FROM
|
SELECT count(*) FROM
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
|
@ -75,13 +92,172 @@ SELECT count(*) FROM
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref
|
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref
|
||||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||||
|
|
||||||
-- Should not pass since reference table locates in the outer part of right join
|
-- two subqueries, each include joins with reference table
|
||||||
|
-- also, two hash distributed tables are joined on partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id AND users_ref_test_table.k_no > 88 AND user_buy_test_table.item_id < 88) subquery_1,
|
||||||
|
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
|
||||||
|
WHERE subquery_1.user_id = subquery_2.user_id ;
|
||||||
|
|
||||||
|
-- Should be able to push down since reference tables are inner joined
|
||||||
|
-- with hash distributed tables, the results of those joins are the parts of
|
||||||
|
-- an outer join
|
||||||
|
SELECT subquery_2.id FROM
|
||||||
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
|
ON tt1.user_id = tt2.user_id) subquery_1
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||||
|
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5;
|
||||||
|
|
||||||
|
-- the same query as the above, but this Citus fails to pushdown the query
|
||||||
|
-- since the outer part of the right join doesn't include any joins
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
ON tt1.user_id = tt2.user_id) subquery_1
|
ON tt1.user_id = tt2.user_id) subquery_1
|
||||||
RIGHT JOIN
|
RIGHT JOIN
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
(SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||||
|
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- same query as above, reference table is wrapped into a subquery
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- direct outer part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- direct outer part of the left join wrapped into a subquery
|
||||||
|
SELECT
|
||||||
|
*
|
||||||
|
FROM
|
||||||
|
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
|
||||||
|
ON (users_table.user_id = ref_all.value_2);
|
||||||
|
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- outer part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT foo.user_id
|
||||||
|
FROM
|
||||||
|
((SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
event_type > 80) as "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
|
||||||
|
-- the same query but this time reference table is in the outer part of the query
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT foo.user_id
|
||||||
|
FROM
|
||||||
|
((SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
event_type > 80) as "temp_data_queries"
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
|
||||||
|
-- we could even suuport the following where the subquery
|
||||||
|
-- on the outer part of the left join contains a reference table
|
||||||
|
SELECT max(events_all.cnt), events_all.usr_id
|
||||||
|
FROM
|
||||||
|
(SELECT users_table.user_id as usr_id,
|
||||||
|
count(*) as cnt
|
||||||
|
FROM events_reference_table
|
||||||
|
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) GROUP BY users_table.user_id) AS events_all
|
||||||
|
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5;
|
||||||
|
|
||||||
|
-- but, we fail to pushdown the following query where join that reference table appears
|
||||||
|
-- wrapped into a subquery
|
||||||
|
SELECT max(events_all.cnt),
|
||||||
|
events_all.usr_id
|
||||||
|
FROM(
|
||||||
|
SELECT *, random() FROM
|
||||||
|
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||||
|
FROM events_reference_table
|
||||||
|
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||||
|
GROUP BY users_table.user_id) AS events_all_inner) AS events_all
|
||||||
|
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id)
|
||||||
|
GROUP BY 2
|
||||||
|
ORDER BY 1 DESC,
|
||||||
|
2 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
-- LATERAL JOINs used with INNER JOINs with reference tables
|
-- LATERAL JOINs used with INNER JOINs with reference tables
|
||||||
SET citus.subquery_pushdown to ON;
|
SET citus.subquery_pushdown to ON;
|
||||||
|
@ -149,10 +325,10 @@ SELECT
|
||||||
count(*) AS value, "generated_group_field"
|
count(*) AS value, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
|
@ -160,7 +336,7 @@ SELECT
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id", "events"."value_2"
|
"events"."time", "events"."user_id", "events"."value_2"
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
|
@ -270,8 +446,8 @@ ORDER BY
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
SET citus.subquery_pushdown to OFF;
|
SET citus.subquery_pushdown to OFF;
|
||||||
|
|
||||||
-- LEFT JOINs used with INNER JOINs should error out since reference table exist in the
|
-- LEFT JOINs used with INNER JOINs should not error out since reference table joined
|
||||||
-- left side of the LEFT JOIN.
|
-- with hash table that Citus can push down
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
|
@ -348,6 +524,106 @@ count(*) AS cnt, "generated_group_field"
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- right join where the inner part of the join includes a reference table
|
||||||
|
-- joined with hash partitioned table using non-equi join
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_reference_table AS e
|
||||||
|
WHERE u.user_id > e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
) t1 RIGHT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event
|
||||||
|
ORDER BY user_id;
|
||||||
|
|
||||||
|
-- a similar query as the above, with non-partition key comparison
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_reference_table AS e
|
||||||
|
WHERE u.value_1 > e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type >= 125 AND e.event_type < 130
|
||||||
|
)
|
||||||
|
) t1 RIGHT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type >= 130 AND e.event_type < 135
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event
|
||||||
|
ORDER BY user_id;
|
||||||
|
|
||||||
|
|
||||||
|
-- LEFT JOINs used with INNER JOINs
|
||||||
|
-- events_table and users_reference_table joined
|
||||||
|
-- with event_table.non_part_key < reference_table.any_key
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_table as "users") "left_group_by_1"
|
||||||
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
-- Outer subquery with reference table
|
-- Outer subquery with reference table
|
||||||
SELECT "some_users_data".user_id, lastseen
|
SELECT "some_users_data".user_id, lastseen
|
||||||
FROM
|
FROM
|
||||||
|
@ -380,9 +656,9 @@ ORDER BY
|
||||||
limit 50;
|
limit 50;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- UNIONs and JOINs with reference tables, shoukld error out
|
-- UNIONs and JOINs with reference tables, should error out
|
||||||
--
|
--
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types
|
||||||
FROM
|
FROM
|
||||||
( SELECT *, random()
|
( SELECT *, random()
|
||||||
FROM
|
FROM
|
||||||
|
@ -396,7 +672,7 @@ FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 0 AS event
|
"events"."user_id", "events"."time", 0 AS event
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||||
UNION
|
UNION
|
||||||
|
@ -406,7 +682,7 @@ FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 1 AS event
|
"events"."user_id", "events"."time", 1 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
|
@ -438,8 +714,6 @@ INNER JOIN
|
||||||
WHERE
|
WHERE
|
||||||
value_1 > 50 and value_1 < 70) AS t
|
value_1 > 50 and value_1 < 70) AS t
|
||||||
ON (t.user_id = q.user_id)) as final_query
|
ON (t.user_id = q.user_id)) as final_query
|
||||||
GROUP BY
|
|
||||||
types
|
|
||||||
ORDER BY
|
ORDER BY
|
||||||
types;
|
types;
|
||||||
|
|
||||||
|
@ -471,15 +745,15 @@ FROM
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
max("events"."time"),
|
max("users"."time"),
|
||||||
0 AS event,
|
0 AS event,
|
||||||
"events"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events", users_table as "users"
|
events_reference_table as "events", users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
events.user_id = users.user_id AND
|
events.user_id = users.user_id AND
|
||||||
event_type IN (10, 11, 12, 13, 14, 15)
|
event_type IN (10, 11, 12, 13, 14, 15)
|
||||||
GROUP BY "events"."user_id"
|
GROUP BY "users"."user_id"
|
||||||
) as events_subquery_5
|
) as events_subquery_5
|
||||||
) events_subquery_2)
|
) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
|
@ -569,6 +843,107 @@ INNER JOIN
|
||||||
GROUP BY types
|
GROUP BY types
|
||||||
ORDER BY types;
|
ORDER BY types;
|
||||||
|
|
||||||
|
-- just a sanity check that we don't allow this if the reference table is on the
|
||||||
|
-- left part of the left join
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
|
||||||
|
-- we don't allow non equi join among hash partitioned tables
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1,
|
||||||
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2
|
||||||
|
WHERE subquery_1.user_id != subquery_2.user_id ;
|
||||||
|
|
||||||
|
-- we cannot push this query since hash partitioned tables
|
||||||
|
-- are not joined on partition keys with equality
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_table as "users") "left_group_by_1"
|
||||||
|
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- two hash partitioned relations are not joined
|
||||||
|
-- on partiton keys although reference table is fine
|
||||||
|
-- to push down
|
||||||
|
SELECT
|
||||||
|
u1.user_id, count(*)
|
||||||
|
FROM
|
||||||
|
events_table as e1, users_table as u1
|
||||||
|
WHERE
|
||||||
|
event_type IN
|
||||||
|
(SELECT
|
||||||
|
event_type
|
||||||
|
FROM
|
||||||
|
events_reference_table as e2
|
||||||
|
WHERE
|
||||||
|
value_2 = 15 AND
|
||||||
|
value_3 > 25 AND
|
||||||
|
e1.value_2 > e2.value_2
|
||||||
|
)
|
||||||
|
AND u1.user_id > e1.user_id
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
WHERE event_type > 100000
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- not supported since group by is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
GROUP BY r.user_id
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- not supported since distinct is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- not supported since distinct on is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
DROP TABLE user_buy_test_table;
|
DROP TABLE user_buy_test_table;
|
||||||
DROP TABLE users_ref_test_table;
|
DROP TABLE users_ref_test_table;
|
||||||
DROP TABLE users_return_test_table;
|
DROP TABLE users_return_test_table;
|
||||||
|
|
|
@ -1,27 +1,6 @@
|
||||||
--
|
--
|
||||||
-- queries to test the subquery pushdown on reference tables
|
-- queries to test the subquery pushdown on reference tables
|
||||||
|
|
||||||
-- subqueries in WHERE with greater operator
|
|
||||||
SELECT
|
|
||||||
user_id
|
|
||||||
FROM
|
|
||||||
users_table
|
|
||||||
WHERE
|
|
||||||
value_2 >
|
|
||||||
(SELECT
|
|
||||||
max(value_2)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
)
|
|
||||||
GROUP BY user_id
|
|
||||||
HAVING count(*) > 66
|
|
||||||
ORDER BY user_id
|
|
||||||
LIMIT 5;
|
|
||||||
|
|
||||||
-- subqueries in WHERE with IN operator
|
-- subqueries in WHERE with IN operator
|
||||||
SELECT
|
SELECT
|
||||||
user_id
|
user_id
|
||||||
|
@ -78,7 +57,7 @@ LIMIT 3;
|
||||||
|
|
||||||
-- subqueries in WHERE with IN operator without equality
|
-- subqueries in WHERE with IN operator without equality
|
||||||
SELECT
|
SELECT
|
||||||
user_id
|
users_table.user_id, count(*)
|
||||||
FROM
|
FROM
|
||||||
users_table
|
users_table
|
||||||
WHERE
|
WHERE
|
||||||
|
@ -90,51 +69,10 @@ WHERE
|
||||||
WHERE
|
WHERE
|
||||||
users_table.user_id > events_reference_table.user_id
|
users_table.user_id > events_reference_table.user_id
|
||||||
)
|
)
|
||||||
GROUP BY user_id
|
GROUP BY users_table.user_id
|
||||||
ORDER BY user_id
|
ORDER BY 2 DESC, 1 DESC
|
||||||
LIMIT 3;
|
LIMIT 3;
|
||||||
|
|
||||||
-- have reference table without any equality, should error out
|
|
||||||
SELECT
|
|
||||||
user_id
|
|
||||||
FROM
|
|
||||||
users_table
|
|
||||||
WHERE
|
|
||||||
value_2 >
|
|
||||||
(SELECT
|
|
||||||
max(value_2)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
event_type = 50
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
)
|
|
||||||
GROUP BY user_id
|
|
||||||
HAVING count(*) > 66
|
|
||||||
ORDER BY user_id
|
|
||||||
LIMIT 5;
|
|
||||||
|
|
||||||
-- users that appeared more than 118 times, should run since the reference table
|
|
||||||
-- on the right side of the semi join
|
|
||||||
SELECT
|
|
||||||
user_id
|
|
||||||
FROM
|
|
||||||
users_table
|
|
||||||
WHERE 118 <=
|
|
||||||
(SELECT
|
|
||||||
count(*)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
users_table.user_id = events_reference_table.user_id
|
|
||||||
GROUP BY
|
|
||||||
user_id)
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
ORDER BY
|
|
||||||
user_id;
|
|
||||||
|
|
||||||
-- should error out since reference table exist on the left side
|
-- should error out since reference table exist on the left side
|
||||||
-- of the left lateral join
|
-- of the left lateral join
|
||||||
SELECT user_id, value_2 FROM users_table WHERE
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
@ -197,3 +135,165 @@ SELECT user_id, value_2 FROM users_table WHERE
|
||||||
HAVING sum(submit_card_info) > 0
|
HAVING sum(submit_card_info) > 0
|
||||||
)
|
)
|
||||||
ORDER BY 1, 2;
|
ORDER BY 1, 2;
|
||||||
|
|
||||||
|
-- non-partition key equality with reference table
|
||||||
|
SELECT
|
||||||
|
user_id, count(*)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_3 =ANY(SELECT value_2 FROM users_reference_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 5;
|
||||||
|
|
||||||
|
|
||||||
|
-- non-partition key comparison with reference table
|
||||||
|
SELECT
|
||||||
|
user_id, count(*)
|
||||||
|
FROM
|
||||||
|
events_table as e1
|
||||||
|
WHERE
|
||||||
|
event_type IN
|
||||||
|
(SELECT
|
||||||
|
event_type
|
||||||
|
FROM
|
||||||
|
events_reference_table as e2
|
||||||
|
WHERE
|
||||||
|
value_2 = 15 AND
|
||||||
|
value_3 > 25 AND
|
||||||
|
e1.value_2 > e2.value_2
|
||||||
|
)
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- subqueries in both WHERE and FROM clauses
|
||||||
|
-- should work since reference table is on the
|
||||||
|
-- inner part of the join
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND user_id IN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
e1.user_id
|
||||||
|
FROM (
|
||||||
|
-- Get the first time each user viewed the homepage.
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS view_homepage,
|
||||||
|
min(time) AS view_homepage_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||||
|
GROUP BY user_id
|
||||||
|
) e1 LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS use_demo,
|
||||||
|
time AS use_demo_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e1.user_id AND
|
||||||
|
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||||
|
ORDER BY time
|
||||||
|
) e2 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS enter_credit_card,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e2.user_id AND
|
||||||
|
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||||
|
ORDER BY time
|
||||||
|
) e3 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS submit_card_info,
|
||||||
|
user_id,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e3.user_id AND
|
||||||
|
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||||
|
ORDER BY time
|
||||||
|
) e4 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS see_bought_screen
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e4.user_id AND
|
||||||
|
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||||
|
ORDER BY time
|
||||||
|
) e5 ON true
|
||||||
|
group by e1.user_id
|
||||||
|
HAVING sum(submit_card_info) > 0
|
||||||
|
)
|
||||||
|
ORDER BY 1, 2;
|
||||||
|
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT
|
||||||
|
count(*)
|
||||||
|
FROM
|
||||||
|
users_reference_table
|
||||||
|
WHERE user_id
|
||||||
|
NOT IN
|
||||||
|
(SELECT users_table.value_2 FROM users_table JOIN users_reference_table as u2 ON users_table.value_2 = u2.value_2);
|
||||||
|
|
||||||
|
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT count(*)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, random() FROM users_reference_table) AS vals
|
||||||
|
WHERE vals.user_id NOT IN
|
||||||
|
(SELECT users_table.value_2
|
||||||
|
FROM users_table
|
||||||
|
JOIN users_reference_table AS u2 ON users_table.value_2 = u2.value_2);
|
||||||
|
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT user_id,
|
||||||
|
count(*)
|
||||||
|
FROM users_reference_table
|
||||||
|
WHERE value_2 > ALL
|
||||||
|
(SELECT min(value_2)
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 50 AND users_reference_table.user_id = events_table.user_id
|
||||||
|
GROUP BY user_id)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY 2 DESC,
|
||||||
|
1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
-- this time in the subquery
|
||||||
|
SELECT *
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN
|
||||||
|
(SELECT users_table.user_id
|
||||||
|
FROM users_table,
|
||||||
|
users_reference_table
|
||||||
|
WHERE users_reference_table.user_id NOT IN
|
||||||
|
(SELECT value_2
|
||||||
|
FROM users_reference_table AS u2));
|
||||||
|
|
||||||
|
-- not supported since GROUP BY references to an upper level query
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 >
|
||||||
|
(SELECT
|
||||||
|
max(value_2)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||||
|
GROUP BY
|
||||||
|
users_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY user_id
|
||||||
|
LIMIT 5;
|
||||||
|
|
Loading…
Reference in New Issue