mirror of https://github.com/citusdata/citus.git
Expand reference table support in subquery pushdown
With this commit, we relax the restrictions put on the reference tables with subquery pushdown. We did three notable improvements: 1) Relax equi-join restrictions Previously, we always expected that the non-reference tables are equi joined with reference tables on the partition key of the non-reference table. With this commit, we allow any column of non-reference tables joined using non-equi joins as well. 2) Relax OUTER JOIN restrictions Previously Citus errored out if any reference table exists at any point of the outer part of an outer join. For instance, See the below sketch where (h) denotes a hash distributed relation, (r) denotes a reference table, (L) denotes LEFT JOIN and (I) denotes INNER JOIN. (L) / \ (I) h / \ r h Before this commit Citus would error out since a reference table appears on the left most part of an left join. However, that was too restrictive so that we only error out if the reference table is directly below and in the outer part of an outer join. 3) Bug fixes We've done some minor bugfixes in the existing implementation.pull/1628/head
parent
18b9be3dfa
commit
a5b66912d4
|
@ -2880,6 +2880,17 @@ FindReferencedTableColumn(Expr *columnExpression, List *parentQueryList, Query *
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We currently don't support finding partition keys in the subqueries
|
||||||
|
* that references from outer subqueries. For example, in corrolated
|
||||||
|
* subqueries in WHERE clause, we don't support use of partition keys
|
||||||
|
* in the subquery that is referred from the outer query.
|
||||||
|
*/
|
||||||
|
if (candidateColumn->varlevelsup > 0)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
rangeTableEntryIndex = candidateColumn->varno - 1;
|
rangeTableEntryIndex = candidateColumn->varno - 1;
|
||||||
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
||||||
|
|
||||||
|
@ -3096,7 +3107,14 @@ PartitionColumnOpExpressionList(Query *query)
|
||||||
rangeTableEntryIndex = candidatePartitionColumn->varno - 1;
|
rangeTableEntryIndex = candidatePartitionColumn->varno - 1;
|
||||||
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
||||||
|
|
||||||
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
/*
|
||||||
|
* We currently don't support checking for equality when user refers
|
||||||
|
* to a column from the JOIN instead of the relation.
|
||||||
|
*/
|
||||||
|
if (rangeTableEntry->rtekind != RTE_RELATION)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
relationId = rangeTableEntry->relid;
|
relationId = rangeTableEntry->relid;
|
||||||
partitionColumn = DistPartitionKey(relationId);
|
partitionColumn = DistPartitionKey(relationId);
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include "distributed/worker_protocol.h"
|
#include "distributed/worker_protocol.h"
|
||||||
#include "nodes/makefuncs.h"
|
#include "nodes/makefuncs.h"
|
||||||
#include "nodes/nodeFuncs.h"
|
#include "nodes/nodeFuncs.h"
|
||||||
|
#include "nodes/relation.h"
|
||||||
#include "optimizer/clauses.h"
|
#include "optimizer/clauses.h"
|
||||||
#include "optimizer/prep.h"
|
#include "optimizer/prep.h"
|
||||||
#include "optimizer/tlist.h"
|
#include "optimizer/tlist.h"
|
||||||
|
@ -69,6 +70,8 @@ static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryPushdown(Query *
|
||||||
PlannerRestrictionContext
|
PlannerRestrictionContext
|
||||||
*
|
*
|
||||||
plannerRestrictionContext);
|
plannerRestrictionContext);
|
||||||
|
static DeferredErrorMessage * DeferErrorIfUnsupportedSublinkAndReferenceTable(
|
||||||
|
Query *queryTree);
|
||||||
static DeferredErrorMessage * DeferErrorIfUnsupportedFilters(Query *subquery);
|
static DeferredErrorMessage * DeferErrorIfUnsupportedFilters(Query *subquery);
|
||||||
static bool EqualOpExpressionLists(List *firstOpExpressionList,
|
static bool EqualOpExpressionLists(List *firstOpExpressionList,
|
||||||
List *secondOpExpressionList);
|
List *secondOpExpressionList);
|
||||||
|
@ -87,6 +90,7 @@ static MultiNode * MultiPlanTree(Query *queryTree);
|
||||||
static void ErrorIfQueryNotSupported(Query *queryTree);
|
static void ErrorIfQueryNotSupported(Query *queryTree);
|
||||||
static bool HasUnsupportedReferenceTableJoin(
|
static bool HasUnsupportedReferenceTableJoin(
|
||||||
PlannerRestrictionContext *plannerRestrictionContext);
|
PlannerRestrictionContext *plannerRestrictionContext);
|
||||||
|
static bool ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo);
|
||||||
static bool HasUnsupportedJoinWalker(Node *node, void *context);
|
static bool HasUnsupportedJoinWalker(Node *node, void *context);
|
||||||
static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
|
static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
|
||||||
static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryRepartition(Query *
|
static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryRepartition(Query *
|
||||||
|
@ -96,8 +100,8 @@ static bool HasOuterJoin(Query *queryTree);
|
||||||
static bool HasOuterJoinWalker(Node *node, void *maxJoinLevel);
|
static bool HasOuterJoinWalker(Node *node, void *maxJoinLevel);
|
||||||
static bool HasComplexJoinOrder(Query *queryTree);
|
static bool HasComplexJoinOrder(Query *queryTree);
|
||||||
static bool HasComplexRangeTableType(Query *queryTree);
|
static bool HasComplexRangeTableType(Query *queryTree);
|
||||||
static bool RelationInfoHasReferenceTable(PlannerInfo *plannerInfo,
|
static bool RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo,
|
||||||
RelOptInfo *relationInfo);
|
RelOptInfo *relationInfo);
|
||||||
static void ValidateClauseList(List *clauseList);
|
static void ValidateClauseList(List *clauseList);
|
||||||
static void ValidateSubqueryPushdownClauseList(List *clauseList);
|
static void ValidateSubqueryPushdownClauseList(List *clauseList);
|
||||||
static bool ExtractFromExpressionWalker(Node *node,
|
static bool ExtractFromExpressionWalker(Node *node,
|
||||||
|
@ -542,7 +546,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
||||||
"one another relation using distribution keys and "
|
"one another relation using distribution keys and "
|
||||||
"equality operator.", NULL);
|
"equality operator.", NULL);
|
||||||
}
|
}
|
||||||
else if (HasUnsupportedReferenceTableJoin(plannerRestrictionContext))
|
|
||||||
|
/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
|
||||||
|
error = DeferErrorIfUnsupportedSublinkAndReferenceTable(originalQuery);
|
||||||
|
if (error)
|
||||||
|
{
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we shouldn't allow reference tables in the outer part of outer joins */
|
||||||
|
if (HasUnsupportedReferenceTableJoin(plannerRestrictionContext))
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
"cannot pushdown the subquery",
|
"cannot pushdown the subquery",
|
||||||
|
@ -582,6 +595,43 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DeferErrorIfUnsupportedSublinkAndReferenceTable returns a deferred error if the
|
||||||
|
* given query is not suitable for subquery pushdown.
|
||||||
|
*
|
||||||
|
* While planning sublinks, we rely on Postgres in the sense that it converts some of
|
||||||
|
* sublinks into joins.
|
||||||
|
*
|
||||||
|
* In some cases, sublinks are pulled up and converted into outer joins. Those cases
|
||||||
|
* are already handled with HasUnsupportedReferenceTableJoin().
|
||||||
|
*
|
||||||
|
* If the sublinks are not pulled up, we should still error out in if any reference table
|
||||||
|
* appears in the FROM clause of a subquery.
|
||||||
|
*
|
||||||
|
* Otherwise, the result would include duplicate rows.
|
||||||
|
*/
|
||||||
|
static DeferredErrorMessage *
|
||||||
|
DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
|
||||||
|
{
|
||||||
|
if (!queryTree->hasSubLinks)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (HasReferenceTable((Node *) queryTree->rtable))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"cannot pushdown the subquery",
|
||||||
|
"Reference tables are not allowed in FROM "
|
||||||
|
"clause when the query has subqueries in "
|
||||||
|
"WHERE clause",
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DeferErrorIfUnsupportedFilters checks if all leaf queries in the given query have
|
* DeferErrorIfUnsupportedFilters checks if all leaf queries in the given query have
|
||||||
* same filter on the partition column. Note that if there are queries without
|
* same filter on the partition column. Note that if there are queries without
|
||||||
|
@ -853,6 +903,14 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
deferredError = DeferErrorIfUnsupportedSublinkAndReferenceTable(subqueryTree);
|
||||||
|
if (deferredError)
|
||||||
|
{
|
||||||
|
preconditionsSatisfied = false;
|
||||||
|
errorDetail = (char *) deferredError->detail;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* finally check and return deferred if not satisfied */
|
/* finally check and return deferred if not satisfied */
|
||||||
if (!preconditionsSatisfied)
|
if (!preconditionsSatisfied)
|
||||||
{
|
{
|
||||||
|
@ -1027,7 +1085,7 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TargetListOnPartitionColumn checks if at least one target list entry is on
|
* TargetListOnPartitionColumn checks if at least one target list entry is on
|
||||||
* partition column or the table is a reference table.
|
* partition column.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||||
|
@ -1047,15 +1105,11 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||||
|
|
||||||
FindReferencedTableColumn(targetExpression, NIL, query, &relationId, &column);
|
FindReferencedTableColumn(targetExpression, NIL, query, &relationId, &column);
|
||||||
|
|
||||||
/*
|
/* if the expression belongs to reference table directly returns false */
|
||||||
* If the expression belongs to reference table directly returns true.
|
|
||||||
* We can assume that target list entry always on partition column of
|
|
||||||
* reference tables.
|
|
||||||
*/
|
|
||||||
if (IsDistributedTable(relationId) && PartitionMethod(relationId) ==
|
if (IsDistributedTable(relationId) && PartitionMethod(relationId) ==
|
||||||
DISTRIBUTE_BY_NONE)
|
DISTRIBUTE_BY_NONE)
|
||||||
{
|
{
|
||||||
targetListOnPartitionColumn = true;
|
targetListOnPartitionColumn = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1459,15 +1513,18 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo
|
||||||
|
|
||||||
if (joinType == JOIN_SEMI || joinType == JOIN_ANTI || joinType == JOIN_LEFT)
|
if (joinType == JOIN_SEMI || joinType == JOIN_ANTI || joinType == JOIN_LEFT)
|
||||||
{
|
{
|
||||||
if (RelationInfoHasReferenceTable(plannerInfo, outerrel))
|
if (ShouldRecurseForReferenceTableJoinChecks(outerrel) &&
|
||||||
|
RelationInfoContainsReferenceTable(plannerInfo, outerrel))
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (joinType == JOIN_FULL)
|
else if (joinType == JOIN_FULL)
|
||||||
{
|
{
|
||||||
if (RelationInfoHasReferenceTable(plannerInfo, innerrel) ||
|
if ((ShouldRecurseForReferenceTableJoinChecks(innerrel) &&
|
||||||
RelationInfoHasReferenceTable(plannerInfo, outerrel))
|
RelationInfoContainsReferenceTable(plannerInfo, innerrel)) ||
|
||||||
|
(ShouldRecurseForReferenceTableJoinChecks(outerrel) &&
|
||||||
|
RelationInfoContainsReferenceTable(plannerInfo, outerrel)))
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1479,12 +1536,66 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RelationInfoHasReferenceTable check whether the relationInfo has reference table.
|
* ShouldRecurseForReferenceTableJoinChecks is a helper function for deciding
|
||||||
* Since relation ids of relationInfo indexes to the range table entry list of
|
* on whether the input relOptInfo should be checked for unsupported reference
|
||||||
* planner info, planner info is also passed.
|
* tables.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
RelationInfoHasReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relationInfo)
|
ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We shouldn't recursively go down for joins since we're already
|
||||||
|
* going to process each join seperately. Otherwise we'd restrict
|
||||||
|
* the coverage. See the below sketch where (h) denotes a hash
|
||||||
|
* distributed relation, (r) denotes a reference table, (L) denotes
|
||||||
|
* LEFT JOIN and (I) denotes INNER JOIN. If we're to recurse into
|
||||||
|
* the inner join, we'd be preventing to push down the following
|
||||||
|
* join tree, which is actually safe to push down.
|
||||||
|
*
|
||||||
|
* (L)
|
||||||
|
* / \
|
||||||
|
* (I) h
|
||||||
|
* / \
|
||||||
|
* r h
|
||||||
|
*/
|
||||||
|
if (relOptInfo->reloptkind == RELOPT_JOINREL)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that we treat the same query where relations appear in subqueries
|
||||||
|
* differently. (i.e., use SELECT * FROM r; instead of r)
|
||||||
|
*
|
||||||
|
* In that case, to relax some restrictions, we do the following optimization:
|
||||||
|
* If the subplan (i.e., plannerInfo corresponding to the subquery) contains any
|
||||||
|
* joins, we skip reference table checks keeping in mind that the join is already
|
||||||
|
* going to be processed seperately. This optimization should suffice for many
|
||||||
|
* use cases.
|
||||||
|
*/
|
||||||
|
if (relOptInfo->reloptkind == RELOPT_BASEREL && relOptInfo->subroot != NULL)
|
||||||
|
{
|
||||||
|
PlannerInfo *subroot = relOptInfo->subroot;
|
||||||
|
|
||||||
|
if (list_length(subroot->join_rel_list) > 0)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RelationInfoContainsReferenceTable checks whether the relationInfo
|
||||||
|
* contains any reference tables. If found, the function returns true.
|
||||||
|
*
|
||||||
|
* Note that since relation ids of relationInfo indexes to the range
|
||||||
|
* table entry list of planner info, planner info is also passed.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relationInfo)
|
||||||
{
|
{
|
||||||
Relids relids = bms_copy(relationInfo->relids);
|
Relids relids = bms_copy(relationInfo->relids);
|
||||||
int relationId = -1;
|
int relationId = -1;
|
||||||
|
@ -2927,9 +3038,17 @@ ExtractRangeTableRelationWalkerWithRTEExpand(Node *node, List **rangeTableRelati
|
||||||
rangeTableRelationList, 0);
|
rangeTableRelationList, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (IsA(node, Query))
|
||||||
|
{
|
||||||
|
walkIsComplete = query_tree_walker((Query *) node,
|
||||||
|
ExtractRangeTableRelationWalkerWithRTEExpand,
|
||||||
|
rangeTableRelationList, QTW_EXAMINE_RTES);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
walkIsComplete = ExtractRangeTableRelationWalker(node, rangeTableRelationList);
|
walkIsComplete = expression_tree_walker(node,
|
||||||
|
ExtractRangeTableRelationWalkerWithRTEExpand,
|
||||||
|
rangeTableRelationList);
|
||||||
}
|
}
|
||||||
|
|
||||||
return walkIsComplete;
|
return walkIsComplete;
|
||||||
|
|
|
@ -62,6 +62,7 @@ typedef struct AttributeEquivalenceClassMember
|
||||||
} AttributeEquivalenceClassMember;
|
} AttributeEquivalenceClassMember;
|
||||||
|
|
||||||
|
|
||||||
|
static uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext);
|
||||||
static Var * FindTranslatedVar(List *appendRelList, Oid relationOid,
|
static Var * FindTranslatedVar(List *appendRelList, Oid relationOid,
|
||||||
Index relationRteIndex, Index *partitionKeyIndex);
|
Index relationRteIndex, Index *partitionKeyIndex);
|
||||||
static bool EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
|
static bool EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
|
||||||
|
@ -329,9 +330,8 @@ FindTranslatedVar(List *appendRelList, Oid relationOid, Index relationRteIndex,
|
||||||
* joined on their partition keys.
|
* joined on their partition keys.
|
||||||
*
|
*
|
||||||
* The function returns true if all relations are joined on their partition keys.
|
* The function returns true if all relations are joined on their partition keys.
|
||||||
* Otherwise, the function returns false. In order to support reference tables
|
* Otherwise, the function returns false. We ignore reference tables at all since
|
||||||
* with subqueries, equality between attributes of reference tables and partition
|
* they don't have partition keys.
|
||||||
* key of distributed tables are also considered.
|
|
||||||
*
|
*
|
||||||
* In order to do that, we invented a new equivalence class namely:
|
* In order to do that, we invented a new equivalence class namely:
|
||||||
* AttributeEquivalenceClass. In very simple words, a AttributeEquivalenceClass is
|
* AttributeEquivalenceClass. In very simple words, a AttributeEquivalenceClass is
|
||||||
|
@ -365,14 +365,24 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
|
||||||
List *joinRestrictionAttributeEquivalenceList = NIL;
|
List *joinRestrictionAttributeEquivalenceList = NIL;
|
||||||
List *allAttributeEquivalenceList = NIL;
|
List *allAttributeEquivalenceList = NIL;
|
||||||
|
|
||||||
|
uint32 referenceRelationCount = ReferenceRelationCount(restrictionContext);
|
||||||
uint32 totalRelationCount = list_length(restrictionContext->relationRestrictionList);
|
uint32 totalRelationCount = list_length(restrictionContext->relationRestrictionList);
|
||||||
|
uint32 nonReferenceRelationCount = totalRelationCount - referenceRelationCount;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the query includes only one relation, we should not check the partition
|
* If the query includes a single relation which is not a reference table,
|
||||||
* column equality. Single table should not need to fetch data from other nodes
|
* we should not check the partition column equality.
|
||||||
* except it's own node(s).
|
* Consider two example cases:
|
||||||
|
* (i) The query includes only a single colocated relation
|
||||||
|
* (ii) A colocated relation is joined with a (or multiple) reference
|
||||||
|
* table(s) where colocated relation is not joined on the partition key
|
||||||
|
*
|
||||||
|
* For the above two cases, we don't need to execute the partition column equality
|
||||||
|
* algorithm. The reason is that the essence of this function is to ensure that the
|
||||||
|
* tasks that are going to be created should not need data from other tasks. In both
|
||||||
|
* cases mentioned above, the necessary data per task would be on available.
|
||||||
*/
|
*/
|
||||||
if (totalRelationCount == 1)
|
if (nonReferenceRelationCount <= 1)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -394,6 +404,31 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReferenceRelationCount iterates over the relations and returns the reference table
|
||||||
|
* relation count.
|
||||||
|
*/
|
||||||
|
static uint32
|
||||||
|
ReferenceRelationCount(RelationRestrictionContext *restrictionContext)
|
||||||
|
{
|
||||||
|
ListCell *relationRestrictionCell = NULL;
|
||||||
|
uint32 referenceRelationCount = 0;
|
||||||
|
|
||||||
|
foreach(relationRestrictionCell, restrictionContext->relationRestrictionList)
|
||||||
|
{
|
||||||
|
RelationRestriction *relationRestriction =
|
||||||
|
(RelationRestriction *) lfirst(relationRestrictionCell);
|
||||||
|
|
||||||
|
if (PartitionMethod(relationRestriction->relationId) == DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
referenceRelationCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return referenceRelationCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* EquivalenceListContainsRelationsEquality gets a list of attributed equivalence
|
* EquivalenceListContainsRelationsEquality gets a list of attributed equivalence
|
||||||
* list and a relation restriction context. The function first generates a common
|
* list and a relation restriction context. The function first generates a common
|
||||||
|
@ -434,6 +469,12 @@ EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
|
||||||
(RelationRestriction *) lfirst(relationRestrictionCell);
|
(RelationRestriction *) lfirst(relationRestrictionCell);
|
||||||
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
|
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
|
||||||
|
|
||||||
|
/* we shouldn't check for the equality of reference tables */
|
||||||
|
if (PartitionMethod(relationRestriction->relationId) == DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (!bms_is_member(rteIdentity, commonRteIdentities))
|
if (!bms_is_member(rteIdentity, commonRteIdentities))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
@ -621,7 +662,7 @@ GetVarFromAssignedParam(List *parentPlannerParamList, Param *plannerParam)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GenerateCommonEquivalence gets a list of unrelated AttributeEquiavalanceClass
|
* GenerateCommonEquivalence gets a list of unrelated AttributeEquiavalanceClass
|
||||||
* whose all members are partition keys or a column of reference table.
|
* whose all members are partition keys.
|
||||||
*
|
*
|
||||||
* With the equivalence classes, the function follows the algorithm
|
* With the equivalence classes, the function follows the algorithm
|
||||||
* outlined below:
|
* outlined below:
|
||||||
|
@ -1084,8 +1125,14 @@ AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||||
|
|
||||||
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
||||||
|
|
||||||
if (PartitionMethod(relationId) != DISTRIBUTE_BY_NONE &&
|
/* we don't need reference tables in the equality on columns */
|
||||||
relationPartitionKey->varattno != varToBeAdded->varattno)
|
if (relationPartitionKey == NULL)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we're only interested in distribution columns */
|
||||||
|
if (relationPartitionKey->varattno != varToBeAdded->varattno)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1122,7 +1122,7 @@ WHERE
|
||||||
colocated_table_test.value_1 = reference_table_test.value_1;
|
colocated_table_test.value_1 = reference_table_test.value_1;
|
||||||
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
|
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
|
||||||
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||||
-- not pushable due to lack of equality between partition column and column of reference table
|
-- safe to push down even lack of equality between partition column and column of reference table
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
colocated_table_test (value_1, value_2)
|
colocated_table_test (value_1, value_2)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -1132,9 +1132,13 @@ FROM
|
||||||
WHERE
|
WHERE
|
||||||
colocated_table_test_2.value_4 = reference_table_test.value_4
|
colocated_table_test_2.value_4 = reference_table_test.value_4
|
||||||
RETURNING value_1, value_2;
|
RETURNING value_1, value_2;
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
value_1 | value_2
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
---------+---------
|
||||||
-- some more complex queries (Note that there are more complex queries in multi_insert_select.sql)
|
1 | 1
|
||||||
|
2 | 2
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
-- similar query with the above, this time partition key but without equality
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
colocated_table_test (value_1, value_2)
|
colocated_table_test (value_1, value_2)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -1142,10 +1146,13 @@ SELECT
|
||||||
FROM
|
FROM
|
||||||
colocated_table_test_2, reference_table_test
|
colocated_table_test_2, reference_table_test
|
||||||
WHERE
|
WHERE
|
||||||
colocated_table_test_2.value_2 = reference_table_test.value_2
|
colocated_table_test_2.value_1 > reference_table_test.value_2
|
||||||
RETURNING value_1, value_2;
|
RETURNING value_1, value_2;
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
value_1 | value_2
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
---------+---------
|
||||||
|
2 | 1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- partition column value comes from reference table, goes via coordinator
|
-- partition column value comes from reference table, goes via coordinator
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
colocated_table_test (value_1, value_2)
|
colocated_table_test (value_1, value_2)
|
||||||
|
@ -1606,7 +1613,7 @@ INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02');
|
||||||
SELECT master_modify_multiple_shards('DELETE FROM colocated_table_test');
|
SELECT master_modify_multiple_shards('DELETE FROM colocated_table_test');
|
||||||
master_modify_multiple_shards
|
master_modify_multiple_shards
|
||||||
-------------------------------
|
-------------------------------
|
||||||
6
|
9
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
ROLLBACK;
|
ROLLBACK;
|
||||||
|
|
|
@ -60,18 +60,38 @@ SELECT count(*) FROM
|
||||||
1
|
1
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
-- Should not work, no equality between partition column and reference table
|
-- Should work, although no equality between partition column and reference table
|
||||||
SELECT * FROM
|
SELECT subquery_1.item_id FROM
|
||||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
(SELECT user_buy_test_table.item_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1
|
||||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
ORDER BY 1;
|
||||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
item_id
|
||||||
-- Should not work, no equality between partition column and reference table
|
---------
|
||||||
SELECT * FROM
|
2
|
||||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
3
|
||||||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1;
|
4
|
||||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
5
|
||||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
(4 rows)
|
||||||
|
|
||||||
|
-- Should work, although no equality between partition column and reference table
|
||||||
|
SELECT subquery_1.user_id FROM
|
||||||
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1
|
||||||
|
ORDER BY 1;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
3
|
||||||
|
7
|
||||||
|
7
|
||||||
|
7
|
||||||
|
7
|
||||||
|
7
|
||||||
|
7
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
-- Shouldn't work, reference table at the outer side is not allowed
|
-- Shouldn't work, reference table at the outer side is not allowed
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||||
|
@ -93,6 +113,33 @@ SELECT * FROM
|
||||||
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
|
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- Equi join test with reference table on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
4
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Non-equi join test with reference table on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
10
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Non-equi left joins with reference tables on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
10
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- Should pass since reference table locates in the inner part of each left join
|
-- Should pass since reference table locates in the inner part of each left join
|
||||||
SELECT count(*) FROM
|
SELECT count(*) FROM
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
|
@ -105,13 +152,238 @@ SELECT count(*) FROM
|
||||||
2
|
2
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
-- Should not pass since reference table locates in the outer part of right join
|
-- two subqueries, each include joins with reference table
|
||||||
|
-- also, two hash distributed tables are joined on partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id AND users_ref_test_table.k_no > 88 AND user_buy_test_table.item_id < 88) subquery_1,
|
||||||
|
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
|
||||||
|
WHERE subquery_1.user_id = subquery_2.user_id ;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
4
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Should be able to push down since reference tables are inner joined
|
||||||
|
-- with hash distributed tables, the results of those joins are the parts of
|
||||||
|
-- an outer join
|
||||||
|
SELECT subquery_2.id FROM
|
||||||
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
|
ON tt1.user_id = tt2.user_id) subquery_1
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||||
|
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5;
|
||||||
|
id
|
||||||
|
----
|
||||||
|
3
|
||||||
|
2
|
||||||
|
1
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
-- the same query as the above, but this Citus fails to pushdown the query
|
||||||
|
-- since the outer part of the right join doesn't include any joins
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
ON tt1.user_id = tt2.user_id) subquery_1
|
ON tt1.user_id = tt2.user_id) subquery_1
|
||||||
RIGHT JOIN
|
RIGHT JOIN
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
(SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
user_id | sum
|
||||||
|
---------+----------
|
||||||
|
12 | 92221920
|
||||||
|
17 | 89192642
|
||||||
|
96 | 85143744
|
||||||
|
45 | 84267456
|
||||||
|
90 | 84157047
|
||||||
|
43 | 82110240
|
||||||
|
1 | 81735612
|
||||||
|
72 | 78992640
|
||||||
|
67 | 72385516
|
||||||
|
97 | 71002659
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
-- same query as above, reference table is wrapped into a subquery
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
user_id | sum
|
||||||
|
---------+----------
|
||||||
|
12 | 92221920
|
||||||
|
17 | 89192642
|
||||||
|
96 | 85143744
|
||||||
|
45 | 84267456
|
||||||
|
90 | 84157047
|
||||||
|
43 | 82110240
|
||||||
|
1 | 81735612
|
||||||
|
72 | 78992640
|
||||||
|
67 | 72385516
|
||||||
|
97 | 71002659
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
user_id | sum
|
||||||
|
---------+----------
|
||||||
|
12 | 92221920
|
||||||
|
17 | 89192642
|
||||||
|
96 | 85143744
|
||||||
|
45 | 84267456
|
||||||
|
90 | 84157047
|
||||||
|
43 | 82110240
|
||||||
|
1 | 81735612
|
||||||
|
72 | 78992640
|
||||||
|
67 | 72385516
|
||||||
|
97 | 71002659
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- direct outer part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- direct outer part of the left join wrapped into a subquery
|
||||||
|
SELECT
|
||||||
|
*
|
||||||
|
FROM
|
||||||
|
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
|
||||||
|
ON (users_table.user_id = ref_all.value_2);
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- outer part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT foo.user_id
|
||||||
|
FROM
|
||||||
|
((SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
event_type > 80) as "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
89
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- the same query but this time reference table is in the outer part of the query
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT foo.user_id
|
||||||
|
FROM
|
||||||
|
((SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
event_type > 80) as "temp_data_queries"
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- we could even suuport the following where the subquery
|
||||||
|
-- on the outer part of the left join contains a reference table
|
||||||
|
SELECT max(events_all.cnt), events_all.usr_id
|
||||||
|
FROM
|
||||||
|
(SELECT users_table.user_id as usr_id,
|
||||||
|
count(*) as cnt
|
||||||
|
FROM events_reference_table
|
||||||
|
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) GROUP BY users_table.user_id) AS events_all
|
||||||
|
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5;
|
||||||
|
max | usr_id
|
||||||
|
-------+--------
|
||||||
|
14605 | 23
|
||||||
|
13090 | 17
|
||||||
|
12915 | 25
|
||||||
|
12317 | 90
|
||||||
|
12285 | 87
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- but, we fail to pushdown the following query where join that reference table appears
|
||||||
|
-- wrapped into a subquery
|
||||||
|
SELECT max(events_all.cnt),
|
||||||
|
events_all.usr_id
|
||||||
|
FROM(
|
||||||
|
SELECT *, random() FROM
|
||||||
|
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||||
|
FROM events_reference_table
|
||||||
|
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||||
|
GROUP BY users_table.user_id) AS events_all_inner) AS events_all
|
||||||
|
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id)
|
||||||
|
GROUP BY 2
|
||||||
|
ORDER BY 1 DESC,
|
||||||
|
2 DESC
|
||||||
|
LIMIT 5;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
-- LATERAL JOINs used with INNER JOINs with reference tables
|
-- LATERAL JOINs used with INNER JOINs with reference tables
|
||||||
|
@ -193,10 +465,10 @@ SELECT
|
||||||
count(*) AS value, "generated_group_field"
|
count(*) AS value, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
|
@ -204,7 +476,7 @@ SELECT
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id", "events"."value_2"
|
"events"."time", "events"."user_id", "events"."value_2"
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
|
@ -355,8 +627,8 @@ LIMIT 10;
|
||||||
(6 rows)
|
(6 rows)
|
||||||
|
|
||||||
SET citus.subquery_pushdown to OFF;
|
SET citus.subquery_pushdown to OFF;
|
||||||
-- LEFT JOINs used with INNER JOINs should error out since reference table exist in the
|
-- LEFT JOINs used with INNER JOINs should not error out since reference table joined
|
||||||
-- left side of the LEFT JOIN.
|
-- with hash table that Citus can push down
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
|
@ -393,8 +665,20 @@ count(*) AS cnt, "generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
ERROR: cannot pushdown the subquery
|
cnt | generated_group_field
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
-----+-----------------------
|
||||||
|
176 | 551
|
||||||
|
176 | 569
|
||||||
|
176 | 645
|
||||||
|
176 | 713
|
||||||
|
176 | 734
|
||||||
|
88 | 3
|
||||||
|
88 | 5
|
||||||
|
88 | 15
|
||||||
|
88 | 32
|
||||||
|
88 | 68
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
|
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||||
-- right side of the RIGHT JOIN.
|
-- right side of the RIGHT JOIN.
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -435,6 +719,133 @@ count(*) AS cnt, "generated_group_field"
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- right join where the inner part of the join includes a reference table
|
||||||
|
-- joined with hash partitioned table using non-equi join
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_reference_table AS e
|
||||||
|
WHERE u.user_id > e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
) t1 RIGHT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event
|
||||||
|
ORDER BY user_id;
|
||||||
|
user_id | sum | length | hasdone_event
|
||||||
|
---------+-----+--------+----------------
|
||||||
|
11 | 306 | 14 | Has done event
|
||||||
|
12 | 363 | 14 | Has done event
|
||||||
|
14 | 510 | 14 | Has done event
|
||||||
|
18 | 600 | 14 | Has done event
|
||||||
|
19 | 618 | 14 | Has done event
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- a similar query as the above, with non-partition key comparison
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_reference_table AS e
|
||||||
|
WHERE u.value_1 > e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type >= 125 AND e.event_type < 130
|
||||||
|
)
|
||||||
|
) t1 RIGHT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type >= 130 AND e.event_type < 135
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event
|
||||||
|
ORDER BY user_id;
|
||||||
|
user_id | sum | length | hasdone_event
|
||||||
|
---------+------+--------+----------------
|
||||||
|
10 | 6018 | 14 | Has done event
|
||||||
|
16 | 5373 | 14 | Has done event
|
||||||
|
17 | 5683 | 14 | Has done event
|
||||||
|
18 | 5321 | 14 | Has done event
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
-- LEFT JOINs used with INNER JOINs
|
||||||
|
-- events_table and users_reference_table joined
|
||||||
|
-- with event_table.non_part_key < reference_table.any_key
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_table as "users") "left_group_by_1"
|
||||||
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
cnt | generated_group_field
|
||||||
|
-----+-----------------------
|
||||||
|
540 | 814
|
||||||
|
533 | 746
|
||||||
|
473 | 914
|
||||||
|
449 | 684
|
||||||
|
445 | 715
|
||||||
|
423 | 191
|
||||||
|
419 | 39
|
||||||
|
415 | 108
|
||||||
|
414 | 819
|
||||||
|
411 | 642
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
-- Outer subquery with reference table
|
-- Outer subquery with reference table
|
||||||
SELECT "some_users_data".user_id, lastseen
|
SELECT "some_users_data".user_id, lastseen
|
||||||
FROM
|
FROM
|
||||||
|
@ -468,9 +879,9 @@ limit 50;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
--
|
--
|
||||||
-- UNIONs and JOINs with reference tables, shoukld error out
|
-- UNIONs and JOINs with reference tables, should error out
|
||||||
--
|
--
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types
|
||||||
FROM
|
FROM
|
||||||
( SELECT *, random()
|
( SELECT *, random()
|
||||||
FROM
|
FROM
|
||||||
|
@ -484,7 +895,7 @@ FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 0 AS event
|
"events"."user_id", "events"."time", 0 AS event
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||||
UNION
|
UNION
|
||||||
|
@ -494,7 +905,7 @@ FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 1 AS event
|
"events"."user_id", "events"."time", 1 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
|
@ -526,11 +937,9 @@ INNER JOIN
|
||||||
WHERE
|
WHERE
|
||||||
value_1 > 50 and value_1 < 70) AS t
|
value_1 > 50 and value_1 < 70) AS t
|
||||||
ON (t.user_id = q.user_id)) as final_query
|
ON (t.user_id = q.user_id)) as final_query
|
||||||
GROUP BY
|
|
||||||
types
|
|
||||||
ORDER BY
|
ORDER BY
|
||||||
types;
|
types;
|
||||||
ERROR: cannot push down this subquery
|
ERROR: cannot push down this subquery
|
||||||
DETAIL: Reference tables are not supported with union operator
|
DETAIL: Reference tables are not supported with union operator
|
||||||
-- reference table exist in the subquery of union, should error out
|
-- reference table exist in the subquery of union, should error out
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||||
|
@ -560,15 +969,15 @@ FROM
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
max("events"."time"),
|
max("users"."time"),
|
||||||
0 AS event,
|
0 AS event,
|
||||||
"events"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events", users_table as "users"
|
events_reference_table as "events", users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
events.user_id = users.user_id AND
|
events.user_id = users.user_id AND
|
||||||
event_type IN (10, 11, 12, 13, 14, 15)
|
event_type IN (10, 11, 12, 13, 14, 15)
|
||||||
GROUP BY "events"."user_id"
|
GROUP BY "users"."user_id"
|
||||||
) as events_subquery_5
|
) as events_subquery_5
|
||||||
) events_subquery_2)
|
) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
|
@ -660,6 +1069,117 @@ GROUP BY types
|
||||||
ORDER BY types;
|
ORDER BY types;
|
||||||
ERROR: cannot push down this subquery
|
ERROR: cannot push down this subquery
|
||||||
DETAIL: Reference tables are not supported with union operator
|
DETAIL: Reference tables are not supported with union operator
|
||||||
|
-- just a sanity check that we don't allow this if the reference table is on the
|
||||||
|
-- left part of the left join
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- we don't allow non equi join among hash partitioned tables
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1,
|
||||||
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2
|
||||||
|
WHERE subquery_1.user_id != subquery_2.user_id ;
|
||||||
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
|
-- we cannot push this query since hash partitioned tables
|
||||||
|
-- are not joined on partition keys with equality
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_table as "users") "left_group_by_1"
|
||||||
|
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
|
-- two hash partitioned relations are not joined
|
||||||
|
-- on partiton keys although reference table is fine
|
||||||
|
-- to push down
|
||||||
|
SELECT
|
||||||
|
u1.user_id, count(*)
|
||||||
|
FROM
|
||||||
|
events_table as e1, users_table as u1
|
||||||
|
WHERE
|
||||||
|
event_type IN
|
||||||
|
(SELECT
|
||||||
|
event_type
|
||||||
|
FROM
|
||||||
|
events_reference_table as e2
|
||||||
|
WHERE
|
||||||
|
value_2 = 15 AND
|
||||||
|
value_3 > 25 AND
|
||||||
|
e1.value_2 > e2.value_2
|
||||||
|
)
|
||||||
|
AND u1.user_id > e1.user_id
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
WHERE event_type > 100000
|
||||||
|
) as foo;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
-- not supported since group by is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
GROUP BY r.user_id
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
|
-- not supported since distinct is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||||
|
-- not supported since distinct on is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||||
DROP TABLE user_buy_test_table;
|
DROP TABLE user_buy_test_table;
|
||||||
DROP TABLE users_ref_test_table;
|
DROP TABLE users_ref_test_table;
|
||||||
DROP TABLE users_return_test_table;
|
DROP TABLE users_return_test_table;
|
||||||
|
|
|
@ -1,33 +1,5 @@
|
||||||
--
|
--
|
||||||
-- queries to test the subquery pushdown on reference tables
|
-- queries to test the subquery pushdown on reference tables
|
||||||
-- subqueries in WHERE with greater operator
|
|
||||||
SELECT
|
|
||||||
user_id
|
|
||||||
FROM
|
|
||||||
users_table
|
|
||||||
WHERE
|
|
||||||
value_2 >
|
|
||||||
(SELECT
|
|
||||||
max(value_2)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
)
|
|
||||||
GROUP BY user_id
|
|
||||||
HAVING count(*) > 66
|
|
||||||
ORDER BY user_id
|
|
||||||
LIMIT 5;
|
|
||||||
user_id
|
|
||||||
---------
|
|
||||||
49
|
|
||||||
55
|
|
||||||
56
|
|
||||||
63
|
|
||||||
(4 rows)
|
|
||||||
|
|
||||||
-- subqueries in WHERE with IN operator
|
-- subqueries in WHERE with IN operator
|
||||||
SELECT
|
SELECT
|
||||||
user_id
|
user_id
|
||||||
|
@ -91,10 +63,10 @@ WHERE
|
||||||
)
|
)
|
||||||
LIMIT 3;
|
LIMIT 3;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||||
-- subqueries in WHERE with IN operator without equality
|
-- subqueries in WHERE with IN operator without equality
|
||||||
SELECT
|
SELECT
|
||||||
user_id
|
users_table.user_id, count(*)
|
||||||
FROM
|
FROM
|
||||||
users_table
|
users_table
|
||||||
WHERE
|
WHERE
|
||||||
|
@ -106,59 +78,15 @@ WHERE
|
||||||
WHERE
|
WHERE
|
||||||
users_table.user_id > events_reference_table.user_id
|
users_table.user_id > events_reference_table.user_id
|
||||||
)
|
)
|
||||||
GROUP BY user_id
|
GROUP BY users_table.user_id
|
||||||
ORDER BY user_id
|
ORDER BY 2 DESC, 1 DESC
|
||||||
LIMIT 3;
|
LIMIT 3;
|
||||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
user_id | count
|
||||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
---------+-------
|
||||||
-- have reference table without any equality, should error out
|
87 | 117
|
||||||
SELECT
|
59 | 115
|
||||||
user_id
|
46 | 115
|
||||||
FROM
|
(3 rows)
|
||||||
users_table
|
|
||||||
WHERE
|
|
||||||
value_2 >
|
|
||||||
(SELECT
|
|
||||||
max(value_2)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
event_type = 50
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
)
|
|
||||||
GROUP BY user_id
|
|
||||||
HAVING count(*) > 66
|
|
||||||
ORDER BY user_id
|
|
||||||
LIMIT 5;
|
|
||||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
|
||||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
|
||||||
-- users that appeared more than 118 times, should run since the reference table
|
|
||||||
-- on the right side of the semi join
|
|
||||||
SELECT
|
|
||||||
user_id
|
|
||||||
FROM
|
|
||||||
users_table
|
|
||||||
WHERE 118 <=
|
|
||||||
(SELECT
|
|
||||||
count(*)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
users_table.user_id = events_reference_table.user_id
|
|
||||||
GROUP BY
|
|
||||||
user_id)
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
ORDER BY
|
|
||||||
user_id;
|
|
||||||
user_id
|
|
||||||
---------
|
|
||||||
13
|
|
||||||
17
|
|
||||||
23
|
|
||||||
25
|
|
||||||
(4 rows)
|
|
||||||
|
|
||||||
-- should error out since reference table exist on the left side
|
-- should error out since reference table exist on the left side
|
||||||
-- of the left lateral join
|
-- of the left lateral join
|
||||||
|
@ -224,3 +152,194 @@ SELECT user_id, value_2 FROM users_table WHERE
|
||||||
ORDER BY 1, 2;
|
ORDER BY 1, 2;
|
||||||
ERROR: cannot pushdown the subquery
|
ERROR: cannot pushdown the subquery
|
||||||
DETAIL: There exist a reference table in the outer part of the outer join
|
DETAIL: There exist a reference table in the outer part of the outer join
|
||||||
|
-- non-partition key equality with reference table
|
||||||
|
SELECT
|
||||||
|
user_id, count(*)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_3 =ANY(SELECT value_2 FROM users_reference_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 5;
|
||||||
|
user_id | count
|
||||||
|
---------+-------
|
||||||
|
48 | 18
|
||||||
|
26 | 18
|
||||||
|
15 | 17
|
||||||
|
54 | 16
|
||||||
|
35 | 15
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- non-partition key comparison with reference table
|
||||||
|
SELECT
|
||||||
|
user_id, count(*)
|
||||||
|
FROM
|
||||||
|
events_table as e1
|
||||||
|
WHERE
|
||||||
|
event_type IN
|
||||||
|
(SELECT
|
||||||
|
event_type
|
||||||
|
FROM
|
||||||
|
events_reference_table as e2
|
||||||
|
WHERE
|
||||||
|
value_2 = 15 AND
|
||||||
|
value_3 > 25 AND
|
||||||
|
e1.value_2 > e2.value_2
|
||||||
|
)
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
user_id | count
|
||||||
|
---------+-------
|
||||||
|
3 | 5
|
||||||
|
56 | 4
|
||||||
|
99 | 2
|
||||||
|
94 | 2
|
||||||
|
92 | 2
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- subqueries in both WHERE and FROM clauses
|
||||||
|
-- should work since reference table is on the
|
||||||
|
-- inner part of the join
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND user_id IN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
e1.user_id
|
||||||
|
FROM (
|
||||||
|
-- Get the first time each user viewed the homepage.
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS view_homepage,
|
||||||
|
min(time) AS view_homepage_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||||
|
GROUP BY user_id
|
||||||
|
) e1 LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS use_demo,
|
||||||
|
time AS use_demo_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e1.user_id AND
|
||||||
|
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||||
|
ORDER BY time
|
||||||
|
) e2 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS enter_credit_card,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e2.user_id AND
|
||||||
|
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||||
|
ORDER BY time
|
||||||
|
) e3 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS submit_card_info,
|
||||||
|
user_id,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e3.user_id AND
|
||||||
|
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||||
|
ORDER BY time
|
||||||
|
) e4 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS see_bought_screen
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e4.user_id AND
|
||||||
|
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||||
|
ORDER BY time
|
||||||
|
) e5 ON true
|
||||||
|
group by e1.user_id
|
||||||
|
HAVING sum(submit_card_info) > 0
|
||||||
|
)
|
||||||
|
ORDER BY 1, 2;
|
||||||
|
user_id | value_2
|
||||||
|
---------+---------
|
||||||
|
5 | 884
|
||||||
|
42 | 55
|
||||||
|
42 | 471
|
||||||
|
51 | 758
|
||||||
|
72 | 897
|
||||||
|
82 | 691
|
||||||
|
95 | 951
|
||||||
|
(7 rows)
|
||||||
|
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT
|
||||||
|
count(*)
|
||||||
|
FROM
|
||||||
|
users_reference_table
|
||||||
|
WHERE user_id
|
||||||
|
NOT IN
|
||||||
|
(SELECT users_table.value_2 FROM users_table JOIN users_reference_table as u2 ON users_table.value_2 = u2.value_2);
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT count(*)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, random() FROM users_reference_table) AS vals
|
||||||
|
WHERE vals.user_id NOT IN
|
||||||
|
(SELECT users_table.value_2
|
||||||
|
FROM users_table
|
||||||
|
JOIN users_reference_table AS u2 ON users_table.value_2 = u2.value_2);
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT user_id,
|
||||||
|
count(*)
|
||||||
|
FROM users_reference_table
|
||||||
|
WHERE value_2 > ALL
|
||||||
|
(SELECT min(value_2)
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 50 AND users_reference_table.user_id = events_table.user_id
|
||||||
|
GROUP BY user_id)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY 2 DESC,
|
||||||
|
1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
ERROR: cannot pushdown the subquery
|
||||||
|
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
-- this time in the subquery
|
||||||
|
SELECT *
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN
|
||||||
|
(SELECT users_table.user_id
|
||||||
|
FROM users_table,
|
||||||
|
users_reference_table
|
||||||
|
WHERE users_reference_table.user_id NOT IN
|
||||||
|
(SELECT value_2
|
||||||
|
FROM users_reference_table AS u2));
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||||
|
-- not supported since GROUP BY references to an upper level query
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 >
|
||||||
|
(SELECT
|
||||||
|
max(value_2)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||||
|
GROUP BY
|
||||||
|
users_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY user_id
|
||||||
|
LIMIT 5;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
|
|
|
@ -356,6 +356,7 @@ SET citus.shard_max_size TO "1MB";
|
||||||
|
|
||||||
CREATE TABLE events_reference_table (like events_table including all);
|
CREATE TABLE events_reference_table (like events_table including all);
|
||||||
SELECT create_reference_table('events_reference_table');
|
SELECT create_reference_table('events_reference_table');
|
||||||
|
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
||||||
INSERT INTO events_reference_table SELECT * FROM events_table;
|
INSERT INTO events_reference_table SELECT * FROM events_table;
|
||||||
|
|
||||||
CREATE TABLE users_reference_table (like users_table including all);
|
CREATE TABLE users_reference_table (like users_table including all);
|
||||||
|
|
|
@ -428,6 +428,7 @@ SELECT create_reference_table('events_reference_table');
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
||||||
INSERT INTO events_reference_table SELECT * FROM events_table;
|
INSERT INTO events_reference_table SELECT * FROM events_table;
|
||||||
CREATE TABLE users_reference_table (like users_table including all);
|
CREATE TABLE users_reference_table (like users_table including all);
|
||||||
SELECT create_reference_table('users_reference_table');
|
SELECT create_reference_table('users_reference_table');
|
||||||
|
|
|
@ -707,7 +707,7 @@ FROM
|
||||||
WHERE
|
WHERE
|
||||||
colocated_table_test.value_1 = reference_table_test.value_1;
|
colocated_table_test.value_1 = reference_table_test.value_1;
|
||||||
|
|
||||||
-- not pushable due to lack of equality between partition column and column of reference table
|
-- safe to push down even lack of equality between partition column and column of reference table
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
colocated_table_test (value_1, value_2)
|
colocated_table_test (value_1, value_2)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -718,7 +718,7 @@ WHERE
|
||||||
colocated_table_test_2.value_4 = reference_table_test.value_4
|
colocated_table_test_2.value_4 = reference_table_test.value_4
|
||||||
RETURNING value_1, value_2;
|
RETURNING value_1, value_2;
|
||||||
|
|
||||||
-- some more complex queries (Note that there are more complex queries in multi_insert_select.sql)
|
-- similar query with the above, this time partition key but without equality
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
colocated_table_test (value_1, value_2)
|
colocated_table_test (value_1, value_2)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -726,7 +726,7 @@ SELECT
|
||||||
FROM
|
FROM
|
||||||
colocated_table_test_2, reference_table_test
|
colocated_table_test_2, reference_table_test
|
||||||
WHERE
|
WHERE
|
||||||
colocated_table_test_2.value_2 = reference_table_test.value_2
|
colocated_table_test_2.value_1 > reference_table_test.value_2
|
||||||
RETURNING value_1, value_2;
|
RETURNING value_1, value_2;
|
||||||
|
|
||||||
-- partition column value comes from reference table, goes via coordinator
|
-- partition column value comes from reference table, goes via coordinator
|
||||||
|
|
|
@ -42,15 +42,17 @@ SELECT count(*) FROM
|
||||||
(SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
(SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47;
|
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47;
|
||||||
|
|
||||||
-- Should not work, no equality between partition column and reference table
|
-- Should work, although no equality between partition column and reference table
|
||||||
SELECT * FROM
|
SELECT subquery_1.item_id FROM
|
||||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
(SELECT user_buy_test_table.item_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
-- Should not work, no equality between partition column and reference table
|
-- Should work, although no equality between partition column and reference table
|
||||||
SELECT * FROM
|
SELECT subquery_1.user_id FROM
|
||||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1;
|
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
-- Shouldn't work, reference table at the outer side is not allowed
|
-- Shouldn't work, reference table at the outer side is not allowed
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
|
@ -67,6 +69,21 @@ SELECT * FROM
|
||||||
(SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table
|
(SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table
|
||||||
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
|
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
|
||||||
|
|
||||||
|
-- Equi join test with reference table on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
||||||
|
|
||||||
|
-- Non-equi join test with reference table on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
|
||||||
|
-- Non-equi left joins with reference tables on non-partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
|
||||||
-- Should pass since reference table locates in the inner part of each left join
|
-- Should pass since reference table locates in the inner part of each left join
|
||||||
SELECT count(*) FROM
|
SELECT count(*) FROM
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
|
@ -75,13 +92,172 @@ SELECT count(*) FROM
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref
|
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref
|
||||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||||
|
|
||||||
-- Should not pass since reference table locates in the outer part of right join
|
-- two subqueries, each include joins with reference table
|
||||||
|
-- also, two hash distributed tables are joined on partition keys
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id AND users_ref_test_table.k_no > 88 AND user_buy_test_table.item_id < 88) subquery_1,
|
||||||
|
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
|
||||||
|
WHERE subquery_1.user_id = subquery_2.user_id ;
|
||||||
|
|
||||||
|
-- Should be able to push down since reference tables are inner joined
|
||||||
|
-- with hash distributed tables, the results of those joins are the parts of
|
||||||
|
-- an outer join
|
||||||
|
SELECT subquery_2.id FROM
|
||||||
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
|
ON tt1.user_id = tt2.user_id) subquery_1
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||||
|
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5;
|
||||||
|
|
||||||
|
-- the same query as the above, but this Citus fails to pushdown the query
|
||||||
|
-- since the outer part of the right join doesn't include any joins
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||||
ON tt1.user_id = tt2.user_id) subquery_1
|
ON tt1.user_id = tt2.user_id) subquery_1
|
||||||
RIGHT JOIN
|
RIGHT JOIN
|
||||||
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
(SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||||
|
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- same query as above, reference table is wrapped into a subquery
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
|
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- direct outer part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- direct outer part of the left join wrapped into a subquery
|
||||||
|
SELECT
|
||||||
|
*
|
||||||
|
FROM
|
||||||
|
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
|
||||||
|
ON (users_table.user_id = ref_all.value_2);
|
||||||
|
|
||||||
|
-- should not be able to pushdown since reference table is in the
|
||||||
|
-- outer part of the left join
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_1)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id, users_table.value_1, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
|
) as foo
|
||||||
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
|
-- should be able to pushdown since reference table is in the
|
||||||
|
-- inner part of the left join
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT foo.user_id
|
||||||
|
FROM
|
||||||
|
((SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
event_type > 80) as "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
|
||||||
|
-- the same query but this time reference table is in the outer part of the query
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT foo.user_id
|
||||||
|
FROM
|
||||||
|
((SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
|
FROM
|
||||||
|
events_reference_table as "events"
|
||||||
|
WHERE
|
||||||
|
event_type > 80) as "temp_data_queries"
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
|
||||||
|
-- we could even suuport the following where the subquery
|
||||||
|
-- on the outer part of the left join contains a reference table
|
||||||
|
SELECT max(events_all.cnt), events_all.usr_id
|
||||||
|
FROM
|
||||||
|
(SELECT users_table.user_id as usr_id,
|
||||||
|
count(*) as cnt
|
||||||
|
FROM events_reference_table
|
||||||
|
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) GROUP BY users_table.user_id) AS events_all
|
||||||
|
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5;
|
||||||
|
|
||||||
|
-- but, we fail to pushdown the following query where join that reference table appears
|
||||||
|
-- wrapped into a subquery
|
||||||
|
SELECT max(events_all.cnt),
|
||||||
|
events_all.usr_id
|
||||||
|
FROM(
|
||||||
|
SELECT *, random() FROM
|
||||||
|
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||||
|
FROM events_reference_table
|
||||||
|
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||||
|
GROUP BY users_table.user_id) AS events_all_inner) AS events_all
|
||||||
|
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id)
|
||||||
|
GROUP BY 2
|
||||||
|
ORDER BY 1 DESC,
|
||||||
|
2 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
-- LATERAL JOINs used with INNER JOINs with reference tables
|
-- LATERAL JOINs used with INNER JOINs with reference tables
|
||||||
SET citus.subquery_pushdown to ON;
|
SET citus.subquery_pushdown to ON;
|
||||||
|
@ -149,10 +325,10 @@ SELECT
|
||||||
count(*) AS value, "generated_group_field"
|
count(*) AS value, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
|
@ -160,7 +336,7 @@ SELECT
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id", "events"."value_2"
|
"events"."time", "events"."user_id", "events"."value_2"
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
|
@ -270,8 +446,8 @@ ORDER BY
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
SET citus.subquery_pushdown to OFF;
|
SET citus.subquery_pushdown to OFF;
|
||||||
|
|
||||||
-- LEFT JOINs used with INNER JOINs should error out since reference table exist in the
|
-- LEFT JOINs used with INNER JOINs should not error out since reference table joined
|
||||||
-- left side of the LEFT JOIN.
|
-- with hash table that Citus can push down
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
|
@ -348,6 +524,106 @@ count(*) AS cnt, "generated_group_field"
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- right join where the inner part of the join includes a reference table
|
||||||
|
-- joined with hash partitioned table using non-equi join
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_reference_table AS e
|
||||||
|
WHERE u.user_id > e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
) t1 RIGHT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event
|
||||||
|
ORDER BY user_id;
|
||||||
|
|
||||||
|
-- a similar query as the above, with non-partition key comparison
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_reference_table AS e
|
||||||
|
WHERE u.value_1 > e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type >= 125 AND e.event_type < 130
|
||||||
|
)
|
||||||
|
) t1 RIGHT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type >= 130 AND e.event_type < 135
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event
|
||||||
|
ORDER BY user_id;
|
||||||
|
|
||||||
|
|
||||||
|
-- LEFT JOINs used with INNER JOINs
|
||||||
|
-- events_table and users_reference_table joined
|
||||||
|
-- with event_table.non_part_key < reference_table.any_key
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_table as "users") "left_group_by_1"
|
||||||
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
-- Outer subquery with reference table
|
-- Outer subquery with reference table
|
||||||
SELECT "some_users_data".user_id, lastseen
|
SELECT "some_users_data".user_id, lastseen
|
||||||
FROM
|
FROM
|
||||||
|
@ -380,9 +656,9 @@ ORDER BY
|
||||||
limit 50;
|
limit 50;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- UNIONs and JOINs with reference tables, shoukld error out
|
-- UNIONs and JOINs with reference tables, should error out
|
||||||
--
|
--
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types
|
||||||
FROM
|
FROM
|
||||||
( SELECT *, random()
|
( SELECT *, random()
|
||||||
FROM
|
FROM
|
||||||
|
@ -396,7 +672,7 @@ FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 0 AS event
|
"events"."user_id", "events"."time", 0 AS event
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||||
UNION
|
UNION
|
||||||
|
@ -406,7 +682,7 @@ FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 1 AS event
|
"events"."user_id", "events"."time", 1 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
|
@ -438,8 +714,6 @@ INNER JOIN
|
||||||
WHERE
|
WHERE
|
||||||
value_1 > 50 and value_1 < 70) AS t
|
value_1 > 50 and value_1 < 70) AS t
|
||||||
ON (t.user_id = q.user_id)) as final_query
|
ON (t.user_id = q.user_id)) as final_query
|
||||||
GROUP BY
|
|
||||||
types
|
|
||||||
ORDER BY
|
ORDER BY
|
||||||
types;
|
types;
|
||||||
|
|
||||||
|
@ -471,15 +745,15 @@ FROM
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
max("events"."time"),
|
max("users"."time"),
|
||||||
0 AS event,
|
0 AS event,
|
||||||
"events"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events", users_table as "users"
|
events_reference_table as "events", users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
events.user_id = users.user_id AND
|
events.user_id = users.user_id AND
|
||||||
event_type IN (10, 11, 12, 13, 14, 15)
|
event_type IN (10, 11, 12, 13, 14, 15)
|
||||||
GROUP BY "events"."user_id"
|
GROUP BY "users"."user_id"
|
||||||
) as events_subquery_5
|
) as events_subquery_5
|
||||||
) events_subquery_2)
|
) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
|
@ -569,6 +843,107 @@ INNER JOIN
|
||||||
GROUP BY types
|
GROUP BY types
|
||||||
ORDER BY types;
|
ORDER BY types;
|
||||||
|
|
||||||
|
-- just a sanity check that we don't allow this if the reference table is on the
|
||||||
|
-- left part of the left join
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||||
|
|
||||||
|
-- we don't allow non equi join among hash partitioned tables
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1,
|
||||||
|
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||||
|
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2
|
||||||
|
WHERE subquery_1.user_id != subquery_2.user_id ;
|
||||||
|
|
||||||
|
-- we cannot push this query since hash partitioned tables
|
||||||
|
-- are not joined on partition keys with equality
|
||||||
|
SELECT
|
||||||
|
count(*) AS cnt, "generated_group_field"
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
|
FROM
|
||||||
|
(SELECT *
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
|
FROM
|
||||||
|
events_table as "events"
|
||||||
|
WHERE
|
||||||
|
user_id > 80) "temp_data_queries"
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id"
|
||||||
|
FROM
|
||||||
|
users_reference_table as "users"
|
||||||
|
WHERE
|
||||||
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
|
RIGHT JOIN
|
||||||
|
(SELECT
|
||||||
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
|
FROM
|
||||||
|
users_table as "users") "left_group_by_1"
|
||||||
|
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
|
group BY
|
||||||
|
"generated_group_field"
|
||||||
|
ORDER BY
|
||||||
|
cnt DESC, generated_group_field ASC
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- two hash partitioned relations are not joined
|
||||||
|
-- on partiton keys although reference table is fine
|
||||||
|
-- to push down
|
||||||
|
SELECT
|
||||||
|
u1.user_id, count(*)
|
||||||
|
FROM
|
||||||
|
events_table as e1, users_table as u1
|
||||||
|
WHERE
|
||||||
|
event_type IN
|
||||||
|
(SELECT
|
||||||
|
event_type
|
||||||
|
FROM
|
||||||
|
events_reference_table as e2
|
||||||
|
WHERE
|
||||||
|
value_2 = 15 AND
|
||||||
|
value_3 > 25 AND
|
||||||
|
e1.value_2 > e2.value_2
|
||||||
|
)
|
||||||
|
AND u1.user_id > e1.user_id
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
WHERE event_type > 100000
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- not supported since group by is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
GROUP BY r.user_id
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- not supported since distinct is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- not supported since distinct on is on the reference table column
|
||||||
|
SELECT foo.user_id FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
DROP TABLE user_buy_test_table;
|
DROP TABLE user_buy_test_table;
|
||||||
DROP TABLE users_ref_test_table;
|
DROP TABLE users_ref_test_table;
|
||||||
DROP TABLE users_return_test_table;
|
DROP TABLE users_return_test_table;
|
||||||
|
|
|
@ -1,27 +1,6 @@
|
||||||
--
|
--
|
||||||
-- queries to test the subquery pushdown on reference tables
|
-- queries to test the subquery pushdown on reference tables
|
||||||
|
|
||||||
-- subqueries in WHERE with greater operator
|
|
||||||
SELECT
|
|
||||||
user_id
|
|
||||||
FROM
|
|
||||||
users_table
|
|
||||||
WHERE
|
|
||||||
value_2 >
|
|
||||||
(SELECT
|
|
||||||
max(value_2)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
)
|
|
||||||
GROUP BY user_id
|
|
||||||
HAVING count(*) > 66
|
|
||||||
ORDER BY user_id
|
|
||||||
LIMIT 5;
|
|
||||||
|
|
||||||
-- subqueries in WHERE with IN operator
|
-- subqueries in WHERE with IN operator
|
||||||
SELECT
|
SELECT
|
||||||
user_id
|
user_id
|
||||||
|
@ -78,7 +57,7 @@ LIMIT 3;
|
||||||
|
|
||||||
-- subqueries in WHERE with IN operator without equality
|
-- subqueries in WHERE with IN operator without equality
|
||||||
SELECT
|
SELECT
|
||||||
user_id
|
users_table.user_id, count(*)
|
||||||
FROM
|
FROM
|
||||||
users_table
|
users_table
|
||||||
WHERE
|
WHERE
|
||||||
|
@ -90,51 +69,10 @@ WHERE
|
||||||
WHERE
|
WHERE
|
||||||
users_table.user_id > events_reference_table.user_id
|
users_table.user_id > events_reference_table.user_id
|
||||||
)
|
)
|
||||||
GROUP BY user_id
|
GROUP BY users_table.user_id
|
||||||
ORDER BY user_id
|
ORDER BY 2 DESC, 1 DESC
|
||||||
LIMIT 3;
|
LIMIT 3;
|
||||||
|
|
||||||
-- have reference table without any equality, should error out
|
|
||||||
SELECT
|
|
||||||
user_id
|
|
||||||
FROM
|
|
||||||
users_table
|
|
||||||
WHERE
|
|
||||||
value_2 >
|
|
||||||
(SELECT
|
|
||||||
max(value_2)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
event_type = 50
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
)
|
|
||||||
GROUP BY user_id
|
|
||||||
HAVING count(*) > 66
|
|
||||||
ORDER BY user_id
|
|
||||||
LIMIT 5;
|
|
||||||
|
|
||||||
-- users that appeared more than 118 times, should run since the reference table
|
|
||||||
-- on the right side of the semi join
|
|
||||||
SELECT
|
|
||||||
user_id
|
|
||||||
FROM
|
|
||||||
users_table
|
|
||||||
WHERE 118 <=
|
|
||||||
(SELECT
|
|
||||||
count(*)
|
|
||||||
FROM
|
|
||||||
events_reference_table
|
|
||||||
WHERE
|
|
||||||
users_table.user_id = events_reference_table.user_id
|
|
||||||
GROUP BY
|
|
||||||
user_id)
|
|
||||||
GROUP BY
|
|
||||||
user_id
|
|
||||||
ORDER BY
|
|
||||||
user_id;
|
|
||||||
|
|
||||||
-- should error out since reference table exist on the left side
|
-- should error out since reference table exist on the left side
|
||||||
-- of the left lateral join
|
-- of the left lateral join
|
||||||
SELECT user_id, value_2 FROM users_table WHERE
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
@ -197,3 +135,165 @@ SELECT user_id, value_2 FROM users_table WHERE
|
||||||
HAVING sum(submit_card_info) > 0
|
HAVING sum(submit_card_info) > 0
|
||||||
)
|
)
|
||||||
ORDER BY 1, 2;
|
ORDER BY 1, 2;
|
||||||
|
|
||||||
|
-- non-partition key equality with reference table
|
||||||
|
SELECT
|
||||||
|
user_id, count(*)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_3 =ANY(SELECT value_2 FROM users_reference_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 5;
|
||||||
|
|
||||||
|
|
||||||
|
-- non-partition key comparison with reference table
|
||||||
|
SELECT
|
||||||
|
user_id, count(*)
|
||||||
|
FROM
|
||||||
|
events_table as e1
|
||||||
|
WHERE
|
||||||
|
event_type IN
|
||||||
|
(SELECT
|
||||||
|
event_type
|
||||||
|
FROM
|
||||||
|
events_reference_table as e2
|
||||||
|
WHERE
|
||||||
|
value_2 = 15 AND
|
||||||
|
value_3 > 25 AND
|
||||||
|
e1.value_2 > e2.value_2
|
||||||
|
)
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- subqueries in both WHERE and FROM clauses
|
||||||
|
-- should work since reference table is on the
|
||||||
|
-- inner part of the join
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND user_id IN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
e1.user_id
|
||||||
|
FROM (
|
||||||
|
-- Get the first time each user viewed the homepage.
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS view_homepage,
|
||||||
|
min(time) AS view_homepage_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||||
|
GROUP BY user_id
|
||||||
|
) e1 LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS use_demo,
|
||||||
|
time AS use_demo_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e1.user_id AND
|
||||||
|
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||||
|
ORDER BY time
|
||||||
|
) e2 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
1 AS enter_credit_card,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e2.user_id AND
|
||||||
|
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||||
|
ORDER BY time
|
||||||
|
) e3 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS submit_card_info,
|
||||||
|
user_id,
|
||||||
|
time AS enter_credit_card_time
|
||||||
|
FROM events_table
|
||||||
|
WHERE
|
||||||
|
user_id = e3.user_id AND
|
||||||
|
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||||
|
ORDER BY time
|
||||||
|
) e4 ON true LEFT JOIN LATERAL (
|
||||||
|
SELECT
|
||||||
|
1 AS see_bought_screen
|
||||||
|
FROM events_reference_table
|
||||||
|
WHERE
|
||||||
|
user_id = e4.user_id AND
|
||||||
|
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||||
|
ORDER BY time
|
||||||
|
) e5 ON true
|
||||||
|
group by e1.user_id
|
||||||
|
HAVING sum(submit_card_info) > 0
|
||||||
|
)
|
||||||
|
ORDER BY 1, 2;
|
||||||
|
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT
|
||||||
|
count(*)
|
||||||
|
FROM
|
||||||
|
users_reference_table
|
||||||
|
WHERE user_id
|
||||||
|
NOT IN
|
||||||
|
(SELECT users_table.value_2 FROM users_table JOIN users_reference_table as u2 ON users_table.value_2 = u2.value_2);
|
||||||
|
|
||||||
|
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT count(*)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, random() FROM users_reference_table) AS vals
|
||||||
|
WHERE vals.user_id NOT IN
|
||||||
|
(SELECT users_table.value_2
|
||||||
|
FROM users_table
|
||||||
|
JOIN users_reference_table AS u2 ON users_table.value_2 = u2.value_2);
|
||||||
|
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
SELECT user_id,
|
||||||
|
count(*)
|
||||||
|
FROM users_reference_table
|
||||||
|
WHERE value_2 > ALL
|
||||||
|
(SELECT min(value_2)
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 50 AND users_reference_table.user_id = events_table.user_id
|
||||||
|
GROUP BY user_id)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY 2 DESC,
|
||||||
|
1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- reference tables are not allowed if there is sublink
|
||||||
|
-- this time in the subquery
|
||||||
|
SELECT *
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN
|
||||||
|
(SELECT users_table.user_id
|
||||||
|
FROM users_table,
|
||||||
|
users_reference_table
|
||||||
|
WHERE users_reference_table.user_id NOT IN
|
||||||
|
(SELECT value_2
|
||||||
|
FROM users_reference_table AS u2));
|
||||||
|
|
||||||
|
-- not supported since GROUP BY references to an upper level query
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 >
|
||||||
|
(SELECT
|
||||||
|
max(value_2)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||||
|
GROUP BY
|
||||||
|
users_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY user_id
|
||||||
|
LIMIT 5;
|
||||||
|
|
Loading…
Reference in New Issue