mirror of https://github.com/citusdata/citus.git
Merge pull request #1628 from citusdata/expand_subquery_reference_table
Expand subquery pushdown for reference tablespull/1653/head
commit
6c9dffccbf
|
@ -2880,6 +2880,17 @@ FindReferencedTableColumn(Expr *columnExpression, List *parentQueryList, Query *
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We currently don't support finding partition keys in the subqueries
|
||||
* that references from outer subqueries. For example, in corrolated
|
||||
* subqueries in WHERE clause, we don't support use of partition keys
|
||||
* in the subquery that is referred from the outer query.
|
||||
*/
|
||||
if (candidateColumn->varlevelsup > 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
rangeTableEntryIndex = candidateColumn->varno - 1;
|
||||
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
||||
|
||||
|
@ -3096,7 +3107,14 @@ PartitionColumnOpExpressionList(Query *query)
|
|||
rangeTableEntryIndex = candidatePartitionColumn->varno - 1;
|
||||
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
||||
|
||||
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
||||
/*
|
||||
* We currently don't support checking for equality when user refers
|
||||
* to a column from the JOIN instead of the relation.
|
||||
*/
|
||||
if (rangeTableEntry->rtekind != RTE_RELATION)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
relationId = rangeTableEntry->relid;
|
||||
partitionColumn = DistPartitionKey(relationId);
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "distributed/worker_protocol.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "nodes/relation.h"
|
||||
#include "optimizer/clauses.h"
|
||||
#include "optimizer/prep.h"
|
||||
#include "optimizer/tlist.h"
|
||||
|
@ -69,6 +70,8 @@ static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryPushdown(Query *
|
|||
PlannerRestrictionContext
|
||||
*
|
||||
plannerRestrictionContext);
|
||||
static DeferredErrorMessage * DeferErrorIfUnsupportedSublinkAndReferenceTable(
|
||||
Query *queryTree);
|
||||
static DeferredErrorMessage * DeferErrorIfUnsupportedFilters(Query *subquery);
|
||||
static bool EqualOpExpressionLists(List *firstOpExpressionList,
|
||||
List *secondOpExpressionList);
|
||||
|
@ -87,6 +90,7 @@ static MultiNode * MultiPlanTree(Query *queryTree);
|
|||
static void ErrorIfQueryNotSupported(Query *queryTree);
|
||||
static bool HasUnsupportedReferenceTableJoin(
|
||||
PlannerRestrictionContext *plannerRestrictionContext);
|
||||
static bool ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo);
|
||||
static bool HasUnsupportedJoinWalker(Node *node, void *context);
|
||||
static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
|
||||
static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryRepartition(Query *
|
||||
|
@ -96,8 +100,8 @@ static bool HasOuterJoin(Query *queryTree);
|
|||
static bool HasOuterJoinWalker(Node *node, void *maxJoinLevel);
|
||||
static bool HasComplexJoinOrder(Query *queryTree);
|
||||
static bool HasComplexRangeTableType(Query *queryTree);
|
||||
static bool RelationInfoHasReferenceTable(PlannerInfo *plannerInfo,
|
||||
RelOptInfo *relationInfo);
|
||||
static bool RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo,
|
||||
RelOptInfo *relationInfo);
|
||||
static void ValidateClauseList(List *clauseList);
|
||||
static void ValidateSubqueryPushdownClauseList(List *clauseList);
|
||||
static bool ExtractFromExpressionWalker(Node *node,
|
||||
|
@ -542,7 +546,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
|||
"one another relation using distribution keys and "
|
||||
"equality operator.", NULL);
|
||||
}
|
||||
else if (HasUnsupportedReferenceTableJoin(plannerRestrictionContext))
|
||||
|
||||
/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
|
||||
error = DeferErrorIfUnsupportedSublinkAndReferenceTable(originalQuery);
|
||||
if (error)
|
||||
{
|
||||
return error;
|
||||
}
|
||||
|
||||
/* we shouldn't allow reference tables in the outer part of outer joins */
|
||||
if (HasUnsupportedReferenceTableJoin(plannerRestrictionContext))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"cannot pushdown the subquery",
|
||||
|
@ -582,6 +595,43 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeferErrorIfUnsupportedSublinkAndReferenceTable returns a deferred error if the
|
||||
* given query is not suitable for subquery pushdown.
|
||||
*
|
||||
* While planning sublinks, we rely on Postgres in the sense that it converts some of
|
||||
* sublinks into joins.
|
||||
*
|
||||
* In some cases, sublinks are pulled up and converted into outer joins. Those cases
|
||||
* are already handled with HasUnsupportedReferenceTableJoin().
|
||||
*
|
||||
* If the sublinks are not pulled up, we should still error out in if any reference table
|
||||
* appears in the FROM clause of a subquery.
|
||||
*
|
||||
* Otherwise, the result would include duplicate rows.
|
||||
*/
|
||||
static DeferredErrorMessage *
|
||||
DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
|
||||
{
|
||||
if (!queryTree->hasSubLinks)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (HasReferenceTable((Node *) queryTree->rtable))
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"cannot pushdown the subquery",
|
||||
"Reference tables are not allowed in FROM "
|
||||
"clause when the query has subqueries in "
|
||||
"WHERE clause",
|
||||
NULL);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeferErrorIfUnsupportedFilters checks if all leaf queries in the given query have
|
||||
* same filter on the partition column. Note that if there are queries without
|
||||
|
@ -853,6 +903,14 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
|||
}
|
||||
}
|
||||
|
||||
deferredError = DeferErrorIfUnsupportedSublinkAndReferenceTable(subqueryTree);
|
||||
if (deferredError)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = (char *) deferredError->detail;
|
||||
}
|
||||
|
||||
|
||||
/* finally check and return deferred if not satisfied */
|
||||
if (!preconditionsSatisfied)
|
||||
{
|
||||
|
@ -1027,7 +1085,7 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree)
|
|||
|
||||
/*
|
||||
* TargetListOnPartitionColumn checks if at least one target list entry is on
|
||||
* partition column or the table is a reference table.
|
||||
* partition column.
|
||||
*/
|
||||
static bool
|
||||
TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||
|
@ -1047,15 +1105,11 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
|||
|
||||
FindReferencedTableColumn(targetExpression, NIL, query, &relationId, &column);
|
||||
|
||||
/*
|
||||
* If the expression belongs to reference table directly returns true.
|
||||
* We can assume that target list entry always on partition column of
|
||||
* reference tables.
|
||||
*/
|
||||
/* if the expression belongs to reference table directly returns false */
|
||||
if (IsDistributedTable(relationId) && PartitionMethod(relationId) ==
|
||||
DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
targetListOnPartitionColumn = true;
|
||||
targetListOnPartitionColumn = false;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1459,15 +1513,18 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo
|
|||
|
||||
if (joinType == JOIN_SEMI || joinType == JOIN_ANTI || joinType == JOIN_LEFT)
|
||||
{
|
||||
if (RelationInfoHasReferenceTable(plannerInfo, outerrel))
|
||||
if (ShouldRecurseForReferenceTableJoinChecks(outerrel) &&
|
||||
RelationInfoContainsReferenceTable(plannerInfo, outerrel))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else if (joinType == JOIN_FULL)
|
||||
{
|
||||
if (RelationInfoHasReferenceTable(plannerInfo, innerrel) ||
|
||||
RelationInfoHasReferenceTable(plannerInfo, outerrel))
|
||||
if ((ShouldRecurseForReferenceTableJoinChecks(innerrel) &&
|
||||
RelationInfoContainsReferenceTable(plannerInfo, innerrel)) ||
|
||||
(ShouldRecurseForReferenceTableJoinChecks(outerrel) &&
|
||||
RelationInfoContainsReferenceTable(plannerInfo, outerrel)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -1479,12 +1536,66 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo
|
|||
|
||||
|
||||
/*
|
||||
* RelationInfoHasReferenceTable check whether the relationInfo has reference table.
|
||||
* Since relation ids of relationInfo indexes to the range table entry list of
|
||||
* planner info, planner info is also passed.
|
||||
* ShouldRecurseForReferenceTableJoinChecks is a helper function for deciding
|
||||
* on whether the input relOptInfo should be checked for unsupported reference
|
||||
* tables.
|
||||
*/
|
||||
static bool
|
||||
RelationInfoHasReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relationInfo)
|
||||
ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo)
|
||||
{
|
||||
/*
|
||||
* We shouldn't recursively go down for joins since we're already
|
||||
* going to process each join seperately. Otherwise we'd restrict
|
||||
* the coverage. See the below sketch where (h) denotes a hash
|
||||
* distributed relation, (r) denotes a reference table, (L) denotes
|
||||
* LEFT JOIN and (I) denotes INNER JOIN. If we're to recurse into
|
||||
* the inner join, we'd be preventing to push down the following
|
||||
* join tree, which is actually safe to push down.
|
||||
*
|
||||
* (L)
|
||||
* / \
|
||||
* (I) h
|
||||
* / \
|
||||
* r h
|
||||
*/
|
||||
if (relOptInfo->reloptkind == RELOPT_JOINREL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that we treat the same query where relations appear in subqueries
|
||||
* differently. (i.e., use SELECT * FROM r; instead of r)
|
||||
*
|
||||
* In that case, to relax some restrictions, we do the following optimization:
|
||||
* If the subplan (i.e., plannerInfo corresponding to the subquery) contains any
|
||||
* joins, we skip reference table checks keeping in mind that the join is already
|
||||
* going to be processed seperately. This optimization should suffice for many
|
||||
* use cases.
|
||||
*/
|
||||
if (relOptInfo->reloptkind == RELOPT_BASEREL && relOptInfo->subroot != NULL)
|
||||
{
|
||||
PlannerInfo *subroot = relOptInfo->subroot;
|
||||
|
||||
if (list_length(subroot->join_rel_list) > 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RelationInfoContainsReferenceTable checks whether the relationInfo
|
||||
* contains any reference tables. If found, the function returns true.
|
||||
*
|
||||
* Note that since relation ids of relationInfo indexes to the range
|
||||
* table entry list of planner info, planner info is also passed.
|
||||
*/
|
||||
static bool
|
||||
RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relationInfo)
|
||||
{
|
||||
Relids relids = bms_copy(relationInfo->relids);
|
||||
int relationId = -1;
|
||||
|
@ -2927,9 +3038,17 @@ ExtractRangeTableRelationWalkerWithRTEExpand(Node *node, List **rangeTableRelati
|
|||
rangeTableRelationList, 0);
|
||||
}
|
||||
}
|
||||
else if (IsA(node, Query))
|
||||
{
|
||||
walkIsComplete = query_tree_walker((Query *) node,
|
||||
ExtractRangeTableRelationWalkerWithRTEExpand,
|
||||
rangeTableRelationList, QTW_EXAMINE_RTES);
|
||||
}
|
||||
else
|
||||
{
|
||||
walkIsComplete = ExtractRangeTableRelationWalker(node, rangeTableRelationList);
|
||||
walkIsComplete = expression_tree_walker(node,
|
||||
ExtractRangeTableRelationWalkerWithRTEExpand,
|
||||
rangeTableRelationList);
|
||||
}
|
||||
|
||||
return walkIsComplete;
|
||||
|
|
|
@ -62,6 +62,7 @@ typedef struct AttributeEquivalenceClassMember
|
|||
} AttributeEquivalenceClassMember;
|
||||
|
||||
|
||||
static uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext);
|
||||
static Var * FindTranslatedVar(List *appendRelList, Oid relationOid,
|
||||
Index relationRteIndex, Index *partitionKeyIndex);
|
||||
static bool EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
|
||||
|
@ -329,9 +330,8 @@ FindTranslatedVar(List *appendRelList, Oid relationOid, Index relationRteIndex,
|
|||
* joined on their partition keys.
|
||||
*
|
||||
* The function returns true if all relations are joined on their partition keys.
|
||||
* Otherwise, the function returns false. In order to support reference tables
|
||||
* with subqueries, equality between attributes of reference tables and partition
|
||||
* key of distributed tables are also considered.
|
||||
* Otherwise, the function returns false. We ignore reference tables at all since
|
||||
* they don't have partition keys.
|
||||
*
|
||||
* In order to do that, we invented a new equivalence class namely:
|
||||
* AttributeEquivalenceClass. In very simple words, a AttributeEquivalenceClass is
|
||||
|
@ -365,14 +365,24 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
|
|||
List *joinRestrictionAttributeEquivalenceList = NIL;
|
||||
List *allAttributeEquivalenceList = NIL;
|
||||
|
||||
uint32 referenceRelationCount = ReferenceRelationCount(restrictionContext);
|
||||
uint32 totalRelationCount = list_length(restrictionContext->relationRestrictionList);
|
||||
uint32 nonReferenceRelationCount = totalRelationCount - referenceRelationCount;
|
||||
|
||||
/*
|
||||
* If the query includes only one relation, we should not check the partition
|
||||
* column equality. Single table should not need to fetch data from other nodes
|
||||
* except it's own node(s).
|
||||
* If the query includes a single relation which is not a reference table,
|
||||
* we should not check the partition column equality.
|
||||
* Consider two example cases:
|
||||
* (i) The query includes only a single colocated relation
|
||||
* (ii) A colocated relation is joined with a (or multiple) reference
|
||||
* table(s) where colocated relation is not joined on the partition key
|
||||
*
|
||||
* For the above two cases, we don't need to execute the partition column equality
|
||||
* algorithm. The reason is that the essence of this function is to ensure that the
|
||||
* tasks that are going to be created should not need data from other tasks. In both
|
||||
* cases mentioned above, the necessary data per task would be on available.
|
||||
*/
|
||||
if (totalRelationCount == 1)
|
||||
if (nonReferenceRelationCount <= 1)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -394,6 +404,31 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ReferenceRelationCount iterates over the relations and returns the reference table
|
||||
* relation count.
|
||||
*/
|
||||
static uint32
|
||||
ReferenceRelationCount(RelationRestrictionContext *restrictionContext)
|
||||
{
|
||||
ListCell *relationRestrictionCell = NULL;
|
||||
uint32 referenceRelationCount = 0;
|
||||
|
||||
foreach(relationRestrictionCell, restrictionContext->relationRestrictionList)
|
||||
{
|
||||
RelationRestriction *relationRestriction =
|
||||
(RelationRestriction *) lfirst(relationRestrictionCell);
|
||||
|
||||
if (PartitionMethod(relationRestriction->relationId) == DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
referenceRelationCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return referenceRelationCount;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EquivalenceListContainsRelationsEquality gets a list of attributed equivalence
|
||||
* list and a relation restriction context. The function first generates a common
|
||||
|
@ -434,6 +469,12 @@ EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
|
|||
(RelationRestriction *) lfirst(relationRestrictionCell);
|
||||
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
|
||||
|
||||
/* we shouldn't check for the equality of reference tables */
|
||||
if (PartitionMethod(relationRestriction->relationId) == DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!bms_is_member(rteIdentity, commonRteIdentities))
|
||||
{
|
||||
return false;
|
||||
|
@ -621,7 +662,7 @@ GetVarFromAssignedParam(List *parentPlannerParamList, Param *plannerParam)
|
|||
|
||||
/*
|
||||
* GenerateCommonEquivalence gets a list of unrelated AttributeEquiavalanceClass
|
||||
* whose all members are partition keys or a column of reference table.
|
||||
* whose all members are partition keys.
|
||||
*
|
||||
* With the equivalence classes, the function follows the algorithm
|
||||
* outlined below:
|
||||
|
@ -1084,8 +1125,14 @@ AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
|||
|
||||
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
||||
|
||||
if (PartitionMethod(relationId) != DISTRIBUTE_BY_NONE &&
|
||||
relationPartitionKey->varattno != varToBeAdded->varattno)
|
||||
/* we don't need reference tables in the equality on columns */
|
||||
if (relationPartitionKey == NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
/* we're only interested in distribution columns */
|
||||
if (relationPartitionKey->varattno != varToBeAdded->varattno)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1122,7 +1122,7 @@ WHERE
|
|||
colocated_table_test.value_1 = reference_table_test.value_1;
|
||||
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
|
||||
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||
-- not pushable due to lack of equality between partition column and column of reference table
|
||||
-- safe to push down even lack of equality between partition column and column of reference table
|
||||
INSERT INTO
|
||||
colocated_table_test (value_1, value_2)
|
||||
SELECT
|
||||
|
@ -1132,9 +1132,13 @@ FROM
|
|||
WHERE
|
||||
colocated_table_test_2.value_4 = reference_table_test.value_4
|
||||
RETURNING value_1, value_2;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- some more complex queries (Note that there are more complex queries in multi_insert_select.sql)
|
||||
value_1 | value_2
|
||||
---------+---------
|
||||
1 | 1
|
||||
2 | 2
|
||||
(2 rows)
|
||||
|
||||
-- similar query with the above, this time partition key but without equality
|
||||
INSERT INTO
|
||||
colocated_table_test (value_1, value_2)
|
||||
SELECT
|
||||
|
@ -1142,10 +1146,13 @@ SELECT
|
|||
FROM
|
||||
colocated_table_test_2, reference_table_test
|
||||
WHERE
|
||||
colocated_table_test_2.value_2 = reference_table_test.value_2
|
||||
colocated_table_test_2.value_1 > reference_table_test.value_2
|
||||
RETURNING value_1, value_2;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
value_1 | value_2
|
||||
---------+---------
|
||||
2 | 1
|
||||
(1 row)
|
||||
|
||||
-- partition column value comes from reference table, goes via coordinator
|
||||
INSERT INTO
|
||||
colocated_table_test (value_1, value_2)
|
||||
|
@ -1606,7 +1613,7 @@ INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02');
|
|||
SELECT master_modify_multiple_shards('DELETE FROM colocated_table_test');
|
||||
master_modify_multiple_shards
|
||||
-------------------------------
|
||||
6
|
||||
9
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
|
|
|
@ -60,18 +60,38 @@ SELECT count(*) FROM
|
|||
1
|
||||
(1 row)
|
||||
|
||||
-- Should not work, no equality between partition column and reference table
|
||||
SELECT * FROM
|
||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- Should not work, no equality between partition column and reference table
|
||||
SELECT * FROM
|
||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- Should work, although no equality between partition column and reference table
|
||||
SELECT subquery_1.item_id FROM
|
||||
(SELECT user_buy_test_table.item_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1
|
||||
ORDER BY 1;
|
||||
item_id
|
||||
---------
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
(4 rows)
|
||||
|
||||
-- Should work, although no equality between partition column and reference table
|
||||
SELECT subquery_1.user_id FROM
|
||||
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1
|
||||
ORDER BY 1;
|
||||
user_id
|
||||
---------
|
||||
1
|
||||
2
|
||||
3
|
||||
3
|
||||
7
|
||||
7
|
||||
7
|
||||
7
|
||||
7
|
||||
7
|
||||
(10 rows)
|
||||
|
||||
-- Shouldn't work, reference table at the outer side is not allowed
|
||||
SELECT * FROM
|
||||
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||
|
@ -93,6 +113,33 @@ SELECT * FROM
|
|||
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- Equi join test with reference table on non-partition keys
|
||||
SELECT count(*) FROM
|
||||
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
-- Non-equi join test with reference table on non-partition keys
|
||||
SELECT count(*) FROM
|
||||
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||
count
|
||||
-------
|
||||
10
|
||||
(1 row)
|
||||
|
||||
-- Non-equi left joins with reference tables on non-partition keys
|
||||
SELECT count(*) FROM
|
||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||
count
|
||||
-------
|
||||
10
|
||||
(1 row)
|
||||
|
||||
-- Should pass since reference table locates in the inner part of each left join
|
||||
SELECT count(*) FROM
|
||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||
|
@ -105,13 +152,238 @@ SELECT count(*) FROM
|
|||
2
|
||||
(1 row)
|
||||
|
||||
-- Should not pass since reference table locates in the outer part of right join
|
||||
-- two subqueries, each include joins with reference table
|
||||
-- also, two hash distributed tables are joined on partition keys
|
||||
SELECT count(*) FROM
|
||||
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id > users_ref_test_table.id AND users_ref_test_table.k_no > 88 AND user_buy_test_table.item_id < 88) subquery_1,
|
||||
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
|
||||
WHERE subquery_1.user_id = subquery_2.user_id ;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
-- Should be able to push down since reference tables are inner joined
|
||||
-- with hash distributed tables, the results of those joins are the parts of
|
||||
-- an outer join
|
||||
SELECT subquery_2.id FROM
|
||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||
ON tt1.user_id = tt2.user_id) subquery_1
|
||||
RIGHT JOIN
|
||||
(SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5;
|
||||
id
|
||||
----
|
||||
3
|
||||
2
|
||||
1
|
||||
(3 rows)
|
||||
|
||||
-- the same query as the above, but this Citus fails to pushdown the query
|
||||
-- since the outer part of the right join doesn't include any joins
|
||||
SELECT * FROM
|
||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||
ON tt1.user_id = tt2.user_id) subquery_1
|
||||
RIGHT JOIN
|
||||
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||
(SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- should be able to pushdown since reference table is in the
|
||||
-- inner part of the left join
|
||||
SELECT
|
||||
user_id, sum(value_1)
|
||||
FROM
|
||||
(SELECT
|
||||
users_table.user_id, users_table.value_1, random()
|
||||
FROM
|
||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||
) as foo
|
||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||
user_id | sum
|
||||
---------+----------
|
||||
12 | 92221920
|
||||
17 | 89192642
|
||||
96 | 85143744
|
||||
45 | 84267456
|
||||
90 | 84157047
|
||||
43 | 82110240
|
||||
1 | 81735612
|
||||
72 | 78992640
|
||||
67 | 72385516
|
||||
97 | 71002659
|
||||
(10 rows)
|
||||
|
||||
-- same query as above, reference table is wrapped into a subquery
|
||||
SELECT
|
||||
user_id, sum(value_1)
|
||||
FROM
|
||||
(SELECT
|
||||
users_table.user_id, users_table.value_1, random()
|
||||
FROM
|
||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
|
||||
) as foo
|
||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||
user_id | sum
|
||||
---------+----------
|
||||
12 | 92221920
|
||||
17 | 89192642
|
||||
96 | 85143744
|
||||
45 | 84267456
|
||||
90 | 84157047
|
||||
43 | 82110240
|
||||
1 | 81735612
|
||||
72 | 78992640
|
||||
67 | 72385516
|
||||
97 | 71002659
|
||||
(10 rows)
|
||||
|
||||
-- should be able to pushdown since reference table is in the
|
||||
-- inner part of the left join
|
||||
SELECT
|
||||
user_id, sum(value_1)
|
||||
FROM
|
||||
(SELECT
|
||||
users_table.user_id, users_table.value_1, random()
|
||||
FROM
|
||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||
) as foo
|
||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||
user_id | sum
|
||||
---------+----------
|
||||
12 | 92221920
|
||||
17 | 89192642
|
||||
96 | 85143744
|
||||
45 | 84267456
|
||||
90 | 84157047
|
||||
43 | 82110240
|
||||
1 | 81735612
|
||||
72 | 78992640
|
||||
67 | 72385516
|
||||
97 | 71002659
|
||||
(10 rows)
|
||||
|
||||
-- should not be able to pushdown since reference table is in the
|
||||
-- direct outer part of the left join
|
||||
SELECT
|
||||
user_id, sum(value_1)
|
||||
FROM
|
||||
(SELECT
|
||||
users_table.user_id, users_table.value_1, random()
|
||||
FROM
|
||||
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||
) as foo
|
||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- should not be able to pushdown since reference table is in the
|
||||
-- direct outer part of the left join wrapped into a subquery
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
|
||||
ON (users_table.user_id = ref_all.value_2);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- should not be able to pushdown since reference table is in the
|
||||
-- outer part of the left join
|
||||
SELECT
|
||||
user_id, sum(value_1)
|
||||
FROM
|
||||
(SELECT
|
||||
users_table.user_id, users_table.value_1, random()
|
||||
FROM
|
||||
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||
) as foo
|
||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- should be able to pushdown since reference table is in the
|
||||
-- inner part of the left join
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT DISTINCT foo.user_id
|
||||
FROM
|
||||
((SELECT
|
||||
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||
FROM
|
||||
events_reference_table as "events"
|
||||
WHERE
|
||||
event_type > 80) as "temp_data_queries"
|
||||
INNER JOIN
|
||||
(SELECT
|
||||
"users"."user_id"
|
||||
FROM
|
||||
users_table as "users"
|
||||
WHERE
|
||||
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||
user_id
|
||||
---------
|
||||
89
|
||||
(1 row)
|
||||
|
||||
-- the same query but this time reference table is in the outer part of the query
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT DISTINCT foo.user_id
|
||||
FROM
|
||||
((SELECT
|
||||
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||
FROM
|
||||
events_reference_table as "events"
|
||||
WHERE
|
||||
event_type > 80) as "temp_data_queries"
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
"users"."user_id"
|
||||
FROM
|
||||
users_table as "users"
|
||||
WHERE
|
||||
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- we could even suuport the following where the subquery
|
||||
-- on the outer part of the left join contains a reference table
|
||||
SELECT max(events_all.cnt), events_all.usr_id
|
||||
FROM
|
||||
(SELECT users_table.user_id as usr_id,
|
||||
count(*) as cnt
|
||||
FROM events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) GROUP BY users_table.user_id) AS events_all
|
||||
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5;
|
||||
max | usr_id
|
||||
-------+--------
|
||||
14605 | 23
|
||||
13090 | 17
|
||||
12915 | 25
|
||||
12317 | 90
|
||||
12285 | 87
|
||||
(5 rows)
|
||||
|
||||
-- but, we fail to pushdown the following query where join that reference table appears
|
||||
-- wrapped into a subquery
|
||||
SELECT max(events_all.cnt),
|
||||
events_all.usr_id
|
||||
FROM(
|
||||
SELECT *, random() FROM
|
||||
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner) AS events_all
|
||||
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id)
|
||||
GROUP BY 2
|
||||
ORDER BY 1 DESC,
|
||||
2 DESC
|
||||
LIMIT 5;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- LATERAL JOINs used with INNER JOINs with reference tables
|
||||
|
@ -193,10 +465,10 @@ SELECT
|
|||
count(*) AS value, "generated_group_field"
|
||||
FROM
|
||||
(SELECT
|
||||
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
||||
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
|
||||
FROM
|
||||
(SELECT
|
||||
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||
FROM
|
||||
(SELECT
|
||||
*
|
||||
|
@ -204,7 +476,7 @@ SELECT
|
|||
(SELECT
|
||||
"events"."time", "events"."user_id", "events"."value_2"
|
||||
FROM
|
||||
events_reference_table as "events"
|
||||
events_table as "events"
|
||||
WHERE
|
||||
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
||||
INNER JOIN
|
||||
|
@ -355,8 +627,8 @@ LIMIT 10;
|
|||
(6 rows)
|
||||
|
||||
SET citus.subquery_pushdown to OFF;
|
||||
-- LEFT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||
-- left side of the LEFT JOIN.
|
||||
-- LEFT JOINs used with INNER JOINs should not error out since reference table joined
|
||||
-- with hash table that Citus can push down
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
FROM
|
||||
|
@ -393,8 +665,20 @@ count(*) AS cnt, "generated_group_field"
|
|||
ORDER BY
|
||||
cnt DESC, generated_group_field ASC
|
||||
LIMIT 10;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
cnt | generated_group_field
|
||||
-----+-----------------------
|
||||
176 | 551
|
||||
176 | 569
|
||||
176 | 645
|
||||
176 | 713
|
||||
176 | 734
|
||||
88 | 3
|
||||
88 | 5
|
||||
88 | 15
|
||||
88 | 32
|
||||
88 | 68
|
||||
(10 rows)
|
||||
|
||||
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||
-- right side of the RIGHT JOIN.
|
||||
SELECT
|
||||
|
@ -435,6 +719,133 @@ count(*) AS cnt, "generated_group_field"
|
|||
LIMIT 10;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- right join where the inner part of the join includes a reference table
|
||||
-- joined with hash partitioned table using non-equi join
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_reference_table AS e
|
||||
WHERE u.user_id > e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
) t1 RIGHT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event
|
||||
ORDER BY user_id;
|
||||
user_id | sum | length | hasdone_event
|
||||
---------+-----+--------+----------------
|
||||
11 | 306 | 14 | Has done event
|
||||
12 | 363 | 14 | Has done event
|
||||
14 | 510 | 14 | Has done event
|
||||
18 | 600 | 14 | Has done event
|
||||
19 | 618 | 14 | Has done event
|
||||
(5 rows)
|
||||
|
||||
-- a similar query as the above, with non-partition key comparison
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_reference_table AS e
|
||||
WHERE u.value_1 > e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type >= 125 AND e.event_type < 130
|
||||
)
|
||||
) t1 RIGHT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type >= 130 AND e.event_type < 135
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event
|
||||
ORDER BY user_id;
|
||||
user_id | sum | length | hasdone_event
|
||||
---------+------+--------+----------------
|
||||
10 | 6018 | 14 | Has done event
|
||||
16 | 5373 | 14 | Has done event
|
||||
17 | 5683 | 14 | Has done event
|
||||
18 | 5321 | 14 | Has done event
|
||||
(4 rows)
|
||||
|
||||
-- LEFT JOINs used with INNER JOINs
|
||||
-- events_table and users_reference_table joined
|
||||
-- with event_table.non_part_key < reference_table.any_key
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
FROM
|
||||
(SELECT
|
||||
"eventQuery"."user_id", random(), generated_group_field
|
||||
FROM
|
||||
(SELECT
|
||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||
FROM
|
||||
(SELECT *
|
||||
FROM
|
||||
(SELECT
|
||||
"events"."time", "events"."user_id" as event_user_id
|
||||
FROM
|
||||
events_table as "events"
|
||||
WHERE
|
||||
user_id > 80) "temp_data_queries"
|
||||
INNER JOIN
|
||||
(SELECT
|
||||
"users"."user_id"
|
||||
FROM
|
||||
users_reference_table as "users"
|
||||
WHERE
|
||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||
RIGHT JOIN
|
||||
(SELECT
|
||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||
FROM
|
||||
users_table as "users") "left_group_by_1"
|
||||
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||
group BY
|
||||
"generated_group_field"
|
||||
ORDER BY
|
||||
cnt DESC, generated_group_field ASC
|
||||
LIMIT 10;
|
||||
cnt | generated_group_field
|
||||
-----+-----------------------
|
||||
540 | 814
|
||||
533 | 746
|
||||
473 | 914
|
||||
449 | 684
|
||||
445 | 715
|
||||
423 | 191
|
||||
419 | 39
|
||||
415 | 108
|
||||
414 | 819
|
||||
411 | 642
|
||||
(10 rows)
|
||||
|
||||
-- Outer subquery with reference table
|
||||
SELECT "some_users_data".user_id, lastseen
|
||||
FROM
|
||||
|
@ -468,9 +879,9 @@ limit 50;
|
|||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
--
|
||||
-- UNIONs and JOINs with reference tables, shoukld error out
|
||||
-- UNIONs and JOINs with reference tables, should error out
|
||||
--
|
||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||
SELECT ("final_query"."event_types") as types
|
||||
FROM
|
||||
( SELECT *, random()
|
||||
FROM
|
||||
|
@ -484,7 +895,7 @@ FROM
|
|||
(SELECT
|
||||
"events"."user_id", "events"."time", 0 AS event
|
||||
FROM
|
||||
events_reference_table as "events"
|
||||
events_table as "events"
|
||||
WHERE
|
||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||
UNION
|
||||
|
@ -494,7 +905,7 @@ FROM
|
|||
(SELECT
|
||||
"events"."user_id", "events"."time", 1 AS event
|
||||
FROM
|
||||
events_table as "events"
|
||||
events_reference_table as "events"
|
||||
WHERE
|
||||
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
||||
UNION
|
||||
|
@ -526,8 +937,6 @@ INNER JOIN
|
|||
WHERE
|
||||
value_1 > 50 and value_1 < 70) AS t
|
||||
ON (t.user_id = q.user_id)) as final_query
|
||||
GROUP BY
|
||||
types
|
||||
ORDER BY
|
||||
types;
|
||||
ERROR: cannot push down this subquery
|
||||
|
@ -560,15 +969,15 @@ FROM
|
|||
SELECT * FROM
|
||||
(
|
||||
SELECT
|
||||
max("events"."time"),
|
||||
max("users"."time"),
|
||||
0 AS event,
|
||||
"events"."user_id"
|
||||
"users"."user_id"
|
||||
FROM
|
||||
events_reference_table as "events", users_table as "users"
|
||||
WHERE
|
||||
events.user_id = users.user_id AND
|
||||
event_type IN (10, 11, 12, 13, 14, 15)
|
||||
GROUP BY "events"."user_id"
|
||||
GROUP BY "users"."user_id"
|
||||
) as events_subquery_5
|
||||
) events_subquery_2)
|
||||
UNION
|
||||
|
@ -660,6 +1069,117 @@ GROUP BY types
|
|||
ORDER BY types;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Reference tables are not supported with union operator
|
||||
-- just a sanity check that we don't allow this if the reference table is on the
|
||||
-- left part of the left join
|
||||
SELECT count(*) FROM
|
||||
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- we don't allow non equi join among hash partitioned tables
|
||||
SELECT count(*) FROM
|
||||
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1,
|
||||
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2
|
||||
WHERE subquery_1.user_id != subquery_2.user_id ;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- we cannot push this query since hash partitioned tables
|
||||
-- are not joined on partition keys with equality
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
FROM
|
||||
(SELECT
|
||||
"eventQuery"."user_id", random(), generated_group_field
|
||||
FROM
|
||||
(SELECT
|
||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||
FROM
|
||||
(SELECT *
|
||||
FROM
|
||||
(SELECT
|
||||
"events"."time", "events"."user_id" as event_user_id
|
||||
FROM
|
||||
events_table as "events"
|
||||
WHERE
|
||||
user_id > 80) "temp_data_queries"
|
||||
INNER JOIN
|
||||
(SELECT
|
||||
"users"."user_id"
|
||||
FROM
|
||||
users_reference_table as "users"
|
||||
WHERE
|
||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||
RIGHT JOIN
|
||||
(SELECT
|
||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||
FROM
|
||||
users_table as "users") "left_group_by_1"
|
||||
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||
group BY
|
||||
"generated_group_field"
|
||||
ORDER BY
|
||||
cnt DESC, generated_group_field ASC
|
||||
LIMIT 10;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- two hash partitioned relations are not joined
|
||||
-- on partiton keys although reference table is fine
|
||||
-- to push down
|
||||
SELECT
|
||||
u1.user_id, count(*)
|
||||
FROM
|
||||
events_table as e1, users_table as u1
|
||||
WHERE
|
||||
event_type IN
|
||||
(SELECT
|
||||
event_type
|
||||
FROM
|
||||
events_reference_table as e2
|
||||
WHERE
|
||||
value_2 = 15 AND
|
||||
value_3 > 25 AND
|
||||
e1.value_2 > e2.value_2
|
||||
)
|
||||
AND u1.user_id > e1.user_id
|
||||
GROUP BY 1
|
||||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 5;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
SELECT foo.user_id FROM
|
||||
(
|
||||
SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||
WHERE event_type > 100000
|
||||
) as foo;
|
||||
user_id
|
||||
---------
|
||||
(0 rows)
|
||||
|
||||
-- not supported since group by is on the reference table column
|
||||
SELECT foo.user_id FROM
|
||||
(
|
||||
SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||
GROUP BY r.user_id
|
||||
) as foo;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Group by list without partition column is currently unsupported
|
||||
-- not supported since distinct is on the reference table column
|
||||
SELECT foo.user_id FROM
|
||||
(
|
||||
SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||
) as foo;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||
-- not supported since distinct on is on the reference table column
|
||||
SELECT foo.user_id FROM
|
||||
(
|
||||
SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||
) as foo;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||
DROP TABLE user_buy_test_table;
|
||||
DROP TABLE users_ref_test_table;
|
||||
DROP TABLE users_return_test_table;
|
||||
|
|
|
@ -1,33 +1,5 @@
|
|||
--
|
||||
-- queries to test the subquery pushdown on reference tables
|
||||
-- subqueries in WHERE with greater operator
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_reference_table
|
||||
WHERE
|
||||
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||
GROUP BY
|
||||
user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
user_id
|
||||
---------
|
||||
49
|
||||
55
|
||||
56
|
||||
63
|
||||
(4 rows)
|
||||
|
||||
-- subqueries in WHERE with IN operator
|
||||
SELECT
|
||||
user_id
|
||||
|
@ -91,10 +63,10 @@ WHERE
|
|||
)
|
||||
LIMIT 3;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||
-- subqueries in WHERE with IN operator without equality
|
||||
SELECT
|
||||
user_id
|
||||
users_table.user_id, count(*)
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
|
@ -106,59 +78,15 @@ WHERE
|
|||
WHERE
|
||||
users_table.user_id > events_reference_table.user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
ORDER BY user_id
|
||||
GROUP BY users_table.user_id
|
||||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 3;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- have reference table without any equality, should error out
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_reference_table
|
||||
WHERE
|
||||
event_type = 50
|
||||
GROUP BY
|
||||
user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- users that appeared more than 118 times, should run since the reference table
|
||||
-- on the right side of the semi join
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE 118 <=
|
||||
(SELECT
|
||||
count(*)
|
||||
FROM
|
||||
events_reference_table
|
||||
WHERE
|
||||
users_table.user_id = events_reference_table.user_id
|
||||
GROUP BY
|
||||
user_id)
|
||||
GROUP BY
|
||||
user_id
|
||||
ORDER BY
|
||||
user_id;
|
||||
user_id
|
||||
---------
|
||||
13
|
||||
17
|
||||
23
|
||||
25
|
||||
(4 rows)
|
||||
user_id | count
|
||||
---------+-------
|
||||
87 | 117
|
||||
59 | 115
|
||||
46 | 115
|
||||
(3 rows)
|
||||
|
||||
-- should error out since reference table exist on the left side
|
||||
-- of the left lateral join
|
||||
|
@ -224,3 +152,194 @@ SELECT user_id, value_2 FROM users_table WHERE
|
|||
ORDER BY 1, 2;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- non-partition key equality with reference table
|
||||
SELECT
|
||||
user_id, count(*)
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_3 =ANY(SELECT value_2 FROM users_reference_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 5;
|
||||
user_id | count
|
||||
---------+-------
|
||||
48 | 18
|
||||
26 | 18
|
||||
15 | 17
|
||||
54 | 16
|
||||
35 | 15
|
||||
(5 rows)
|
||||
|
||||
-- non-partition key comparison with reference table
|
||||
SELECT
|
||||
user_id, count(*)
|
||||
FROM
|
||||
events_table as e1
|
||||
WHERE
|
||||
event_type IN
|
||||
(SELECT
|
||||
event_type
|
||||
FROM
|
||||
events_reference_table as e2
|
||||
WHERE
|
||||
value_2 = 15 AND
|
||||
value_3 > 25 AND
|
||||
e1.value_2 > e2.value_2
|
||||
)
|
||||
GROUP BY 1
|
||||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 5;
|
||||
user_id | count
|
||||
---------+-------
|
||||
3 | 5
|
||||
56 | 4
|
||||
99 | 2
|
||||
94 | 2
|
||||
92 | 2
|
||||
(5 rows)
|
||||
|
||||
-- subqueries in both WHERE and FROM clauses
|
||||
-- should work since reference table is on the
|
||||
-- inner part of the join
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND user_id IN
|
||||
(
|
||||
SELECT
|
||||
e1.user_id
|
||||
FROM (
|
||||
-- Get the first time each user viewed the homepage.
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS view_homepage,
|
||||
min(time) AS view_homepage_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||
GROUP BY user_id
|
||||
) e1 LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS use_demo,
|
||||
time AS use_demo_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e1.user_id AND
|
||||
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||
ORDER BY time
|
||||
) e2 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS enter_credit_card,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e2.user_id AND
|
||||
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||
ORDER BY time
|
||||
) e3 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS submit_card_info,
|
||||
user_id,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e3.user_id AND
|
||||
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||
ORDER BY time
|
||||
) e4 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS see_bought_screen
|
||||
FROM events_reference_table
|
||||
WHERE
|
||||
user_id = e4.user_id AND
|
||||
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||
ORDER BY time
|
||||
) e5 ON true
|
||||
group by e1.user_id
|
||||
HAVING sum(submit_card_info) > 0
|
||||
)
|
||||
ORDER BY 1, 2;
|
||||
user_id | value_2
|
||||
---------+---------
|
||||
5 | 884
|
||||
42 | 55
|
||||
42 | 471
|
||||
51 | 758
|
||||
72 | 897
|
||||
82 | 691
|
||||
95 | 951
|
||||
(7 rows)
|
||||
|
||||
-- reference tables are not allowed if there is sublink
|
||||
SELECT
|
||||
count(*)
|
||||
FROM
|
||||
users_reference_table
|
||||
WHERE user_id
|
||||
NOT IN
|
||||
(SELECT users_table.value_2 FROM users_table JOIN users_reference_table as u2 ON users_table.value_2 = u2.value_2);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||
-- reference tables are not allowed if there is sublink
|
||||
SELECT count(*)
|
||||
FROM
|
||||
(SELECT
|
||||
user_id, random() FROM users_reference_table) AS vals
|
||||
WHERE vals.user_id NOT IN
|
||||
(SELECT users_table.value_2
|
||||
FROM users_table
|
||||
JOIN users_reference_table AS u2 ON users_table.value_2 = u2.value_2);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||
-- reference tables are not allowed if there is sublink
|
||||
SELECT user_id,
|
||||
count(*)
|
||||
FROM users_reference_table
|
||||
WHERE value_2 > ALL
|
||||
(SELECT min(value_2)
|
||||
FROM events_table
|
||||
WHERE event_type > 50 AND users_reference_table.user_id = events_table.user_id
|
||||
GROUP BY user_id)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY 2 DESC,
|
||||
1 DESC
|
||||
LIMIT 5;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||
-- reference tables are not allowed if there is sublink
|
||||
-- this time in the subquery
|
||||
SELECT *
|
||||
FROM users_table
|
||||
WHERE user_id IN
|
||||
(SELECT users_table.user_id
|
||||
FROM users_table,
|
||||
users_reference_table
|
||||
WHERE users_reference_table.user_id NOT IN
|
||||
(SELECT value_2
|
||||
FROM users_reference_table AS u2));
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||
-- not supported since GROUP BY references to an upper level query
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_reference_table
|
||||
WHERE
|
||||
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||
GROUP BY
|
||||
users_table.user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Group by list without partition column is currently unsupported
|
||||
|
|
|
@ -356,6 +356,7 @@ SET citus.shard_max_size TO "1MB";
|
|||
|
||||
CREATE TABLE events_reference_table (like events_table including all);
|
||||
SELECT create_reference_table('events_reference_table');
|
||||
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
||||
INSERT INTO events_reference_table SELECT * FROM events_table;
|
||||
|
||||
CREATE TABLE users_reference_table (like users_table including all);
|
||||
|
|
|
@ -428,6 +428,7 @@ SELECT create_reference_table('events_reference_table');
|
|||
|
||||
(1 row)
|
||||
|
||||
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
||||
INSERT INTO events_reference_table SELECT * FROM events_table;
|
||||
CREATE TABLE users_reference_table (like users_table including all);
|
||||
SELECT create_reference_table('users_reference_table');
|
||||
|
|
|
@ -707,7 +707,7 @@ FROM
|
|||
WHERE
|
||||
colocated_table_test.value_1 = reference_table_test.value_1;
|
||||
|
||||
-- not pushable due to lack of equality between partition column and column of reference table
|
||||
-- safe to push down even lack of equality between partition column and column of reference table
|
||||
INSERT INTO
|
||||
colocated_table_test (value_1, value_2)
|
||||
SELECT
|
||||
|
@ -718,7 +718,7 @@ WHERE
|
|||
colocated_table_test_2.value_4 = reference_table_test.value_4
|
||||
RETURNING value_1, value_2;
|
||||
|
||||
-- some more complex queries (Note that there are more complex queries in multi_insert_select.sql)
|
||||
-- similar query with the above, this time partition key but without equality
|
||||
INSERT INTO
|
||||
colocated_table_test (value_1, value_2)
|
||||
SELECT
|
||||
|
@ -726,7 +726,7 @@ SELECT
|
|||
FROM
|
||||
colocated_table_test_2, reference_table_test
|
||||
WHERE
|
||||
colocated_table_test_2.value_2 = reference_table_test.value_2
|
||||
colocated_table_test_2.value_1 > reference_table_test.value_2
|
||||
RETURNING value_1, value_2;
|
||||
|
||||
-- partition column value comes from reference table, goes via coordinator
|
||||
|
|
|
@ -42,15 +42,17 @@ SELECT count(*) FROM
|
|||
(SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47;
|
||||
|
||||
-- Should not work, no equality between partition column and reference table
|
||||
SELECT * FROM
|
||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
||||
-- Should work, although no equality between partition column and reference table
|
||||
SELECT subquery_1.item_id FROM
|
||||
(SELECT user_buy_test_table.item_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1
|
||||
ORDER BY 1;
|
||||
|
||||
-- Should not work, no equality between partition column and reference table
|
||||
SELECT * FROM
|
||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1;
|
||||
-- Should work, although no equality between partition column and reference table
|
||||
SELECT subquery_1.user_id FROM
|
||||
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1
|
||||
ORDER BY 1;
|
||||
|
||||
-- Shouldn't work, reference table at the outer side is not allowed
|
||||
SELECT * FROM
|
||||
|
@ -67,6 +69,21 @@ SELECT * FROM
|
|||
(SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
|
||||
|
||||
-- Equi join test with reference table on non-partition keys
|
||||
SELECT count(*) FROM
|
||||
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
|
||||
|
||||
-- Non-equi join test with reference table on non-partition keys
|
||||
SELECT count(*) FROM
|
||||
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||
|
||||
-- Non-equi left joins with reference tables on non-partition keys
|
||||
SELECT count(*) FROM
|
||||
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||
|
||||
-- Should pass since reference table locates in the inner part of each left join
|
||||
SELECT count(*) FROM
|
||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||
|
@ -75,13 +92,172 @@ SELECT count(*) FROM
|
|||
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref
|
||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||
|
||||
-- Should not pass since reference table locates in the outer part of right join
|
||||
-- two subqueries, each include joins with reference table
|
||||
-- also, two hash distributed tables are joined on partition keys
|
||||
SELECT count(*) FROM
|
||||
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id > users_ref_test_table.id AND users_ref_test_table.k_no > 88 AND user_buy_test_table.item_id < 88) subquery_1,
|
||||
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
|
||||
WHERE subquery_1.user_id = subquery_2.user_id ;
|
||||
|
||||
-- Should be able to push down since reference tables are inner joined
|
||||
-- with hash distributed tables, the results of those joins are the parts of
|
||||
-- an outer join
|
||||
SELECT subquery_2.id FROM
|
||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||
ON tt1.user_id = tt2.user_id) subquery_1
|
||||
RIGHT JOIN
|
||||
(SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5;
|
||||
|
||||
-- the same query as the above, but this Citus fails to pushdown the query
|
||||
-- since the outer part of the right join doesn't include any joins
|
||||
SELECT * FROM
|
||||
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
|
||||
ON tt1.user_id = tt2.user_id) subquery_1
|
||||
RIGHT JOIN
|
||||
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||
(SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
|
||||
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
|
||||
|
||||
-- should be able to pushdown since reference table is in the
|
||||
-- inner part of the left join
|
||||
SELECT
|
||||
user_id, sum(value_1)
|
||||
FROM
|
||||
(SELECT
|
||||
users_table.user_id, users_table.value_1, random()
|
||||
FROM
|
||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||
) as foo
|
||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||
|
||||
-- same query as above, reference table is wrapped into a subquery
|
||||
SELECT
|
||||
user_id, sum(value_1)
|
||||
FROM
|
||||
(SELECT
|
||||
users_table.user_id, users_table.value_1, random()
|
||||
FROM
|
||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
|
||||
) as foo
|
||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||
|
||||
-- should be able to pushdown since reference table is in the
|
||||
-- inner part of the left join
|
||||
SELECT
|
||||
user_id, sum(value_1)
|
||||
FROM
|
||||
(SELECT
|
||||
users_table.user_id, users_table.value_1, random()
|
||||
FROM
|
||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||
) as foo
|
||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||
|
||||
-- should not be able to pushdown since reference table is in the
|
||||
-- direct outer part of the left join
|
||||
SELECT
|
||||
user_id, sum(value_1)
|
||||
FROM
|
||||
(SELECT
|
||||
users_table.user_id, users_table.value_1, random()
|
||||
FROM
|
||||
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||
) as foo
|
||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||
|
||||
-- should not be able to pushdown since reference table is in the
|
||||
-- direct outer part of the left join wrapped into a subquery
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
|
||||
ON (users_table.user_id = ref_all.value_2);
|
||||
|
||||
-- should not be able to pushdown since reference table is in the
|
||||
-- outer part of the left join
|
||||
SELECT
|
||||
user_id, sum(value_1)
|
||||
FROM
|
||||
(SELECT
|
||||
users_table.user_id, users_table.value_1, random()
|
||||
FROM
|
||||
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||
) as foo
|
||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||
|
||||
-- should be able to pushdown since reference table is in the
|
||||
-- inner part of the left join
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT DISTINCT foo.user_id
|
||||
FROM
|
||||
((SELECT
|
||||
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||
FROM
|
||||
events_reference_table as "events"
|
||||
WHERE
|
||||
event_type > 80) as "temp_data_queries"
|
||||
INNER JOIN
|
||||
(SELECT
|
||||
"users"."user_id"
|
||||
FROM
|
||||
users_table as "users"
|
||||
WHERE
|
||||
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||
|
||||
-- the same query but this time reference table is in the outer part of the query
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT DISTINCT foo.user_id
|
||||
FROM
|
||||
((SELECT
|
||||
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||
FROM
|
||||
events_reference_table as "events"
|
||||
WHERE
|
||||
event_type > 80) as "temp_data_queries"
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
"users"."user_id"
|
||||
FROM
|
||||
users_table as "users"
|
||||
WHERE
|
||||
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||
|
||||
-- we could even suuport the following where the subquery
|
||||
-- on the outer part of the left join contains a reference table
|
||||
SELECT max(events_all.cnt), events_all.usr_id
|
||||
FROM
|
||||
(SELECT users_table.user_id as usr_id,
|
||||
count(*) as cnt
|
||||
FROM events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) GROUP BY users_table.user_id) AS events_all
|
||||
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5;
|
||||
|
||||
-- but, we fail to pushdown the following query where join that reference table appears
|
||||
-- wrapped into a subquery
|
||||
SELECT max(events_all.cnt),
|
||||
events_all.usr_id
|
||||
FROM(
|
||||
SELECT *, random() FROM
|
||||
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner) AS events_all
|
||||
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id)
|
||||
GROUP BY 2
|
||||
ORDER BY 1 DESC,
|
||||
2 DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- LATERAL JOINs used with INNER JOINs with reference tables
|
||||
SET citus.subquery_pushdown to ON;
|
||||
|
@ -149,10 +325,10 @@ SELECT
|
|||
count(*) AS value, "generated_group_field"
|
||||
FROM
|
||||
(SELECT
|
||||
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
||||
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
|
||||
FROM
|
||||
(SELECT
|
||||
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||
FROM
|
||||
(SELECT
|
||||
*
|
||||
|
@ -160,7 +336,7 @@ SELECT
|
|||
(SELECT
|
||||
"events"."time", "events"."user_id", "events"."value_2"
|
||||
FROM
|
||||
events_reference_table as "events"
|
||||
events_table as "events"
|
||||
WHERE
|
||||
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
||||
INNER JOIN
|
||||
|
@ -270,8 +446,8 @@ ORDER BY
|
|||
LIMIT 10;
|
||||
SET citus.subquery_pushdown to OFF;
|
||||
|
||||
-- LEFT JOINs used with INNER JOINs should error out since reference table exist in the
|
||||
-- left side of the LEFT JOIN.
|
||||
-- LEFT JOINs used with INNER JOINs should not error out since reference table joined
|
||||
-- with hash table that Citus can push down
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
FROM
|
||||
|
@ -348,6 +524,106 @@ count(*) AS cnt, "generated_group_field"
|
|||
cnt DESC, generated_group_field ASC
|
||||
LIMIT 10;
|
||||
|
||||
-- right join where the inner part of the join includes a reference table
|
||||
-- joined with hash partitioned table using non-equi join
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_reference_table AS e
|
||||
WHERE u.user_id > e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
) t1 RIGHT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event
|
||||
ORDER BY user_id;
|
||||
|
||||
-- a similar query as the above, with non-partition key comparison
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_reference_table AS e
|
||||
WHERE u.value_1 > e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type >= 125 AND e.event_type < 130
|
||||
)
|
||||
) t1 RIGHT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type >= 130 AND e.event_type < 135
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event
|
||||
ORDER BY user_id;
|
||||
|
||||
|
||||
-- LEFT JOINs used with INNER JOINs
|
||||
-- events_table and users_reference_table joined
|
||||
-- with event_table.non_part_key < reference_table.any_key
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
FROM
|
||||
(SELECT
|
||||
"eventQuery"."user_id", random(), generated_group_field
|
||||
FROM
|
||||
(SELECT
|
||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||
FROM
|
||||
(SELECT *
|
||||
FROM
|
||||
(SELECT
|
||||
"events"."time", "events"."user_id" as event_user_id
|
||||
FROM
|
||||
events_table as "events"
|
||||
WHERE
|
||||
user_id > 80) "temp_data_queries"
|
||||
INNER JOIN
|
||||
(SELECT
|
||||
"users"."user_id"
|
||||
FROM
|
||||
users_reference_table as "users"
|
||||
WHERE
|
||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||
RIGHT JOIN
|
||||
(SELECT
|
||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||
FROM
|
||||
users_table as "users") "left_group_by_1"
|
||||
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||
group BY
|
||||
"generated_group_field"
|
||||
ORDER BY
|
||||
cnt DESC, generated_group_field ASC
|
||||
LIMIT 10;
|
||||
|
||||
-- Outer subquery with reference table
|
||||
SELECT "some_users_data".user_id, lastseen
|
||||
FROM
|
||||
|
@ -380,9 +656,9 @@ ORDER BY
|
|||
limit 50;
|
||||
|
||||
--
|
||||
-- UNIONs and JOINs with reference tables, shoukld error out
|
||||
-- UNIONs and JOINs with reference tables, should error out
|
||||
--
|
||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||
SELECT ("final_query"."event_types") as types
|
||||
FROM
|
||||
( SELECT *, random()
|
||||
FROM
|
||||
|
@ -396,7 +672,7 @@ FROM
|
|||
(SELECT
|
||||
"events"."user_id", "events"."time", 0 AS event
|
||||
FROM
|
||||
events_reference_table as "events"
|
||||
events_table as "events"
|
||||
WHERE
|
||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||
UNION
|
||||
|
@ -406,7 +682,7 @@ FROM
|
|||
(SELECT
|
||||
"events"."user_id", "events"."time", 1 AS event
|
||||
FROM
|
||||
events_table as "events"
|
||||
events_reference_table as "events"
|
||||
WHERE
|
||||
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
||||
UNION
|
||||
|
@ -438,8 +714,6 @@ INNER JOIN
|
|||
WHERE
|
||||
value_1 > 50 and value_1 < 70) AS t
|
||||
ON (t.user_id = q.user_id)) as final_query
|
||||
GROUP BY
|
||||
types
|
||||
ORDER BY
|
||||
types;
|
||||
|
||||
|
@ -471,15 +745,15 @@ FROM
|
|||
SELECT * FROM
|
||||
(
|
||||
SELECT
|
||||
max("events"."time"),
|
||||
max("users"."time"),
|
||||
0 AS event,
|
||||
"events"."user_id"
|
||||
"users"."user_id"
|
||||
FROM
|
||||
events_reference_table as "events", users_table as "users"
|
||||
WHERE
|
||||
events.user_id = users.user_id AND
|
||||
event_type IN (10, 11, 12, 13, 14, 15)
|
||||
GROUP BY "events"."user_id"
|
||||
GROUP BY "users"."user_id"
|
||||
) as events_subquery_5
|
||||
) events_subquery_2)
|
||||
UNION
|
||||
|
@ -569,6 +843,107 @@ INNER JOIN
|
|||
GROUP BY types
|
||||
ORDER BY types;
|
||||
|
||||
-- just a sanity check that we don't allow this if the reference table is on the
|
||||
-- left part of the left join
|
||||
SELECT count(*) FROM
|
||||
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
|
||||
|
||||
-- we don't allow non equi join among hash partitioned tables
|
||||
SELECT count(*) FROM
|
||||
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1,
|
||||
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
|
||||
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2
|
||||
WHERE subquery_1.user_id != subquery_2.user_id ;
|
||||
|
||||
-- we cannot push this query since hash partitioned tables
|
||||
-- are not joined on partition keys with equality
|
||||
SELECT
|
||||
count(*) AS cnt, "generated_group_field"
|
||||
FROM
|
||||
(SELECT
|
||||
"eventQuery"."user_id", random(), generated_group_field
|
||||
FROM
|
||||
(SELECT
|
||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||
FROM
|
||||
(SELECT *
|
||||
FROM
|
||||
(SELECT
|
||||
"events"."time", "events"."user_id" as event_user_id
|
||||
FROM
|
||||
events_table as "events"
|
||||
WHERE
|
||||
user_id > 80) "temp_data_queries"
|
||||
INNER JOIN
|
||||
(SELECT
|
||||
"users"."user_id"
|
||||
FROM
|
||||
users_reference_table as "users"
|
||||
WHERE
|
||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||
RIGHT JOIN
|
||||
(SELECT
|
||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||
FROM
|
||||
users_table as "users") "left_group_by_1"
|
||||
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||
group BY
|
||||
"generated_group_field"
|
||||
ORDER BY
|
||||
cnt DESC, generated_group_field ASC
|
||||
LIMIT 10;
|
||||
|
||||
-- two hash partitioned relations are not joined
|
||||
-- on partiton keys although reference table is fine
|
||||
-- to push down
|
||||
SELECT
|
||||
u1.user_id, count(*)
|
||||
FROM
|
||||
events_table as e1, users_table as u1
|
||||
WHERE
|
||||
event_type IN
|
||||
(SELECT
|
||||
event_type
|
||||
FROM
|
||||
events_reference_table as e2
|
||||
WHERE
|
||||
value_2 = 15 AND
|
||||
value_3 > 25 AND
|
||||
e1.value_2 > e2.value_2
|
||||
)
|
||||
AND u1.user_id > e1.user_id
|
||||
GROUP BY 1
|
||||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 5;
|
||||
|
||||
SELECT foo.user_id FROM
|
||||
(
|
||||
SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||
WHERE event_type > 100000
|
||||
) as foo;
|
||||
|
||||
-- not supported since group by is on the reference table column
|
||||
SELECT foo.user_id FROM
|
||||
(
|
||||
SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||
GROUP BY r.user_id
|
||||
) as foo;
|
||||
|
||||
-- not supported since distinct is on the reference table column
|
||||
SELECT foo.user_id FROM
|
||||
(
|
||||
SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||
) as foo;
|
||||
|
||||
-- not supported since distinct on is on the reference table column
|
||||
SELECT foo.user_id FROM
|
||||
(
|
||||
SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
|
||||
) as foo;
|
||||
|
||||
DROP TABLE user_buy_test_table;
|
||||
DROP TABLE users_ref_test_table;
|
||||
DROP TABLE users_return_test_table;
|
||||
|
|
|
@ -1,27 +1,6 @@
|
|||
--
|
||||
-- queries to test the subquery pushdown on reference tables
|
||||
|
||||
-- subqueries in WHERE with greater operator
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_reference_table
|
||||
WHERE
|
||||
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||
GROUP BY
|
||||
user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
|
||||
-- subqueries in WHERE with IN operator
|
||||
SELECT
|
||||
user_id
|
||||
|
@ -78,7 +57,7 @@ LIMIT 3;
|
|||
|
||||
-- subqueries in WHERE with IN operator without equality
|
||||
SELECT
|
||||
user_id
|
||||
users_table.user_id, count(*)
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
|
@ -90,51 +69,10 @@ WHERE
|
|||
WHERE
|
||||
users_table.user_id > events_reference_table.user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
ORDER BY user_id
|
||||
GROUP BY users_table.user_id
|
||||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 3;
|
||||
|
||||
-- have reference table without any equality, should error out
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_reference_table
|
||||
WHERE
|
||||
event_type = 50
|
||||
GROUP BY
|
||||
user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
|
||||
-- users that appeared more than 118 times, should run since the reference table
|
||||
-- on the right side of the semi join
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE 118 <=
|
||||
(SELECT
|
||||
count(*)
|
||||
FROM
|
||||
events_reference_table
|
||||
WHERE
|
||||
users_table.user_id = events_reference_table.user_id
|
||||
GROUP BY
|
||||
user_id)
|
||||
GROUP BY
|
||||
user_id
|
||||
ORDER BY
|
||||
user_id;
|
||||
|
||||
-- should error out since reference table exist on the left side
|
||||
-- of the left lateral join
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
|
@ -197,3 +135,165 @@ SELECT user_id, value_2 FROM users_table WHERE
|
|||
HAVING sum(submit_card_info) > 0
|
||||
)
|
||||
ORDER BY 1, 2;
|
||||
|
||||
-- non-partition key equality with reference table
|
||||
SELECT
|
||||
user_id, count(*)
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_3 =ANY(SELECT value_2 FROM users_reference_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 5;
|
||||
|
||||
|
||||
-- non-partition key comparison with reference table
|
||||
SELECT
|
||||
user_id, count(*)
|
||||
FROM
|
||||
events_table as e1
|
||||
WHERE
|
||||
event_type IN
|
||||
(SELECT
|
||||
event_type
|
||||
FROM
|
||||
events_reference_table as e2
|
||||
WHERE
|
||||
value_2 = 15 AND
|
||||
value_3 > 25 AND
|
||||
e1.value_2 > e2.value_2
|
||||
)
|
||||
GROUP BY 1
|
||||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- subqueries in both WHERE and FROM clauses
|
||||
-- should work since reference table is on the
|
||||
-- inner part of the join
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND user_id IN
|
||||
(
|
||||
SELECT
|
||||
e1.user_id
|
||||
FROM (
|
||||
-- Get the first time each user viewed the homepage.
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS view_homepage,
|
||||
min(time) AS view_homepage_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||
GROUP BY user_id
|
||||
) e1 LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS use_demo,
|
||||
time AS use_demo_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e1.user_id AND
|
||||
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||
ORDER BY time
|
||||
) e2 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS enter_credit_card,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e2.user_id AND
|
||||
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||
ORDER BY time
|
||||
) e3 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS submit_card_info,
|
||||
user_id,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e3.user_id AND
|
||||
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||
ORDER BY time
|
||||
) e4 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS see_bought_screen
|
||||
FROM events_reference_table
|
||||
WHERE
|
||||
user_id = e4.user_id AND
|
||||
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||
ORDER BY time
|
||||
) e5 ON true
|
||||
group by e1.user_id
|
||||
HAVING sum(submit_card_info) > 0
|
||||
)
|
||||
ORDER BY 1, 2;
|
||||
|
||||
-- reference tables are not allowed if there is sublink
|
||||
SELECT
|
||||
count(*)
|
||||
FROM
|
||||
users_reference_table
|
||||
WHERE user_id
|
||||
NOT IN
|
||||
(SELECT users_table.value_2 FROM users_table JOIN users_reference_table as u2 ON users_table.value_2 = u2.value_2);
|
||||
|
||||
|
||||
-- reference tables are not allowed if there is sublink
|
||||
SELECT count(*)
|
||||
FROM
|
||||
(SELECT
|
||||
user_id, random() FROM users_reference_table) AS vals
|
||||
WHERE vals.user_id NOT IN
|
||||
(SELECT users_table.value_2
|
||||
FROM users_table
|
||||
JOIN users_reference_table AS u2 ON users_table.value_2 = u2.value_2);
|
||||
|
||||
-- reference tables are not allowed if there is sublink
|
||||
SELECT user_id,
|
||||
count(*)
|
||||
FROM users_reference_table
|
||||
WHERE value_2 > ALL
|
||||
(SELECT min(value_2)
|
||||
FROM events_table
|
||||
WHERE event_type > 50 AND users_reference_table.user_id = events_table.user_id
|
||||
GROUP BY user_id)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY 2 DESC,
|
||||
1 DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- reference tables are not allowed if there is sublink
|
||||
-- this time in the subquery
|
||||
SELECT *
|
||||
FROM users_table
|
||||
WHERE user_id IN
|
||||
(SELECT users_table.user_id
|
||||
FROM users_table,
|
||||
users_reference_table
|
||||
WHERE users_reference_table.user_id NOT IN
|
||||
(SELECT value_2
|
||||
FROM users_reference_table AS u2));
|
||||
|
||||
-- not supported since GROUP BY references to an upper level query
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_reference_table
|
||||
WHERE
|
||||
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||
GROUP BY
|
||||
users_table.user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
|
|
Loading…
Reference in New Issue