Merge pull request #1628 from citusdata/expand_subquery_reference_table

Expand subquery pushdown for reference tables
pull/1653/head
Önder Kalacı 2017-09-15 00:04:11 +03:00 committed by GitHub
commit 6c9dffccbf
11 changed files with 1551 additions and 244 deletions

View File

@ -2880,6 +2880,17 @@ FindReferencedTableColumn(Expr *columnExpression, List *parentQueryList, Query *
return;
}
/*
* We currently don't support finding partition keys in the subqueries
* that references from outer subqueries. For example, in corrolated
* subqueries in WHERE clause, we don't support use of partition keys
* in the subquery that is referred from the outer query.
*/
if (candidateColumn->varlevelsup > 0)
{
return;
}
rangeTableEntryIndex = candidateColumn->varno - 1;
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
@ -3096,7 +3107,14 @@ PartitionColumnOpExpressionList(Query *query)
rangeTableEntryIndex = candidatePartitionColumn->varno - 1;
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
Assert(rangeTableEntry->rtekind == RTE_RELATION);
/*
* We currently don't support checking for equality when user refers
* to a column from the JOIN instead of the relation.
*/
if (rangeTableEntry->rtekind != RTE_RELATION)
{
continue;
}
relationId = rangeTableEntry->relid;
partitionColumn = DistPartitionKey(relationId);

View File

@ -31,6 +31,7 @@
#include "distributed/worker_protocol.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/relation.h"
#include "optimizer/clauses.h"
#include "optimizer/prep.h"
#include "optimizer/tlist.h"
@ -69,6 +70,8 @@ static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryPushdown(Query *
PlannerRestrictionContext
*
plannerRestrictionContext);
static DeferredErrorMessage * DeferErrorIfUnsupportedSublinkAndReferenceTable(
Query *queryTree);
static DeferredErrorMessage * DeferErrorIfUnsupportedFilters(Query *subquery);
static bool EqualOpExpressionLists(List *firstOpExpressionList,
List *secondOpExpressionList);
@ -87,6 +90,7 @@ static MultiNode * MultiPlanTree(Query *queryTree);
static void ErrorIfQueryNotSupported(Query *queryTree);
static bool HasUnsupportedReferenceTableJoin(
PlannerRestrictionContext *plannerRestrictionContext);
static bool ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo);
static bool HasUnsupportedJoinWalker(Node *node, void *context);
static bool ErrorHintRequired(const char *errorHint, Query *queryTree);
static DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryRepartition(Query *
@ -96,8 +100,8 @@ static bool HasOuterJoin(Query *queryTree);
static bool HasOuterJoinWalker(Node *node, void *maxJoinLevel);
static bool HasComplexJoinOrder(Query *queryTree);
static bool HasComplexRangeTableType(Query *queryTree);
static bool RelationInfoHasReferenceTable(PlannerInfo *plannerInfo,
RelOptInfo *relationInfo);
static bool RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo,
RelOptInfo *relationInfo);
static void ValidateClauseList(List *clauseList);
static void ValidateSubqueryPushdownClauseList(List *clauseList);
static bool ExtractFromExpressionWalker(Node *node,
@ -542,7 +546,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
"one another relation using distribution keys and "
"equality operator.", NULL);
}
else if (HasUnsupportedReferenceTableJoin(plannerRestrictionContext))
/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
error = DeferErrorIfUnsupportedSublinkAndReferenceTable(originalQuery);
if (error)
{
return error;
}
/* we shouldn't allow reference tables in the outer part of outer joins */
if (HasUnsupportedReferenceTableJoin(plannerRestrictionContext))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot pushdown the subquery",
@ -582,6 +595,43 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
}
/*
* DeferErrorIfUnsupportedSublinkAndReferenceTable returns a deferred error if the
* given query is not suitable for subquery pushdown.
*
* While planning sublinks, we rely on Postgres in the sense that it converts some of
* sublinks into joins.
*
* In some cases, sublinks are pulled up and converted into outer joins. Those cases
* are already handled with HasUnsupportedReferenceTableJoin().
*
* If the sublinks are not pulled up, we should still error out in if any reference table
* appears in the FROM clause of a subquery.
*
* Otherwise, the result would include duplicate rows.
*/
static DeferredErrorMessage *
DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
{
if (!queryTree->hasSubLinks)
{
return NULL;
}
if (HasReferenceTable((Node *) queryTree->rtable))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot pushdown the subquery",
"Reference tables are not allowed in FROM "
"clause when the query has subqueries in "
"WHERE clause",
NULL);
}
return NULL;
}
/*
* DeferErrorIfUnsupportedFilters checks if all leaf queries in the given query have
* same filter on the partition column. Note that if there are queries without
@ -853,6 +903,14 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
}
}
deferredError = DeferErrorIfUnsupportedSublinkAndReferenceTable(subqueryTree);
if (deferredError)
{
preconditionsSatisfied = false;
errorDetail = (char *) deferredError->detail;
}
/* finally check and return deferred if not satisfied */
if (!preconditionsSatisfied)
{
@ -1027,7 +1085,7 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree)
/*
* TargetListOnPartitionColumn checks if at least one target list entry is on
* partition column or the table is a reference table.
* partition column.
*/
static bool
TargetListOnPartitionColumn(Query *query, List *targetEntryList)
@ -1047,15 +1105,11 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
FindReferencedTableColumn(targetExpression, NIL, query, &relationId, &column);
/*
* If the expression belongs to reference table directly returns true.
* We can assume that target list entry always on partition column of
* reference tables.
*/
/* if the expression belongs to reference table directly returns false */
if (IsDistributedTable(relationId) && PartitionMethod(relationId) ==
DISTRIBUTE_BY_NONE)
{
targetListOnPartitionColumn = true;
targetListOnPartitionColumn = false;
break;
}
@ -1459,15 +1513,18 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo
if (joinType == JOIN_SEMI || joinType == JOIN_ANTI || joinType == JOIN_LEFT)
{
if (RelationInfoHasReferenceTable(plannerInfo, outerrel))
if (ShouldRecurseForReferenceTableJoinChecks(outerrel) &&
RelationInfoContainsReferenceTable(plannerInfo, outerrel))
{
return true;
}
}
else if (joinType == JOIN_FULL)
{
if (RelationInfoHasReferenceTable(plannerInfo, innerrel) ||
RelationInfoHasReferenceTable(plannerInfo, outerrel))
if ((ShouldRecurseForReferenceTableJoinChecks(innerrel) &&
RelationInfoContainsReferenceTable(plannerInfo, innerrel)) ||
(ShouldRecurseForReferenceTableJoinChecks(outerrel) &&
RelationInfoContainsReferenceTable(plannerInfo, outerrel)))
{
return true;
}
@ -1479,12 +1536,66 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo
/*
* RelationInfoHasReferenceTable check whether the relationInfo has reference table.
* Since relation ids of relationInfo indexes to the range table entry list of
* planner info, planner info is also passed.
* ShouldRecurseForReferenceTableJoinChecks is a helper function for deciding
* on whether the input relOptInfo should be checked for unsupported reference
* tables.
*/
static bool
RelationInfoHasReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relationInfo)
ShouldRecurseForReferenceTableJoinChecks(RelOptInfo *relOptInfo)
{
/*
* We shouldn't recursively go down for joins since we're already
* going to process each join seperately. Otherwise we'd restrict
* the coverage. See the below sketch where (h) denotes a hash
* distributed relation, (r) denotes a reference table, (L) denotes
* LEFT JOIN and (I) denotes INNER JOIN. If we're to recurse into
* the inner join, we'd be preventing to push down the following
* join tree, which is actually safe to push down.
*
* (L)
* / \
* (I) h
* / \
* r h
*/
if (relOptInfo->reloptkind == RELOPT_JOINREL)
{
return false;
}
/*
* Note that we treat the same query where relations appear in subqueries
* differently. (i.e., use SELECT * FROM r; instead of r)
*
* In that case, to relax some restrictions, we do the following optimization:
* If the subplan (i.e., plannerInfo corresponding to the subquery) contains any
* joins, we skip reference table checks keeping in mind that the join is already
* going to be processed seperately. This optimization should suffice for many
* use cases.
*/
if (relOptInfo->reloptkind == RELOPT_BASEREL && relOptInfo->subroot != NULL)
{
PlannerInfo *subroot = relOptInfo->subroot;
if (list_length(subroot->join_rel_list) > 0)
{
return false;
}
}
return true;
}
/*
* RelationInfoContainsReferenceTable checks whether the relationInfo
* contains any reference tables. If found, the function returns true.
*
* Note that since relation ids of relationInfo indexes to the range
* table entry list of planner info, planner info is also passed.
*/
static bool
RelationInfoContainsReferenceTable(PlannerInfo *plannerInfo, RelOptInfo *relationInfo)
{
Relids relids = bms_copy(relationInfo->relids);
int relationId = -1;
@ -2927,9 +3038,17 @@ ExtractRangeTableRelationWalkerWithRTEExpand(Node *node, List **rangeTableRelati
rangeTableRelationList, 0);
}
}
else if (IsA(node, Query))
{
walkIsComplete = query_tree_walker((Query *) node,
ExtractRangeTableRelationWalkerWithRTEExpand,
rangeTableRelationList, QTW_EXAMINE_RTES);
}
else
{
walkIsComplete = ExtractRangeTableRelationWalker(node, rangeTableRelationList);
walkIsComplete = expression_tree_walker(node,
ExtractRangeTableRelationWalkerWithRTEExpand,
rangeTableRelationList);
}
return walkIsComplete;

View File

@ -62,6 +62,7 @@ typedef struct AttributeEquivalenceClassMember
} AttributeEquivalenceClassMember;
static uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext);
static Var * FindTranslatedVar(List *appendRelList, Oid relationOid,
Index relationRteIndex, Index *partitionKeyIndex);
static bool EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
@ -329,9 +330,8 @@ FindTranslatedVar(List *appendRelList, Oid relationOid, Index relationRteIndex,
* joined on their partition keys.
*
* The function returns true if all relations are joined on their partition keys.
* Otherwise, the function returns false. In order to support reference tables
* with subqueries, equality between attributes of reference tables and partition
* key of distributed tables are also considered.
* Otherwise, the function returns false. We ignore reference tables at all since
* they don't have partition keys.
*
* In order to do that, we invented a new equivalence class namely:
* AttributeEquivalenceClass. In very simple words, a AttributeEquivalenceClass is
@ -365,14 +365,24 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
List *joinRestrictionAttributeEquivalenceList = NIL;
List *allAttributeEquivalenceList = NIL;
uint32 referenceRelationCount = ReferenceRelationCount(restrictionContext);
uint32 totalRelationCount = list_length(restrictionContext->relationRestrictionList);
uint32 nonReferenceRelationCount = totalRelationCount - referenceRelationCount;
/*
* If the query includes only one relation, we should not check the partition
* column equality. Single table should not need to fetch data from other nodes
* except it's own node(s).
* If the query includes a single relation which is not a reference table,
* we should not check the partition column equality.
* Consider two example cases:
* (i) The query includes only a single colocated relation
* (ii) A colocated relation is joined with a (or multiple) reference
* table(s) where colocated relation is not joined on the partition key
*
* For the above two cases, we don't need to execute the partition column equality
* algorithm. The reason is that the essence of this function is to ensure that the
* tasks that are going to be created should not need data from other tasks. In both
* cases mentioned above, the necessary data per task would be on available.
*/
if (totalRelationCount == 1)
if (nonReferenceRelationCount <= 1)
{
return true;
}
@ -394,6 +404,31 @@ RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
}
/*
* ReferenceRelationCount iterates over the relations and returns the reference table
* relation count.
*/
static uint32
ReferenceRelationCount(RelationRestrictionContext *restrictionContext)
{
ListCell *relationRestrictionCell = NULL;
uint32 referenceRelationCount = 0;
foreach(relationRestrictionCell, restrictionContext->relationRestrictionList)
{
RelationRestriction *relationRestriction =
(RelationRestriction *) lfirst(relationRestrictionCell);
if (PartitionMethod(relationRestriction->relationId) == DISTRIBUTE_BY_NONE)
{
referenceRelationCount++;
}
}
return referenceRelationCount;
}
/*
* EquivalenceListContainsRelationsEquality gets a list of attributed equivalence
* list and a relation restriction context. The function first generates a common
@ -434,6 +469,12 @@ EquivalenceListContainsRelationsEquality(List *attributeEquivalenceList,
(RelationRestriction *) lfirst(relationRestrictionCell);
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
/* we shouldn't check for the equality of reference tables */
if (PartitionMethod(relationRestriction->relationId) == DISTRIBUTE_BY_NONE)
{
continue;
}
if (!bms_is_member(rteIdentity, commonRteIdentities))
{
return false;
@ -621,7 +662,7 @@ GetVarFromAssignedParam(List *parentPlannerParamList, Param *plannerParam)
/*
* GenerateCommonEquivalence gets a list of unrelated AttributeEquiavalanceClass
* whose all members are partition keys or a column of reference table.
* whose all members are partition keys.
*
* With the equivalence classes, the function follows the algorithm
* outlined below:
@ -1084,8 +1125,14 @@ AddRteRelationToAttributeEquivalenceClass(AttributeEquivalenceClass **
Assert(rangeTableEntry->rtekind == RTE_RELATION);
if (PartitionMethod(relationId) != DISTRIBUTE_BY_NONE &&
relationPartitionKey->varattno != varToBeAdded->varattno)
/* we don't need reference tables in the equality on columns */
if (relationPartitionKey == NULL)
{
return;
}
/* we're only interested in distribution columns */
if (relationPartitionKey->varattno != varToBeAdded->varattno)
{
return;
}

View File

@ -1122,7 +1122,7 @@ WHERE
colocated_table_test.value_1 = reference_table_test.value_1;
DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- not pushable due to lack of equality between partition column and column of reference table
-- safe to push down even lack of equality between partition column and column of reference table
INSERT INTO
colocated_table_test (value_1, value_2)
SELECT
@ -1132,9 +1132,13 @@ FROM
WHERE
colocated_table_test_2.value_4 = reference_table_test.value_4
RETURNING value_1, value_2;
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- some more complex queries (Note that there are more complex queries in multi_insert_select.sql)
value_1 | value_2
---------+---------
1 | 1
2 | 2
(2 rows)
-- similar query with the above, this time partition key but without equality
INSERT INTO
colocated_table_test (value_1, value_2)
SELECT
@ -1142,10 +1146,13 @@ SELECT
FROM
colocated_table_test_2, reference_table_test
WHERE
colocated_table_test_2.value_2 = reference_table_test.value_2
colocated_table_test_2.value_1 > reference_table_test.value_2
RETURNING value_1, value_2;
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
value_1 | value_2
---------+---------
2 | 1
(1 row)
-- partition column value comes from reference table, goes via coordinator
INSERT INTO
colocated_table_test (value_1, value_2)
@ -1606,7 +1613,7 @@ INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02');
SELECT master_modify_multiple_shards('DELETE FROM colocated_table_test');
master_modify_multiple_shards
-------------------------------
6
9
(1 row)
ROLLBACK;

View File

@ -60,18 +60,38 @@ SELECT count(*) FROM
1
(1 row)
-- Should not work, no equality between partition column and reference table
SELECT * FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- Should not work, no equality between partition column and reference table
SELECT * FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- Should work, although no equality between partition column and reference table
SELECT subquery_1.item_id FROM
(SELECT user_buy_test_table.item_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1
ORDER BY 1;
item_id
---------
2
3
4
5
(4 rows)
-- Should work, although no equality between partition column and reference table
SELECT subquery_1.user_id FROM
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1
ORDER BY 1;
user_id
---------
1
2
3
3
7
7
7
7
7
7
(10 rows)
-- Shouldn't work, reference table at the outer side is not allowed
SELECT * FROM
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
@ -93,6 +113,33 @@ SELECT * FROM
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- Equi join test with reference table on non-partition keys
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
count
-------
4
(1 row)
-- Non-equi join test with reference table on non-partition keys
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
count
-------
10
(1 row)
-- Non-equi left joins with reference tables on non-partition keys
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
count
-------
10
(1 row)
-- Should pass since reference table locates in the inner part of each left join
SELECT count(*) FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
@ -105,13 +152,238 @@ SELECT count(*) FROM
2
(1 row)
-- Should not pass since reference table locates in the outer part of right join
-- two subqueries, each include joins with reference table
-- also, two hash distributed tables are joined on partition keys
SELECT count(*) FROM
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id AND users_ref_test_table.k_no > 88 AND user_buy_test_table.item_id < 88) subquery_1,
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
WHERE subquery_1.user_id = subquery_2.user_id ;
count
-------
4
(1 row)
-- Should be able to push down since reference tables are inner joined
-- with hash distributed tables, the results of those joins are the parts of
-- an outer join
SELECT subquery_2.id FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5;
id
----
3
2
1
(3 rows)
-- the same query as the above, but this Citus fails to pushdown the query
-- since the outer part of the right join doesn't include any joins
SELECT * FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
(SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
user_id | sum
---------+----------
12 | 92221920
17 | 89192642
96 | 85143744
45 | 84267456
90 | 84157047
43 | 82110240
1 | 81735612
72 | 78992640
67 | 72385516
97 | 71002659
(10 rows)
-- same query as above, reference table is wrapped into a subquery
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
user_id | sum
---------+----------
12 | 92221920
17 | 89192642
96 | 85143744
45 | 84267456
90 | 84157047
43 | 82110240
1 | 81735612
72 | 78992640
67 | 72385516
97 | 71002659
(10 rows)
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
user_id | sum
---------+----------
12 | 92221920
17 | 89192642
96 | 85143744
45 | 84267456
90 | 84157047
43 | 82110240
1 | 81735612
72 | 78992640
67 | 72385516
97 | 71002659
(10 rows)
-- should not be able to pushdown since reference table is in the
-- direct outer part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- should not be able to pushdown since reference table is in the
-- direct outer part of the left join wrapped into a subquery
SELECT
*
FROM
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
ON (users_table.user_id = ref_all.value_2);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- should not be able to pushdown since reference table is in the
-- outer part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT * FROM
(
SELECT DISTINCT foo.user_id
FROM
((SELECT
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
FROM
events_reference_table as "events"
WHERE
event_type > 80) as "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_table as "users"
WHERE
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
user_id
---------
89
(1 row)
-- the same query but this time reference table is in the outer part of the query
SELECT * FROM
(
SELECT DISTINCT foo.user_id
FROM
((SELECT
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
FROM
events_reference_table as "events"
WHERE
event_type > 80) as "temp_data_queries"
LEFT JOIN
(SELECT
"users"."user_id"
FROM
users_table as "users"
WHERE
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- we could even suuport the following where the subquery
-- on the outer part of the left join contains a reference table
SELECT max(events_all.cnt), events_all.usr_id
FROM
(SELECT users_table.user_id as usr_id,
count(*) as cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) GROUP BY users_table.user_id) AS events_all
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5;
max | usr_id
-------+--------
14605 | 23
13090 | 17
12915 | 25
12317 | 90
12285 | 87
(5 rows)
-- but, we fail to pushdown the following query where join that reference table appears
-- wrapped into a subquery
SELECT max(events_all.cnt),
events_all.usr_id
FROM(
SELECT *, random() FROM
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner) AS events_all
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- LATERAL JOINs used with INNER JOINs with reference tables
@ -193,10 +465,10 @@ SELECT
count(*) AS value, "generated_group_field"
FROM
(SELECT
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
FROM
(SELECT
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
FROM
(SELECT
*
@ -204,7 +476,7 @@ SELECT
(SELECT
"events"."time", "events"."user_id", "events"."value_2"
FROM
events_reference_table as "events"
events_table as "events"
WHERE
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
INNER JOIN
@ -355,8 +627,8 @@ LIMIT 10;
(6 rows)
SET citus.subquery_pushdown to OFF;
-- LEFT JOINs used with INNER JOINs should error out since reference table exist in the
-- left side of the LEFT JOIN.
-- LEFT JOINs used with INNER JOINs should not error out since reference table joined
-- with hash table that Citus can push down
SELECT
count(*) AS cnt, "generated_group_field"
FROM
@ -393,8 +665,20 @@ count(*) AS cnt, "generated_group_field"
ORDER BY
cnt DESC, generated_group_field ASC
LIMIT 10;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
cnt | generated_group_field
-----+-----------------------
176 | 551
176 | 569
176 | 645
176 | 713
176 | 734
88 | 3
88 | 5
88 | 15
88 | 32
88 | 68
(10 rows)
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
-- right side of the RIGHT JOIN.
SELECT
@ -435,6 +719,133 @@ count(*) AS cnt, "generated_group_field"
LIMIT 10;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- right join where the inner part of the join includes a reference table
-- joined with hash partitioned table using non-equi join
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
(
SELECT u.user_id, 'step=>1'::text AS event, e.time
FROM users_table AS u,
events_reference_table AS e
WHERE u.user_id > e.user_id
AND u.user_id >= 10
AND u.user_id <= 25
AND e.event_type IN (100, 101, 102)
)
) t1 RIGHT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 10
AND e.user_id <= 25
AND e.event_type IN (106, 107, 108)
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
ORDER BY user_id;
user_id | sum | length | hasdone_event
---------+-----+--------+----------------
11 | 306 | 14 | Has done event
12 | 363 | 14 | Has done event
14 | 510 | 14 | Has done event
18 | 600 | 14 | Has done event
19 | 618 | 14 | Has done event
(5 rows)
-- a similar query as the above, with non-partition key comparison
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
(
SELECT u.user_id, 'step=>1'::text AS event, e.time
FROM users_table AS u,
events_reference_table AS e
WHERE u.value_1 > e.user_id
AND u.user_id >= 10
AND u.user_id <= 25
AND e.event_type >= 125 AND e.event_type < 130
)
) t1 RIGHT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 10
AND e.user_id <= 25
AND e.event_type >= 130 AND e.event_type < 135
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
ORDER BY user_id;
user_id | sum | length | hasdone_event
---------+------+--------+----------------
10 | 6018 | 14 | Has done event
16 | 5373 | 14 | Has done event
17 | 5683 | 14 | Has done event
18 | 5321 | 14 | Has done event
(4 rows)
-- LEFT JOINs used with INNER JOINs
-- events_table and users_reference_table joined
-- with event_table.non_part_key < reference_table.any_key
SELECT
count(*) AS cnt, "generated_group_field"
FROM
(SELECT
"eventQuery"."user_id", random(), generated_group_field
FROM
(SELECT
"multi_group_wrapper_1".*, generated_group_field, random()
FROM
(SELECT *
FROM
(SELECT
"events"."time", "events"."user_id" as event_user_id
FROM
events_table as "events"
WHERE
user_id > 80) "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 80 and value_2 = 5) "user_filters_1"
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
RIGHT JOIN
(SELECT
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
FROM
users_table as "users") "left_group_by_1"
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
group BY
"generated_group_field"
ORDER BY
cnt DESC, generated_group_field ASC
LIMIT 10;
cnt | generated_group_field
-----+-----------------------
540 | 814
533 | 746
473 | 914
449 | 684
445 | 715
423 | 191
419 | 39
415 | 108
414 | 819
411 | 642
(10 rows)
-- Outer subquery with reference table
SELECT "some_users_data".user_id, lastseen
FROM
@ -468,9 +879,9 @@ limit 50;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
--
-- UNIONs and JOINs with reference tables, shoukld error out
-- UNIONs and JOINs with reference tables, should error out
--
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
SELECT ("final_query"."event_types") as types
FROM
( SELECT *, random()
FROM
@ -484,7 +895,7 @@ FROM
(SELECT
"events"."user_id", "events"."time", 0 AS event
FROM
events_reference_table as "events"
events_table as "events"
WHERE
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
UNION
@ -494,7 +905,7 @@ FROM
(SELECT
"events"."user_id", "events"."time", 1 AS event
FROM
events_table as "events"
events_reference_table as "events"
WHERE
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
UNION
@ -526,11 +937,9 @@ INNER JOIN
WHERE
value_1 > 50 and value_1 < 70) AS t
ON (t.user_id = q.user_id)) as final_query
GROUP BY
types
ORDER BY
types;
ERROR: cannot push down this subquery
ERROR: cannot push down this subquery
DETAIL: Reference tables are not supported with union operator
-- reference table exist in the subquery of union, should error out
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
@ -560,15 +969,15 @@ FROM
SELECT * FROM
(
SELECT
max("events"."time"),
max("users"."time"),
0 AS event,
"events"."user_id"
"users"."user_id"
FROM
events_reference_table as "events", users_table as "users"
WHERE
events.user_id = users.user_id AND
event_type IN (10, 11, 12, 13, 14, 15)
GROUP BY "events"."user_id"
GROUP BY "users"."user_id"
) as events_subquery_5
) events_subquery_2)
UNION
@ -660,6 +1069,117 @@ GROUP BY types
ORDER BY types;
ERROR: cannot push down this subquery
DETAIL: Reference tables are not supported with union operator
-- just a sanity check that we don't allow this if the reference table is on the
-- left part of the left join
SELECT count(*) FROM
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- we don't allow non equi join among hash partitioned tables
SELECT count(*) FROM
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1,
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2
WHERE subquery_1.user_id != subquery_2.user_id ;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- we cannot push this query since hash partitioned tables
-- are not joined on partition keys with equality
SELECT
count(*) AS cnt, "generated_group_field"
FROM
(SELECT
"eventQuery"."user_id", random(), generated_group_field
FROM
(SELECT
"multi_group_wrapper_1".*, generated_group_field, random()
FROM
(SELECT *
FROM
(SELECT
"events"."time", "events"."user_id" as event_user_id
FROM
events_table as "events"
WHERE
user_id > 80) "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 80 and value_2 = 5) "user_filters_1"
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
RIGHT JOIN
(SELECT
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
FROM
users_table as "users") "left_group_by_1"
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
group BY
"generated_group_field"
ORDER BY
cnt DESC, generated_group_field ASC
LIMIT 10;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- two hash partitioned relations are not joined
-- on partiton keys although reference table is fine
-- to push down
SELECT
u1.user_id, count(*)
FROM
events_table as e1, users_table as u1
WHERE
event_type IN
(SELECT
event_type
FROM
events_reference_table as e2
WHERE
value_2 = 15 AND
value_3 > 25 AND
e1.value_2 > e2.value_2
)
AND u1.user_id > e1.user_id
GROUP BY 1
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
SELECT foo.user_id FROM
(
SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
WHERE event_type > 100000
) as foo;
user_id
---------
(0 rows)
-- not supported since group by is on the reference table column
SELECT foo.user_id FROM
(
SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
GROUP BY r.user_id
) as foo;
ERROR: cannot push down this subquery
DETAIL: Group by list without partition column is currently unsupported
-- not supported since distinct is on the reference table column
SELECT foo.user_id FROM
(
SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
) as foo;
ERROR: cannot push down this subquery
DETAIL: Distinct on columns without partition column is currently unsupported
-- not supported since distinct on is on the reference table column
SELECT foo.user_id FROM
(
SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
) as foo;
ERROR: cannot push down this subquery
DETAIL: Distinct on columns without partition column is currently unsupported
DROP TABLE user_buy_test_table;
DROP TABLE users_ref_test_table;
DROP TABLE users_return_test_table;

View File

@ -1,33 +1,5 @@
--
-- queries to test the subquery pushdown on reference tables
-- subqueries in WHERE with greater operator
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_reference_table
WHERE
users_table.user_id = events_reference_table.user_id AND event_type = 50
GROUP BY
user_id
)
GROUP BY user_id
HAVING count(*) > 66
ORDER BY user_id
LIMIT 5;
user_id
---------
49
55
56
63
(4 rows)
-- subqueries in WHERE with IN operator
SELECT
user_id
@ -91,10 +63,10 @@ WHERE
)
LIMIT 3;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
-- subqueries in WHERE with IN operator without equality
SELECT
user_id
users_table.user_id, count(*)
FROM
users_table
WHERE
@ -106,59 +78,15 @@ WHERE
WHERE
users_table.user_id > events_reference_table.user_id
)
GROUP BY user_id
ORDER BY user_id
GROUP BY users_table.user_id
ORDER BY 2 DESC, 1 DESC
LIMIT 3;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- have reference table without any equality, should error out
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_reference_table
WHERE
event_type = 50
GROUP BY
user_id
)
GROUP BY user_id
HAVING count(*) > 66
ORDER BY user_id
LIMIT 5;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- users that appeared more than 118 times, should run since the reference table
-- on the right side of the semi join
SELECT
user_id
FROM
users_table
WHERE 118 <=
(SELECT
count(*)
FROM
events_reference_table
WHERE
users_table.user_id = events_reference_table.user_id
GROUP BY
user_id)
GROUP BY
user_id
ORDER BY
user_id;
user_id
---------
13
17
23
25
(4 rows)
user_id | count
---------+-------
87 | 117
59 | 115
46 | 115
(3 rows)
-- should error out since reference table exist on the left side
-- of the left lateral join
@ -224,3 +152,194 @@ SELECT user_id, value_2 FROM users_table WHERE
ORDER BY 1, 2;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- non-partition key equality with reference table
SELECT
user_id, count(*)
FROM
users_table
WHERE
value_3 =ANY(SELECT value_2 FROM users_reference_table WHERE value_1 >= 10 AND value_1 <= 20)
GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 5;
user_id | count
---------+-------
48 | 18
26 | 18
15 | 17
54 | 16
35 | 15
(5 rows)
-- non-partition key comparison with reference table
SELECT
user_id, count(*)
FROM
events_table as e1
WHERE
event_type IN
(SELECT
event_type
FROM
events_reference_table as e2
WHERE
value_2 = 15 AND
value_3 > 25 AND
e1.value_2 > e2.value_2
)
GROUP BY 1
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
user_id | count
---------+-------
3 | 5
56 | 4
99 | 2
94 | 2
92 | 2
(5 rows)
-- subqueries in both WHERE and FROM clauses
-- should work since reference table is on the
-- inner part of the join
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 101 AND value_1 < 110
AND value_2 >= 5
AND user_id IN
(
SELECT
e1.user_id
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
ORDER BY time
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
ORDER BY time
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e3.user_id AND
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
ORDER BY time
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_reference_table
WHERE
user_id = e4.user_id AND
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
ORDER BY time
) e5 ON true
group by e1.user_id
HAVING sum(submit_card_info) > 0
)
ORDER BY 1, 2;
user_id | value_2
---------+---------
5 | 884
42 | 55
42 | 471
51 | 758
72 | 897
82 | 691
95 | 951
(7 rows)
-- reference tables are not allowed if there is sublink
SELECT
count(*)
FROM
users_reference_table
WHERE user_id
NOT IN
(SELECT users_table.value_2 FROM users_table JOIN users_reference_table as u2 ON users_table.value_2 = u2.value_2);
ERROR: cannot pushdown the subquery
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
-- reference tables are not allowed if there is sublink
SELECT count(*)
FROM
(SELECT
user_id, random() FROM users_reference_table) AS vals
WHERE vals.user_id NOT IN
(SELECT users_table.value_2
FROM users_table
JOIN users_reference_table AS u2 ON users_table.value_2 = u2.value_2);
ERROR: cannot pushdown the subquery
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
-- reference tables are not allowed if there is sublink
SELECT user_id,
count(*)
FROM users_reference_table
WHERE value_2 > ALL
(SELECT min(value_2)
FROM events_table
WHERE event_type > 50 AND users_reference_table.user_id = events_table.user_id
GROUP BY user_id)
GROUP BY user_id
HAVING count(*) > 66
ORDER BY 2 DESC,
1 DESC
LIMIT 5;
ERROR: cannot pushdown the subquery
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
-- reference tables are not allowed if there is sublink
-- this time in the subquery
SELECT *
FROM users_table
WHERE user_id IN
(SELECT users_table.user_id
FROM users_table,
users_reference_table
WHERE users_reference_table.user_id NOT IN
(SELECT value_2
FROM users_reference_table AS u2));
ERROR: cannot push down this subquery
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
-- not supported since GROUP BY references to an upper level query
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_reference_table
WHERE
users_table.user_id = events_reference_table.user_id AND event_type = 50
GROUP BY
users_table.user_id
)
GROUP BY user_id
HAVING count(*) > 66
ORDER BY user_id
LIMIT 5;
ERROR: cannot push down this subquery
DETAIL: Group by list without partition column is currently unsupported

View File

@ -356,6 +356,7 @@ SET citus.shard_max_size TO "1MB";
CREATE TABLE events_reference_table (like events_table including all);
SELECT create_reference_table('events_reference_table');
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
INSERT INTO events_reference_table SELECT * FROM events_table;
CREATE TABLE users_reference_table (like users_table including all);

View File

@ -428,6 +428,7 @@ SELECT create_reference_table('events_reference_table');
(1 row)
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
INSERT INTO events_reference_table SELECT * FROM events_table;
CREATE TABLE users_reference_table (like users_table including all);
SELECT create_reference_table('users_reference_table');

View File

@ -707,7 +707,7 @@ FROM
WHERE
colocated_table_test.value_1 = reference_table_test.value_1;
-- not pushable due to lack of equality between partition column and column of reference table
-- safe to push down even lack of equality between partition column and column of reference table
INSERT INTO
colocated_table_test (value_1, value_2)
SELECT
@ -718,7 +718,7 @@ WHERE
colocated_table_test_2.value_4 = reference_table_test.value_4
RETURNING value_1, value_2;
-- some more complex queries (Note that there are more complex queries in multi_insert_select.sql)
-- similar query with the above, this time partition key but without equality
INSERT INTO
colocated_table_test (value_1, value_2)
SELECT
@ -726,7 +726,7 @@ SELECT
FROM
colocated_table_test_2, reference_table_test
WHERE
colocated_table_test_2.value_2 = reference_table_test.value_2
colocated_table_test_2.value_1 > reference_table_test.value_2
RETURNING value_1, value_2;
-- partition column value comes from reference table, goes via coordinator

View File

@ -42,15 +42,17 @@ SELECT count(*) FROM
(SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47;
-- Should not work, no equality between partition column and reference table
SELECT * FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
-- Should work, although no equality between partition column and reference table
SELECT subquery_1.item_id FROM
(SELECT user_buy_test_table.item_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1
ORDER BY 1;
-- Should not work, no equality between partition column and reference table
SELECT * FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1;
-- Should work, although no equality between partition column and reference table
SELECT subquery_1.user_id FROM
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1
ORDER BY 1;
-- Shouldn't work, reference table at the outer side is not allowed
SELECT * FROM
@ -67,6 +69,21 @@ SELECT * FROM
(SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
-- Equi join test with reference table on non-partition keys
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
-- Non-equi join test with reference table on non-partition keys
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
-- Non-equi left joins with reference tables on non-partition keys
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
-- Should pass since reference table locates in the inner part of each left join
SELECT count(*) FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
@ -75,13 +92,172 @@ SELECT count(*) FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
-- Should not pass since reference table locates in the outer part of right join
-- two subqueries, each include joins with reference table
-- also, two hash distributed tables are joined on partition keys
SELECT count(*) FROM
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id AND users_ref_test_table.k_no > 88 AND user_buy_test_table.item_id < 88) subquery_1,
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
WHERE subquery_1.user_id = subquery_2.user_id ;
-- Should be able to push down since reference tables are inner joined
-- with hash distributed tables, the results of those joins are the parts of
-- an outer join
SELECT subquery_2.id FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5;
-- the same query as the above, but this Citus fails to pushdown the query
-- since the outer part of the right join doesn't include any joins
SELECT * FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
(SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
-- same query as above, reference table is wrapped into a subquery
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
-- should not be able to pushdown since reference table is in the
-- direct outer part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
-- should not be able to pushdown since reference table is in the
-- direct outer part of the left join wrapped into a subquery
SELECT
*
FROM
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
ON (users_table.user_id = ref_all.value_2);
-- should not be able to pushdown since reference table is in the
-- outer part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT * FROM
(
SELECT DISTINCT foo.user_id
FROM
((SELECT
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
FROM
events_reference_table as "events"
WHERE
event_type > 80) as "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_table as "users"
WHERE
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
-- the same query but this time reference table is in the outer part of the query
SELECT * FROM
(
SELECT DISTINCT foo.user_id
FROM
((SELECT
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
FROM
events_reference_table as "events"
WHERE
event_type > 80) as "temp_data_queries"
LEFT JOIN
(SELECT
"users"."user_id"
FROM
users_table as "users"
WHERE
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
-- we could even suuport the following where the subquery
-- on the outer part of the left join contains a reference table
SELECT max(events_all.cnt), events_all.usr_id
FROM
(SELECT users_table.user_id as usr_id,
count(*) as cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) GROUP BY users_table.user_id) AS events_all
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5;
-- but, we fail to pushdown the following query where join that reference table appears
-- wrapped into a subquery
SELECT max(events_all.cnt),
events_all.usr_id
FROM(
SELECT *, random() FROM
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner) AS events_all
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- LATERAL JOINs used with INNER JOINs with reference tables
SET citus.subquery_pushdown to ON;
@ -149,10 +325,10 @@ SELECT
count(*) AS value, "generated_group_field"
FROM
(SELECT
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
FROM
(SELECT
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
FROM
(SELECT
*
@ -160,7 +336,7 @@ SELECT
(SELECT
"events"."time", "events"."user_id", "events"."value_2"
FROM
events_reference_table as "events"
events_table as "events"
WHERE
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
INNER JOIN
@ -270,8 +446,8 @@ ORDER BY
LIMIT 10;
SET citus.subquery_pushdown to OFF;
-- LEFT JOINs used with INNER JOINs should error out since reference table exist in the
-- left side of the LEFT JOIN.
-- LEFT JOINs used with INNER JOINs should not error out since reference table joined
-- with hash table that Citus can push down
SELECT
count(*) AS cnt, "generated_group_field"
FROM
@ -348,6 +524,106 @@ count(*) AS cnt, "generated_group_field"
cnt DESC, generated_group_field ASC
LIMIT 10;
-- right join where the inner part of the join includes a reference table
-- joined with hash partitioned table using non-equi join
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
(
SELECT u.user_id, 'step=>1'::text AS event, e.time
FROM users_table AS u,
events_reference_table AS e
WHERE u.user_id > e.user_id
AND u.user_id >= 10
AND u.user_id <= 25
AND e.event_type IN (100, 101, 102)
)
) t1 RIGHT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 10
AND e.user_id <= 25
AND e.event_type IN (106, 107, 108)
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
ORDER BY user_id;
-- a similar query as the above, with non-partition key comparison
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
(
SELECT u.user_id, 'step=>1'::text AS event, e.time
FROM users_table AS u,
events_reference_table AS e
WHERE u.value_1 > e.user_id
AND u.user_id >= 10
AND u.user_id <= 25
AND e.event_type >= 125 AND e.event_type < 130
)
) t1 RIGHT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 10
AND e.user_id <= 25
AND e.event_type >= 130 AND e.event_type < 135
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
ORDER BY user_id;
-- LEFT JOINs used with INNER JOINs
-- events_table and users_reference_table joined
-- with event_table.non_part_key < reference_table.any_key
SELECT
count(*) AS cnt, "generated_group_field"
FROM
(SELECT
"eventQuery"."user_id", random(), generated_group_field
FROM
(SELECT
"multi_group_wrapper_1".*, generated_group_field, random()
FROM
(SELECT *
FROM
(SELECT
"events"."time", "events"."user_id" as event_user_id
FROM
events_table as "events"
WHERE
user_id > 80) "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 80 and value_2 = 5) "user_filters_1"
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
RIGHT JOIN
(SELECT
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
FROM
users_table as "users") "left_group_by_1"
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
group BY
"generated_group_field"
ORDER BY
cnt DESC, generated_group_field ASC
LIMIT 10;
-- Outer subquery with reference table
SELECT "some_users_data".user_id, lastseen
FROM
@ -380,9 +656,9 @@ ORDER BY
limit 50;
--
-- UNIONs and JOINs with reference tables, shoukld error out
-- UNIONs and JOINs with reference tables, should error out
--
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
SELECT ("final_query"."event_types") as types
FROM
( SELECT *, random()
FROM
@ -396,7 +672,7 @@ FROM
(SELECT
"events"."user_id", "events"."time", 0 AS event
FROM
events_reference_table as "events"
events_table as "events"
WHERE
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
UNION
@ -406,7 +682,7 @@ FROM
(SELECT
"events"."user_id", "events"."time", 1 AS event
FROM
events_table as "events"
events_reference_table as "events"
WHERE
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
UNION
@ -438,8 +714,6 @@ INNER JOIN
WHERE
value_1 > 50 and value_1 < 70) AS t
ON (t.user_id = q.user_id)) as final_query
GROUP BY
types
ORDER BY
types;
@ -471,15 +745,15 @@ FROM
SELECT * FROM
(
SELECT
max("events"."time"),
max("users"."time"),
0 AS event,
"events"."user_id"
"users"."user_id"
FROM
events_reference_table as "events", users_table as "users"
WHERE
events.user_id = users.user_id AND
event_type IN (10, 11, 12, 13, 14, 15)
GROUP BY "events"."user_id"
GROUP BY "users"."user_id"
) as events_subquery_5
) events_subquery_2)
UNION
@ -569,6 +843,107 @@ INNER JOIN
GROUP BY types
ORDER BY types;
-- just a sanity check that we don't allow this if the reference table is on the
-- left part of the left join
SELECT count(*) FROM
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
-- we don't allow non equi join among hash partitioned tables
SELECT count(*) FROM
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1,
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2
WHERE subquery_1.user_id != subquery_2.user_id ;
-- we cannot push this query since hash partitioned tables
-- are not joined on partition keys with equality
SELECT
count(*) AS cnt, "generated_group_field"
FROM
(SELECT
"eventQuery"."user_id", random(), generated_group_field
FROM
(SELECT
"multi_group_wrapper_1".*, generated_group_field, random()
FROM
(SELECT *
FROM
(SELECT
"events"."time", "events"."user_id" as event_user_id
FROM
events_table as "events"
WHERE
user_id > 80) "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 80 and value_2 = 5) "user_filters_1"
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
RIGHT JOIN
(SELECT
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
FROM
users_table as "users") "left_group_by_1"
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
group BY
"generated_group_field"
ORDER BY
cnt DESC, generated_group_field ASC
LIMIT 10;
-- two hash partitioned relations are not joined
-- on partiton keys although reference table is fine
-- to push down
SELECT
u1.user_id, count(*)
FROM
events_table as e1, users_table as u1
WHERE
event_type IN
(SELECT
event_type
FROM
events_reference_table as e2
WHERE
value_2 = 15 AND
value_3 > 25 AND
e1.value_2 > e2.value_2
)
AND u1.user_id > e1.user_id
GROUP BY 1
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
SELECT foo.user_id FROM
(
SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
WHERE event_type > 100000
) as foo;
-- not supported since group by is on the reference table column
SELECT foo.user_id FROM
(
SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
GROUP BY r.user_id
) as foo;
-- not supported since distinct is on the reference table column
SELECT foo.user_id FROM
(
SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
) as foo;
-- not supported since distinct on is on the reference table column
SELECT foo.user_id FROM
(
SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
) as foo;
DROP TABLE user_buy_test_table;
DROP TABLE users_ref_test_table;
DROP TABLE users_return_test_table;

View File

@ -1,27 +1,6 @@
--
-- queries to test the subquery pushdown on reference tables
-- subqueries in WHERE with greater operator
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_reference_table
WHERE
users_table.user_id = events_reference_table.user_id AND event_type = 50
GROUP BY
user_id
)
GROUP BY user_id
HAVING count(*) > 66
ORDER BY user_id
LIMIT 5;
-- subqueries in WHERE with IN operator
SELECT
user_id
@ -78,7 +57,7 @@ LIMIT 3;
-- subqueries in WHERE with IN operator without equality
SELECT
user_id
users_table.user_id, count(*)
FROM
users_table
WHERE
@ -90,51 +69,10 @@ WHERE
WHERE
users_table.user_id > events_reference_table.user_id
)
GROUP BY user_id
ORDER BY user_id
GROUP BY users_table.user_id
ORDER BY 2 DESC, 1 DESC
LIMIT 3;
-- have reference table without any equality, should error out
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_reference_table
WHERE
event_type = 50
GROUP BY
user_id
)
GROUP BY user_id
HAVING count(*) > 66
ORDER BY user_id
LIMIT 5;
-- users that appeared more than 118 times, should run since the reference table
-- on the right side of the semi join
SELECT
user_id
FROM
users_table
WHERE 118 <=
(SELECT
count(*)
FROM
events_reference_table
WHERE
users_table.user_id = events_reference_table.user_id
GROUP BY
user_id)
GROUP BY
user_id
ORDER BY
user_id;
-- should error out since reference table exist on the left side
-- of the left lateral join
SELECT user_id, value_2 FROM users_table WHERE
@ -197,3 +135,165 @@ SELECT user_id, value_2 FROM users_table WHERE
HAVING sum(submit_card_info) > 0
)
ORDER BY 1, 2;
-- non-partition key equality with reference table
SELECT
user_id, count(*)
FROM
users_table
WHERE
value_3 =ANY(SELECT value_2 FROM users_reference_table WHERE value_1 >= 10 AND value_1 <= 20)
GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 5;
-- non-partition key comparison with reference table
SELECT
user_id, count(*)
FROM
events_table as e1
WHERE
event_type IN
(SELECT
event_type
FROM
events_reference_table as e2
WHERE
value_2 = 15 AND
value_3 > 25 AND
e1.value_2 > e2.value_2
)
GROUP BY 1
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
-- subqueries in both WHERE and FROM clauses
-- should work since reference table is on the
-- inner part of the join
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 101 AND value_1 < 110
AND value_2 >= 5
AND user_id IN
(
SELECT
e1.user_id
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
ORDER BY time
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
ORDER BY time
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e3.user_id AND
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
ORDER BY time
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_reference_table
WHERE
user_id = e4.user_id AND
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
ORDER BY time
) e5 ON true
group by e1.user_id
HAVING sum(submit_card_info) > 0
)
ORDER BY 1, 2;
-- reference tables are not allowed if there is sublink
SELECT
count(*)
FROM
users_reference_table
WHERE user_id
NOT IN
(SELECT users_table.value_2 FROM users_table JOIN users_reference_table as u2 ON users_table.value_2 = u2.value_2);
-- reference tables are not allowed if there is sublink
SELECT count(*)
FROM
(SELECT
user_id, random() FROM users_reference_table) AS vals
WHERE vals.user_id NOT IN
(SELECT users_table.value_2
FROM users_table
JOIN users_reference_table AS u2 ON users_table.value_2 = u2.value_2);
-- reference tables are not allowed if there is sublink
SELECT user_id,
count(*)
FROM users_reference_table
WHERE value_2 > ALL
(SELECT min(value_2)
FROM events_table
WHERE event_type > 50 AND users_reference_table.user_id = events_table.user_id
GROUP BY user_id)
GROUP BY user_id
HAVING count(*) > 66
ORDER BY 2 DESC,
1 DESC
LIMIT 5;
-- reference tables are not allowed if there is sublink
-- this time in the subquery
SELECT *
FROM users_table
WHERE user_id IN
(SELECT users_table.user_id
FROM users_table,
users_reference_table
WHERE users_reference_table.user_id NOT IN
(SELECT value_2
FROM users_reference_table AS u2));
-- not supported since GROUP BY references to an upper level query
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_reference_table
WHERE
users_table.user_id = events_reference_table.user_id AND event_type = 50
GROUP BY
users_table.user_id
)
GROUP BY user_id
HAVING count(*) > 66
ORDER BY user_id
LIMIT 5;