mirror of https://github.com/citusdata/citus.git
Support for subqueries in WHERE clause
This commit enables subqueries in WHERE clause to be pushed down
by the subquery pushdown logic.
The support covers:
- Correlated subqueries with IN, NOT IN, EXISTS, NOT EXISTS,
operator expressions such as (>, <, =, ALL, ANY etc.)
- Non-correlated subqueries with (partition_key) IN (SELECT partition_key ..)
(partition_key) =ANY (SELECT partition_key ...)
Note that this commit heavily utilizes the attribute equivalence logic introduced
in the 1cb6a34ba8
. In general, this commit mostly
adjusts the logical planner not to error out on the subqueries in WHERE clause.
pull/1342/head
parent
6b1de6914e
commit
6f4ee50324
|
@ -93,6 +93,7 @@ static bool HasOuterJoinWalker(Node *node, void *maxJoinLevel);
|
|||
static bool HasComplexJoinOrder(Query *queryTree);
|
||||
static bool HasComplexRangeTableType(Query *queryTree);
|
||||
static void ValidateClauseList(List *clauseList);
|
||||
static void ValidateSubqueryPushdownClauseList(List *clauseList);
|
||||
static bool ExtractFromExpressionWalker(Node *node,
|
||||
QualifierWalkerContext *walkerContext);
|
||||
static List * MultiTableNodeList(List *tableEntryList, List *rangeTableList);
|
||||
|
@ -102,6 +103,8 @@ static MultiNode * MultiJoinTree(List *joinOrderList, List *collectTableList,
|
|||
static MultiCollect * CollectNodeForTable(List *collectTableList, uint32 rangeTableId);
|
||||
static MultiSelect * MultiSelectNode(List *whereClauseList);
|
||||
static bool IsSelectClause(Node *clause);
|
||||
static void ErrorIfSublink(Node *clause);
|
||||
static bool IsSublinkClause(Node *clause);
|
||||
static MultiProject * MultiProjectNode(List *targetEntryList);
|
||||
static MultiExtendedOp * MultiExtendedOpNode(Query *queryTree);
|
||||
|
||||
|
@ -134,6 +137,8 @@ static MultiNode * MultiSubqueryPlanTree(Query *originalQuery,
|
|||
Query *queryTree,
|
||||
PlannerRestrictionContext *
|
||||
plannerRestrictionContext);
|
||||
static List * SublinkList(Query *originalQuery);
|
||||
static bool ExtractSublinkWalker(Node *node, List **sublinkList);
|
||||
static MultiNode * SubqueryPushdownMultiPlanTree(Query *queryTree);
|
||||
|
||||
static List * CreateSubqueryTargetEntryList(List *columnList);
|
||||
|
@ -159,15 +164,18 @@ MultiLogicalPlanCreate(Query *originalQuery, Query *queryTree,
|
|||
{
|
||||
MultiNode *multiQueryNode = NULL;
|
||||
MultiTreeRoot *rootNode = NULL;
|
||||
List *subqueryEntryList = NULL;
|
||||
|
||||
/*
|
||||
* We check the existence of subqueries in the modified query given that
|
||||
* if postgres already flattened the subqueries, MultiPlanTree() can plan
|
||||
* corresponding distributed plan.
|
||||
* We check the existence of subqueries in FROM clause on the modified query
|
||||
* given that if postgres already flattened the subqueries, MultiPlanTree()
|
||||
* can plan corresponding distributed plan.
|
||||
*
|
||||
* We also check the existence of subqueries in WHERE clause. Note that
|
||||
* this check needs to be done on the original query given that
|
||||
* standard_planner() may replace the sublinks with anti/semi joins and
|
||||
* MultiPlanTree() cannot plan such queries.
|
||||
*/
|
||||
subqueryEntryList = SubqueryEntryList(queryTree);
|
||||
if (subqueryEntryList != NIL)
|
||||
if (SubqueryEntryList(queryTree) != NIL || SublinkList(originalQuery) != NIL)
|
||||
{
|
||||
multiQueryNode = MultiSubqueryPlanTree(originalQuery, queryTree,
|
||||
plannerRestrictionContext);
|
||||
|
@ -185,6 +193,57 @@ MultiLogicalPlanCreate(Query *originalQuery, Query *queryTree,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* SublinkList finds the subquery nodes in the where clause of the given query. Note
|
||||
* that the function should be called on the orignal query given that postgres
|
||||
* standard_planner() may convert the subqueries in WHERE clause to joins.
|
||||
*/
|
||||
static List *
|
||||
SublinkList(Query *originalQuery)
|
||||
{
|
||||
FromExpr *joinTree = originalQuery->jointree;
|
||||
Node *queryQuals = NULL;
|
||||
List *sublinkList = NIL;
|
||||
|
||||
if (!joinTree)
|
||||
{
|
||||
return NIL;
|
||||
}
|
||||
|
||||
queryQuals = joinTree->quals;
|
||||
ExtractSublinkWalker(queryQuals, &sublinkList);
|
||||
|
||||
return sublinkList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ExtractSublinkWalker walks over a quals node, and finds all sublinks
|
||||
* in that node.
|
||||
*/
|
||||
static bool
|
||||
ExtractSublinkWalker(Node *node, List **sublinkList)
|
||||
{
|
||||
bool walkerResult = false;
|
||||
if (node == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (IsA(node, SubLink))
|
||||
{
|
||||
(*sublinkList) = lappend(*sublinkList, node);
|
||||
}
|
||||
else
|
||||
{
|
||||
walkerResult = expression_tree_walker(node, ExtractSublinkWalker,
|
||||
sublinkList);
|
||||
}
|
||||
|
||||
return walkerResult;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* MultiSubqueryPlanTree gets the query objects and returns logical plan
|
||||
* for subqueries.
|
||||
|
@ -336,8 +395,10 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
|||
plannerRestrictionContext)
|
||||
{
|
||||
ListCell *rangeTableEntryCell = NULL;
|
||||
ListCell *sublinkCell = NULL;
|
||||
List *subqueryEntryList = NIL;
|
||||
bool outerMostQueryHasLimit = false;
|
||||
List *sublinkList = NIL;
|
||||
DeferredErrorMessage *error = NULL;
|
||||
RelationRestrictionContext *relationRestrictionContext =
|
||||
plannerRestrictionContext->relationRestrictionContext;
|
||||
|
@ -395,6 +456,23 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
|||
}
|
||||
}
|
||||
|
||||
sublinkList = SublinkList(originalQuery);
|
||||
foreach(sublinkCell, sublinkList)
|
||||
{
|
||||
SubLink *sublink = (SubLink *) lfirst(sublinkCell);
|
||||
Node *subselect = sublink->subselect;
|
||||
|
||||
if (subselect && IsA(subselect, Query))
|
||||
{
|
||||
error = DeferErrorIfCannotPushdownSubquery((Query *) subselect,
|
||||
outerMostQueryHasLimit);
|
||||
if (error)
|
||||
{
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -549,12 +627,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
|||
return deferredError;
|
||||
}
|
||||
|
||||
if (subqueryTree->hasSubLinks)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Subqueries other than from-clause subqueries are unsupported";
|
||||
}
|
||||
|
||||
if (subqueryTree->rtable == NIL)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
|
@ -1248,11 +1320,11 @@ ErrorIfQueryNotSupported(Query *queryTree)
|
|||
const char *filterHint = "Consider using an equality filter on the distributed "
|
||||
"table's partition column.";
|
||||
|
||||
if (queryTree->hasSubLinks)
|
||||
if (queryTree->hasSubLinks && SublinkList(queryTree) == NIL)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorMessage = "could not run distributed query with subquery outside the "
|
||||
"FROM clause";
|
||||
"FROM and WHERE clauses";
|
||||
errorHint = filterHint;
|
||||
}
|
||||
|
||||
|
@ -1506,6 +1578,12 @@ DeferErrorIfUnsupportedSubqueryRepartition(Query *subqueryTree)
|
|||
errorDetail = "Subqueries with offset are not supported yet";
|
||||
}
|
||||
|
||||
if (subqueryTree->hasSubLinks)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Subqueries other than from-clause subqueries are unsupported";
|
||||
}
|
||||
|
||||
/* finally check and return error if conditions are not satisfied */
|
||||
if (!preconditionsSatisfied)
|
||||
{
|
||||
|
@ -1752,11 +1830,37 @@ ValidateClauseList(List *clauseList)
|
|||
{
|
||||
Node *clause = (Node *) lfirst(clauseCell);
|
||||
|
||||
bool selectClause = IsSelectClause(clause);
|
||||
bool joinClause = IsJoinClause(clause);
|
||||
bool orClause = or_clause(clause);
|
||||
/* produce a better error message for sublinks */
|
||||
ErrorIfSublink(clause);
|
||||
|
||||
if (!(selectClause || joinClause || orClause))
|
||||
if (!(IsSelectClause(clause) || IsJoinClause(clause) || or_clause(clause)))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("unsupported clause type")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ValidateSubqueryPushdownClauseList walks over the given list of clauses,
|
||||
* and checks that we can recognize all the clauses. This function ensures
|
||||
* that we do not drop an unsupported clause type on the floor, and thus
|
||||
* prevents erroneous results.
|
||||
*
|
||||
* Note that this function is slightly different than ValidateClauseList(),
|
||||
* additionally allowing subkinks.
|
||||
*/
|
||||
static void
|
||||
ValidateSubqueryPushdownClauseList(List *clauseList)
|
||||
{
|
||||
ListCell *clauseCell = NULL;
|
||||
foreach(clauseCell, clauseList)
|
||||
{
|
||||
Node *clause = (Node *) lfirst(clauseCell);
|
||||
|
||||
if (!(IsSublinkClause(clause) || IsSelectClause(clause) ||
|
||||
IsJoinClause(clause) || or_clause(clause)))
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("unsupported clause type")));
|
||||
|
@ -1810,6 +1914,10 @@ JoinClauseList(List *whereClauseList)
|
|||
* subqueries but differently from the outermost query, they are run on a copy
|
||||
* of parse tree and changes do not get persisted as modifications to the original
|
||||
* query tree.
|
||||
*
|
||||
* Also this function adds SubLinks to the baseQualifierList when they appear on
|
||||
* the query's WHERE clause. The callers of the function should consider processing
|
||||
* Sublinks as well.
|
||||
*/
|
||||
static bool
|
||||
ExtractFromExpressionWalker(Node *node, QualifierWalkerContext *walkerContext)
|
||||
|
@ -2203,7 +2311,8 @@ MultiSelectNode(List *whereClauseList)
|
|||
/*
|
||||
* IsSelectClause determines if the given node is a select clause according to
|
||||
* our criteria. Our criteria defines a select clause as an expression that has
|
||||
* zero or more columns belonging to only one table.
|
||||
* zero or more columns belonging to only one table. The function assumes that
|
||||
* no sublinks exists in the clause.
|
||||
*/
|
||||
static bool
|
||||
IsSelectClause(Node *clause)
|
||||
|
@ -2213,16 +2322,6 @@ IsSelectClause(Node *clause)
|
|||
Var *firstColumn = NULL;
|
||||
Index firstColumnTableId = 0;
|
||||
bool isSelectClause = true;
|
||||
NodeTag nodeTag = nodeTag(clause);
|
||||
|
||||
/* error out for subqueries in WHERE clause */
|
||||
if (nodeTag == T_SubLink || nodeTag == T_SubPlan)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot perform distributed planning on this query"),
|
||||
errdetail("Subqueries other than in from-clause are currently "
|
||||
"unsupported")));
|
||||
}
|
||||
|
||||
/* extract columns from the clause */
|
||||
columnList = pull_var_clause_default(clause);
|
||||
|
@ -2249,6 +2348,42 @@ IsSelectClause(Node *clause)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfSublink errors out if the input claise is either sublink or subplan.
|
||||
*/
|
||||
static void
|
||||
ErrorIfSublink(Node *clause)
|
||||
{
|
||||
NodeTag nodeTag = nodeTag(clause);
|
||||
|
||||
/* error out for subqueries in WHERE clause */
|
||||
if (nodeTag == T_SubLink || nodeTag == T_SubPlan)
|
||||
{
|
||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("cannot perform distributed planning on this query"),
|
||||
errdetail("Subqueries other than in from-clause are currently "
|
||||
"unsupported")));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsSublinkClause determines if the given node is a sublink or subplan.
|
||||
*/
|
||||
static bool
|
||||
IsSublinkClause(Node *clause)
|
||||
{
|
||||
NodeTag nodeTag = nodeTag(clause);
|
||||
|
||||
if (nodeTag == T_SubLink || nodeTag == T_SubPlan)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* MultiProjectNode builds the project node using the target entry information
|
||||
* from the query tree. The project node only encapsulates projected columns,
|
||||
|
@ -2904,9 +3039,13 @@ SubqueryPushdownMultiPlanTree(Query *queryTree)
|
|||
/* verify we can perform distributed planning on this query */
|
||||
ErrorIfQueryNotSupported(queryTree);
|
||||
|
||||
/* extract qualifiers and verify we can plan for them */
|
||||
/*
|
||||
* Extract qualifiers and verify we can plan for them. Note that since
|
||||
* subquery pushdown join planning is based on restriction equivalence,
|
||||
* checking for these qualifiers may not be necessary.
|
||||
*/
|
||||
qualifierList = QualifierList(queryTree->jointree);
|
||||
ValidateClauseList(qualifierList);
|
||||
ValidateSubqueryPushdownClauseList(qualifierList);
|
||||
|
||||
/*
|
||||
* We would be creating a new Query and pushing down top level query's
|
||||
|
|
|
@ -422,15 +422,6 @@ DEBUG: Plan is router executable
|
|||
SELECT * FROM articles_hash_mx, position('om' in 'Thomas') WHERE author_id = 1 or author_id = 2;
|
||||
ERROR: could not run distributed query with complex table expressions
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
-- subqueries are not supported in WHERE clause in Citus
|
||||
SELECT * FROM articles_hash_mx WHERE author_id IN (SELECT id FROM authors_hash_mx WHERE name LIKE '%a');
|
||||
ERROR: cannot plan queries that include both regular and partitioned relations
|
||||
SELECT * FROM articles_hash_mx WHERE author_id IN (SELECT author_id FROM articles_hash_mx WHERE author_id = 1 or author_id = 3);
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
HINT: Consider joining tables on partition column and have equal filter on joining columns.
|
||||
SELECT * FROM articles_hash_mx WHERE author_id = (SELECT 1);
|
||||
ERROR: could not run distributed query with subquery outside the FROM clause
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
-- subqueries are supported in FROM clause but they are not router plannable
|
||||
SELECT articles_hash_mx.id,test.word_count
|
||||
FROM articles_hash_mx, (SELECT id, word_count FROM articles_hash_mx) AS test WHERE test.id = articles_hash_mx.id
|
||||
|
@ -502,7 +493,7 @@ HINT: Set citus.task_executor_type to "task-tracker".
|
|||
-- subqueries are not supported in SELECT clause
|
||||
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1)
|
||||
AS special_price FROM articles_hash_mx a;
|
||||
ERROR: could not run distributed query with subquery outside the FROM clause
|
||||
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
-- simple lookup query
|
||||
SELECT *
|
||||
|
|
|
@ -520,15 +520,6 @@ DEBUG: Plan is router executable
|
|||
SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1 or author_id = 2;
|
||||
ERROR: could not run distributed query with complex table expressions
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
-- subqueries are not supported in WHERE clause in Citus
|
||||
SELECT * FROM articles_hash WHERE author_id IN (SELECT id FROM authors_hash WHERE name LIKE '%a');
|
||||
ERROR: cannot plan queries that include both regular and partitioned relations
|
||||
SELECT * FROM articles_hash WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 1 or author_id = 3);
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
HINT: Consider joining tables on partition column and have equal filter on joining columns.
|
||||
SELECT * FROM articles_hash WHERE author_id = (SELECT 1);
|
||||
ERROR: could not run distributed query with subquery outside the FROM clause
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
-- unless the query can be transformed into a join
|
||||
SELECT * FROM articles_hash
|
||||
WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2)
|
||||
|
@ -615,7 +606,7 @@ HINT: Set citus.task_executor_type to "task-tracker".
|
|||
-- subqueries are not supported in SELECT clause
|
||||
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash a2 WHERE a.id = a2.id LIMIT 1)
|
||||
AS special_price FROM articles_hash a;
|
||||
ERROR: could not run distributed query with subquery outside the FROM clause
|
||||
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
-- simple lookup query
|
||||
SELECT *
|
||||
|
|
|
@ -247,7 +247,7 @@ ORDER BY articles.id;
|
|||
-- subqueries are not supported in SELECT clause
|
||||
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard a2 WHERE a.id = a2.id LIMIT 1)
|
||||
AS special_price FROM articles a;
|
||||
ERROR: could not run distributed query with subquery outside the FROM clause
|
||||
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
-- joins are not supported between local and distributed tables
|
||||
SELECT title, authors.name FROM authors, articles WHERE authors.id = articles.author_id;
|
||||
|
|
|
@ -546,62 +546,103 @@ ORDER BY user_lastseen DESC, user_id;
|
|||
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT DISTINCT user_id
|
||||
SELECT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
HINT: Consider joining tables on partition column and have equal filter on joining columns.
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60)
|
||||
GROUP BY
|
||||
user_id
|
||||
ORDER BY
|
||||
user_id DESC
|
||||
LIMIT 5;
|
||||
user_id
|
||||
---------
|
||||
93
|
||||
90
|
||||
88
|
||||
87
|
||||
84
|
||||
(5 rows)
|
||||
|
||||
------------------------------------
|
||||
-- Find customers who have done X, and satisfy other customer specific criteria
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 101 AND event_type < 110 AND value_3 > 100 AND user_id = users_table.user_id);
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
HINT: Consider joining tables on partition column and have equal filter on joining columns.
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 101 AND event_type < 110 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 5;
|
||||
user_id | value_2
|
||||
---------+---------
|
||||
95 | 951
|
||||
4 | 934
|
||||
2 | 908
|
||||
90 | 900
|
||||
49 | 847
|
||||
(5 rows)
|
||||
|
||||
------------------------------------
|
||||
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 = 101
|
||||
AND value_2 >= 5
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id = users_table.user_id);
|
||||
ERROR: could not run distributed query with subquery outside the FROM clause
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
ORDER BY 1 DESC, 2 DESC
|
||||
LIMIT 3;
|
||||
user_id | value_2
|
||||
---------+---------
|
||||
58 | 585
|
||||
51 | 1000
|
||||
48 | 861
|
||||
(3 rows)
|
||||
|
||||
------------------------------------
|
||||
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
SELECT user_id, sum(value_2) as cnt FROM users_table WHERE
|
||||
value_1 > 100
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type != 100 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 101 AND value_3 > 100 AND user_id = users_table.user_id);
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
HINT: Consider joining tables on partition column and have equal filter on joining columns.
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 101 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
GROUP BY
|
||||
user_id
|
||||
ORDER BY cnt DESC, user_id DESC
|
||||
LIMIT 5;
|
||||
user_id | cnt
|
||||
---------+-------
|
||||
49 | 48606
|
||||
69 | 46524
|
||||
86 | 46163
|
||||
80 | 45995
|
||||
35 | 45437
|
||||
(5 rows)
|
||||
|
||||
------------------------------------
|
||||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id);
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
HINT: Consider joining tables on partition column and have equal filter on joining columns.
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 4;
|
||||
user_id | value_2
|
||||
---------+---------
|
||||
8 | 996
|
||||
96 | 995
|
||||
8 | 995
|
||||
96 | 989
|
||||
(4 rows)
|
||||
|
||||
------------------------------------
|
||||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id,
|
||||
value_2
|
||||
avg(value_2)
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
|
@ -613,9 +654,21 @@ SELECT user_id,
|
|||
AND value_3 > 100
|
||||
AND user_id = users_table.user_id
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
ERROR: could not run distributed query with subquery outside the FROM clause
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
HAVING Count(*) > 2)
|
||||
GROUP BY
|
||||
user_id
|
||||
ORDER BY
|
||||
1 DESC, 2 DESC
|
||||
LIMIT 5;
|
||||
user_id | avg
|
||||
---------+----------------------
|
||||
99 | 571.6666666666666667
|
||||
98 | 758.0000000000000000
|
||||
96 | 459.6666666666666667
|
||||
90 | 453.3333333333333333
|
||||
89 | 215.0000000000000000
|
||||
(5 rows)
|
||||
|
||||
------------------------------------
|
||||
-- Find me all users_table who logged in more than once
|
||||
------------------------------------
|
||||
|
@ -671,7 +724,6 @@ ORDER BY
|
|||
|
||||
------------------------------------
|
||||
-- Find me all users_table who has done some event and has filters
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id
|
||||
FROM events_table
|
||||
|
@ -684,18 +736,32 @@ WHERE
|
|||
users_table
|
||||
WHERE
|
||||
value_1 = 15 AND value_2 > 25
|
||||
);
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
HINT: Consider joining tables on partition column and have equal filter on joining columns.
|
||||
)
|
||||
ORDER BY 1;
|
||||
user_id
|
||||
---------
|
||||
7
|
||||
53
|
||||
(2 rows)
|
||||
|
||||
------------------------------------
|
||||
-- Which events_table did people who has done some specific events_table
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id, event_type FROM events_table
|
||||
WHERE user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY user_id, event_type;
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
HINT: Consider joining tables on partition column and have equal filter on joining columns.
|
||||
SELECT
|
||||
user_id, event_type FROM events_table
|
||||
WHERE
|
||||
user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY
|
||||
user_id, event_type
|
||||
ORDER BY 2 DESC, 1
|
||||
LIMIT 3;
|
||||
user_id | event_type
|
||||
---------+------------
|
||||
18 | 999
|
||||
23 | 999
|
||||
26 | 999
|
||||
(3 rows)
|
||||
|
||||
------------------------------------
|
||||
-- Find me all the users_table who has done some event more than three times
|
||||
------------------------------------
|
||||
|
@ -1950,5 +2016,45 @@ FROM (
|
|||
ERROR: no value found for parameter 1
|
||||
SET client_min_messages TO DEFAULT;
|
||||
DROP FUNCTION volatile_func_test();
|
||||
SET citus.subquery_pushdown to OFF;
|
||||
CREATE FUNCTION test_join_function_2(integer, integer) RETURNS bool
|
||||
AS 'select $1 > $2;'
|
||||
LANGUAGE SQL
|
||||
IMMUTABLE
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
-- we don't support joins via functions
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE test_join_function_2(u.user_id, e.user_id)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q
|
||||
ORDER BY 2 DESC, 1;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- note that the following query has joins on the partition keys
|
||||
-- however we fail to push down it due to the function call on the
|
||||
-- where clause. We probably need to relax that check
|
||||
SELECT
|
||||
users_table.user_id, users_table.value_1, prob
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||
FROM
|
||||
users_table AS ma, events_table as short_list
|
||||
WHERE
|
||||
short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
|
||||
) temp
|
||||
ON users_table.user_id = temp.user_id
|
||||
WHERE
|
||||
users_table.value_1 < 50 AND test_join_function_2(users_table.user_id, temp.user_id);
|
||||
ERROR: unsupported clause type
|
||||
DROP FUNCTION test_join_function_2(integer, integer);
|
||||
SET citus.enable_router_execution TO TRUE;
|
||||
SET citus.subquery_pushdown to OFF;
|
||||
|
|
|
@ -0,0 +1,555 @@
|
|||
--
|
||||
-- multi subquery in where queries aims to expand existing subquery pushdown
|
||||
-- regression tests to cover more cases specifically subqueries in WHERE clause
|
||||
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
|
||||
--
|
||||
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
||||
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1430000;
|
||||
-- ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1430000;
|
||||
-- subqueries in WHERE with greater operator
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND event_type = 50
|
||||
GROUP BY
|
||||
user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
user_id
|
||||
---------
|
||||
49
|
||||
55
|
||||
56
|
||||
63
|
||||
(4 rows)
|
||||
|
||||
-- subqueries in where with ALL operator
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 > 545 AND
|
||||
value_2 < ALL (SELECT avg(value_3) FROM events_table WHERE users_table.user_id = events_table.user_id GROUP BY user_id)
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
1 DESC
|
||||
LIMIT 3;
|
||||
user_id
|
||||
---------
|
||||
69
|
||||
52
|
||||
12
|
||||
(3 rows)
|
||||
|
||||
-- IN operator on non-partition key
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
events_table as e1
|
||||
WHERE
|
||||
event_type IN
|
||||
(SELECT
|
||||
event_type
|
||||
FROM
|
||||
events_table as e2
|
||||
WHERE
|
||||
value_2 = 15 AND value_3 > 25 AND
|
||||
e1.user_id = e2.user_id
|
||||
)
|
||||
ORDER BY 1;
|
||||
user_id
|
||||
---------
|
||||
8
|
||||
17
|
||||
33
|
||||
47
|
||||
54
|
||||
54
|
||||
56
|
||||
71
|
||||
79
|
||||
86
|
||||
(10 rows)
|
||||
|
||||
-- NOT IN on non-partition key
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
events_table as e1
|
||||
WHERE
|
||||
event_type NOT IN
|
||||
(SELECT
|
||||
event_type
|
||||
FROM
|
||||
events_table as e2
|
||||
WHERE
|
||||
value_2 = 15 AND value_3 > 25 AND
|
||||
e1.user_id = e2.user_id
|
||||
)
|
||||
GROUP BY 1
|
||||
HAVING count(*) > 122
|
||||
ORDER BY 1;
|
||||
user_id
|
||||
---------
|
||||
23
|
||||
25
|
||||
(2 rows)
|
||||
|
||||
-- non-correlated query with =ANY on partition keys
|
||||
SELECT
|
||||
user_id, count(*)
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id =ANY(SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20) GROUP BY 1 ORDER BY 2 DESC LIMIT 5;
|
||||
user_id | count
|
||||
---------+-------
|
||||
12 | 121
|
||||
87 | 117
|
||||
37 | 115
|
||||
23 | 115
|
||||
46 | 115
|
||||
(5 rows)
|
||||
|
||||
-- users that appeared more than 118 times
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE 118 <=
|
||||
(SELECT
|
||||
count(*)
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id
|
||||
GROUP BY
|
||||
user_id)
|
||||
GROUP BY
|
||||
user_id
|
||||
ORDER BY
|
||||
user_id;
|
||||
user_id
|
||||
---------
|
||||
13
|
||||
17
|
||||
23
|
||||
25
|
||||
(4 rows)
|
||||
|
||||
-- the following query doesn't have a meaningul result
|
||||
-- but it is a valid query with an arbitrary subquery in
|
||||
-- WHERE clause
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND user_id IN
|
||||
(
|
||||
SELECT
|
||||
e1.user_id
|
||||
FROM (
|
||||
-- Get the first time each user viewed the homepage.
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS view_homepage,
|
||||
min(time) AS view_homepage_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||
GROUP BY user_id
|
||||
) e1 LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS use_demo,
|
||||
time AS use_demo_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e1.user_id AND
|
||||
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||
ORDER BY time
|
||||
) e2 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS enter_credit_card,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e2.user_id AND
|
||||
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||
ORDER BY time
|
||||
) e3 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS submit_card_info,
|
||||
user_id,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e3.user_id AND
|
||||
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||
ORDER BY time
|
||||
) e4 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS see_bought_screen
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e4.user_id AND
|
||||
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||
ORDER BY time
|
||||
) e5 ON true
|
||||
group by e1.user_id
|
||||
HAVING sum(submit_card_info) > 0
|
||||
)
|
||||
ORDER BY 1, 2;
|
||||
user_id | value_2
|
||||
---------+---------
|
||||
5 | 884
|
||||
42 | 55
|
||||
42 | 471
|
||||
51 | 758
|
||||
72 | 897
|
||||
82 | 691
|
||||
95 | 951
|
||||
(7 rows)
|
||||
|
||||
-- similar to the above query
|
||||
-- the following query doesn't have a meaningul result
|
||||
-- but it is a valid query with an arbitrary subquery in
|
||||
-- WHERE clause
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id IN
|
||||
(
|
||||
SELECT
|
||||
user_id
|
||||
FROM (
|
||||
SELECT
|
||||
subquery_1.user_id, count_pay
|
||||
FROM
|
||||
(
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>1'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
)
|
||||
UNION
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>2'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 12 AND events_table.event_type < 14
|
||||
)
|
||||
) AS subquery_1
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
user_id,
|
||||
COUNT(*) AS count_pay
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
COUNT(*) > 1) AS subquery_2
|
||||
ON
|
||||
subquery_1.user_id = subquery_2.user_id
|
||||
GROUP BY
|
||||
subquery_1.user_id,
|
||||
count_pay) AS subquery_top
|
||||
GROUP BY
|
||||
count_pay, user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 3 AND sum(value_2) > 49000
|
||||
ORDER BY 1;
|
||||
user_id
|
||||
---------
|
||||
18
|
||||
29
|
||||
40
|
||||
49
|
||||
58
|
||||
69
|
||||
(6 rows)
|
||||
|
||||
-- the following query doesn't have a meaningul result
|
||||
-- but it is a valid query with an arbitrary subquery in
|
||||
-- FROM clause involving a complex query in WHERE clause
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT
|
||||
u.user_id, e.event_type::text AS event, e.time
|
||||
FROM
|
||||
users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id AND
|
||||
u.user_id IN
|
||||
(
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q
|
||||
ORDER BY 2 DESC, 1;
|
||||
user_id | array_length
|
||||
---------+--------------
|
||||
96 | 12204
|
||||
8 | 8170
|
||||
(2 rows)
|
||||
|
||||
--
|
||||
-- below tests only aims for cases where all relations
|
||||
-- are not joined on partition key
|
||||
--
|
||||
-- e4 is not joined on the partition key
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND user_id IN
|
||||
(
|
||||
SELECT
|
||||
e1.user_id
|
||||
FROM (
|
||||
-- Get the first time each user viewed the homepage.
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS view_homepage,
|
||||
min(time) AS view_homepage_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||
GROUP BY user_id
|
||||
) e1 LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS use_demo,
|
||||
time AS use_demo_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e1.user_id AND
|
||||
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||
ORDER BY time
|
||||
) e2 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS enter_credit_card,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e2.user_id AND
|
||||
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||
ORDER BY time
|
||||
) e3 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS submit_card_info,
|
||||
user_id,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
value_2 = e3.user_id AND
|
||||
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||
ORDER BY time
|
||||
) e4 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS see_bought_screen
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e4.user_id AND
|
||||
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||
ORDER BY time
|
||||
) e5 ON true
|
||||
group by e1.user_id
|
||||
HAVING sum(submit_card_info) > 0
|
||||
);
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- left leaf query does not return partition key
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id IN
|
||||
(
|
||||
SELECT
|
||||
user_id
|
||||
FROM (
|
||||
SELECT
|
||||
subquery_1.user_id, count_pay
|
||||
FROM
|
||||
(
|
||||
(SELECT
|
||||
2 * users_table.user_id as user_id,
|
||||
'action=>1'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
)
|
||||
UNION
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>2'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 12 AND events_table.event_type < 14
|
||||
)
|
||||
) AS subquery_1
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
user_id,
|
||||
COUNT(*) AS count_pay
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
COUNT(*) > 1) AS subquery_2
|
||||
ON
|
||||
subquery_1.user_id = subquery_2.user_id
|
||||
GROUP BY
|
||||
subquery_1.user_id,
|
||||
count_pay) AS subquery_top
|
||||
GROUP BY
|
||||
count_pay, user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 3 AND sum(value_2) > 49000
|
||||
ORDER BY 1;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- NOT EXISTS query has non-equi join
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT
|
||||
u.user_id, e.event_type::text AS event, e.time
|
||||
FROM
|
||||
users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id AND
|
||||
u.user_id IN
|
||||
(
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id != users_table.user_id)
|
||||
)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q
|
||||
ORDER BY 2 DESC, 1;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- subquery in where clause doesn't have a relation
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT 1);
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Subqueries without relations are unsupported
|
||||
-- OFFSET is not supported in the subquey
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND event_type = 50
|
||||
GROUP BY
|
||||
user_id
|
||||
OFFSET 3
|
||||
);
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Offset clause is currently unsupported
|
||||
-- semi join is not on the partition key for the third subquery
|
||||
SELECT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
CREATE FUNCTION test_join_function(integer, integer) RETURNS bool
|
||||
AS 'select $1 > $2;'
|
||||
LANGUAGE SQL
|
||||
IMMUTABLE
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
-- we disallow JOINs via functions
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 = 101
|
||||
AND value_2 >= 5
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND test_join_function(events_table.user_id, users_table.user_id))
|
||||
ORDER BY 1 DESC, 2 DESC
|
||||
LIMIT 3;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
DROP FUNCTION test_join_function(int,int);
|
|
@ -1,18 +0,0 @@
|
|||
--
|
||||
-- MULTI_VERIFY_NO_SUBQUERY
|
||||
--
|
||||
-- This test checks that we simply emit an error message instead of trying to
|
||||
-- process a distributed unsupported SQL subquery.
|
||||
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1030000;
|
||||
SELECT * FROM lineitem WHERE l_orderkey IN
|
||||
(SELECT l_orderkey FROM lineitem WHERE l_quantity > 0);
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
SELECT l_quantity FROM lineitem WHERE EXISTS
|
||||
(SELECT 1 FROM orders WHERE o_orderkey = l_orderkey);
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
SELECT l_quantity FROM lineitem WHERE l_orderkey IN (SELECT o_orderkey FROM orders);
|
||||
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
|
||||
SELECT l_orderkey FROM lineitem WHERE l_quantity > ALL(SELECT o_orderkey FROM orders);
|
||||
ERROR: could not run distributed query with subquery outside the FROM clause
|
||||
SELECT l_quantity FROM lineitem WHERE l_orderkey = (SELECT min(o_orderkey) FROM orders);
|
||||
ERROR: could not run distributed query with subquery outside the FROM clause
|
|
@ -21,7 +21,7 @@ test: multi_table_ddl
|
|||
test: multi_create_table
|
||||
test: multi_load_data
|
||||
|
||||
test: multi_basic_queries multi_complex_expressions multi_verify_no_subquery
|
||||
test: multi_basic_queries multi_complex_expressions
|
||||
test: multi_single_relation_subquery
|
||||
test: multi_binary_master_copy_format
|
||||
|
||||
|
|
|
@ -38,10 +38,10 @@ test: multi_insert_select
|
|||
# Miscellaneous tests to check our query planning behavior
|
||||
# ----------
|
||||
test: multi_deparse_shard_query
|
||||
test: multi_basic_queries multi_complex_expressions multi_verify_no_subquery
|
||||
test: multi_basic_queries multi_complex_expressions
|
||||
test: multi_explain
|
||||
test: multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
|
||||
test: multi_subquery_union
|
||||
test: multi_subquery_union multi_subquery_in_where_clause
|
||||
test: multi_reference_table
|
||||
test: multi_outer_join_reference
|
||||
test: multi_single_relation_subquery
|
||||
|
|
|
@ -28,7 +28,7 @@ test: multi_load_data
|
|||
# ----------
|
||||
# Miscellaneous tests to check our query planning behavior
|
||||
# ----------
|
||||
test: multi_basic_queries multi_complex_expressions multi_verify_no_subquery
|
||||
test: multi_basic_queries multi_complex_expressions
|
||||
test: multi_agg_distinct multi_limit_clause multi_limit_clause_approximate
|
||||
test: multi_average_expression multi_working_columns
|
||||
test: multi_array_agg
|
||||
|
|
|
@ -219,14 +219,6 @@ SELECT * FROM articles_hash_mx, position('om' in 'Thomas') WHERE author_id = 1 o
|
|||
-- they are not supported if multiple workers are involved
|
||||
SELECT * FROM articles_hash_mx, position('om' in 'Thomas') WHERE author_id = 1 or author_id = 2;
|
||||
|
||||
-- subqueries are not supported in WHERE clause in Citus
|
||||
SELECT * FROM articles_hash_mx WHERE author_id IN (SELECT id FROM authors_hash_mx WHERE name LIKE '%a');
|
||||
|
||||
SELECT * FROM articles_hash_mx WHERE author_id IN (SELECT author_id FROM articles_hash_mx WHERE author_id = 1 or author_id = 3);
|
||||
|
||||
SELECT * FROM articles_hash_mx WHERE author_id = (SELECT 1);
|
||||
|
||||
|
||||
-- subqueries are supported in FROM clause but they are not router plannable
|
||||
SELECT articles_hash_mx.id,test.word_count
|
||||
FROM articles_hash_mx, (SELECT id, word_count FROM articles_hash_mx) AS test WHERE test.id = articles_hash_mx.id
|
||||
|
|
|
@ -281,13 +281,6 @@ SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1 or a
|
|||
-- they are not supported if multiple workers are involved
|
||||
SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1 or author_id = 2;
|
||||
|
||||
-- subqueries are not supported in WHERE clause in Citus
|
||||
SELECT * FROM articles_hash WHERE author_id IN (SELECT id FROM authors_hash WHERE name LIKE '%a');
|
||||
|
||||
SELECT * FROM articles_hash WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 1 or author_id = 3);
|
||||
|
||||
SELECT * FROM articles_hash WHERE author_id = (SELECT 1);
|
||||
|
||||
-- unless the query can be transformed into a join
|
||||
SELECT * FROM articles_hash
|
||||
WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2)
|
||||
|
|
|
@ -458,57 +458,66 @@ ORDER BY user_lastseen DESC, user_id;
|
|||
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT DISTINCT user_id
|
||||
SELECT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60)
|
||||
GROUP BY
|
||||
user_id
|
||||
ORDER BY
|
||||
user_id DESC
|
||||
LIMIT 5;
|
||||
|
||||
------------------------------------
|
||||
-- Find customers who have done X, and satisfy other customer specific criteria
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 101 AND event_type < 110 AND value_3 > 100 AND user_id = users_table.user_id);
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 101 AND event_type < 110 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 5;
|
||||
|
||||
------------------------------------
|
||||
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 = 101
|
||||
AND value_2 >= 5
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id = users_table.user_id);
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
ORDER BY 1 DESC, 2 DESC
|
||||
LIMIT 3;
|
||||
|
||||
------------------------------------
|
||||
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
SELECT user_id, sum(value_2) as cnt FROM users_table WHERE
|
||||
value_1 > 100
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type != 100 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 101 AND value_3 > 100 AND user_id = users_table.user_id);
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 101 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
GROUP BY
|
||||
user_id
|
||||
ORDER BY cnt DESC, user_id DESC
|
||||
LIMIT 5;
|
||||
|
||||
------------------------------------
|
||||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id);
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 4;
|
||||
|
||||
------------------------------------
|
||||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id,
|
||||
value_2
|
||||
avg(value_2)
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
|
@ -520,7 +529,12 @@ SELECT user_id,
|
|||
AND value_3 > 100
|
||||
AND user_id = users_table.user_id
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
HAVING Count(*) > 2)
|
||||
GROUP BY
|
||||
user_id
|
||||
ORDER BY
|
||||
1 DESC, 2 DESC
|
||||
LIMIT 5;
|
||||
|
||||
------------------------------------
|
||||
-- Find me all users_table who logged in more than once
|
||||
|
@ -555,7 +569,6 @@ ORDER BY
|
|||
|
||||
------------------------------------
|
||||
-- Find me all users_table who has done some event and has filters
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id
|
||||
FROM events_table
|
||||
|
@ -568,15 +581,20 @@ WHERE
|
|||
users_table
|
||||
WHERE
|
||||
value_1 = 15 AND value_2 > 25
|
||||
);
|
||||
)
|
||||
ORDER BY 1;
|
||||
|
||||
------------------------------------
|
||||
-- Which events_table did people who has done some specific events_table
|
||||
-- This query will be supported when we have subqueries in where clauses.
|
||||
------------------------------------
|
||||
SELECT user_id, event_type FROM events_table
|
||||
WHERE user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY user_id, event_type;
|
||||
SELECT
|
||||
user_id, event_type FROM events_table
|
||||
WHERE
|
||||
user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY
|
||||
user_id, event_type
|
||||
ORDER BY 2 DESC, 1
|
||||
LIMIT 3;
|
||||
|
||||
------------------------------------
|
||||
-- Find me all the users_table who has done some event more than three times
|
||||
|
@ -1600,5 +1618,47 @@ FROM (
|
|||
SET client_min_messages TO DEFAULT;
|
||||
DROP FUNCTION volatile_func_test();
|
||||
|
||||
SET citus.subquery_pushdown to OFF;
|
||||
CREATE FUNCTION test_join_function_2(integer, integer) RETURNS bool
|
||||
AS 'select $1 > $2;'
|
||||
LANGUAGE SQL
|
||||
IMMUTABLE
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
-- we don't support joins via functions
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE test_join_function_2(u.user_id, e.user_id)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q
|
||||
ORDER BY 2 DESC, 1;
|
||||
|
||||
-- note that the following query has joins on the partition keys
|
||||
-- however we fail to push down it due to the function call on the
|
||||
-- where clause. We probably need to relax that check
|
||||
SELECT
|
||||
users_table.user_id, users_table.value_1, prob
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||
FROM
|
||||
users_table AS ma, events_table as short_list
|
||||
WHERE
|
||||
short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
|
||||
) temp
|
||||
ON users_table.user_id = temp.user_id
|
||||
WHERE
|
||||
users_table.value_1 < 50 AND test_join_function_2(users_table.user_id, temp.user_id);
|
||||
|
||||
DROP FUNCTION test_join_function_2(integer, integer);
|
||||
|
||||
SET citus.enable_router_execution TO TRUE;
|
||||
SET citus.subquery_pushdown to OFF;
|
||||
|
||||
|
|
|
@ -0,0 +1,486 @@
|
|||
--
|
||||
-- multi subquery in where queries aims to expand existing subquery pushdown
|
||||
-- regression tests to cover more cases specifically subqueries in WHERE clause
|
||||
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
|
||||
--
|
||||
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
||||
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1430000;
|
||||
-- ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1430000;
|
||||
|
||||
-- subqueries in WHERE with greater operator
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND event_type = 50
|
||||
GROUP BY
|
||||
user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
|
||||
-- subqueries in where with ALL operator
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 > 545 AND
|
||||
value_2 < ALL (SELECT avg(value_3) FROM events_table WHERE users_table.user_id = events_table.user_id GROUP BY user_id)
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
1 DESC
|
||||
LIMIT 3;
|
||||
|
||||
-- IN operator on non-partition key
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
events_table as e1
|
||||
WHERE
|
||||
event_type IN
|
||||
(SELECT
|
||||
event_type
|
||||
FROM
|
||||
events_table as e2
|
||||
WHERE
|
||||
value_2 = 15 AND value_3 > 25 AND
|
||||
e1.user_id = e2.user_id
|
||||
)
|
||||
ORDER BY 1;
|
||||
|
||||
-- NOT IN on non-partition key
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
events_table as e1
|
||||
WHERE
|
||||
event_type NOT IN
|
||||
(SELECT
|
||||
event_type
|
||||
FROM
|
||||
events_table as e2
|
||||
WHERE
|
||||
value_2 = 15 AND value_3 > 25 AND
|
||||
e1.user_id = e2.user_id
|
||||
)
|
||||
GROUP BY 1
|
||||
|
||||
HAVING count(*) > 122
|
||||
ORDER BY 1;
|
||||
|
||||
-- non-correlated query with =ANY on partition keys
|
||||
SELECT
|
||||
user_id, count(*)
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id =ANY(SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20) GROUP BY 1 ORDER BY 2 DESC LIMIT 5;
|
||||
|
||||
-- users that appeared more than 118 times
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE 118 <=
|
||||
(SELECT
|
||||
count(*)
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id
|
||||
GROUP BY
|
||||
user_id)
|
||||
GROUP BY
|
||||
user_id
|
||||
ORDER BY
|
||||
user_id;
|
||||
|
||||
|
||||
-- the following query doesn't have a meaningul result
|
||||
-- but it is a valid query with an arbitrary subquery in
|
||||
-- WHERE clause
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND user_id IN
|
||||
(
|
||||
SELECT
|
||||
e1.user_id
|
||||
FROM (
|
||||
-- Get the first time each user viewed the homepage.
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS view_homepage,
|
||||
min(time) AS view_homepage_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||
GROUP BY user_id
|
||||
) e1 LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS use_demo,
|
||||
time AS use_demo_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e1.user_id AND
|
||||
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||
ORDER BY time
|
||||
) e2 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS enter_credit_card,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e2.user_id AND
|
||||
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||
ORDER BY time
|
||||
) e3 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS submit_card_info,
|
||||
user_id,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e3.user_id AND
|
||||
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||
ORDER BY time
|
||||
) e4 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS see_bought_screen
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e4.user_id AND
|
||||
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||
ORDER BY time
|
||||
) e5 ON true
|
||||
group by e1.user_id
|
||||
HAVING sum(submit_card_info) > 0
|
||||
)
|
||||
ORDER BY 1, 2;
|
||||
|
||||
|
||||
-- similar to the above query
|
||||
-- the following query doesn't have a meaningul result
|
||||
-- but it is a valid query with an arbitrary subquery in
|
||||
-- WHERE clause
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id IN
|
||||
(
|
||||
SELECT
|
||||
user_id
|
||||
FROM (
|
||||
SELECT
|
||||
subquery_1.user_id, count_pay
|
||||
FROM
|
||||
(
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>1'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
)
|
||||
UNION
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>2'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 12 AND events_table.event_type < 14
|
||||
)
|
||||
) AS subquery_1
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
user_id,
|
||||
COUNT(*) AS count_pay
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
COUNT(*) > 1) AS subquery_2
|
||||
ON
|
||||
subquery_1.user_id = subquery_2.user_id
|
||||
GROUP BY
|
||||
subquery_1.user_id,
|
||||
count_pay) AS subquery_top
|
||||
GROUP BY
|
||||
count_pay, user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 3 AND sum(value_2) > 49000
|
||||
ORDER BY 1;
|
||||
|
||||
|
||||
-- the following query doesn't have a meaningul result
|
||||
-- but it is a valid query with an arbitrary subquery in
|
||||
-- FROM clause involving a complex query in WHERE clause
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT
|
||||
u.user_id, e.event_type::text AS event, e.time
|
||||
FROM
|
||||
users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id AND
|
||||
u.user_id IN
|
||||
(
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q
|
||||
ORDER BY 2 DESC, 1;
|
||||
|
||||
--
|
||||
-- below tests only aims for cases where all relations
|
||||
-- are not joined on partition key
|
||||
--
|
||||
|
||||
-- e4 is not joined on the partition key
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND user_id IN
|
||||
(
|
||||
SELECT
|
||||
e1.user_id
|
||||
FROM (
|
||||
-- Get the first time each user viewed the homepage.
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS view_homepage,
|
||||
min(time) AS view_homepage_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
|
||||
GROUP BY user_id
|
||||
) e1 LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS use_demo,
|
||||
time AS use_demo_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e1.user_id AND
|
||||
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
|
||||
ORDER BY time
|
||||
) e2 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
user_id,
|
||||
1 AS enter_credit_card,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e2.user_id AND
|
||||
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
|
||||
ORDER BY time
|
||||
) e3 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS submit_card_info,
|
||||
user_id,
|
||||
time AS enter_credit_card_time
|
||||
FROM events_table
|
||||
WHERE
|
||||
value_2 = e3.user_id AND
|
||||
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
|
||||
ORDER BY time
|
||||
) e4 ON true LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
1 AS see_bought_screen
|
||||
FROM events_table
|
||||
WHERE
|
||||
user_id = e4.user_id AND
|
||||
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
|
||||
ORDER BY time
|
||||
) e5 ON true
|
||||
group by e1.user_id
|
||||
HAVING sum(submit_card_info) > 0
|
||||
);
|
||||
|
||||
|
||||
-- left leaf query does not return partition key
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id IN
|
||||
(
|
||||
SELECT
|
||||
user_id
|
||||
FROM (
|
||||
SELECT
|
||||
subquery_1.user_id, count_pay
|
||||
FROM
|
||||
(
|
||||
(SELECT
|
||||
2 * users_table.user_id as user_id,
|
||||
'action=>1'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
)
|
||||
UNION
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>2'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 12 AND events_table.event_type < 14
|
||||
)
|
||||
) AS subquery_1
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
user_id,
|
||||
COUNT(*) AS count_pay
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
COUNT(*) > 1) AS subquery_2
|
||||
ON
|
||||
subquery_1.user_id = subquery_2.user_id
|
||||
GROUP BY
|
||||
subquery_1.user_id,
|
||||
count_pay) AS subquery_top
|
||||
GROUP BY
|
||||
count_pay, user_id
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 3 AND sum(value_2) > 49000
|
||||
ORDER BY 1;
|
||||
|
||||
-- NOT EXISTS query has non-equi join
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT
|
||||
u.user_id, e.event_type::text AS event, e.time
|
||||
FROM
|
||||
users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id AND
|
||||
u.user_id IN
|
||||
(
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id != users_table.user_id)
|
||||
)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q
|
||||
ORDER BY 2 DESC, 1;
|
||||
|
||||
-- subquery in where clause doesn't have a relation
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT 1);
|
||||
|
||||
-- OFFSET is not supported in the subquey
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND event_type = 50
|
||||
GROUP BY
|
||||
user_id
|
||||
OFFSET 3
|
||||
);
|
||||
|
||||
-- semi join is not on the partition key for the third subquery
|
||||
SELECT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
|
||||
CREATE FUNCTION test_join_function(integer, integer) RETURNS bool
|
||||
AS 'select $1 > $2;'
|
||||
LANGUAGE SQL
|
||||
IMMUTABLE
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
-- we disallow JOINs via functions
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 = 101
|
||||
AND value_2 >= 5
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND test_join_function(events_table.user_id, users_table.user_id))
|
||||
ORDER BY 1 DESC, 2 DESC
|
||||
LIMIT 3;
|
||||
|
||||
DROP FUNCTION test_join_function(int,int);
|
|
@ -1,22 +0,0 @@
|
|||
--
|
||||
-- MULTI_VERIFY_NO_SUBQUERY
|
||||
--
|
||||
|
||||
-- This test checks that we simply emit an error message instead of trying to
|
||||
-- process a distributed unsupported SQL subquery.
|
||||
|
||||
|
||||
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1030000;
|
||||
|
||||
|
||||
SELECT * FROM lineitem WHERE l_orderkey IN
|
||||
(SELECT l_orderkey FROM lineitem WHERE l_quantity > 0);
|
||||
|
||||
SELECT l_quantity FROM lineitem WHERE EXISTS
|
||||
(SELECT 1 FROM orders WHERE o_orderkey = l_orderkey);
|
||||
|
||||
SELECT l_quantity FROM lineitem WHERE l_orderkey IN (SELECT o_orderkey FROM orders);
|
||||
|
||||
SELECT l_orderkey FROM lineitem WHERE l_quantity > ALL(SELECT o_orderkey FROM orders);
|
||||
|
||||
SELECT l_quantity FROM lineitem WHERE l_orderkey = (SELECT min(o_orderkey) FROM orders);
|
Loading…
Reference in New Issue