Support for subqueries in WHERE clause

This commit enables subqueries in WHERE clause to be pushed down
by the subquery pushdown logic.

The support covers:
  - Correlated subqueries with IN, NOT IN, EXISTS, NOT EXISTS,
    operator expressions such as (>, <, =, ALL, ANY etc.)
  - Non-correlated subqueries with (partition_key) IN (SELECT partition_key ..)
    (partition_key) =ANY (SELECT partition_key ...)

Note that this commit heavily utilizes the attribute equivalence logic introduced
in the 1cb6a34ba8. In general, this commit mostly
adjusts the logical planner not to error out on the subqueries in WHERE clause.
pull/1372/head
Onder Kalaci 2017-04-19 18:20:38 +03:00
parent 8dab40da69
commit a4afe24ccf
15 changed files with 1444 additions and 171 deletions

View File

@ -93,6 +93,7 @@ static bool HasOuterJoinWalker(Node *node, void *maxJoinLevel);
static bool HasComplexJoinOrder(Query *queryTree);
static bool HasComplexRangeTableType(Query *queryTree);
static void ValidateClauseList(List *clauseList);
static void ValidateSubqueryPushdownClauseList(List *clauseList);
static bool ExtractFromExpressionWalker(Node *node,
QualifierWalkerContext *walkerContext);
static List * MultiTableNodeList(List *tableEntryList, List *rangeTableList);
@ -102,6 +103,8 @@ static MultiNode * MultiJoinTree(List *joinOrderList, List *collectTableList,
static MultiCollect * CollectNodeForTable(List *collectTableList, uint32 rangeTableId);
static MultiSelect * MultiSelectNode(List *whereClauseList);
static bool IsSelectClause(Node *clause);
static void ErrorIfSublink(Node *clause);
static bool IsSublinkClause(Node *clause);
static MultiProject * MultiProjectNode(List *targetEntryList);
static MultiExtendedOp * MultiExtendedOpNode(Query *queryTree);
@ -134,6 +137,8 @@ static MultiNode * MultiSubqueryPlanTree(Query *originalQuery,
Query *queryTree,
PlannerRestrictionContext *
plannerRestrictionContext);
static List * SublinkList(Query *originalQuery);
static bool ExtractSublinkWalker(Node *node, List **sublinkList);
static MultiNode * SubqueryPushdownMultiPlanTree(Query *queryTree);
static List * CreateSubqueryTargetEntryList(List *columnList);
@ -159,15 +164,18 @@ MultiLogicalPlanCreate(Query *originalQuery, Query *queryTree,
{
MultiNode *multiQueryNode = NULL;
MultiTreeRoot *rootNode = NULL;
List *subqueryEntryList = NULL;
/*
* We check the existence of subqueries in the modified query given that
* if postgres already flattened the subqueries, MultiPlanTree() can plan
* corresponding distributed plan.
* We check the existence of subqueries in FROM clause on the modified query
* given that if postgres already flattened the subqueries, MultiPlanTree()
* can plan corresponding distributed plan.
*
* We also check the existence of subqueries in WHERE clause. Note that
* this check needs to be done on the original query given that
* standard_planner() may replace the sublinks with anti/semi joins and
* MultiPlanTree() cannot plan such queries.
*/
subqueryEntryList = SubqueryEntryList(queryTree);
if (subqueryEntryList != NIL)
if (SubqueryEntryList(queryTree) != NIL || SublinkList(originalQuery) != NIL)
{
multiQueryNode = MultiSubqueryPlanTree(originalQuery, queryTree,
plannerRestrictionContext);
@ -185,6 +193,57 @@ MultiLogicalPlanCreate(Query *originalQuery, Query *queryTree,
}
/*
* SublinkList finds the subquery nodes in the where clause of the given query. Note
* that the function should be called on the orignal query given that postgres
* standard_planner() may convert the subqueries in WHERE clause to joins.
*/
static List *
SublinkList(Query *originalQuery)
{
FromExpr *joinTree = originalQuery->jointree;
Node *queryQuals = NULL;
List *sublinkList = NIL;
if (!joinTree)
{
return NIL;
}
queryQuals = joinTree->quals;
ExtractSublinkWalker(queryQuals, &sublinkList);
return sublinkList;
}
/*
* ExtractSublinkWalker walks over a quals node, and finds all sublinks
* in that node.
*/
static bool
ExtractSublinkWalker(Node *node, List **sublinkList)
{
bool walkerResult = false;
if (node == NULL)
{
return false;
}
if (IsA(node, SubLink))
{
(*sublinkList) = lappend(*sublinkList, node);
}
else
{
walkerResult = expression_tree_walker(node, ExtractSublinkWalker,
sublinkList);
}
return walkerResult;
}
/*
* MultiSubqueryPlanTree gets the query objects and returns logical plan
* for subqueries.
@ -336,8 +395,10 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
plannerRestrictionContext)
{
ListCell *rangeTableEntryCell = NULL;
ListCell *sublinkCell = NULL;
List *subqueryEntryList = NIL;
bool outerMostQueryHasLimit = false;
List *sublinkList = NIL;
DeferredErrorMessage *error = NULL;
RelationRestrictionContext *relationRestrictionContext =
plannerRestrictionContext->relationRestrictionContext;
@ -395,6 +456,23 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
}
}
sublinkList = SublinkList(originalQuery);
foreach(sublinkCell, sublinkList)
{
SubLink *sublink = (SubLink *) lfirst(sublinkCell);
Node *subselect = sublink->subselect;
if (subselect && IsA(subselect, Query))
{
error = DeferErrorIfCannotPushdownSubquery((Query *) subselect,
outerMostQueryHasLimit);
if (error)
{
return error;
}
}
}
return NULL;
}
@ -549,12 +627,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
return deferredError;
}
if (subqueryTree->hasSubLinks)
{
preconditionsSatisfied = false;
errorDetail = "Subqueries other than from-clause subqueries are unsupported";
}
if (subqueryTree->rtable == NIL)
{
preconditionsSatisfied = false;
@ -1248,11 +1320,11 @@ ErrorIfQueryNotSupported(Query *queryTree)
const char *filterHint = "Consider using an equality filter on the distributed "
"table's partition column.";
if (queryTree->hasSubLinks)
if (queryTree->hasSubLinks && SublinkList(queryTree) == NIL)
{
preconditionsSatisfied = false;
errorMessage = "could not run distributed query with subquery outside the "
"FROM clause";
"FROM and WHERE clauses";
errorHint = filterHint;
}
@ -1506,6 +1578,12 @@ DeferErrorIfUnsupportedSubqueryRepartition(Query *subqueryTree)
errorDetail = "Subqueries with offset are not supported yet";
}
if (subqueryTree->hasSubLinks)
{
preconditionsSatisfied = false;
errorDetail = "Subqueries other than from-clause subqueries are unsupported";
}
/* finally check and return error if conditions are not satisfied */
if (!preconditionsSatisfied)
{
@ -1752,11 +1830,37 @@ ValidateClauseList(List *clauseList)
{
Node *clause = (Node *) lfirst(clauseCell);
bool selectClause = IsSelectClause(clause);
bool joinClause = IsJoinClause(clause);
bool orClause = or_clause(clause);
/* produce a better error message for sublinks */
ErrorIfSublink(clause);
if (!(selectClause || joinClause || orClause))
if (!(IsSelectClause(clause) || IsJoinClause(clause) || or_clause(clause)))
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("unsupported clause type")));
}
}
}
/*
* ValidateSubqueryPushdownClauseList walks over the given list of clauses,
* and checks that we can recognize all the clauses. This function ensures
* that we do not drop an unsupported clause type on the floor, and thus
* prevents erroneous results.
*
* Note that this function is slightly different than ValidateClauseList(),
* additionally allowing subkinks.
*/
static void
ValidateSubqueryPushdownClauseList(List *clauseList)
{
ListCell *clauseCell = NULL;
foreach(clauseCell, clauseList)
{
Node *clause = (Node *) lfirst(clauseCell);
if (!(IsSublinkClause(clause) || IsSelectClause(clause) ||
IsJoinClause(clause) || or_clause(clause)))
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("unsupported clause type")));
@ -1810,6 +1914,10 @@ JoinClauseList(List *whereClauseList)
* subqueries but differently from the outermost query, they are run on a copy
* of parse tree and changes do not get persisted as modifications to the original
* query tree.
*
* Also this function adds SubLinks to the baseQualifierList when they appear on
* the query's WHERE clause. The callers of the function should consider processing
* Sublinks as well.
*/
static bool
ExtractFromExpressionWalker(Node *node, QualifierWalkerContext *walkerContext)
@ -2203,7 +2311,8 @@ MultiSelectNode(List *whereClauseList)
/*
* IsSelectClause determines if the given node is a select clause according to
* our criteria. Our criteria defines a select clause as an expression that has
* zero or more columns belonging to only one table.
* zero or more columns belonging to only one table. The function assumes that
* no sublinks exists in the clause.
*/
static bool
IsSelectClause(Node *clause)
@ -2213,16 +2322,6 @@ IsSelectClause(Node *clause)
Var *firstColumn = NULL;
Index firstColumnTableId = 0;
bool isSelectClause = true;
NodeTag nodeTag = nodeTag(clause);
/* error out for subqueries in WHERE clause */
if (nodeTag == T_SubLink || nodeTag == T_SubPlan)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot perform distributed planning on this query"),
errdetail("Subqueries other than in from-clause are currently "
"unsupported")));
}
/* extract columns from the clause */
columnList = pull_var_clause_default(clause);
@ -2249,6 +2348,42 @@ IsSelectClause(Node *clause)
}
/*
* ErrorIfSublink errors out if the input claise is either sublink or subplan.
*/
static void
ErrorIfSublink(Node *clause)
{
NodeTag nodeTag = nodeTag(clause);
/* error out for subqueries in WHERE clause */
if (nodeTag == T_SubLink || nodeTag == T_SubPlan)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot perform distributed planning on this query"),
errdetail("Subqueries other than in from-clause are currently "
"unsupported")));
}
}
/*
* IsSublinkClause determines if the given node is a sublink or subplan.
*/
static bool
IsSublinkClause(Node *clause)
{
NodeTag nodeTag = nodeTag(clause);
if (nodeTag == T_SubLink || nodeTag == T_SubPlan)
{
return true;
}
return false;
}
/*
* MultiProjectNode builds the project node using the target entry information
* from the query tree. The project node only encapsulates projected columns,
@ -2904,9 +3039,13 @@ SubqueryPushdownMultiPlanTree(Query *queryTree)
/* verify we can perform distributed planning on this query */
ErrorIfQueryNotSupported(queryTree);
/* extract qualifiers and verify we can plan for them */
/*
* Extract qualifiers and verify we can plan for them. Note that since
* subquery pushdown join planning is based on restriction equivalence,
* checking for these qualifiers may not be necessary.
*/
qualifierList = QualifierList(queryTree->jointree);
ValidateClauseList(qualifierList);
ValidateSubqueryPushdownClauseList(qualifierList);
/*
* We would be creating a new Query and pushing down top level query's

View File

@ -422,15 +422,6 @@ DEBUG: Plan is router executable
SELECT * FROM articles_hash_mx, position('om' in 'Thomas') WHERE author_id = 1 or author_id = 2;
ERROR: could not run distributed query with complex table expressions
HINT: Consider using an equality filter on the distributed table's partition column.
-- subqueries are not supported in WHERE clause in Citus
SELECT * FROM articles_hash_mx WHERE author_id IN (SELECT id FROM authors_hash_mx WHERE name LIKE '%a');
ERROR: cannot plan queries that include both regular and partitioned relations
SELECT * FROM articles_hash_mx WHERE author_id IN (SELECT author_id FROM articles_hash_mx WHERE author_id = 1 or author_id = 3);
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
HINT: Consider joining tables on partition column and have equal filter on joining columns.
SELECT * FROM articles_hash_mx WHERE author_id = (SELECT 1);
ERROR: could not run distributed query with subquery outside the FROM clause
HINT: Consider using an equality filter on the distributed table's partition column.
-- subqueries are supported in FROM clause but they are not router plannable
SELECT articles_hash_mx.id,test.word_count
FROM articles_hash_mx, (SELECT id, word_count FROM articles_hash_mx) AS test WHERE test.id = articles_hash_mx.id
@ -502,7 +493,7 @@ HINT: Set citus.task_executor_type to "task-tracker".
-- subqueries are not supported in SELECT clause
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles_hash_mx a;
ERROR: could not run distributed query with subquery outside the FROM clause
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- simple lookup query
SELECT *

View File

@ -520,15 +520,6 @@ DEBUG: Plan is router executable
SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1 or author_id = 2;
ERROR: could not run distributed query with complex table expressions
HINT: Consider using an equality filter on the distributed table's partition column.
-- subqueries are not supported in WHERE clause in Citus
SELECT * FROM articles_hash WHERE author_id IN (SELECT id FROM authors_hash WHERE name LIKE '%a');
ERROR: cannot plan queries that include both regular and partitioned relations
SELECT * FROM articles_hash WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 1 or author_id = 3);
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
HINT: Consider joining tables on partition column and have equal filter on joining columns.
SELECT * FROM articles_hash WHERE author_id = (SELECT 1);
ERROR: could not run distributed query with subquery outside the FROM clause
HINT: Consider using an equality filter on the distributed table's partition column.
-- unless the query can be transformed into a join
SELECT * FROM articles_hash
WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2)
@ -615,7 +606,7 @@ HINT: Set citus.task_executor_type to "task-tracker".
-- subqueries are not supported in SELECT clause
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles_hash a;
ERROR: could not run distributed query with subquery outside the FROM clause
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- simple lookup query
SELECT *

View File

@ -247,7 +247,7 @@ ORDER BY articles.id;
-- subqueries are not supported in SELECT clause
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles a;
ERROR: could not run distributed query with subquery outside the FROM clause
ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses
HINT: Consider using an equality filter on the distributed table's partition column.
-- joins are not supported between local and distributed tables
SELECT title, authors.name FROM authors, articles WHERE authors.id = articles.author_id;

View File

@ -546,62 +546,103 @@ ORDER BY user_lastseen DESC, user_id;
------------------------------------
-- Count the number of distinct users_table who are in segment X and Y and Z
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT DISTINCT user_id
SELECT user_id
FROM users_table
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
HINT: Consider joining tables on partition column and have equal filter on joining columns.
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60)
GROUP BY
user_id
ORDER BY
user_id DESC
LIMIT 5;
user_id
---------
93
90
88
87
84
(5 rows)
------------------------------------
-- Find customers who have done X, and satisfy other customer specific criteria
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 101 AND value_1 < 110
AND value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 101 AND event_type < 110 AND value_3 > 100 AND user_id = users_table.user_id);
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
HINT: Consider joining tables on partition column and have equal filter on joining columns.
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 101 AND event_type < 110 AND value_3 > 100 AND user_id = users_table.user_id)
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
user_id | value_2
---------+---------
95 | 951
4 | 934
2 | 908
90 | 900
49 | 847
(5 rows)
------------------------------------
-- Customers who havent done X, and satisfy other customer specific criteria
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
value_1 = 101
AND value_2 >= 5
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id = users_table.user_id);
ERROR: could not run distributed query with subquery outside the FROM clause
HINT: Consider using an equality filter on the distributed table's partition column.
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id = users_table.user_id)
ORDER BY 1 DESC, 2 DESC
LIMIT 3;
user_id | value_2
---------+---------
58 | 585
51 | 1000
48 | 861
(3 rows)
------------------------------------
-- Customers who have done X and Y, and satisfy other customer specific criteria
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
SELECT user_id, sum(value_2) as cnt FROM users_table WHERE
value_1 > 100
AND value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type != 100 AND value_3 > 100 AND user_id = users_table.user_id)
AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 101 AND value_3 > 100 AND user_id = users_table.user_id);
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
HINT: Consider joining tables on partition column and have equal filter on joining columns.
AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 101 AND value_3 > 100 AND user_id = users_table.user_id)
GROUP BY
user_id
ORDER BY cnt DESC, user_id DESC
LIMIT 5;
user_id | cnt
---------+-------
49 | 48606
69 | 46524
86 | 46163
80 | 45995
35 | 45437
(5 rows)
------------------------------------
-- Customers who have done X and havent done Y, and satisfy other customer specific criteria
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id);
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
HINT: Consider joining tables on partition column and have equal filter on joining columns.
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id)
ORDER BY 2 DESC, 1 DESC
LIMIT 4;
user_id | value_2
---------+---------
8 | 996
96 | 995
8 | 995
96 | 989
(4 rows)
------------------------------------
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id,
value_2
avg(value_2)
FROM users_table
WHERE value_1 > 100
AND value_1 < 124
@ -613,9 +654,21 @@ SELECT user_id,
AND value_3 > 100
AND user_id = users_table.user_id
GROUP BY user_id
HAVING Count(*) > 2);
ERROR: could not run distributed query with subquery outside the FROM clause
HINT: Consider using an equality filter on the distributed table's partition column.
HAVING Count(*) > 2)
GROUP BY
user_id
ORDER BY
1 DESC, 2 DESC
LIMIT 5;
user_id | avg
---------+----------------------
99 | 571.6666666666666667
98 | 758.0000000000000000
96 | 459.6666666666666667
90 | 453.3333333333333333
89 | 215.0000000000000000
(5 rows)
------------------------------------
-- Find me all users_table who logged in more than once
------------------------------------
@ -671,7 +724,6 @@ ORDER BY
------------------------------------
-- Find me all users_table who has done some event and has filters
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id
FROM events_table
@ -684,18 +736,32 @@ WHERE
users_table
WHERE
value_1 = 15 AND value_2 > 25
);
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
HINT: Consider joining tables on partition column and have equal filter on joining columns.
)
ORDER BY 1;
user_id
---------
7
53
(2 rows)
------------------------------------
-- Which events_table did people who has done some specific events_table
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id, event_type FROM events_table
WHERE user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
GROUP BY user_id, event_type;
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
HINT: Consider joining tables on partition column and have equal filter on joining columns.
SELECT
user_id, event_type FROM events_table
WHERE
user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
GROUP BY
user_id, event_type
ORDER BY 2 DESC, 1
LIMIT 3;
user_id | event_type
---------+------------
18 | 999
23 | 999
26 | 999
(3 rows)
------------------------------------
-- Find me all the users_table who has done some event more than three times
------------------------------------
@ -1950,5 +2016,45 @@ FROM (
ERROR: no value found for parameter 1
SET client_min_messages TO DEFAULT;
DROP FUNCTION volatile_func_test();
SET citus.subquery_pushdown to OFF;
CREATE FUNCTION test_join_function_2(integer, integer) RETURNS bool
AS 'select $1 > $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
-- we don't support joins via functions
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT u.user_id, e.event_type::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE test_join_function_2(u.user_id, e.user_id)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- note that the following query has joins on the partition keys
-- however we fail to push down it due to the function call on the
-- where clause. We probably need to relax that check
SELECT
users_table.user_id, users_table.value_1, prob
FROM
users_table
JOIN
(SELECT
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
) temp
ON users_table.user_id = temp.user_id
WHERE
users_table.value_1 < 50 AND test_join_function_2(users_table.user_id, temp.user_id);
ERROR: unsupported clause type
DROP FUNCTION test_join_function_2(integer, integer);
SET citus.enable_router_execution TO TRUE;
SET citus.subquery_pushdown to OFF;

View File

@ -0,0 +1,555 @@
--
-- multi subquery in where queries aims to expand existing subquery pushdown
-- regression tests to cover more cases specifically subqueries in WHERE clause
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
--
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1430000;
-- ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1430000;
-- subqueries in WHERE with greater operator
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id AND event_type = 50
GROUP BY
user_id
)
GROUP BY user_id
HAVING count(*) > 66
ORDER BY user_id
LIMIT 5;
user_id
---------
49
55
56
63
(4 rows)
-- subqueries in where with ALL operator
SELECT
user_id
FROM
users_table
WHERE
value_2 > 545 AND
value_2 < ALL (SELECT avg(value_3) FROM events_table WHERE users_table.user_id = events_table.user_id GROUP BY user_id)
GROUP BY
1
ORDER BY
1 DESC
LIMIT 3;
user_id
---------
69
52
12
(3 rows)
-- IN operator on non-partition key
SELECT
user_id
FROM
events_table as e1
WHERE
event_type IN
(SELECT
event_type
FROM
events_table as e2
WHERE
value_2 = 15 AND value_3 > 25 AND
e1.user_id = e2.user_id
)
ORDER BY 1;
user_id
---------
8
17
33
47
54
54
56
71
79
86
(10 rows)
-- NOT IN on non-partition key
SELECT
user_id
FROM
events_table as e1
WHERE
event_type NOT IN
(SELECT
event_type
FROM
events_table as e2
WHERE
value_2 = 15 AND value_3 > 25 AND
e1.user_id = e2.user_id
)
GROUP BY 1
HAVING count(*) > 122
ORDER BY 1;
user_id
---------
23
25
(2 rows)
-- non-correlated query with =ANY on partition keys
SELECT
user_id, count(*)
FROM
users_table
WHERE
user_id =ANY(SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20) GROUP BY 1 ORDER BY 2 DESC LIMIT 5;
user_id | count
---------+-------
12 | 121
87 | 117
37 | 115
23 | 115
46 | 115
(5 rows)
-- users that appeared more than 118 times
SELECT
user_id
FROM
users_table
WHERE 118 <=
(SELECT
count(*)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
GROUP BY
user_id)
GROUP BY
user_id
ORDER BY
user_id;
user_id
---------
13
17
23
25
(4 rows)
-- the following query doesn't have a meaningul result
-- but it is a valid query with an arbitrary subquery in
-- WHERE clause
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 101 AND value_1 < 110
AND value_2 >= 5
AND user_id IN
(
SELECT
e1.user_id
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
ORDER BY time
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
ORDER BY time
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e3.user_id AND
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
ORDER BY time
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_table
WHERE
user_id = e4.user_id AND
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
ORDER BY time
) e5 ON true
group by e1.user_id
HAVING sum(submit_card_info) > 0
)
ORDER BY 1, 2;
user_id | value_2
---------+---------
5 | 884
42 | 55
42 | 471
51 | 758
72 | 897
82 | 691
95 | 951
(7 rows)
-- similar to the above query
-- the following query doesn't have a meaningul result
-- but it is a valid query with an arbitrary subquery in
-- WHERE clause
SELECT
user_id
FROM
users_table
WHERE
user_id IN
(
SELECT
user_id
FROM (
SELECT
subquery_1.user_id, count_pay
FROM
(
(SELECT
users_table.user_id,
'action=>1'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 10 AND
users_table.user_id <= 70 AND
events_table.event_type > 10 AND events_table.event_type < 12
)
UNION
(SELECT
users_table.user_id,
'action=>2'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 10 AND
users_table.user_id <= 70 AND
events_table.event_type > 12 AND events_table.event_type < 14
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 10 AND
user_id <= 70 AND
users_table.value_1 > 15 AND users_table.value_1 < 17
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
GROUP BY
count_pay, user_id
)
GROUP BY user_id
HAVING count(*) > 3 AND sum(value_2) > 49000
ORDER BY 1;
user_id
---------
18
29
40
49
58
69
(6 rows)
-- the following query doesn't have a meaningul result
-- but it is a valid query with an arbitrary subquery in
-- FROM clause involving a complex query in WHERE clause
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id AND
u.user_id IN
(
SELECT
user_id
FROM
users_table
WHERE value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id)
)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
user_id | array_length
---------+--------------
96 | 12204
8 | 8170
(2 rows)
--
-- below tests only aims for cases where all relations
-- are not joined on partition key
--
-- e4 is not joined on the partition key
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 101 AND value_1 < 110
AND value_2 >= 5
AND user_id IN
(
SELECT
e1.user_id
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
ORDER BY time
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
ORDER BY time
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
value_2 = e3.user_id AND
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
ORDER BY time
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_table
WHERE
user_id = e4.user_id AND
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
ORDER BY time
) e5 ON true
group by e1.user_id
HAVING sum(submit_card_info) > 0
);
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- left leaf query does not return partition key
SELECT
user_id
FROM
users_table
WHERE
user_id IN
(
SELECT
user_id
FROM (
SELECT
subquery_1.user_id, count_pay
FROM
(
(SELECT
2 * users_table.user_id as user_id,
'action=>1'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 10 AND
users_table.user_id <= 70 AND
events_table.event_type > 10 AND events_table.event_type < 12
)
UNION
(SELECT
users_table.user_id,
'action=>2'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 10 AND
users_table.user_id <= 70 AND
events_table.event_type > 12 AND events_table.event_type < 14
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 10 AND
user_id <= 70 AND
users_table.value_1 > 15 AND users_table.value_1 < 17
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
GROUP BY
count_pay, user_id
)
GROUP BY user_id
HAVING count(*) > 3 AND sum(value_2) > 49000
ORDER BY 1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- NOT EXISTS query has non-equi join
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id AND
u.user_id IN
(
SELECT
user_id
FROM
users_table
WHERE value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id != users_table.user_id)
)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- subquery in where clause doesn't have a relation
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT 1);
ERROR: cannot push down this subquery
DETAIL: Subqueries without relations are unsupported
-- OFFSET is not supported in the subquey
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id AND event_type = 50
GROUP BY
user_id
OFFSET 3
);
ERROR: cannot push down this subquery
DETAIL: Offset clause is currently unsupported
-- semi join is not on the partition key for the third subquery
SELECT user_id
FROM users_table
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
CREATE FUNCTION test_join_function(integer, integer) RETURNS bool
AS 'select $1 > $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
-- we disallow JOINs via functions
SELECT user_id, value_2 FROM users_table WHERE
value_1 = 101
AND value_2 >= 5
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND test_join_function(events_table.user_id, users_table.user_id))
ORDER BY 1 DESC, 2 DESC
LIMIT 3;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
DROP FUNCTION test_join_function(int,int);

View File

@ -1,18 +0,0 @@
--
-- MULTI_VERIFY_NO_SUBQUERY
--
-- This test checks that we simply emit an error message instead of trying to
-- process a distributed unsupported SQL subquery.
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1030000;
SELECT * FROM lineitem WHERE l_orderkey IN
(SELECT l_orderkey FROM lineitem WHERE l_quantity > 0);
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
SELECT l_quantity FROM lineitem WHERE EXISTS
(SELECT 1 FROM orders WHERE o_orderkey = l_orderkey);
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
SELECT l_quantity FROM lineitem WHERE l_orderkey IN (SELECT o_orderkey FROM orders);
ERROR: could not run distributed query with join types other than INNER or OUTER JOINS
SELECT l_orderkey FROM lineitem WHERE l_quantity > ALL(SELECT o_orderkey FROM orders);
ERROR: could not run distributed query with subquery outside the FROM clause
SELECT l_quantity FROM lineitem WHERE l_orderkey = (SELECT min(o_orderkey) FROM orders);
ERROR: could not run distributed query with subquery outside the FROM clause

View File

@ -21,7 +21,7 @@ test: multi_table_ddl
test: multi_create_table
test: multi_load_data
test: multi_basic_queries multi_complex_expressions multi_verify_no_subquery
test: multi_basic_queries multi_complex_expressions
test: multi_single_relation_subquery
test: multi_binary_master_copy_format

View File

@ -38,10 +38,10 @@ test: multi_insert_select
# Miscellaneous tests to check our query planning behavior
# ----------
test: multi_deparse_shard_query
test: multi_basic_queries multi_complex_expressions multi_verify_no_subquery
test: multi_basic_queries multi_complex_expressions
test: multi_explain
test: multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
test: multi_subquery_union
test: multi_subquery_union multi_subquery_in_where_clause
test: multi_reference_table
test: multi_outer_join_reference
test: multi_single_relation_subquery

View File

@ -28,7 +28,7 @@ test: multi_load_data
# ----------
# Miscellaneous tests to check our query planning behavior
# ----------
test: multi_basic_queries multi_complex_expressions multi_verify_no_subquery
test: multi_basic_queries multi_complex_expressions
test: multi_agg_distinct multi_limit_clause multi_limit_clause_approximate
test: multi_average_expression multi_working_columns
test: multi_array_agg

View File

@ -219,14 +219,6 @@ SELECT * FROM articles_hash_mx, position('om' in 'Thomas') WHERE author_id = 1 o
-- they are not supported if multiple workers are involved
SELECT * FROM articles_hash_mx, position('om' in 'Thomas') WHERE author_id = 1 or author_id = 2;
-- subqueries are not supported in WHERE clause in Citus
SELECT * FROM articles_hash_mx WHERE author_id IN (SELECT id FROM authors_hash_mx WHERE name LIKE '%a');
SELECT * FROM articles_hash_mx WHERE author_id IN (SELECT author_id FROM articles_hash_mx WHERE author_id = 1 or author_id = 3);
SELECT * FROM articles_hash_mx WHERE author_id = (SELECT 1);
-- subqueries are supported in FROM clause but they are not router plannable
SELECT articles_hash_mx.id,test.word_count
FROM articles_hash_mx, (SELECT id, word_count FROM articles_hash_mx) AS test WHERE test.id = articles_hash_mx.id

View File

@ -281,13 +281,6 @@ SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1 or a
-- they are not supported if multiple workers are involved
SELECT * FROM articles_hash, position('om' in 'Thomas') WHERE author_id = 1 or author_id = 2;
-- subqueries are not supported in WHERE clause in Citus
SELECT * FROM articles_hash WHERE author_id IN (SELECT id FROM authors_hash WHERE name LIKE '%a');
SELECT * FROM articles_hash WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 1 or author_id = 3);
SELECT * FROM articles_hash WHERE author_id = (SELECT 1);
-- unless the query can be transformed into a join
SELECT * FROM articles_hash
WHERE author_id IN (SELECT author_id FROM articles_hash WHERE author_id = 2)

View File

@ -458,57 +458,66 @@ ORDER BY user_lastseen DESC, user_id;
------------------------------------
-- Count the number of distinct users_table who are in segment X and Y and Z
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT DISTINCT user_id
SELECT user_id
FROM users_table
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60)
GROUP BY
user_id
ORDER BY
user_id DESC
LIMIT 5;
------------------------------------
-- Find customers who have done X, and satisfy other customer specific criteria
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 101 AND value_1 < 110
AND value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 101 AND event_type < 110 AND value_3 > 100 AND user_id = users_table.user_id);
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 101 AND event_type < 110 AND value_3 > 100 AND user_id = users_table.user_id)
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
------------------------------------
-- Customers who havent done X, and satisfy other customer specific criteria
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
value_1 = 101
AND value_2 >= 5
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id = users_table.user_id);
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id = users_table.user_id)
ORDER BY 1 DESC, 2 DESC
LIMIT 3;
------------------------------------
-- Customers who have done X and Y, and satisfy other customer specific criteria
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
SELECT user_id, sum(value_2) as cnt FROM users_table WHERE
value_1 > 100
AND value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type != 100 AND value_3 > 100 AND user_id = users_table.user_id)
AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 101 AND value_3 > 100 AND user_id = users_table.user_id);
AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 101 AND value_3 > 100 AND user_id = users_table.user_id)
GROUP BY
user_id
ORDER BY cnt DESC, user_id DESC
LIMIT 5;
------------------------------------
-- Customers who have done X and havent done Y, and satisfy other customer specific criteria
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id);
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id)
ORDER BY 2 DESC, 1 DESC
LIMIT 4;
------------------------------------
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id,
value_2
avg(value_2)
FROM users_table
WHERE value_1 > 100
AND value_1 < 124
@ -520,7 +529,12 @@ SELECT user_id,
AND value_3 > 100
AND user_id = users_table.user_id
GROUP BY user_id
HAVING Count(*) > 2);
HAVING Count(*) > 2)
GROUP BY
user_id
ORDER BY
1 DESC, 2 DESC
LIMIT 5;
------------------------------------
-- Find me all users_table who logged in more than once
@ -555,7 +569,6 @@ ORDER BY
------------------------------------
-- Find me all users_table who has done some event and has filters
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id
FROM events_table
@ -568,15 +581,20 @@ WHERE
users_table
WHERE
value_1 = 15 AND value_2 > 25
);
)
ORDER BY 1;
------------------------------------
-- Which events_table did people who has done some specific events_table
-- This query will be supported when we have subqueries in where clauses.
------------------------------------
SELECT user_id, event_type FROM events_table
WHERE user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
GROUP BY user_id, event_type;
SELECT
user_id, event_type FROM events_table
WHERE
user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
GROUP BY
user_id, event_type
ORDER BY 2 DESC, 1
LIMIT 3;
------------------------------------
-- Find me all the users_table who has done some event more than three times
@ -1600,5 +1618,47 @@ FROM (
SET client_min_messages TO DEFAULT;
DROP FUNCTION volatile_func_test();
SET citus.subquery_pushdown to OFF;
CREATE FUNCTION test_join_function_2(integer, integer) RETURNS bool
AS 'select $1 > $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
-- we don't support joins via functions
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT u.user_id, e.event_type::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE test_join_function_2(u.user_id, e.user_id)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
-- note that the following query has joins on the partition keys
-- however we fail to push down it due to the function call on the
-- where clause. We probably need to relax that check
SELECT
users_table.user_id, users_table.value_1, prob
FROM
users_table
JOIN
(SELECT
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
) temp
ON users_table.user_id = temp.user_id
WHERE
users_table.value_1 < 50 AND test_join_function_2(users_table.user_id, temp.user_id);
DROP FUNCTION test_join_function_2(integer, integer);
SET citus.enable_router_execution TO TRUE;
SET citus.subquery_pushdown to OFF;

View File

@ -0,0 +1,486 @@
--
-- multi subquery in where queries aims to expand existing subquery pushdown
-- regression tests to cover more cases specifically subqueries in WHERE clause
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
--
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1430000;
-- ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1430000;
-- subqueries in WHERE with greater operator
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id AND event_type = 50
GROUP BY
user_id
)
GROUP BY user_id
HAVING count(*) > 66
ORDER BY user_id
LIMIT 5;
-- subqueries in where with ALL operator
SELECT
user_id
FROM
users_table
WHERE
value_2 > 545 AND
value_2 < ALL (SELECT avg(value_3) FROM events_table WHERE users_table.user_id = events_table.user_id GROUP BY user_id)
GROUP BY
1
ORDER BY
1 DESC
LIMIT 3;
-- IN operator on non-partition key
SELECT
user_id
FROM
events_table as e1
WHERE
event_type IN
(SELECT
event_type
FROM
events_table as e2
WHERE
value_2 = 15 AND value_3 > 25 AND
e1.user_id = e2.user_id
)
ORDER BY 1;
-- NOT IN on non-partition key
SELECT
user_id
FROM
events_table as e1
WHERE
event_type NOT IN
(SELECT
event_type
FROM
events_table as e2
WHERE
value_2 = 15 AND value_3 > 25 AND
e1.user_id = e2.user_id
)
GROUP BY 1
HAVING count(*) > 122
ORDER BY 1;
-- non-correlated query with =ANY on partition keys
SELECT
user_id, count(*)
FROM
users_table
WHERE
user_id =ANY(SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20) GROUP BY 1 ORDER BY 2 DESC LIMIT 5;
-- users that appeared more than 118 times
SELECT
user_id
FROM
users_table
WHERE 118 <=
(SELECT
count(*)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
GROUP BY
user_id)
GROUP BY
user_id
ORDER BY
user_id;
-- the following query doesn't have a meaningul result
-- but it is a valid query with an arbitrary subquery in
-- WHERE clause
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 101 AND value_1 < 110
AND value_2 >= 5
AND user_id IN
(
SELECT
e1.user_id
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
ORDER BY time
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
ORDER BY time
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e3.user_id AND
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
ORDER BY time
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_table
WHERE
user_id = e4.user_id AND
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
ORDER BY time
) e5 ON true
group by e1.user_id
HAVING sum(submit_card_info) > 0
)
ORDER BY 1, 2;
-- similar to the above query
-- the following query doesn't have a meaningul result
-- but it is a valid query with an arbitrary subquery in
-- WHERE clause
SELECT
user_id
FROM
users_table
WHERE
user_id IN
(
SELECT
user_id
FROM (
SELECT
subquery_1.user_id, count_pay
FROM
(
(SELECT
users_table.user_id,
'action=>1'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 10 AND
users_table.user_id <= 70 AND
events_table.event_type > 10 AND events_table.event_type < 12
)
UNION
(SELECT
users_table.user_id,
'action=>2'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 10 AND
users_table.user_id <= 70 AND
events_table.event_type > 12 AND events_table.event_type < 14
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 10 AND
user_id <= 70 AND
users_table.value_1 > 15 AND users_table.value_1 < 17
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
GROUP BY
count_pay, user_id
)
GROUP BY user_id
HAVING count(*) > 3 AND sum(value_2) > 49000
ORDER BY 1;
-- the following query doesn't have a meaningul result
-- but it is a valid query with an arbitrary subquery in
-- FROM clause involving a complex query in WHERE clause
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id AND
u.user_id IN
(
SELECT
user_id
FROM
users_table
WHERE value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id)
)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
--
-- below tests only aims for cases where all relations
-- are not joined on partition key
--
-- e4 is not joined on the partition key
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 101 AND value_1 < 110
AND value_2 >= 5
AND user_id IN
(
SELECT
e1.user_id
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE
event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND
event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91)
ORDER BY time
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND
event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92)
ORDER BY time
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
value_2 = e3.user_id AND
event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93)
ORDER BY time
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_table
WHERE
user_id = e4.user_id AND
event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94)
ORDER BY time
) e5 ON true
group by e1.user_id
HAVING sum(submit_card_info) > 0
);
-- left leaf query does not return partition key
SELECT
user_id
FROM
users_table
WHERE
user_id IN
(
SELECT
user_id
FROM (
SELECT
subquery_1.user_id, count_pay
FROM
(
(SELECT
2 * users_table.user_id as user_id,
'action=>1'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 10 AND
users_table.user_id <= 70 AND
events_table.event_type > 10 AND events_table.event_type < 12
)
UNION
(SELECT
users_table.user_id,
'action=>2'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 10 AND
users_table.user_id <= 70 AND
events_table.event_type > 12 AND events_table.event_type < 14
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 10 AND
user_id <= 70 AND
users_table.value_1 > 15 AND users_table.value_1 < 17
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
GROUP BY
count_pay, user_id
)
GROUP BY user_id
HAVING count(*) > 3 AND sum(value_2) > 49000
ORDER BY 1;
-- NOT EXISTS query has non-equi join
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id AND
u.user_id IN
(
SELECT
user_id
FROM
users_table
WHERE value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id != users_table.user_id)
)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
-- subquery in where clause doesn't have a relation
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT 1);
-- OFFSET is not supported in the subquey
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id AND event_type = 50
GROUP BY
user_id
OFFSET 3
);
-- semi join is not on the partition key for the third subquery
SELECT user_id
FROM users_table
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
CREATE FUNCTION test_join_function(integer, integer) RETURNS bool
AS 'select $1 > $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
-- we disallow JOINs via functions
SELECT user_id, value_2 FROM users_table WHERE
value_1 = 101
AND value_2 >= 5
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND test_join_function(events_table.user_id, users_table.user_id))
ORDER BY 1 DESC, 2 DESC
LIMIT 3;
DROP FUNCTION test_join_function(int,int);

View File

@ -1,22 +0,0 @@
--
-- MULTI_VERIFY_NO_SUBQUERY
--
-- This test checks that we simply emit an error message instead of trying to
-- process a distributed unsupported SQL subquery.
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1030000;
SELECT * FROM lineitem WHERE l_orderkey IN
(SELECT l_orderkey FROM lineitem WHERE l_quantity > 0);
SELECT l_quantity FROM lineitem WHERE EXISTS
(SELECT 1 FROM orders WHERE o_orderkey = l_orderkey);
SELECT l_quantity FROM lineitem WHERE l_orderkey IN (SELECT o_orderkey FROM orders);
SELECT l_orderkey FROM lineitem WHERE l_quantity > ALL(SELECT o_orderkey FROM orders);
SELECT l_quantity FROM lineitem WHERE l_orderkey = (SELECT min(o_orderkey) FROM orders);