Address review feedback

pull/4385/head
Marco Slot 2020-12-15 15:23:38 +01:00
parent 23dccd8941
commit 100e5d3196
9 changed files with 458 additions and 126 deletions

View File

@ -34,6 +34,7 @@
#include "distributed/pg_dist_partition.h"
#include "distributed/query_utils.h"
#include "distributed/query_pushdown_planning.h"
#include "distributed/recursive_planning.h"
#include "distributed/relation_restriction_equivalence.h"
#include "distributed/version_compat.h"
#include "nodes/nodeFuncs.h"
@ -78,6 +79,7 @@ static RecurringTuplesType FromClauseRecurringTupleType(Query *queryTree);
static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin(
PlannerRestrictionContext *plannerRestrictionContext);
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
static DeferredErrorMessage * DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree);
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
static RecurringTuplesType FetchFirstRecurType(PlannerInfo *plannerInfo,
Relids relids);
@ -911,7 +913,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
{
bool preconditionsSatisfied = true;
char *errorDetail = NULL;
StringInfo errorInfo = NULL;
DeferredErrorMessage *deferredError = DeferErrorIfUnsupportedTableCombination(
subqueryTree);
@ -934,84 +935,12 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
* push down SQL features within such a function, as long as co-located join
* checks are applied.
*/
if (!contain_vars_of_level((Node *) subqueryTree, 1))
if (!ContainsReferencesToOuterQuery(subqueryTree))
{
if (subqueryTree->limitOffset)
deferredError = DeferErrorIfSubqueryRequiresMerge(subqueryTree);
if (deferredError)
{
preconditionsSatisfied = false;
errorDetail = "Offset clause is currently unsupported when a subquery "
"references a column from another query";
}
/* limit is not supported when SubqueryPushdown is not set */
if (subqueryTree->limitCount && !SubqueryPushdown)
{
preconditionsSatisfied = false;
errorDetail = "Limit in subquery is currently unsupported when a "
"subquery references a column from another query";
}
/* group clause list must include partition column */
if (subqueryTree->groupClause)
{
List *groupClauseList = subqueryTree->groupClause;
List *targetEntryList = subqueryTree->targetList;
List *groupTargetEntryList = GroupTargetEntryList(groupClauseList,
targetEntryList);
bool groupOnPartitionColumn =
TargetListOnPartitionColumn(subqueryTree, groupTargetEntryList);
if (!groupOnPartitionColumn)
{
preconditionsSatisfied = false;
errorDetail = "Group by list without partition column is currently "
"unsupported when a subquery references a column "
"from another query";
}
}
/* we don't support aggregates without group by */
if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL))
{
preconditionsSatisfied = false;
errorDetail = "Aggregates without group by are currently unsupported "
"when a subquery references a column from another query";
}
/* having clause without group by on partition column is not supported */
if (subqueryTree->havingQual && (subqueryTree->groupClause == NULL))
{
preconditionsSatisfied = false;
errorDetail = "Having qual without group by on partition column is "
"currently unsupported when a subquery references "
"a column from another query";
}
/*
* We support window functions when the window function
* is partitioned on distribution column.
*/
if (subqueryTree->hasWindowFuncs && !SafeToPushdownWindowFunction(subqueryTree,
&errorInfo))
{
errorDetail = (char *) errorInfo->data;
preconditionsSatisfied = false;
}
/* distinct clause list must include partition column */
if (subqueryTree->distinctClause)
{
List *distinctClauseList = subqueryTree->distinctClause;
List *targetEntryList = subqueryTree->targetList;
List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList,
targetEntryList);
bool distinctOnPartitionColumn =
TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList);
if (!distinctOnPartitionColumn)
{
preconditionsSatisfied = false;
errorDetail = "Distinct on columns without partition column is "
"currently unsupported";
}
return deferredError;
}
}
@ -1080,6 +1009,108 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
}
/*
* DeferErrorIfSubqueryRequiresMerge returns a deferred error if the subquery
* requires a merge step on the coordinator (e.g. limit, group by non-distribution
* column, etc.).
*/
static DeferredErrorMessage *
DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree)
{
bool preconditionsSatisfied = true;
char *errorDetail = NULL;
if (subqueryTree->limitOffset)
{
preconditionsSatisfied = false;
errorDetail = "Offset clause is currently unsupported when a subquery "
"references a column from another query";
}
/* limit is not supported when SubqueryPushdown is not set */
if (subqueryTree->limitCount && !SubqueryPushdown)
{
preconditionsSatisfied = false;
errorDetail = "Limit in subquery is currently unsupported when a "
"subquery references a column from another query";
}
/* group clause list must include partition column */
if (subqueryTree->groupClause)
{
List *groupClauseList = subqueryTree->groupClause;
List *targetEntryList = subqueryTree->targetList;
List *groupTargetEntryList = GroupTargetEntryList(groupClauseList,
targetEntryList);
bool groupOnPartitionColumn =
TargetListOnPartitionColumn(subqueryTree, groupTargetEntryList);
if (!groupOnPartitionColumn)
{
preconditionsSatisfied = false;
errorDetail = "Group by list without partition column is currently "
"unsupported when a subquery references a column "
"from another query";
}
}
/* we don't support aggregates without group by */
if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL))
{
preconditionsSatisfied = false;
errorDetail = "Aggregates without group by are currently unsupported "
"when a subquery references a column from another query";
}
/* having clause without group by on partition column is not supported */
if (subqueryTree->havingQual && (subqueryTree->groupClause == NULL))
{
preconditionsSatisfied = false;
errorDetail = "Having qual without group by on partition column is "
"currently unsupported when a subquery references "
"a column from another query";
}
/*
* We support window functions when the window function
* is partitioned on distribution column.
*/
StringInfo errorInfo = NULL;
if (subqueryTree->hasWindowFuncs && !SafeToPushdownWindowFunction(subqueryTree,
&errorInfo))
{
errorDetail = (char *) errorInfo->data;
preconditionsSatisfied = false;
}
/* distinct clause list must include partition column */
if (subqueryTree->distinctClause)
{
List *distinctClauseList = subqueryTree->distinctClause;
List *targetEntryList = subqueryTree->targetList;
List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList,
targetEntryList);
bool distinctOnPartitionColumn =
TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList);
if (!distinctOnPartitionColumn)
{
preconditionsSatisfied = false;
errorDetail = "Distinct on columns without partition column is "
"currently unsupported";
}
}
/* finally check and return deferred if not satisfied */
if (!preconditionsSatisfied)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot push down this subquery",
errorDetail, NULL);
}
return NULL;
}
/*
* DeferErrorIfUnsupportedTableCombination checks if the given query tree contains any
* unsupported range table combinations. For this, the function walks over all

View File

@ -180,7 +180,6 @@ static bool IsLocalTableRteOrMatView(Node *node);
static DistributedSubPlan * CreateDistributedSubPlan(uint32 subPlanId,
Query *subPlanQuery);
static bool CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context);
static bool ContainsReferencesToOuterQuery(Query *query);
static bool ContainsReferencesToOuterQueryWalker(Node *node,
VarLevelsUpWalkerContext *context);
static bool NodeContainsSubqueryReferencingOuterQuery(Node *node);
@ -1288,7 +1287,7 @@ CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context)
* anything that points outside of the query itself. Such queries cannot be
* planned recursively.
*/
static bool
bool
ContainsReferencesToOuterQuery(Query *query)
{
VarLevelsUpWalkerContext context = { 0 };

View File

@ -49,4 +49,7 @@ extern void ReplaceRTERelationWithRteSubquery(RangeTblEntry *rangeTableEntry,
RecursivePlanningContext *context);
extern bool IsRecursivelyPlannableRelation(RangeTblEntry *rangeTableEntry);
extern bool IsRelationLocalTableOrMatView(Oid relationId);
extern bool ContainsReferencesToOuterQuery(Query *query);
#endif /* RECURSIVE_PLANNING_H */

View File

@ -484,7 +484,7 @@ DEBUG: query has a single distribution column value: 1
41 | 11814
(5 rows)
-- subqueries are not supported in SELECT clause
-- subqueries in SELECT clause
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles_hash_mx a;
DEBUG: Router planner cannot handle multi-shard select queries

View File

@ -1227,7 +1227,7 @@ FROM
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data"
ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE
ORDER BY
user_id
@ -1240,9 +1240,7 @@ limit 50;
-- reset subquery_pushdown
SET citus.subquery_pushdown to OFF;
-- we recursively plan recent_events_1
-- but not some_users_data since it has a reference
-- from an outer query which is not recursively planned
-- mixture of recursively planned subqueries and correlated subqueries
SELECT "some_users_data".user_id, lastseen
FROM
(SELECT user_id, max(time) AS lastseen
@ -1270,24 +1268,21 @@ FROM
WHERE
"users"."value_1" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data"
ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE
ORDER BY
user_id
limit 50;
user_id | lastseen
---------------------------------------------------------------------
1 | Thu Nov 23 18:08:26.550729 2017
2 | Thu Nov 23 17:26:14.563216 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 18:08:26.550729 2017
5 | Thu Nov 23 17:26:14.563216 2017
6 | Thu Nov 23 18:08:26.550729 2017
(6 rows)
-- we recursively plan some queries but fail in the end
-- since some_users_data since it has a reference
-- from an outer query which is not recursively planned
SELECT "some_users_data".user_id, lastseen
FROM
(SELECT 2 * user_id as user_id, max(time) AS lastseen
@ -1315,7 +1310,7 @@ FROM
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data"
ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE
ORDER BY
user_id
@ -1379,7 +1374,7 @@ FROM
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4
LIMIT 1) "some_users_data"
ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE
ORDER BY
lastseen DESC
@ -1447,7 +1442,7 @@ SELECT "some_users_data".user_id, MAX(lastseen), count(*)
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4
LIMIT 1) "some_users_data" ON true
ORDER BY 1 LIMIT 1) "some_users_data" ON true
GROUP BY 1
ORDER BY 2, 1 DESC
LIMIT 10;
@ -1504,7 +1499,7 @@ FROM
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4
LIMIT 1) "some_users_data" ON true
ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY
lastseen DESC
LIMIT 10) "some_users"
@ -1519,7 +1514,7 @@ DEBUG: push down of limit count: 10
DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.<>) user_where_1_join_1.user_id)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: push down of limit count: 10
DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10
DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) ORDER BY users.user_id LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10
DEBUG: Creating router plan
user_id | lastseen
@ -1584,7 +1579,7 @@ FROM
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4
LIMIT 1) "some_users_data" ON true
ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY
lastseen DESC
LIMIT 10) "some_users"
@ -1595,7 +1590,7 @@ DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_1 FROM publi
DEBUG: push down of limit count: 10
DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.=) user_where_1_join_1.value_1)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10
DEBUG: push down of limit count: 10
DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10
DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) ORDER BY users.user_id LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10
user_id | lastseen
---------------------------------------------------------------------
@ -1667,9 +1662,7 @@ ORDER BY
user_id DESC, lastseen DESC
LIMIT 10;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- not pushdownable since lower LATERAL JOIN is not on the partition key
-- not recursively plannable due to LATERAL join where there is a reference
-- from an outer query
-- complex lateral join between inner join and correlated subquery
SELECT user_id, lastseen
FROM
(SELECT
@ -1715,7 +1708,7 @@ FROM
WHERE
"users"."value_1" = "some_recent_users"."user_id" AND
"users"."value_2" > 4
LIMIT 1) "some_users_data" ON true
ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY
lastseen DESC
LIMIT 10) "some_users"
@ -2339,9 +2332,7 @@ LIMIT 10;
(1 row)
SET citus.subquery_pushdown to OFF;
-- not pushdownable since lower LATERAL JOIN is not on the partition key
-- not recursively plannable due to LATERAL join where there is a reference
-- from an outer query
-- on side of the lateral join can be recursively plannen, then pushed down
SELECT *
FROM
(SELECT
@ -2372,7 +2363,7 @@ FROM
WHERE
"users"."value_2" = "some_recent_users"."user_id" AND
value_2 > 4
LIMIT 1) "some_users_data" ON true
ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY
value_2 DESC
LIMIT 10) "some_users"

View File

@ -967,6 +967,186 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c
24
(1 row)
-- two levels of correlation should also allow
-- merge step in the subquery
SELECT sum(value_1)
FROM users_table u
WHERE EXISTS
(SELECT 1
FROM events_table e
WHERE u.user_id = e.user_id AND
EXISTS
(SELECT 1
FROM users_table u2
WHERE u2.user_id = u.user_id AND u2.value_1 = 5
LIMIT 1));
sum
---------------------------------------------------------------------
216
(1 row)
-- correlated subquery in WHERE, with a slightly
-- different syntax that the result of the subquery
-- is compared with a constant
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 115;
sum
---------------------------------------------------------------------
(1 row)
-- a correlated subquery which requires merge step
-- can be pushed down on UPDATE/DELETE queries as well
-- rollback to keep the rest of the tests unchanged
BEGIN;
UPDATE users_table u1
SET value_1 = (SELECT count(DISTINCT value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id);
DELETE FROM users_table u1 WHERE (SELECT count(DISTINCT value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id) > 10;
ROLLBACK;
-- a correlated anti-join can also be pushed down even if the subquery
-- has a LIMIT
SELECT avg(value_1)
FROM users_table u
WHERE NOT EXISTS
(SELECT 'XXX'
FROM events_table e
WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1);
avg
---------------------------------------------------------------------
2.5544554455445545
(1 row)
-- a [correlated] lateral join can also be pushed down even if the subquery
-- has an aggregate wout a GROUP BY
SELECT
max(min_of_val_2), max(u1.value_1)
FROM
users_table u1
LEFT JOIN LATERAL
(SELECT min(e1.value_2) as min_of_val_2 FROM events_table e1 WHERE e1.user_id = u1.user_id) as foo ON (true);
max | max
---------------------------------------------------------------------
1 | 5
(1 row)
-- a self join is followed by a correlated subquery
EXPLAIN (COSTS OFF)
SELECT
*
FROM
users_table u1 JOIN users_table u2 USING (user_id)
WHERE
u1.value_1 < u2.value_1 AND
(SELECT
count(*)
FROM
events_table e1
WHERE
e1.user_id = u2.user_id) > 10;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Hash Join
Hash Cond: (u2.user_id = u1.user_id)
Join Filter: (u1.value_1 < u2.value_1)
-> Seq Scan on users_table_1400256 u2
Filter: ((SubPlan 1) > 10)
SubPlan 1
-> Aggregate
-> Seq Scan on events_table_1400260 e1
Filter: (user_id = u2.user_id)
-> Hash
-> Seq Scan on users_table_1400256 u1
(16 rows)
-- when the colocated join of the FROM clause
-- entries happen on WHERE clause, Citus cannot
-- pushdown
-- Likely that the colocation checks should be
-- improved
SELECT
u1.user_id, u2.user_id
FROM
users_table u1, users_table u2
WHERE
u1.value_1 < u2.value_1 AND
(SELECT
count(*)
FROM
events_table e1
WHERE
e1.user_id = u2.user_id AND
u1.user_id = u2.user_id) > 10
ORDER BY 1,2;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- create a view that contains correlated subquery
CREATE TEMPORARY VIEW correlated_subquery_view AS
SELECT u1.user_id
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 0;
SELECT sum(user_id) FROM correlated_subquery_view;
sum
---------------------------------------------------------------------
376
(1 row)
-- now, join the view with another correlated subquery
SELECT
sum(mx)
FROM
correlated_subquery_view
LEFT JOIN LATERAL
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (true);
sum
---------------------------------------------------------------------
459
(1 row)
-- as an edge case, JOIN is on false
SELECT
sum(mx)
FROM
correlated_subquery_view
LEFT JOIN LATERAL
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (false);
sum
---------------------------------------------------------------------
(1 row)
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id AND false
) > 115;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 115 AND false;
sum
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO DEFAULT;
DROP TABLE local_table;
DROP SCHEMA subquery_in_where CASCADE;

View File

@ -235,7 +235,7 @@ FROM articles_hash_mx, (SELECT id, word_count FROM articles_hash_mx) AS test
WHERE test.id = articles_hash_mx.id and articles_hash_mx.author_id = 1
ORDER BY articles_hash_mx.id;
-- subqueries are not supported in SELECT clause
-- subqueries in SELECT clause
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles_hash_mx a;

View File

@ -1119,7 +1119,7 @@ FROM
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data"
ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE
ORDER BY
user_id
@ -1128,9 +1128,7 @@ limit 50;
-- reset subquery_pushdown
SET citus.subquery_pushdown to OFF;
-- we recursively plan recent_events_1
-- but not some_users_data since it has a reference
-- from an outer query which is not recursively planned
-- mixture of recursively planned subqueries and correlated subqueries
SELECT "some_users_data".user_id, lastseen
FROM
(SELECT user_id, max(time) AS lastseen
@ -1158,15 +1156,12 @@ FROM
WHERE
"users"."value_1" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data"
ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE
ORDER BY
user_id
limit 50;
-- we recursively plan some queries but fail in the end
-- since some_users_data since it has a reference
-- from an outer query which is not recursively planned
SELECT "some_users_data".user_id, lastseen
FROM
(SELECT 2 * user_id as user_id, max(time) AS lastseen
@ -1194,7 +1189,7 @@ FROM
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data"
ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE
ORDER BY
user_id
@ -1251,7 +1246,7 @@ FROM
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4
LIMIT 1) "some_users_data"
ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE
ORDER BY
lastseen DESC
@ -1306,7 +1301,7 @@ SELECT "some_users_data".user_id, MAX(lastseen), count(*)
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4
LIMIT 1) "some_users_data" ON true
ORDER BY 1 LIMIT 1) "some_users_data" ON true
GROUP BY 1
ORDER BY 2, 1 DESC
LIMIT 10;
@ -1360,7 +1355,7 @@ FROM
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4
LIMIT 1) "some_users_data" ON true
ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY
lastseen DESC
LIMIT 10) "some_users"
@ -1418,7 +1413,7 @@ FROM
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4
LIMIT 1) "some_users_data" ON true
ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY
lastseen DESC
LIMIT 10) "some_users"
@ -1484,9 +1479,7 @@ ORDER BY
user_id DESC, lastseen DESC
LIMIT 10;
-- not pushdownable since lower LATERAL JOIN is not on the partition key
-- not recursively plannable due to LATERAL join where there is a reference
-- from an outer query
-- complex lateral join between inner join and correlated subquery
SELECT user_id, lastseen
FROM
(SELECT
@ -1532,7 +1525,7 @@ FROM
WHERE
"users"."value_1" = "some_recent_users"."user_id" AND
"users"."value_2" > 4
LIMIT 1) "some_users_data" ON true
ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY
lastseen DESC
LIMIT 10) "some_users"
@ -2033,9 +2026,7 @@ ORDER BY
LIMIT 10;
SET citus.subquery_pushdown to OFF;
-- not pushdownable since lower LATERAL JOIN is not on the partition key
-- not recursively plannable due to LATERAL join where there is a reference
-- from an outer query
-- on side of the lateral join can be recursively plannen, then pushed down
SELECT *
FROM
(SELECT
@ -2066,7 +2057,7 @@ FROM
WHERE
"users"."value_2" = "some_recent_users"."user_id" AND
value_2 > 4
LIMIT 1) "some_users_data" ON true
ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY
value_2 DESC
LIMIT 10) "some_users"

View File

@ -713,6 +713,143 @@ WHERE
WHERE u.value_2 > 3
GROUP BY r.value_2 HAVING min(r.value_3) > 0);
-- two levels of correlation should also allow
-- merge step in the subquery
SELECT sum(value_1)
FROM users_table u
WHERE EXISTS
(SELECT 1
FROM events_table e
WHERE u.user_id = e.user_id AND
EXISTS
(SELECT 1
FROM users_table u2
WHERE u2.user_id = u.user_id AND u2.value_1 = 5
LIMIT 1));
-- correlated subquery in WHERE, with a slightly
-- different syntax that the result of the subquery
-- is compared with a constant
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 115;
-- a correlated subquery which requires merge step
-- can be pushed down on UPDATE/DELETE queries as well
-- rollback to keep the rest of the tests unchanged
BEGIN;
UPDATE users_table u1
SET value_1 = (SELECT count(DISTINCT value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id);
DELETE FROM users_table u1 WHERE (SELECT count(DISTINCT value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id) > 10;
ROLLBACK;
-- a correlated anti-join can also be pushed down even if the subquery
-- has a LIMIT
SELECT avg(value_1)
FROM users_table u
WHERE NOT EXISTS
(SELECT 'XXX'
FROM events_table e
WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1);
-- a [correlated] lateral join can also be pushed down even if the subquery
-- has an aggregate wout a GROUP BY
SELECT
max(min_of_val_2), max(u1.value_1)
FROM
users_table u1
LEFT JOIN LATERAL
(SELECT min(e1.value_2) as min_of_val_2 FROM events_table e1 WHERE e1.user_id = u1.user_id) as foo ON (true);
-- a self join is followed by a correlated subquery
EXPLAIN (COSTS OFF)
SELECT
*
FROM
users_table u1 JOIN users_table u2 USING (user_id)
WHERE
u1.value_1 < u2.value_1 AND
(SELECT
count(*)
FROM
events_table e1
WHERE
e1.user_id = u2.user_id) > 10;
-- when the colocated join of the FROM clause
-- entries happen on WHERE clause, Citus cannot
-- pushdown
-- Likely that the colocation checks should be
-- improved
SELECT
u1.user_id, u2.user_id
FROM
users_table u1, users_table u2
WHERE
u1.value_1 < u2.value_1 AND
(SELECT
count(*)
FROM
events_table e1
WHERE
e1.user_id = u2.user_id AND
u1.user_id = u2.user_id) > 10
ORDER BY 1,2;
-- create a view that contains correlated subquery
CREATE TEMPORARY VIEW correlated_subquery_view AS
SELECT u1.user_id
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 0;
SELECT sum(user_id) FROM correlated_subquery_view;
-- now, join the view with another correlated subquery
SELECT
sum(mx)
FROM
correlated_subquery_view
LEFT JOIN LATERAL
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (true);
-- as an edge case, JOIN is on false
SELECT
sum(mx)
FROM
correlated_subquery_view
LEFT JOIN LATERAL
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (false);
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id AND false
) > 115;
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 115 AND false;
SET client_min_messages TO DEFAULT;
DROP TABLE local_table;