From f51157c4bb89cb88a6863440b474c1b22f5d2716 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Tue, 4 Apr 2017 09:27:08 +0300 Subject: [PATCH] Disallow subqueries without a relation in the range table list for subquery pushdown This commit disallows subqueries without relation in the range table list. This commit is only applied for subquery pushdown. In other words, we do not add this limitation for single table re-partition subqueries. The reasoning behind this limitation is that if we allow pushing down such queries, the result would include (shardCount * expectedResults) where in a non distributed world the result would be (expectedResult) only. --- .../planner/multi_logical_optimizer.c | 6 ++ .../multi_subquery_behavioral_analytics.out | 64 ++++++++++++++ .../multi_subquery_complex_queries.out | 86 +++++++++++++++++++ .../regress/expected/multi_subquery_union.out | 72 ++++++++++++++++ .../multi_subquery_behavioral_analytics.sql | 60 +++++++++++++ .../sql/multi_subquery_complex_queries.sql | 84 ++++++++++++++++++ src/test/regress/sql/multi_subquery_union.sql | 68 +++++++++++++++ 7 files changed, 440 insertions(+) diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index f4a30ebfd..bfe65667d 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -2949,6 +2949,12 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) errorDetail = "Subqueries other than from-clause subqueries are unsupported"; } + if (subqueryTree->rtable == NIL) + { + preconditionsSatisfied = false; + errorDetail = "Subqueries without relations are unsupported"; + } + if (subqueryTree->hasWindowFuncs) { preconditionsSatisfied = false; diff --git a/src/test/regress/expected/multi_subquery_behavioral_analytics.out b/src/test/regress/expected/multi_subquery_behavioral_analytics.out index df832e96d..fa49997d5 100644 --- a/src/test/regress/expected/multi_subquery_behavioral_analytics.out +++ b/src/test/regress/expected/multi_subquery_behavioral_analytics.out @@ -1609,5 +1609,69 @@ ORDER BY 1,2; (2 rows) DROP FUNCTION array_index(ANYARRAY, ANYELEMENT); +-- a not supported query due to constant range table entry +SELECT count(*) as subquery_count +FROM ( + SELECT + user_id + FROM + users_table + WHERE + (value_1 = '5' OR value_1 = '13') + GROUP BY user_id + HAVING count(distinct value_1) = 2 + ) as a + LEFT JOIN ( + SELECT + 1 as user_id + ) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NULL +GROUP BY a.user_id; +ERROR: cannot push down this subquery +DETAIL: Subqueries without relations are unsupported +-- same with INNER JOIN +SELECT count(*) as subquery_count +FROM ( + SELECT + user_id + FROM + users_table + WHERE + (value_1 = '5' OR value_1 = '13') + GROUP BY user_id + HAVING count(distinct value_1) = 2 + ) as a + INNER JOIN ( + SELECT + 1 as user_id + ) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NULL +GROUP BY a.user_id; +ERROR: cannot push down this subquery +DETAIL: Subqueries without relations are unsupported +-- this is slightly different, we use RTE_VALUEs here +SELECT Count(*) AS subquery_count +FROM (SELECT + user_id + FROM + users_table + WHERE + (value_1 = '5' OR value_1 = '13' ) + GROUP BY + user_id + HAVING + Count(DISTINCT value_1) = 2) AS a + INNER JOIN + (SELECT + * + FROM + (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (user_id, letter)) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NULL +GROUP BY a.user_id; +ERROR: cannot push down this subquery +DETAIL: Table expressions other than simple relations and subqueries are currently unsupported SET citus.subquery_pushdown TO FALSE; SET citus.enable_router_execution TO TRUE; diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index 6c8c70f1b..294327710 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -2027,5 +2027,91 @@ ORDER BY 1, 2 limit 10; ERROR: cannot push down this subquery DETAIL: Window functions are currently unsupported +-- not supported due to non relation rte +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") ORDER BY time ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT 1, now(), 3 AS event) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot push down this subquery +DETAIL: Subqueries without relations are unsupported +-- similar to the above, but constant rte is on the right side of the query +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") ORDER BY time ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT random()::int as user_id) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; +ERROR: cannot push down this subquery +DETAIL: Subqueries without relations are unsupported SET citus.subquery_pushdown TO FALSE; SET citus.enable_router_execution TO TRUE; diff --git a/src/test/regress/expected/multi_subquery_union.out b/src/test/regress/expected/multi_subquery_union.out index ee5d58e11..3b2be3465 100644 --- a/src/test/regress/expected/multi_subquery_union.out +++ b/src/test/regress/expected/multi_subquery_union.out @@ -773,5 +773,77 @@ FROM ) b; ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +-- we don't support subqueries without relations +SELECT + count(*) +FROM +( + (SELECT user_id FROM users_table) + UNION ALL + (SELECT 1) +) b; +ERROR: cannot push down this subquery +DETAIL: Subqueries without relations are unsupported +-- we don't support subqueries without relations +SELECT + * +FROM +( + (SELECT user_id FROM users_table) + UNION ALL + (SELECT (random() * 100)::int) +) b; +ERROR: cannot push down this subquery +DETAIL: Subqueries without relations are unsupported +-- we don't support subqueries without relations +SELECT + user_id, value_3 +FROM +( + (SELECT value_3, user_id FROM events_table where event_type IN (1, 2, 3, 4, 5)) + UNION ALL + (SELECT value_3, user_id FROM events_table where event_type IN (6, 7, 8, 9, 10)) + UNION ALL + (SELECT value_3, user_id FROM events_table where event_type IN (11, 12, 13, 14, 15)) + UNION ALL + (SELECT value_3, user_id FROM events_table where event_type IN (16, 17, 18, 19, 20)) + UNION ALL + (SELECT value_3, user_id FROM events_table where event_type IN (21, 22, 23, 24, 25)) + UNION ALL + (SELECT 1, 2) +) b +ORDER BY 1 DESC, 2 DESC +LIMIT 5; +ERROR: cannot push down this subquery +DETAIL: Subqueries without relations are unsupported +SELECT ("q"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") ORDER BY time ASC, event DESC) AS collected_events + FROM ( + (SELECT "events"."user_id", "events"."time", 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) + ) + UNION + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) + UNION + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) + UNION + (SELECT 1, now(), 3 AS event)) t1 + GROUP BY "t1"."user_id") AS t) "q" +GROUP BY types +ORDER BY types; +ERROR: cannot push down this subquery +DETAIL: Subqueries without relations are unsupported SET citus.subquery_pushdown TO false; SET citus.enable_router_execution TO true; diff --git a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql index 8607f347a..ee86bf1f1 100644 --- a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql +++ b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql @@ -1285,6 +1285,66 @@ SELECT * FROM run_command_on_workers('DROP FUNCTION array_index(ANYARRAY, ANYELE ORDER BY 1,2; DROP FUNCTION array_index(ANYARRAY, ANYELEMENT); +-- a not supported query due to constant range table entry +SELECT count(*) as subquery_count +FROM ( + SELECT + user_id + FROM + users_table + WHERE + (value_1 = '5' OR value_1 = '13') + GROUP BY user_id + HAVING count(distinct value_1) = 2 + ) as a + LEFT JOIN ( + SELECT + 1 as user_id + ) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NULL +GROUP BY a.user_id; + +-- same with INNER JOIN +SELECT count(*) as subquery_count +FROM ( + SELECT + user_id + FROM + users_table + WHERE + (value_1 = '5' OR value_1 = '13') + GROUP BY user_id + HAVING count(distinct value_1) = 2 + ) as a + INNER JOIN ( + SELECT + 1 as user_id + ) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NULL +GROUP BY a.user_id; + +-- this is slightly different, we use RTE_VALUEs here +SELECT Count(*) AS subquery_count +FROM (SELECT + user_id + FROM + users_table + WHERE + (value_1 = '5' OR value_1 = '13' ) + GROUP BY + user_id + HAVING + Count(DISTINCT value_1) = 2) AS a + INNER JOIN + (SELECT + * + FROM + (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (user_id, letter)) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NULL +GROUP BY a.user_id; SET citus.subquery_pushdown TO FALSE; SET citus.enable_router_execution TO TRUE; diff --git a/src/test/regress/sql/multi_subquery_complex_queries.sql b/src/test/regress/sql/multi_subquery_complex_queries.sql index f7281f75c..018758b8e 100644 --- a/src/test/regress/sql/multi_subquery_complex_queries.sql +++ b/src/test/regress/sql/multi_subquery_complex_queries.sql @@ -1786,5 +1786,89 @@ FROM ( ORDER BY 1, 2 limit 10; +-- not supported due to non relation rte +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") ORDER BY time ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION + (SELECT * + FROM + (SELECT 1, now(), 3 AS event) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT "users"."user_id" + FROM users_table as "users" + WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + +-- similar to the above, but constant rte is on the right side of the query +SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT *, random() + FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") ORDER BY time ASC, event DESC) AS collected_events + FROM ( + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) events_subquery_2) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3) + UNION ALL + (SELECT * + FROM + (SELECT "events"."user_id", "events"."time", 3 AS event + FROM events_table as "events" + WHERE event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1 + GROUP BY "t1"."user_id") AS t) "q" +INNER JOIN + (SELECT random()::int as user_id) AS t ON (t.user_id = q.user_id)) as final_query +GROUP BY types +ORDER BY types; + SET citus.subquery_pushdown TO FALSE; SET citus.enable_router_execution TO TRUE; diff --git a/src/test/regress/sql/multi_subquery_union.sql b/src/test/regress/sql/multi_subquery_union.sql index f459abed9..4daac4cce 100644 --- a/src/test/regress/sql/multi_subquery_union.sql +++ b/src/test/regress/sql/multi_subquery_union.sql @@ -605,5 +605,73 @@ FROM (SELECT users_table.user_id FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) ) b; +-- we don't support subqueries without relations +SELECT + count(*) +FROM +( + (SELECT user_id FROM users_table) + UNION ALL + (SELECT 1) +) b; + +-- we don't support subqueries without relations +SELECT + * +FROM +( + (SELECT user_id FROM users_table) + UNION ALL + (SELECT (random() * 100)::int) +) b; + +-- we don't support subqueries without relations +SELECT + user_id, value_3 +FROM +( + (SELECT value_3, user_id FROM events_table where event_type IN (1, 2, 3, 4, 5)) + UNION ALL + (SELECT value_3, user_id FROM events_table where event_type IN (6, 7, 8, 9, 10)) + UNION ALL + (SELECT value_3, user_id FROM events_table where event_type IN (11, 12, 13, 14, 15)) + UNION ALL + (SELECT value_3, user_id FROM events_table where event_type IN (16, 17, 18, 19, 20)) + UNION ALL + (SELECT value_3, user_id FROM events_table where event_type IN (21, 22, 23, 24, 25)) + UNION ALL + (SELECT 1, 2) +) b +ORDER BY 1 DESC, 2 DESC +LIMIT 5; + +SELECT ("q"."event_types") as types, count(*) AS sumOfEventType +FROM + ( SELECT "t"."user_id", + "t"."time", + unnest("t"."collected_events") AS "event_types" + FROM + ( SELECT "t1"."user_id", + min("t1"."time") AS "time", + array_agg(("t1"."event") ORDER BY time ASC, event DESC) AS collected_events + FROM ( + (SELECT "events"."user_id", "events"."time", 0 AS event + FROM events_table as "events" + WHERE event_type IN (10, 11, 12, 13, 14, 15) + ) + UNION + (SELECT "events"."user_id", "events"."time", 1 AS event + FROM events_table as "events" + WHERE event_type IN (15, 16, 17, 18, 19) ) + UNION + (SELECT "events"."user_id", "events"."time", 2 AS event + FROM events_table as "events" + WHERE event_type IN (20, 21, 22, 23, 24, 25) ) + UNION + (SELECT 1, now(), 3 AS event)) t1 + GROUP BY "t1"."user_id") AS t) "q" +GROUP BY types +ORDER BY types; + SET citus.subquery_pushdown TO false; SET citus.enable_router_execution TO true;