diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 13d511b17..c70f1bb50 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -596,7 +596,7 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, { return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, "complex joins are only supported when all distributed tables are " - "joined on their distribution columns with equal operator", + "co-located and joined on their distribution columns", NULL, NULL); } diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 6ab91ec2e..63a55692e 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -10,7 +10,9 @@ */ #include "postgres.h" +#include "distributed/colocation_utils.h" #include "distributed/distributed_planner.h" +#include "distributed/listutils.h" #include "distributed/metadata_cache.h" #include "distributed/multi_logical_planner.h" #include "distributed/multi_logical_optimizer.h" @@ -133,6 +135,8 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass secondClass); static Index RelationRestrictionPartitionKeyIndex(RelationRestriction * relationRestriction); +static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext * + restrictionContext); static RelationRestrictionContext * FilterRelationRestrictionContext( RelationRestrictionContext *relationRestrictionContext, Relids @@ -345,8 +349,20 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext allAttributeEquivalenceList = lappend(allAttributeEquivalenceList, attributeEquivalance); - return EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList, - restrictionContext); + if (!EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList, + restrictionContext)) + { + /* cannot confirm equality for all distribution colums */ + return false; + } + + if (!AllRelationsInRestrictionContextColocated(restrictionContext)) + { + /* distribution columns are equal, but tables are not co-located */ + return false; + } + + return true; } @@ -1650,6 +1666,42 @@ RelationRestrictionPartitionKeyIndex(RelationRestriction *relationRestriction) } +/* + * AllRelationsInRestrictionContextColocated determines whether all of the relations in the + * given relation restrictions list are co-located. + */ +static bool +AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext) +{ + RelationRestriction *relationRestriction = NULL; + int initialColocationId = INVALID_COLOCATION_ID; + + /* check whether all relations exists in the main restriction list */ + foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList) + { + Oid relationId = relationRestriction->relationId; + + if (PartitionMethod(relationId) == DISTRIBUTE_BY_NONE) + { + continue; + } + + int colocationId = TableColocationId(relationId); + + if (initialColocationId == INVALID_COLOCATION_ID) + { + initialColocationId = colocationId; + } + else if (colocationId != initialColocationId) + { + return false; + } + } + + return true; +} + + /* * RelationIdList returns list of unique relation ids in query tree. */ diff --git a/src/test/regress/expected/ch_bench_having.out b/src/test/regress/expected/ch_bench_having.out index e9638b93d..560948958 100644 --- a/src/test/regress/expected/ch_bench_having.out +++ b/src/test/regress/expected/ch_bench_having.out @@ -274,14 +274,14 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock) group by s_i_id having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id) order by s_i_id; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- We don't support correlated subqueries in having select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id) order by s_i_id; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DROP TABLE stock; CREATE TABLE stock ( s_w_id int NOT NULL, diff --git a/src/test/regress/expected/ch_bench_having_mx.out b/src/test/regress/expected/ch_bench_having_mx.out index 207a4d017..506d2057c 100644 --- a/src/test/regress/expected/ch_bench_having_mx.out +++ b/src/test/regress/expected/ch_bench_having_mx.out @@ -279,14 +279,14 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock) group by s_i_id having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id) order by s_i_id; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- We don't support correlated subqueries in having select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id) order by s_i_id; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns \c - - - :master_port SET citus.replication_model TO streaming; SET citus.shard_replication_factor to 1; diff --git a/src/test/regress/expected/ch_bench_subquery_repartition.out b/src/test/regress/expected/ch_bench_subquery_repartition.out index 68ad46d42..f2b518dd8 100644 --- a/src/test/regress/expected/ch_bench_subquery_repartition.out +++ b/src/test/regress/expected/ch_bench_subquery_repartition.out @@ -116,7 +116,7 @@ select s_i_id where s_i_id in (select i_im_id from item) AND s_i_id = ol_i_id; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- Subquery + repartion is supported when it is a NOT IN query where the subquery -- returns unique results select s_i_id @@ -124,7 +124,7 @@ select s_i_id where s_i_id not in (select i_id from item) AND s_i_id = ol_i_id; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- Subquery + repartion is not supported when it is a NOT IN where the subquery -- doesn't return unique results select s_i_id @@ -132,7 +132,7 @@ select s_i_id where s_i_id not in (select i_im_id from item) AND s_i_id = ol_i_id; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- Actual CHbenCHmark query is supported select su_name, su_address from supplier, nation diff --git a/src/test/regress/expected/dml_recursive.out b/src/test/regress/expected/dml_recursive.out index 1116bb750..3b0b322a4 100644 --- a/src/test/regress/expected/dml_recursive.out +++ b/src/test/regress/expected/dml_recursive.out @@ -262,7 +262,7 @@ FROM ) as foo RETURNING *; DEBUG: generating subplan 15_1 for subquery SELECT dept FROM recursive_dml_queries.second_distributed_table -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- again a corrolated subquery -- this time distribution key eq. exists -- however recursive planning is prevented due to correlated subqueries @@ -292,7 +292,7 @@ FROM ) as baz ) as foo WHERE second_distributed_table.tenant_id = foo.tenant_id RETURNING *; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- we don't support subqueries/CTEs inside VALUES INSERT INTO second_distributed_table (tenant_id, dept) diff --git a/src/test/regress/expected/full_join.out b/src/test/regress/expected/full_join.out index 522376d78..9cfbdcd82 100644 --- a/src/test/regress/expected/full_join.out +++ b/src/test/regress/expected/full_join.out @@ -1,5 +1,5 @@ -- --- Full join with subquery pushdown support +-- Full join with subquery pushdown support -- SET citus.next_shard_id TO 9000000; CREATE SCHEMA full_join; @@ -49,7 +49,7 @@ SELECT * FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1; (4 rows) -- Join subqueries using single column -SELECT * FROM +SELECT * FROM (SELECT test_table_1.id FROM test_table_1 FULL JOIN test_table_3 using(id)) as j1 FULL JOIN (SELECT test_table_1.id FROM test_table_1 FULL JOIN test_table_3 using(id)) as j2 @@ -65,7 +65,7 @@ SELECT * FROM (5 rows) -- Join subqueries using multiple columns -SELECT * FROM +SELECT * FROM (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_3 using(id)) as j1 FULL JOIN (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_3 using(id)) as j2 @@ -91,7 +91,7 @@ SELECT * FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) ORDER BY 1; (4 rows) -- Full join with complicated target lists -SELECT count(DISTINCT id), (avg(test_table_1.val1) + id * id)::integer as avg_value, id::numeric IS NOT NULL as not_null +SELECT count(DISTINCT id), (avg(test_table_1.val1) + id * id)::integer as avg_value, id::numeric IS NOT NULL as not_null FROM test_table_1 FULL JOIN test_table_3 using(id) WHERE id::bigint < 55 GROUP BY id @@ -214,7 +214,7 @@ SELECT * FROM test_table_1 FULL JOIN test_table_2 using(id) ORDER BY 1; (5 rows) -- Join subqueries using multiple columns -SELECT * FROM +SELECT * FROM (SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_2 using(id)) as j1 FULL JOIN (SELECT test_table_2.id, test_table_2.val1 FROM test_table_1 FULL JOIN test_table_2 using(id)) as j2 diff --git a/src/test/regress/expected/multi_dropped_column_aliases.out b/src/test/regress/expected/multi_dropped_column_aliases.out index 6dc77a058..3744d71db 100644 --- a/src/test/regress/expected/multi_dropped_column_aliases.out +++ b/src/test/regress/expected/multi_dropped_column_aliases.out @@ -59,4 +59,4 @@ FROM (customer LEFT OUTER JOIN orders ON (c_custkey = o_custkey)) AS test(c_custkey, c_nationkey) INNER JOIN lineitem ON (test.c_custkey = l_orderkey) LIMIT 10; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns diff --git a/src/test/regress/expected/multi_shard_modify.out b/src/test/regress/expected/multi_shard_modify.out index acef48a1f..792fb80ac 100644 --- a/src/test/regress/expected/multi_shard_modify.out +++ b/src/test/regress/expected/multi_shard_modify.out @@ -83,7 +83,7 @@ SELECT create_distributed_table('temp_nations', 'name', 'hash'); SELECT master_modify_multiple_shards('DELETE FROM multi_shard_modify_test USING temp_nations WHERE multi_shard_modify_test.t_value = temp_nations.key AND temp_nations.name = ''foobar'' '); WARNING: master_modify_multiple_shards is deprecated and will be removed in a future release. HINT: Run the command directly -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- commands with a RETURNING clause are unsupported SELECT master_modify_multiple_shards('DELETE FROM multi_shard_modify_test WHERE t_key = 3 RETURNING *'); WARNING: master_modify_multiple_shards is deprecated and will be removed in a future release. diff --git a/src/test/regress/expected/multi_shard_update_delete.out b/src/test/regress/expected/multi_shard_update_delete.out index 48c4d3ee5..7f6715cb1 100644 --- a/src/test/regress/expected/multi_shard_update_delete.out +++ b/src/test/regress/expected/multi_shard_update_delete.out @@ -656,7 +656,7 @@ WHERE user_id IN (SELECT user_id UPDATE users_test_table SET value_2 = (SELECT value_3 FROM users_test_table); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns UPDATE users_test_table SET value_2 = 2 WHERE @@ -671,7 +671,7 @@ WHERE GROUP BY user_id ); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns UPDATE users_test_table SET (value_1, value_2) = (2,1) WHERE user_id IN diff --git a/src/test/regress/expected/multi_subquery.out b/src/test/regress/expected/multi_subquery.out index d7a9c8291..08c35700d 100644 --- a/src/test/regress/expected/multi_subquery.out +++ b/src/test/regress/expected/multi_subquery.out @@ -426,7 +426,7 @@ FROM events_table t1 LEFT JOIN users_table t2 ON t1.user_id > t2.user_id ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC LIMIT 5; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- outer joins on reference tables with expressions should work SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3 FROM events_table t1 @@ -467,7 +467,7 @@ SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3 LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id) ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC LIMIT 5; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- outer joins as subqueries should work -- https://github.com/citusdata/citus/issues/2739 SELECT user_id, value_1, event_type diff --git a/src/test/regress/expected/multi_subquery_behavioral_analytics.out b/src/test/regress/expected/multi_subquery_behavioral_analytics.out index fb7da43f2..4b9fdf483 100644 --- a/src/test/regress/expected/multi_subquery_behavioral_analytics.out +++ b/src/test/regress/expected/multi_subquery_behavioral_analytics.out @@ -362,11 +362,11 @@ ORDER BY ( SELECT users_table.user_id, - CASE - WHEN + CASE + WHEN events_table.event_type > 1 AND events_table.event_type < 3 - THEN 'action=>1' - ELSE 'action=>2' + THEN 'action=>1' + ELSE 'action=>2' END AS event, events_table.time FROM @@ -518,7 +518,7 @@ FROM users_table WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4) AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6) -GROUP BY +GROUP BY user_id ORDER BY user_id DESC @@ -621,9 +621,9 @@ SELECT user_id, AND user_id = users_table.user_id GROUP BY user_id HAVING Count(*) > 2) -GROUP BY +GROUP BY user_id -ORDER BY +ORDER BY 1 DESC, 2 DESC LIMIT 5; user_id | avg @@ -637,16 +637,16 @@ LIMIT 5; ------------------------------------ SELECT user_id, value_1 from ( - SELECT + SELECT user_id, value_1 From users_table - WHERE - value_2 > 1 and user_id = 2 - GROUP BY - value_1, user_id - HAVING + WHERE + value_2 > 1 and user_id = 2 + GROUP BY + value_1, user_id + HAVING count(*) > 1 ) AS a -ORDER BY +ORDER BY user_id ASC, value_1 ASC; user_id | value_1 ---------+--------- @@ -659,15 +659,15 @@ ORDER BY -- same query with additional filter to make it not router plannable SELECT user_id, value_1 from ( - SELECT + SELECT user_id, value_1 From users_table - WHERE - value_2 > 1 and (user_id = 2 OR user_id = 3) - GROUP BY - value_1, user_id + WHERE + value_2 > 1 and (user_id = 2 OR user_id = 3) + GROUP BY + value_1, user_id HAVING count(*) > 1 ) AS a -ORDER BY +ORDER BY user_id ASC, value_1 ASC; user_id | value_1 ---------+--------- @@ -687,11 +687,11 @@ ORDER BY SELECT user_id FROM events_table WHERE - event_type = 3 AND value_2 > 2 AND + event_type = 3 AND value_2 > 2 AND user_id IN - (SELECT + (SELECT user_id - FROM + FROM users_table WHERE value_1 = 1 AND value_2 > 2 @@ -709,13 +709,13 @@ ORDER BY 1; ------------------------------------ -- Which events_table did people who has done some specific events_table ------------------------------------ -SELECT +SELECT user_id, event_type FROM events_table -WHERE +WHERE user_id in (SELECT user_id from events_table WHERE event_type > 3 and event_type < 5) -GROUP BY +GROUP BY user_id, event_type -ORDER BY 2 DESC, 1 +ORDER BY 2 DESC, 1 LIMIT 3; user_id | event_type ---------+------------ @@ -733,14 +733,14 @@ SELECT user_id FROM user_id FROM events_table - WHERE + WHERE event_type = 2 - GROUP BY - user_id - HAVING + GROUP BY + user_id + HAVING count(*) > 1 ) AS a -ORDER BY +ORDER BY user_id; user_id --------- @@ -768,7 +768,7 @@ FROM short_list.user_id = ma.user_id and ma.value_1 < 2 and short_list.event_type < 2 ) temp ON users_table.user_id = temp.user_id - WHERE + WHERE users_table.value_1 < 2; -- get some statistics from the aggregated results to ensure the results are correct SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM assets; @@ -783,16 +783,16 @@ DROP TABLE assets; SET client_min_messages TO DEBUG1; SELECT count(*) FROM ( - SELECT + SELECT user_id - FROM + FROM users_table - WHERE - (value_1 = '1' OR value_1 = '3') AND + WHERE + (value_1 = '1' OR value_1 = '3') AND user_id NOT IN (select user_id from users_table where value_1 = '4') - GROUP BY + GROUP BY user_id - HAVING + HAVING count(distinct value_1) = 2 ) as foo; DEBUG: generating subplan 23_1 for subquery SELECT user_id FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.=) 4) @@ -812,9 +812,9 @@ SELECT subquery_count FROM users_table WHERE (value_1 = '1' OR value_1 = '3') - GROUP BY + GROUP BY user_id - HAVING + HAVING count(distinct value_1) = 2) as a LEFT JOIN (SELECT @@ -823,12 +823,12 @@ SELECT subquery_count FROM users_table WHERE (value_1 = '2') - GROUP BY - user_id) as b - ON a.user_id = b.user_id - WHERE + GROUP BY + user_id) as b + ON a.user_id = b.user_id + WHERE b.user_id IS NULL - GROUP BY + GROUP BY a.user_id ) AS inner_subquery; subquery_count @@ -845,9 +845,9 @@ FROM ( users_table WHERE (value_1 = '1' OR value_1 = '3') - GROUP BY + GROUP BY user_id - HAVING + HAVING count(distinct value_1) = 2 ) as a LEFT JOIN ( @@ -857,12 +857,12 @@ FROM ( users_table WHERE (value_1 = '2') - GROUP BY + GROUP BY user_id) AS b ON a.user_id = b.user_id -WHERE +WHERE b.user_id IS NULL -GROUP BY +GROUP BY a.user_id; subquery_count ---------------- @@ -935,9 +935,9 @@ FROM ( ORDER BY time LIMIT 1 ) e5 ON true -WHERE +WHERE e1.user_id = 1 -GROUP BY +GROUP BY e1.user_id LIMIT 1; user_id | viewed_homepage | use_demo | entered_credit_card | submit_card_info | see_bought_screen @@ -1096,9 +1096,9 @@ FROM ( users_table WHERE (value_1 > 2) - GROUP BY + GROUP BY user_id - HAVING + HAVING count(distinct value_1) > 2 ) as a LEFT JOIN ( @@ -1109,11 +1109,11 @@ FROM ( WHERE (value_1 > 3)) AS b ON a.user_id = b.user_id -WHERE +WHERE b.user_id IS NOT NULL -GROUP BY +GROUP BY a.user_id -ORDER BY +ORDER BY avg(b.value_3), 2, 1 LIMIT 5; user_id | subquery_avg @@ -1144,13 +1144,13 @@ FROM ( WHERE (value_1 > 3)) AS b ON a.user_id = b.user_id -WHERE +WHERE b.user_id IS NOT NULL -GROUP BY +GROUP BY a.user_id -HAVING +HAVING sum(b.value_3) > 5 -ORDER BY +ORDER BY avg(b.value_3), 2, 1 LIMIT 5; user_id | subquery_avg @@ -1164,32 +1164,32 @@ LIMIT 5; -- avg on the value_3 is not a resjunk SELECT a.user_id, avg(b.value_2) as subquery_avg, avg(b.value_3) FROM - (SELECT + (SELECT user_id - FROM + FROM users_table - WHERE + WHERE (value_1 > 2) - GROUP BY + GROUP BY user_id - HAVING + HAVING count(distinct value_1) > 2 ) as a LEFT JOIN ( - SELECT + SELECT user_id, value_2, value_3 - FROM + FROM users_table - WHERE + WHERE (value_1 > 3) ) AS b ON a.user_id = b.user_id -WHERE +WHERE b.user_id IS NOT NULL -GROUP BY +GROUP BY a.user_id -ORDER BY +ORDER BY avg(b.value_3) DESC, 2, 1 LIMIT 5; user_id | subquery_avg | avg @@ -1206,24 +1206,24 @@ SELECT u.user_id, sub.value_2, sub.value_3, COUNT(e2.user_id) counts FROM users_table u LEFT OUTER JOIN LATERAL - (SELECT + (SELECT * - FROM + FROM events_table e1 - WHERE + WHERE e1.user_id = u.user_id - ORDER BY + ORDER BY e1.value_3 DESC LIMIT 1 ) sub ON true LEFT OUTER JOIN events_table e2 ON e2.user_id = sub.user_id -WHERE +WHERE e2.value_2 > 1 AND e2.value_2 < 5 AND u.value_2 > 1 AND u.value_2 < 5 -GROUP BY +GROUP BY u.user_id, sub.value_2, sub.value_3 -ORDER BY +ORDER BY 4 DESC, 1 DESC, 2 ASC, 3 ASC LIMIT 10; user_id | value_2 | value_3 | counts @@ -1242,15 +1242,15 @@ SELECT count(*) as users_count FROM events_table JOIN - (SELECT + (SELECT DISTINCT user_id - FROM + FROM users_table ) as distinct_users ON distinct_users.user_id = events_table.user_id -GROUP BY +GROUP BY distinct_users.user_id -ORDER BY +ORDER BY users_count desc, avg_type DESC LIMIT 5; avg_type | users_count @@ -1270,16 +1270,16 @@ FROM events_table JOIN (SELECT distinct_users.user_id, count(1) as ct FROM - (SELECT + (SELECT user_id - FROM + FROM users_table ) as distinct_users - GROUP BY + GROUP BY distinct_users.user_id ) as users_count ON users_count.user_id = events_table.user_id -ORDER BY +ORDER BY users_count.ct desc, event_type DESC LIMIT 5; event_type | ct @@ -1294,24 +1294,24 @@ LIMIT 5; --- now, test (subquery JOIN subquery) SELECT n1.user_id, count_1, total_count FROM - (SELECT + (SELECT user_id, count(1) as count_1 - FROM + FROM users_table - GROUP BY + GROUP BY user_id ) n1 INNER JOIN ( - SELECT + SELECT user_id, count(1) as total_count - FROM + FROM events_table - GROUP BY + GROUP BY user_id, event_type ) n2 ON (n2.user_id = n1.user_id) -ORDER BY +ORDER BY total_count DESC, count_1 DESC, 1 DESC LIMIT 10; user_id | count_1 | total_count @@ -1330,25 +1330,25 @@ LIMIT 10; SELECT a.user_id, avg(b.value_2) as subquery_avg FROM - (SELECT + (SELECT user_id - FROM + FROM users_table - WHERE + WHERE (value_1 > 2) - GROUP BY + GROUP BY user_id - HAVING + HAVING count(distinct value_1) > 2 ) as a LEFT JOIN - (SELECT + (SELECT DISTINCT ON (user_id) user_id, value_2, value_3 - FROM + FROM users_table - WHERE + WHERE (value_1 > 3) - ORDER BY + ORDER BY 1,2,3 ) AS b ON a.user_id = b.user_id @@ -1368,25 +1368,25 @@ LIMIT 5; -- when used in target list SELECT a.user_id, avg(b.value_2) as subquery_avg FROM - (SELECT + (SELECT user_id - FROM + FROM users_table - WHERE + WHERE (value_1 > 2) - GROUP BY + GROUP BY user_id - HAVING + HAVING count(distinct value_1) > 2 ) as a LEFT JOIN - (SELECT + (SELECT DISTINCT ON (value_2) value_2 , user_id, value_3 - FROM + FROM users_table - WHERE + WHERE (value_1 > 3) - ORDER BY + ORDER BY 1,2,3 ) AS b USING (user_id) @@ -1395,33 +1395,33 @@ ERROR: cannot push down this subquery DETAIL: Distinct on columns without partition column is currently unsupported SELECT a.user_id, avg(b.value_2) as subquery_avg FROM - (SELECT + (SELECT user_id - FROM + FROM users_table - WHERE + WHERE (value_1 > 2) - GROUP BY + GROUP BY user_id - HAVING + HAVING count(distinct value_1) > 2 ) as a LEFT JOIN - (SELECT + (SELECT DISTINCT ON (value_2, user_id) value_2 , user_id, value_3 - FROM + FROM users_table - WHERE + WHERE (value_1 > 3) - ORDER BY + ORDER BY 1,2,3 ) AS b ON a.user_id = b.user_id -WHERE +WHERE b.user_id IS NOT NULL -GROUP BY +GROUP BY a.user_id -ORDER BY +ORDER BY avg(b.value_3), 2, 1 LIMIT 5; user_id | subquery_avg @@ -1433,33 +1433,33 @@ LIMIT 5; (4 rows) SELECT user_id, event_type -FROM +FROM (SELECT * FROM ( - (SELECT - event_type, user_id as a_user_id - FROM + (SELECT + event_type, user_id as a_user_id + FROM events_table) AS a JOIN (SELECT ma.user_id AS user_id, ma.value_2 AS value_2, (GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob - FROM + FROM users_table AS ma - WHERE + WHERE (ma.value_2 > 1) - ORDER BY + ORDER BY prob DESC, value_2 DESC, user_id DESC LIMIT 10 ) AS ma ON (a.a_user_id = ma.user_id) ) AS inner_sub - ORDER BY + ORDER BY prob DESC, value_2 DESC, user_id DESC, event_type DESC LIMIT 10 ) AS outer_sub -ORDER BY +ORDER BY prob DESC, value_2 DESC, user_id DESC, event_type DESC LIMIT 10; user_id | event_type @@ -1480,24 +1480,24 @@ LIMIT 10; -- ordering difference in the previous one's inner query SELECT user_id, event_type FROM - (SELECT - event_type, user_id as a_user_id - FROM + (SELECT + event_type, user_id as a_user_id + FROM events_table) AS a JOIN (SELECT ma.user_id AS user_id, ma.value_2 AS value_2, (GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob - FROM + FROM users_table AS ma - WHERE + WHERE (ma.value_2 > 1) - ORDER BY + ORDER BY prob DESC, user_id DESC LIMIT 10 ) AS ma ON (a.a_user_id = ma.user_id) -ORDER BY +ORDER BY prob DESC, event_type DESC, user_id DESC LIMIT 10; user_id | event_type @@ -1516,34 +1516,34 @@ LIMIT 10; -- now they produce the same result when ordering fixed in 'outer_sub' SELECT user_id, event_type -FROM +FROM (SELECT * FROM ( - (SELECT - event_type, user_id as a_user_id - FROM + (SELECT + event_type, user_id as a_user_id + FROM events_table ) AS a JOIN (SELECT ma.user_id AS user_id, ma.value_2 AS value_2, (GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob - FROM + FROM users_table AS ma - WHERE + WHERE (ma.value_2 > 1) - ORDER BY + ORDER BY prob DESC, user_id DESC LIMIT 10 ) AS ma ON (a.a_user_id = ma.user_id) ) AS inner_sub - ORDER BY + ORDER BY prob DESC, event_type DESC, user_id DESC LIMIT 10 ) AS outer_sub -ORDER BY +ORDER BY prob DESC, event_type DESC, user_id DESC LIMIT 10; user_id | event_type @@ -1602,47 +1602,47 @@ FROM FROM (SELECT * FROM ( - (SELECT + (SELECT user_id AS user_id_p - FROM + FROM events_table - WHERE + WHERE (event_type IN (1,2,3,4,5)) ) AS ma_p JOIN - (SELECT + (SELECT user_id AS user_id_a - FROM + FROM users_table - WHERE - (value_2 % 5 = 1) ) AS a + WHERE + (value_2 % 5 = 1) ) AS a ON (a.user_id_a = ma_p.user_id_p) ) ) AS a_ma_p ) AS inner_filter_q JOIN - (SELECT + (SELECT value_2, value_3, user_id AS user_id_ck - FROM + FROM events_table - WHERE + WHERE event_type = ANY(ARRAY [4, 5, 6]) - ORDER BY + ORDER BY value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC - LIMIT 10 ) - AS ma_ck ON (ma_ck.user_id_ck = inner_filter_q.user_id) ) + LIMIT 10 ) + AS ma_ck ON (ma_ck.user_id_ck = inner_filter_q.user_id) ) AS inner_sub_q - ORDER BY + ORDER BY value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC - LIMIT 10 ) + LIMIT 10 ) AS outer_sub_q - ORDER BY + ORDER BY value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC - LIMIT 10) - AS inner_search_q - ON (ma_e.user_id_e = inner_search_q.user_id) ) + LIMIT 10) + AS inner_search_q + ON (ma_e.user_id_e = inner_search_q.user_id) ) AS outer_inner_sub_q - ORDER BY + ORDER BY value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC - LIMIT 10) + LIMIT 10) AS outer_outer_sub_q -ORDER BY +ORDER BY value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC LIMIT 10; user_id_e | event_type_e | value_2 | value_3 | user_id @@ -1659,10 +1659,10 @@ LIMIT 10; 5 | 5 | 2 | 0 | 5 (10 rows) --- top level select * is removed now there is +-- top level select * is removed now there is -- a join at top level. SELECT * -FROM +FROM ( (SELECT user_id AS user_id_e, event_type as event_type_e @@ -1673,30 +1673,30 @@ FROM (SELECT value_2, value_3, user_id FROM - (SELECT + (SELECT * FROM ( - (SELECT + (SELECT user_id_p AS user_id FROM - (SELECT + (SELECT * FROM ( - (SELECT + (SELECT user_id AS user_id_p - FROM + FROM events_table - WHERE + WHERE (event_type IN (1, 2, 3, 4, 5)) ) AS ma_p JOIN - (SELECT + (SELECT user_id AS user_id_a - FROM + FROM users_table - WHERE + WHERE (value_2 % 5 = 1) ) AS a ON (a.user_id_a = ma_p.user_id_p) @@ -1706,9 +1706,9 @@ FROM JOIN (SELECT value_2, value_3, user_id AS user_id_ck - FROM + FROM events_table - WHERE + WHERE event_type = ANY(ARRAY [4, 5, 6]) ORDER BY value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC @@ -1755,20 +1755,20 @@ DROP FUNCTION array_index(ANYARRAY, ANYELEMENT); -- a query with a constant subquery SELECT count(*) as subquery_count FROM ( - SELECT + SELECT user_id FROM users_table WHERE (value_1 = '1' OR value_1 = '3') - GROUP BY user_id + GROUP BY user_id HAVING count(distinct value_1) = 2 ) as a LEFT JOIN ( SELECT 1 as user_id - ) AS b - ON a.user_id = b.user_id + ) AS b + ON a.user_id = b.user_id WHERE b.user_id IS NULL GROUP BY a.user_id; subquery_count @@ -1783,44 +1783,44 @@ GROUP BY a.user_id; -- volatile function in the subquery SELECT count(*) as subquery_count FROM ( - SELECT + SELECT user_id FROM users_table WHERE (value_1 = '1' OR value_1 = '3') - GROUP BY user_id + GROUP BY user_id HAVING count(distinct value_1) = 2 ) as a INNER JOIN ( SELECT random()::int as user_id - ) AS b - ON a.user_id = b.user_id + ) AS b + ON a.user_id = b.user_id WHERE b.user_id IS NULL GROUP BY a.user_id; ERROR: cannot push down this subquery DETAIL: Subqueries without a FROM clause can only contain immutable functions -- this is slightly different, we use RTE_VALUEs here -SELECT Count(*) AS subquery_count -FROM (SELECT - user_id - FROM - users_table - WHERE - (value_1 = '1' OR value_1 = '3' ) - GROUP BY - user_id - HAVING - Count(DISTINCT value_1) = 2) AS a - INNER JOIN - (SELECT - * - FROM - (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (user_id, letter)) AS b - ON a.user_id = b.user_id -WHERE b.user_id IS NULL -GROUP BY a.user_id; +SELECT Count(*) AS subquery_count +FROM (SELECT + user_id + FROM + users_table + WHERE + (value_1 = '1' OR value_1 = '3' ) + GROUP BY + user_id + HAVING + Count(DISTINCT value_1) = 2) AS a + INNER JOIN + (SELECT + * + FROM + (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (user_id, letter)) AS b + ON a.user_id = b.user_id +WHERE b.user_id IS NULL +GROUP BY a.user_id; ERROR: cannot push down this subquery DETAIL: VALUES in multi-shard queries is currently unsupported -- same query without LIMIT/OFFSET returns 30 rows @@ -1830,12 +1830,12 @@ SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( - SELECT + SELECT u.user_id, e.event_type::text AS event, e.time - FROM + FROM users_table AS u, events_table AS e - WHERE + WHERE u.user_id = e.user_id AND e.event_type IN (1, 2) ) t GROUP BY user_id @@ -1854,12 +1854,12 @@ SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( - SELECT + SELECT u.user_id, e.event_type::text AS event, e.time - FROM + FROM users_table AS u, events_table AS e - WHERE + WHERE u.user_id = e.user_id AND e.event_type IN (1, 2) ) t GROUP BY user_id @@ -1884,12 +1884,12 @@ SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( - SELECT + SELECT u.user_id, e.event_type::text AS event, e.time - FROM + FROM users_table AS u, events_table AS e - WHERE + WHERE u.user_id = e.user_id AND e.event_type IN (1, 2, 3, 4) ) t GROUP BY user_id @@ -1908,12 +1908,12 @@ SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( - SELECT + SELECT u.user_id, e.event_type::text AS event, e.time - FROM + FROM users_table AS u, events_table AS e - WHERE + WHERE u.user_id = e.user_id AND e.event_type IN (1, 2) ) t GROUP BY user_id @@ -1998,7 +1998,7 @@ $f$); (localhost,57638,t,"CREATE FUNCTION") (2 rows) --- we don't support joins via functions +-- we don't support joins via functions SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table @@ -2011,7 +2011,7 @@ FROM ( GROUP BY user_id ) q ORDER BY 2 DESC, 1; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- note that the following query has both equi-joins on the partition keys -- and non-equi-joins on other columns. We now support query filters -- having non-equi-joins as long as they have equi-joins on partition keys. @@ -2028,7 +2028,7 @@ FROM short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 3 ) temp ON users_table.user_id = temp.user_id - WHERE + WHERE users_table.value_1 < 3 AND test_join_function_2(users_table.user_id, temp.user_id); user_id | value_1 | prob ---------+---------+------ @@ -2050,7 +2050,7 @@ FROM test_join_function_2(ma.value_1, short_list.value_2) ) temp ON users_table.user_id = temp.user_id - WHERE + WHERE users_table.value_1 < 3 ORDER BY 2 DESC, 1 DESC LIMIT 10; @@ -2073,13 +2073,13 @@ FROM SELECT count(*) FROM - (SELECT - event_type, random() - FROM - events_table, users_table - WHERE - events_table.user_id = users_table.user_id AND - events_table.time > users_table.time AND + (SELECT + event_type, random() + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id AND + events_table.time > users_table.time AND events_table.value_2 IN (0, 4) ) as foo; count @@ -2091,36 +2091,36 @@ FROM SELECT count(*) FROM - (SELECT - event_type, random() - FROM - events_table, users_table - WHERE - events_table.user_id > users_table.user_id AND - events_table.time = users_table.time AND + (SELECT + event_type, random() + FROM + events_table, users_table + WHERE + events_table.user_id > users_table.user_id AND + events_table.time = users_table.time AND events_table.value_2 IN (0, 4) ) as foo; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- we can even allow that on top level joins SELECT count(*) FROM - (SELECT - event_type, random(), events_table.user_id - FROM - events_table, users_table - WHERE - events_table.user_id = users_table.user_id AND + (SELECT + event_type, random(), events_table.user_id + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id AND events_table.value_2 IN (0, 4) ) as foo, -(SELECT - event_type, random(), events_table.user_id - FROM - events_table, users_table - WHERE - events_table.user_id = users_table.user_id AND +(SELECT + event_type, random(), events_table.user_id + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id AND events_table.value_2 IN (1, 5) - ) as bar + ) as bar WHERE foo.event_type > bar.event_type AND foo.user_id = bar.user_id; count @@ -2133,38 +2133,38 @@ AND foo.user_id = bar.user_id; SELECT count(*) FROM - (SELECT - event_type, random() - FROM - events_table, users_table - WHERE - events_table.user_id = users_table.user_id AND + (SELECT + event_type, random() + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id AND events_table.value_2 IN (0, 4) ) as foo, -(SELECT - event_type, random() - FROM - events_table, users_table - WHERE - events_table.user_id = users_table.user_id AND +(SELECT + event_type, random() + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id AND events_table.value_2 IN (1, 5) - ) as bar + ) as bar WHERE foo.event_type = bar.event_type; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- DISTINCT in the outer query and DISTINCT in the subquery SELECT DISTINCT users_ids.user_id -FROM +FROM (SELECT DISTINCT user_id FROM users_table) as users_ids - JOIN - (SELECT + JOIN + (SELECT ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob - FROM + FROM users_table AS ma, events_table as short_list - WHERE + WHERE short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 3 - ) temp - ON users_ids.user_id = temp.user_id + ) temp + ON users_ids.user_id = temp.user_id WHERE temp.value_1 < 3 ORDER BY 1 LIMIT 5; @@ -2180,17 +2180,17 @@ FROM -- DISTINCT ON in the outer query and DISTINCT in the subquery SELECT DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob -FROM +FROM (SELECT DISTINCT user_id FROM users_table) as users_ids - JOIN - (SELECT + JOIN + (SELECT ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob - FROM + FROM users_table AS ma, events_table as short_list - WHERE + WHERE short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 2 - ) temp - ON users_ids.user_id = temp.user_id + ) temp + ON users_ids.user_id = temp.user_id WHERE temp.value_1 < 3 ORDER BY 1, 2 LIMIT 5; @@ -2206,17 +2206,17 @@ FROM -- DISTINCT ON in the outer query and DISTINCT ON in the subquery SELECT DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob -FROM +FROM (SELECT DISTINCT ON (user_id) user_id, value_1 FROM users_table ORDER BY 1,2) as users_ids - JOIN - (SELECT + JOIN + (SELECT ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob - FROM + FROM users_table AS ma, events_table as short_list - WHERE + WHERE short_list.user_id = ma.user_id and ma.value_1 < 2 and short_list.event_type < 3 - ) temp - ON users_ids.user_id = temp.user_id + ) temp + ON users_ids.user_id = temp.user_id ORDER BY 1,2 LIMIT 5; user_id | value_1 | prob diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index 2245d5e6b..ce61f1df1 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -1616,7 +1616,7 @@ FROM ORDER BY user_id DESC, lastseen DESC LIMIT 10; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- not pushdownable since lower LATERAL JOIN is not on the partition key -- not recursively plannable due to LATERAL join where there is a reference -- from an outer query diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index d805e5932..e1977446d 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -1420,7 +1420,7 @@ WHERE GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 5; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns SELECT foo.user_id FROM ( SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) diff --git a/src/test/regress/expected/multi_subquery_in_where_clause.out b/src/test/regress/expected/multi_subquery_in_where_clause.out index 07fc69993..f72b7adf0 100644 --- a/src/test/regress/expected/multi_subquery_in_where_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_clause.out @@ -5,17 +5,17 @@ -- -- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests -- subqueries in WHERE with greater operator -SELECT +SELECT user_id -FROM +FROM users_table -WHERE - value_2 > - (SELECT - max(value_2) - FROM - events_table - WHERE +WHERE + value_2 > + (SELECT + max(value_2) + FROM + events_table + WHERE users_table.user_id = events_table.user_id AND event_type = 1 GROUP BY user_id @@ -32,17 +32,17 @@ LIMIT 5; (3 rows) -- same query with one additional join on non distribution column -SELECT +SELECT user_id -FROM +FROM users_table -WHERE - value_2 > - (SELECT - max(value_2) - FROM - events_table - WHERE +WHERE + value_2 > + (SELECT + max(value_2) + FROM + events_table + WHERE users_table.user_id = events_table.user_id AND event_type = 1 AND users_table.time > events_table.time GROUP BY @@ -51,7 +51,7 @@ WHERE GROUP BY user_id HAVING count(*) > 1 ORDER BY user_id -LIMIT 5; +LIMIT 5; user_id --------- 1 @@ -60,17 +60,17 @@ LIMIT 5; (3 rows) -- the other way around is not supported -SELECT +SELECT user_id -FROM +FROM users_table -WHERE - value_2 > - (SELECT - max(value_2) - FROM - events_table - WHERE +WHERE + value_2 > + (SELECT + max(value_2) + FROM + events_table + WHERE users_table.user_id > events_table.user_id AND event_type = 1 AND users_table.time = events_table.time GROUP BY @@ -79,21 +79,21 @@ WHERE GROUP BY user_id HAVING count(*) > 1 ORDER BY user_id -LIMIT 5; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +LIMIT 5; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- subqueries in where with ALL operator -SELECT +SELECT user_id -FROM - users_table -WHERE +FROM + users_table +WHERE value_2 > 1 AND value_2 < ALL (SELECT avg(value_3) FROM events_table WHERE users_table.user_id = events_table.user_id GROUP BY user_id) -GROUP BY +GROUP BY 1 -ORDER BY +ORDER BY 1 DESC -LIMIT 3; +LIMIT 3; user_id --------- 4 @@ -102,15 +102,15 @@ LIMIT 3; (3 rows) -- IN operator on non-partition key -SELECT +SELECT user_id -FROM +FROM events_table as e1 WHERE event_type IN - (SELECT + (SELECT event_type - FROM + FROM events_table as e2 WHERE value_2 = 1 AND value_3 > 3 AND @@ -139,15 +139,15 @@ ORDER BY 1; (17 rows) -- NOT IN on non-partition key -SELECT +SELECT user_id -FROM +FROM events_table as e1 WHERE event_type NOT IN - (SELECT + (SELECT event_type - FROM + FROM events_table as e2 WHERE value_2 = 1 AND value_3 > 3 AND @@ -167,11 +167,11 @@ ORDER BY 1; (6 rows) -- non-correlated query with =ANY on partition keys - SELECT - user_id, count(*) -FROM - users_table -WHERE + SELECT + user_id, count(*) +FROM + users_table +WHERE user_id =ANY(SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) GROUP BY 1 ORDER BY 2 DESC LIMIT 5; user_id | count ---------+------- @@ -183,20 +183,20 @@ WHERE (5 rows) -- users that appeared more than 118 times -SELECT +SELECT user_id -FROM +FROM users_table WHERE 2 <= - (SELECT - count(*) - FROM - events_table - WHERE - users_table.user_id = events_table.user_id - GROUP BY + (SELECT + count(*) + FROM + events_table + WHERE + users_table.user_id = events_table.user_id + GROUP BY user_id) -GROUP BY +GROUP BY user_id ORDER BY user_id; @@ -296,10 +296,10 @@ ORDER BY 1, 2; -- the following query doesn't have a meaningful result -- but it is a valid query with an arbitrary subquery in -- WHERE clause -SELECT - user_id -FROM - users_table +SELECT + user_id +FROM + users_table WHERE user_id IN ( @@ -376,18 +376,18 @@ SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( - SELECT + SELECT u.user_id, e.event_type::text AS event, e.time - FROM + FROM users_table AS u, events_table AS e - WHERE u.user_id = e.user_id AND - u.user_id IN + WHERE u.user_id = e.user_id AND + u.user_id IN ( - SELECT - user_id - FROM - users_table + SELECT + user_id + FROM + users_table WHERE value_2 >= 1 AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id) AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id = users_table.user_id) @@ -401,8 +401,8 @@ ORDER BY 2 DESC, 1; 5 | 364 (1 row) --- --- below tests only aims for cases where all relations +-- +-- below tests only aims for cases where all relations -- are not joined on partition key -- -- e4 is not joined on the partition key @@ -465,12 +465,12 @@ SELECT user_id, value_2 FROM users_table WHERE group by e1.user_id HAVING sum(submit_card_info) > 0 ); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- left leaf query does not return partition key -SELECT - user_id -FROM - users_table +SELECT + user_id +FROM + users_table WHERE user_id IN ( @@ -541,18 +541,18 @@ SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( - SELECT + SELECT u.user_id, e.event_type::text AS event, e.time - FROM + FROM users_table AS u, events_table AS e - WHERE u.user_id = e.user_id AND - u.user_id IN + WHERE u.user_id = e.user_id AND + u.user_id IN ( - SELECT - user_id - FROM - users_table + SELECT + user_id + FROM + users_table WHERE value_2 >= 5 AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id) AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id != users_table.user_id) @@ -561,7 +561,7 @@ FROM ( GROUP BY user_id ) q ORDER BY 2 DESC, 1; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- subquery in where clause doesn't have a relation, but is constant SELECT user_id @@ -594,17 +594,17 @@ LIMIT 2; (0 rows) -- OFFSET is not supported in the subquey -SELECT +SELECT user_id -FROM +FROM users_table -WHERE - value_2 > - (SELECT - max(value_2) - FROM - events_table - WHERE +WHERE + value_2 > + (SELECT + max(value_2) + FROM + events_table + WHERE users_table.user_id = events_table.user_id AND event_type = 2 GROUP BY user_id @@ -615,26 +615,26 @@ DETAIL: Offset clause is currently unsupported when a subquery references a col -- we can detect unsupported subqueries even if they appear -- in WHERE subquery -> FROM subquery -> WHERE subquery -- but we can recursively plan that anyway -SELECT DISTINCT user_id -FROM users_table -WHERE user_id - IN (SELECT - f_inner.user_id - FROM +SELECT DISTINCT user_id +FROM users_table +WHERE user_id + IN (SELECT + f_inner.user_id + FROM ( - SELECT - e1.user_id - FROM - users_table u1, events_table e1 - WHERE + SELECT + e1.user_id + FROM + users_table u1, events_table e1 + WHERE e1.user_id = u1.user_id ) as f_inner, ( - SELECT - e1.user_id - FROM - users_table u1, events_table e1 - WHERE + SELECT + e1.user_id + FROM + users_table u1, events_table e1 + WHERE e1.user_id = u1.user_id AND e1.user_id IN (SELECT user_id FROM users_table ORDER BY user_id LIMIT 3) ) as f_outer @@ -675,5 +675,5 @@ SELECT user_id, value_2 FROM users_table WHERE AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=1 AND value_3 > 1 AND test_join_function(events_table.user_id, users_table.user_id)) ORDER BY 1 DESC, 2 DESC LIMIT 3; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DROP FUNCTION test_join_function(int,int); diff --git a/src/test/regress/expected/multi_subquery_union.out b/src/test/regress/expected/multi_subquery_union.out index d21faa287..78f69cd2b 100644 --- a/src/test/regress/expected/multi_subquery_union.out +++ b/src/test/regress/expected/multi_subquery_union.out @@ -7,10 +7,10 @@ -- a very simple union query SELECT user_id, counter FROM ( - SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) - UNION - SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6) -) user_id + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) + UNION + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6) +) user_id ORDER BY 2 DESC,1 LIMIT 5; user_id | counter @@ -40,10 +40,10 @@ LIMIT 5; -- a very simple union query with reference table SELECT user_id, counter FROM ( - SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) - UNION - SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6) -) user_id + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) + UNION + SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6) +) user_id ORDER BY 2 DESC,1 LIMIT 5; user_id | counter @@ -58,10 +58,10 @@ LIMIT 5; -- the same query with union all SELECT user_id, counter FROM ( - SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) UNION ALL - SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6) -) user_id + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6) +) user_id ORDER BY 2 DESC,1 LIMIT 5; user_id | counter @@ -76,10 +76,10 @@ LIMIT 5; -- the same query with union all and reference table SELECT user_id, counter FROM ( - SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) UNION ALL - SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6) -) user_id + SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6) +) user_id ORDER BY 2 DESC,1 LIMIT 5; user_id | counter @@ -92,12 +92,12 @@ LIMIT 5; (5 rows) -- the same query with group by -SELECT user_id, sum(counter) +SELECT user_id, sum(counter) FROM ( - SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) - UNION - SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6) -) user_id + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) + UNION + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6) +) user_id GROUP BY 1 ORDER BY 2 DESC,1 LIMIT 5; @@ -111,12 +111,12 @@ LIMIT 5; (5 rows) -- the same query with UNION ALL clause -SELECT user_id, sum(counter) +SELECT user_id, sum(counter) FROM ( - SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2) UNION ALL - SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6) -) user_id + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6) +) user_id GROUP BY 1 ORDER BY 2 DESC,1 LIMIT 5; @@ -130,12 +130,12 @@ LIMIT 5; (5 rows) -- the same query target list entries shuffled -SELECT user_id, sum(counter) +SELECT user_id, sum(counter) FROM ( - SELECT value_2 % 10 AS counter, user_id FROM events_table WHERE event_type IN (1, 2) - UNION - SELECT value_2 % 10 AS counter, user_id FROM events_table WHERE event_type IN (5, 6) -) user_id + SELECT value_2 % 10 AS counter, user_id FROM events_table WHERE event_type IN (1, 2) + UNION + SELECT value_2 % 10 AS counter, user_id FROM events_table WHERE event_type IN (5, 6) +) user_id GROUP BY 1 ORDER BY 2 DESC,1 LIMIT 5; @@ -149,15 +149,15 @@ LIMIT 5; (5 rows) -- same query with GROUP BY -SELECT user_id, sum(counter) +SELECT user_id, sum(counter) FROM ( - SELECT user_id, value_2 AS counter FROM events_table WHERE event_type IN (1, 2) - UNION - SELECT user_id, value_2 AS counter FROM events_table WHERE event_type IN (5, 6) -) user_id -GROUP BY - user_id ---HAVING sum(counter) > 900 + SELECT user_id, value_2 AS counter FROM events_table WHERE event_type IN (1, 2) + UNION + SELECT user_id, value_2 AS counter FROM events_table WHERE event_type IN (5, 6) +) user_id +GROUP BY + user_id +--HAVING sum(counter) > 900 ORDER BY 1,2 DESC LIMIT 5; user_id | sum ---------+----- @@ -170,15 +170,15 @@ ORDER BY 1,2 DESC LIMIT 5; -- the same query target list entries shuffled but this time the subqueries target list -- is shuffled -SELECT user_id, sum(counter) +SELECT user_id, sum(counter) FROM ( - SELECT value_2 AS counter, user_id FROM events_table WHERE event_type IN (1, 2) - UNION - SELECT value_2 AS counter, user_id FROM events_table WHERE event_type IN (5, 6) -) user_id -GROUP BY - user_id ---HAVING sum(counter) > 900 + SELECT value_2 AS counter, user_id FROM events_table WHERE event_type IN (1, 2) + UNION + SELECT value_2 AS counter, user_id FROM events_table WHERE event_type IN (5, 6) +) user_id +GROUP BY + user_id +--HAVING sum(counter) > 900 ORDER BY 1,2 DESC LIMIT 5; user_id | sum ---------+----- @@ -190,10 +190,10 @@ ORDER BY 1,2 DESC LIMIT 5; (5 rows) -- similar query this time more subqueries and target list contains a resjunk entry -SELECT sum(counter) +SELECT sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 1 GROUP BY user_id HAVING sum(value_2) > 5 - UNION + UNION SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 2 and value_1 < 3 GROUP BY user_id HAVING sum(value_2) > 25 UNION SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 3 and value_1 < 4 GROUP BY user_id HAVING sum(value_2) > 25 @@ -201,7 +201,7 @@ FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 4 and value_1 < 5 GROUP BY user_id HAVING sum(value_2) > 25 UNION SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25 -) user_id +) user_id GROUP BY user_id ORDER BY 1 DESC LIMIT 5; sum ----- @@ -212,10 +212,10 @@ GROUP BY user_id ORDER BY 1 DESC LIMIT 5; (4 rows) -- similar query this time more subqueries with reference table and target list contains a resjunk entry -SELECT sum(counter) +SELECT sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 1 GROUP BY user_id HAVING sum(value_2) > 25 - UNION + UNION SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 2 and value_1 < 3 GROUP BY user_id HAVING sum(value_2) > 25 UNION SELECT user_id, sum(value_2) AS counter FROM users_reference_table where value_1 < 3 and value_1 < 4 GROUP BY user_id HAVING sum(value_2) > 25 @@ -223,7 +223,7 @@ FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 4 and value_1 < 5 GROUP BY user_id HAVING sum(value_2) > 25 UNION SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25 -) user_id +) user_id GROUP BY user_id ORDER BY 1 DESC LIMIT 5; sum ----- @@ -234,7 +234,7 @@ GROUP BY user_id ORDER BY 1 DESC LIMIT 5; (4 rows) -- similar query as above, with UNION ALL -SELECT sum(counter) +SELECT sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 1 GROUP BY user_id HAVING sum(value_2) > 250 UNION ALL @@ -245,7 +245,7 @@ FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 4 and value_1 < 5 GROUP BY user_id HAVING sum(value_2) > 25 UNION ALL SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25 -) user_id +) user_id GROUP BY user_id ORDER BY 1 DESC LIMIT 5; sum ----- @@ -261,41 +261,41 @@ FROM ( ( SELECT user_id, sum(counter) FROM - (SELECT + (SELECT user_id, sum(value_2) AS counter - FROM + FROM users_table - GROUP BY + GROUP BY user_id - UNION - SELECT + UNION + SELECT user_id, sum(value_2) AS counter - FROM + FROM events_table - GROUP BY + GROUP BY user_id) user_id_1 - GROUP BY + GROUP BY user_id) UNION - (SELECT + (SELECT user_id, sum(counter) FROM - (SELECT + (SELECT user_id, sum(value_2) AS counter - FROM + FROM users_table - GROUP BY + GROUP BY user_id - UNION - SELECT - user_id, sum(value_2) AS counter - FROM + UNION + SELECT + user_id, sum(value_2) AS counter + FROM events_table - GROUP BY + GROUP BY user_id) user_id_2 - GROUP BY - user_id)) AS ftop -ORDER BY 2 DESC, 1 DESC + GROUP BY + user_id)) AS ftop +ORDER BY 2 DESC, 1 DESC LIMIT 5; user_id | sum ---------+----- @@ -312,41 +312,41 @@ FROM ( ( SELECT user_id, sum(counter) FROM - (SELECT + (SELECT user_id, sum(value_2) AS counter - FROM + FROM users_table - GROUP BY + GROUP BY user_id - UNION - SELECT + UNION + SELECT user_id, sum(value_2) AS counter - FROM + FROM events_reference_table - GROUP BY + GROUP BY user_id) user_id_1 - GROUP BY + GROUP BY user_id) UNION - (SELECT + (SELECT user_id, sum(counter) FROM - (SELECT + (SELECT user_id, sum(value_2) AS counter - FROM + FROM users_table - GROUP BY + GROUP BY user_id - UNION - SELECT - user_id, sum(value_2) AS counter - FROM + UNION + SELECT + user_id, sum(value_2) AS counter + FROM events_table - GROUP BY + GROUP BY user_id) user_id_2 - GROUP BY - user_id)) AS ftop -ORDER BY 2 DESC, 1 DESC + GROUP BY + user_id)) AS ftop +ORDER BY 2 DESC, 1 DESC LIMIT 5; user_id | sum ---------+----- @@ -368,40 +368,40 @@ FROM FROM ( (SELECT * FROM - (SELECT + (SELECT "events"."user_id", "events"."time", 0 AS event - FROM + FROM events_table as "events" - WHERE - event_type IN (1, 2)) events_subquery_1) - UNION + WHERE + event_type IN (1, 2)) events_subquery_1) + UNION (SELECT * FROM - (SELECT + (SELECT "events"."user_id", "events"."time", 1 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (2, 3) ) events_subquery_2) - UNION + UNION (SELECT * FROM - (SELECT + (SELECT "events"."user_id", "events"."time", 2 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (4, 5) ) events_subquery_3) - UNION + UNION (SELECT * FROM - (SELECT + (SELECT "events"."user_id", "events"."time", 3 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (6, 1)) events_subquery_4)) t1 - GROUP BY "t1"."user_id") AS t) "q" + GROUP BY "t1"."user_id") AS t) "q" ) as final_query GROUP BY types ORDER BY types; @@ -419,40 +419,40 @@ SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM (SELECT *, random() FROM - (SELECT + (SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM - (SELECT + (SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM( - (SELECT + (SELECT "events"."user_id", "events"."time", 0 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (1, 2)) - UNION - (SELECT + UNION + (SELECT "events"."user_id", "events"."time", 1 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (2, 3) ) - UNION - (SELECT + UNION + (SELECT "events"."user_id", "events"."time", 2 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (4, 5) ) - UNION - (SELECT + UNION + (SELECT "events"."user_id", "events"."time", 3 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (6, 1))) t1 - GROUP BY "t1"."user_id") AS t) "q" + GROUP BY "t1"."user_id") AS t) "q" ) as final_query GROUP BY types ORDER BY types; @@ -471,34 +471,34 @@ FROM FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( - (SELECT + (SELECT "events"."user_id", "events"."time", 0 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (1, 2)) - UNION - (SELECT + UNION + (SELECT "events"."user_id", "events"."time", 1 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (2, 3) ) - UNION - (SELECT + UNION + (SELECT "events"."user_id", "events"."time", 2 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (4, 5) ) - UNION - (SELECT + UNION + (SELECT "events"."user_id", "events"."time", 3 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (6, 1))) t1 - GROUP BY "t1"."user_id") AS t) "q" + GROUP BY "t1"."user_id") AS t) "q" GROUP BY types ORDER BY types; types | sumofeventtype @@ -512,39 +512,39 @@ ORDER BY types; -- again same query but with only two top level empty queries (i.e., no group bys) SELECT * FROM - ( SELECT * + ( SELECT * FROM ( SELECT "t1"."user_id" FROM ( - (SELECT + (SELECT "events"."user_id", "events"."time", 0 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (1, 2)) - UNION - (SELECT + UNION + (SELECT "events"."user_id", "events"."time", 1 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (2, 3) ) - UNION - (SELECT + UNION + (SELECT "events"."user_id", "events"."time", 2 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (4, 5) ) - UNION - (SELECT + UNION + (SELECT "events"."user_id", "events"."time", 3 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (6, 1))) t1 - ) AS t) "q" -ORDER BY 1 + ) AS t) "q" +ORDER BY 1 LIMIT 5; user_id --------- @@ -562,34 +562,34 @@ FROM FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( - (SELECT + (SELECT "events"."user_id", "events"."time", 0 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (1, 2)) UNION ALL - (SELECT + (SELECT "events"."user_id", "events"."time", 1 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (2, 3) ) UNION ALL - (SELECT + (SELECT "events"."user_id", "events"."time", 2 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (4, 5) ) UNION ALL - (SELECT + (SELECT "events"."user_id", "events"."time", 3 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (6, 1))) t1 - GROUP BY "t1"."user_id") AS t) "q" + GROUP BY "t1"."user_id") AS t) "q" GROUP BY types ORDER BY types; types | sumofeventtype @@ -601,9 +601,9 @@ ORDER BY types; (4 rows) -- some UNION ALL queries that are going to be pulled up -SELECT +SELECT count(*) -FROM +FROM ( (SELECT user_id FROM users_table) UNION ALL @@ -615,9 +615,9 @@ FROM (1 row) -- some UNION ALL queries that are going to be pulled up with reference table -SELECT +SELECT count(*) -FROM +FROM ( (SELECT user_id FROM users_table) UNION ALL @@ -629,9 +629,9 @@ FROM (1 row) -- similar query without top level agg -SELECT +SELECT user_id -FROM +FROM ( (SELECT user_id FROM users_table) UNION ALL @@ -649,9 +649,9 @@ LIMIT 5; (5 rows) -- similar query with multiple target list entries -SELECT +SELECT user_id, value_3 -FROM +FROM ( (SELECT value_3, user_id FROM users_table) UNION ALL @@ -669,9 +669,9 @@ LIMIT 5; (5 rows) -- similar query group by inside the subqueries -SELECT +SELECT user_id, value_3_sum -FROM +FROM ( (SELECT sum(value_3) as value_3_sum, user_id FROM users_table GROUP BY user_id) UNION ALL @@ -689,9 +689,9 @@ LIMIT 5; (5 rows) -- similar query top level group by -SELECT +SELECT user_id, sum(value_3) -FROM +FROM ( (SELECT value_3, user_id FROM users_table) UNION ALL @@ -710,9 +710,9 @@ LIMIT 5; (5 rows) -- a long set operation list -SELECT +SELECT user_id, value_3 -FROM +FROM ( (SELECT value_3, user_id FROM events_table where event_type IN (1, 2)) UNION ALL @@ -738,9 +738,9 @@ LIMIT 5; (5 rows) -- no partition key on the top -SELECT +SELECT max(value_3) -FROM +FROM ( (SELECT value_3, user_id FROM events_table where event_type IN (1, 2)) UNION ALL @@ -768,12 +768,12 @@ LIMIT 5; -- now lets also have some unsupported queries -- group by is not on the partition key, supported through recursive planning -SELECT user_id, sum(counter) +SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id UNION SELECT value_1 as user_id, sum(value_2) AS counter FROM users_table GROUP BY value_1 -) user_id +) user_id GROUP BY user_id ORDER BY 1,2; user_id | sum @@ -788,10 +788,10 @@ ORDER BY 1,2; (7 rows) -- partition key is not selected, supported through recursive planning -SELECT sum(counter) +SELECT sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 1 GROUP BY user_id HAVING sum(value_2) > 25 - UNION + UNION SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 2 and value_1 < 3 GROUP BY user_id HAVING sum(value_2) > 25 UNION SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 3 and value_1 < 4 GROUP BY user_id HAVING sum(value_2) > 25 @@ -799,7 +799,7 @@ FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 4 and value_1 < 5 GROUP BY user_id HAVING sum(value_2) > 25 UNION SELECT 2 * user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25 -) user_id +) user_id GROUP BY user_id ORDER BY 1 DESC LIMIT 5; sum ----- @@ -814,20 +814,20 @@ GROUP BY user_id ORDER BY 1 DESC LIMIT 5; SELECT * FROM ( ( - SELECT user_id, sum(counter) + SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id - UNION + UNION SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id ) user_id_1 GROUP BY user_id -) +) UNION ( - SELECT user_id, sum(counter) + SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id - EXCEPT + EXCEPT SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id ) user_id_2 GROUP BY user_id) @@ -850,12 +850,12 @@ ORDER BY 1,2; (12 rows) -- non-equi join are not supported since there is no equivalence between the partition column -SELECT user_id, sum(counter) +SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id - UNION + UNION SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1 -) user_id +) user_id GROUP BY user_id; ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- non-equi join also not supported for UNION ALL @@ -864,27 +864,27 @@ FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id UNION ALL SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1 -) user_id +) user_id GROUP BY user_id; ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- joins inside unions are supported -- slightly more comlex than the above SELECT * FROM ( ( - SELECT user_id, sum(counter) + SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id - UNION + UNION SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id ) user_id_1 GROUP BY user_id -) +) UNION ( - SELECT user_id, sum(counter) + SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id - UNION + UNION SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE (events_table.user_id = users_table.user_id) GROUP BY events_table.user_id ) user_id_2 GROUP BY user_id) @@ -995,12 +995,12 @@ LIMIT 10; (10 rows) -- offset inside the union -SELECT user_id, sum(counter) +SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id UNION SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id ORDER BY user_id OFFSET 4 -) user_id +) user_id GROUP BY user_id ORDER BY 1,2; user_id | sum @@ -1017,39 +1017,39 @@ FROM ( ( SELECT user_id, sum(counter) FROM - (SELECT + (SELECT user_id, sum(value_2) AS counter - FROM + FROM users_table - GROUP BY + GROUP BY user_id - UNION - SELECT + UNION + SELECT user_id, sum(value_2) AS counter - FROM + FROM events_table - GROUP BY + GROUP BY user_id) user_id_1 - GROUP BY + GROUP BY user_id) UNION - (SELECT + (SELECT user_id, sum(counter) FROM - (SELECT + (SELECT sum(value_2) AS counter, user_id - FROM + FROM users_table - GROUP BY + GROUP BY user_id - UNION - SELECT - user_id, sum(value_2) AS counter - FROM + UNION + SELECT + user_id, sum(value_2) AS counter + FROM events_table - GROUP BY + GROUP BY user_id) user_id_2 - GROUP BY + GROUP BY user_id)) AS ftop ORDER BY 1,2; user_id | sum @@ -1075,9 +1075,9 @@ ORDER BY 1,2; (18 rows) -- some UNION all queries that are going to be pulled up -SELECT +SELECT count(*) -FROM +FROM ( (SELECT user_id FROM users_table) UNION ALL @@ -1089,9 +1089,9 @@ FROM (1 row) -- last query does not have partition key -SELECT +SELECT user_id, value_3 -FROM +FROM ( (SELECT value_3, user_id FROM events_table where event_type IN (1, 2)) UNION ALL @@ -1117,9 +1117,9 @@ LIMIT 5; (5 rows) -- we allow joins within unions -SELECT +SELECT count(*) -FROM +FROM ( (SELECT user_id FROM users_table) UNION ALL @@ -1131,9 +1131,9 @@ FROM (1 row) -- we support unions on subqueries without relations through recursive planning -SELECT +SELECT count(*) -FROM +FROM ( (SELECT user_id FROM users_table) UNION ALL @@ -1145,9 +1145,9 @@ FROM (1 row) -- we support pushing down subqueries without relations through recursive planning -SELECT +SELECT count(*) -FROM +FROM ( (SELECT user_id FROM users_table) UNION ALL @@ -1159,9 +1159,9 @@ FROM (1 row) -- we support subqueries without relations within a union -SELECT +SELECT user_id, value_3 -FROM +FROM ( (SELECT value_3, user_id FROM events_table where event_type IN (1, 2)) UNION ALL @@ -1197,35 +1197,35 @@ FROM FROM ( (SELECT * FROM - (SELECT + (SELECT "events"."user_id", "events"."time", 0 AS event - FROM + FROM events_table as "events" - WHERE - event_type IN (1, 2)) events_subquery_1) - UNION + WHERE + event_type IN (1, 2)) events_subquery_1) + UNION (SELECT * FROM - (SELECT + (SELECT "events"."user_id", "events"."time", 1 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (2, 3) ) events_subquery_2) - UNION + UNION (SELECT * FROM - (SELECT + (SELECT "events"."user_id", "events"."time", 2 AS event - FROM + FROM events_table as "events" - WHERE + WHERE event_type IN (4, 5) ) events_subquery_3) - UNION + UNION (SELECT * FROM (SELECT 1, now(), 3 AS event) events_subquery_4)) t1 - GROUP BY "t1"."user_id") AS t) "q" + GROUP BY "t1"."user_id") AS t) "q" ) as final_query GROUP BY types ORDER BY types; diff --git a/src/test/regress/expected/non_colocated_subquery_joins.out b/src/test/regress/expected/non_colocated_subquery_joins.out index 486d8bb17..15fdc52ac 100644 --- a/src/test/regress/expected/non_colocated_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_subquery_joins.out @@ -209,7 +209,7 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 - + WHERE foo1.user_id = foo4.user_id AND @@ -241,7 +241,7 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (17,18,19,20)) as foo5 - + WHERE foo1.user_id = foo4.user_id AND @@ -258,7 +258,7 @@ DEBUG: Plan 26 query after replacing subqueries and CTEs: SELECT user_id, rando t (1 row) --- There are two non colocated joins, one is in the one of the leaf queries, +-- There are two non colocated joins, one is in the one of the leaf queries, -- the other is on the top-level subquery SELECT true AS valid FROM explain_json_2($$ @@ -271,7 +271,7 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2, (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, - (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 WHERE foo1.user_id = foo4.user_id AND foo1.user_id = foo2.user_id AND @@ -302,7 +302,7 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2, (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, - (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 WHERE foo1.user_id = foo4.user_id AND foo1.user_id = foo2.user_id AND @@ -322,8 +322,8 @@ DEBUG: Plan 31 query after replacing subqueries and CTEs: SELECT user_id, rando -- Deeper subqueries are non-colocated SELECT true AS valid FROM explain_json_2($$ - SELECT - count(*) + SELECT + count(*) FROM ( SELECT @@ -332,7 +332,7 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar WHERE - foo.user_id = bar.user_id) as foo_top JOIN + foo.user_id = bar.user_id) as foo_top JOIN ( SELECT @@ -341,7 +341,7 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar WHERE - foo.user_id = bar.user_id) as bar_top + foo.user_id = bar.user_id) as bar_top ON (foo_top.user_id = bar_top.user_id); $$); DEBUG: generating subplan 34_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) @@ -355,8 +355,8 @@ DEBUG: Plan 34 query after replacing subqueries and CTEs: SELECT count(*) AS co -- Top level Subquery is not colocated SELECT true AS valid FROM explain_json_2($$ - SELECT - count(*) + SELECT + count(*) FROM ( SELECT @@ -365,7 +365,7 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar WHERE - foo.user_id = bar.user_id) as foo_top JOIN + foo.user_id = bar.user_id) as foo_top JOIN ( SELECT @@ -374,8 +374,8 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo, (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as bar WHERE - foo.user_id = bar.user_id) as bar_top - ON (foo_top.value_2 = bar_top.user_id); + foo.user_id = bar.user_id) as bar_top + ON (foo_top.value_2 = bar_top.user_id); $$); DEBUG: generating subplan 37_1 for subquery SELECT foo.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id) @@ -388,8 +388,8 @@ DEBUG: Plan 37 query after replacing subqueries and CTEs: SELECT count(*) AS co -- Top level Subquery is not colocated as the above SELECT true AS valid FROM explain_json_2($$ - SELECT - count(*) + SELECT + count(*) FROM ( SELECT @@ -398,7 +398,7 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar WHERE - foo.user_id = bar.user_id) as foo_top JOIN + foo.user_id = bar.user_id) as foo_top JOIN ( SELECT foo.user_id @@ -406,7 +406,7 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo, (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (13,14,15,16)) as bar WHERE - foo.user_id = bar.user_id) as bar_top + foo.user_id = bar.user_id) as bar_top ON (foo_top.value_2 = bar_top.user_id); $$); DEBUG: generating subplan 39_1 for subquery SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16]))) @@ -420,13 +420,13 @@ DEBUG: Plan 39 query after replacing subqueries and CTEs: SELECT count(*) AS co -- non colocated joins are deep inside the query SELECT true AS valid FROM explain_json_2($$ - SELECT + SELECT count(*) FROM ( - SELECT * FROM - (SELECT DISTINCT users_table.user_id FROM users_table, - (SELECT events_table.user_id as my_users FROM events_table, users_table WHERE events_table.event_type = users_table.user_id) as foo + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.user_id as my_users FROM events_table, users_table WHERE events_table.event_type = users_table.user_id) as foo WHERE foo.my_users = users_table.user_id) as mid_level_query ) as bar; $$); @@ -443,9 +443,9 @@ DEBUG: Plan 42 query after replacing subqueries and CTEs: SELECT count(*) AS co -- via regular repartitioning since PostgreSQL would pull the query up SELECT true AS valid FROM explain_json_2($$ - SELECT count(*) FROM ( SELECT * FROM - (SELECT DISTINCT users_table.user_id FROM users_table, - (SELECT events_table.event_type as my_users, random() FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) as foo + SELECT count(*) FROM ( SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.event_type as my_users, random() FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) as foo WHERE foo.my_users = users_table.user_id) as mid_level_query ) as bar; $$); @@ -458,17 +458,17 @@ DEBUG: Plan 44 query after replacing subqueries and CTEs: SELECT count(*) AS co -- same as the above query, but, one level deeper subquery SELECT true AS valid FROM explain_json_2($$ - - SELECT + + SELECT count(*) FROM ( - SELECT * FROM - (SELECT DISTINCT users_table.user_id FROM users_table, - (SELECT events_table.user_id as my_users FROM events_table, + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.user_id as my_users FROM events_table, (SELECT events_table.user_id, random() FROM users_table, events_table WHERE users_table.user_id = events_table.user_id) as selected_users - WHERE events_table.event_type = selected_users.user_id) as foo - + WHERE events_table.event_type = selected_users.user_id) as foo + WHERE foo.my_users = users_table.user_id) as mid_level_query ) as bar; $$); @@ -484,21 +484,21 @@ DEBUG: Plan 46 query after replacing subqueries and CTEs: SELECT count(*) AS co -- the subquery on the distribution key SELECT true AS valid FROM explain_json_2($$ - SELECT + SELECT count(*) FROM ( - SELECT * FROM - (SELECT DISTINCT users_table.user_id FROM users_table, - + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, - (SELECT events_table.user_id as my_users FROM events_table, - (SELECT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND + + (SELECT events_table.user_id as my_users FROM events_table, + (SELECT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id IN (SELECT value_2 FROM events_table) ) as selected_users - WHERE events_table.user_id = selected_users.user_id) as foo + WHERE events_table.user_id = selected_users.user_id) as foo WHERE foo.my_users = users_table.user_id) as mid_level_query @@ -518,13 +518,13 @@ SELECT true AS valid FROM explain_json_2($$SELECT FROM users_table WHERE - value_1 + value_1 IN - (SELECT - users_table.user_id - FROM - users_table, events_table - WHERE + (SELECT + users_table.user_id + FROM + users_table, events_table + WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6));$$); DEBUG: generating subplan 50_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6]))) DEBUG: Plan 50 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('50_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) @@ -535,12 +535,12 @@ DEBUG: Plan 50 query after replacing subqueries and CTEs: SELECT count(*) AS co -- leaf subquery repartitioning should work fine when used with CTEs SELECT true AS valid FROM explain_json_2($$ - WITH q1 AS (SELECT user_id FROM users_table) -SELECT count(*) FROM q1, (SELECT - users_table.user_id, random() - FROM - users_table, events_table - WHERE + WITH q1 AS (SELECT user_id FROM users_table) +SELECT count(*) FROM q1, (SELECT + users_table.user_id, random() + FROM + users_table, events_table + WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); DEBUG: generating subplan 52_1 for CTE q1: SELECT user_id FROM public.users_table DEBUG: generating subplan 52_2 for subquery SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) @@ -552,12 +552,12 @@ DEBUG: Plan 52 query after replacing subqueries and CTEs: SELECT count(*) AS co -- subquery joins should work fine when used with CTEs SELECT true AS valid FROM explain_json_2($$ - WITH q1 AS (SELECT user_id FROM users_table) - SELECT count(*) FROM q1, (SELECT - users_table.user_id, random() - FROM - users_table, events_table - WHERE + WITH q1 AS (SELECT user_id FROM users_table) + SELECT count(*) FROM q1, (SELECT + users_table.user_id, random() + FROM + users_table, events_table + WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); DEBUG: generating subplan 55_1 for CTE q1: SELECT user_id FROM public.users_table DEBUG: Plan 55 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('55_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) q1, (SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) bar WHERE (bar.user_id OPERATOR(pg_catalog.=) q1.user_id) @@ -584,19 +584,19 @@ SELECT event, array_length(events_table, 1) FROM ( SELECT event, array_agg(t.user_id) AS events_table FROM ( - SELECT + SELECT DISTINCT ON(e.event_type::text) e.event_type::text as event, e.time, e.user_id - FROM + FROM users_table AS u, events_table AS e, (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as bar - WHERE u.user_id = e.user_id AND - u.user_id IN + WHERE u.user_id = e.user_id AND + u.user_id IN ( - SELECT - user_id - FROM - users_table + SELECT + user_id + FROM + users_table WHERE value_2 >= 5 AND EXISTS (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) LIMIT 5 @@ -622,22 +622,22 @@ DEBUG: Plan 60 query after replacing subqueries and CTEs: SELECT event, array_l -- the relations are joined under a join tree with an alias SELECT true AS valid FROM explain_json_2($$ - SELECT - count(*) + SELECT + count(*) FROM - (users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1); + (users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1); $$); DEBUG: generating subplan 66_1 for subquery SELECT value_1, random() AS random FROM public.users_table DEBUG: Plan 66 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN public.users_table u2 USING (value_1)) a(value_1, user_id, "time", value_2, value_3, value_4, user_id_1, time_1, value_2_1, value_3_1, value_4_1) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('66_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- a very similar query to the above -- however, this time we users a subquery instead of join alias, and it works SELECT true AS valid FROM explain_json_2($$ - SELECT - count(*) + SELECT + count(*) FROM - (SELECT * FROM users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1); + (SELECT * FROM users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1); $$); DEBUG: generating subplan 68_1 for subquery SELECT u1.value_1, u1.user_id, u1."time", u1.value_2, u1.value_3, u1.value_4, u2.user_id, u2."time", u2.value_2, u2.value_3, u2.value_4 FROM (public.users_table u1 JOIN public.users_table u2 USING (value_1)) DEBUG: Plan 68 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_1, intermediate_result.user_id, intermediate_result."time", intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4, intermediate_result.user_id_1 AS user_id, intermediate_result.time_1 AS "time", intermediate_result.value_2_1 AS value_2, intermediate_result.value_3_1 AS value_3, intermediate_result.value_4_1 AS value_4 FROM read_intermediate_result('68_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, user_id integer, "time" timestamp without time zone, value_2 integer, value_3 double precision, value_4 bigint, user_id_1 integer, time_1 timestamp without time zone, value_2_1 integer, value_3_1 double precision, value_4_1 bigint)) a(value_1, user_id, "time", value_2, value_3, value_4, user_id_1, time_1, value_2_1, value_3_1, value_4_1) JOIN (SELECT users_table.value_1, random() AS random FROM public.users_table) u3 USING (value_1)) @@ -710,9 +710,9 @@ SELECT true AS valid FROM explain_json_2($$ SELECT user_id FROM users_table UNION SELECT user_id FROM users_table - ) a + ) a JOIN - (SELECT value_1 FROM users_table) as foo ON (a.user_id = foo.value_1) + (SELECT value_1 FROM users_table) as foo ON (a.user_id = foo.value_1) ); $$); DEBUG: generating subplan 77_1 for subquery SELECT user_id FROM public.users_table @@ -734,9 +734,9 @@ SELECT true AS valid FROM explain_json_2($$ SELECT user_id FROM users_table UNION SELECT user_id FROM users_table - ) a + ) a JOIN - users_table as foo ON (a.user_id = foo.value_1) + users_table as foo ON (a.user_id = foo.value_1) ); $$); DEBUG: generating subplan 81_1 for subquery SELECT user_id FROM public.users_table @@ -755,21 +755,21 @@ DEBUG: Plan 80 query after replacing subqueries and CTEs: SELECT a.user_id, foo SELECT true AS valid FROM explain_json_2($$ SELECT * FROM - ( - (SELECT user_id FROM users_table) as foo + ( + (SELECT user_id FROM users_table) as foo JOIN ( SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4) UNION SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8) - ) a + ) a - ON (a.user_id = foo.user_id) + ON (a.user_id = foo.user_id) JOIN (SELECT value_1 FROM users_table) as bar - - ON(foo.user_id = bar.value_1) + + ON(foo.user_id = bar.value_1) ); $$); DEBUG: generating subplan 84_1 for subquery SELECT value_1 FROM public.users_table @@ -783,7 +783,7 @@ DEBUG: Plan 84 query after replacing subqueries and CTEs: SELECT foo.user_id, a -- inside a CTE SELECT true AS valid FROM explain_json_2($$ - WITH non_colocated_subquery AS + WITH non_colocated_subquery AS ( SELECT foo.value_2 @@ -793,7 +793,7 @@ SELECT true AS valid FROM explain_json_2($$ WHERE foo.value_2 = bar.value_2 ), - non_colocated_subquery_2 AS + non_colocated_subquery_2 AS ( SELECT count(*) as cnt @@ -804,11 +804,11 @@ SELECT true AS valid FROM explain_json_2($$ IN (SELECT event_type FROM events_table WHERE user_id < 4) ) - SELECT - * - FROM - non_colocated_subquery, non_colocated_subquery_2 - WHERE + SELECT + * + FROM + non_colocated_subquery, non_colocated_subquery_2 + WHERE non_colocated_subquery.value_2 != non_colocated_subquery_2.cnt $$); DEBUG: generating subplan 86_1 for CTE non_colocated_subquery: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.value_2 OPERATOR(pg_catalog.=) bar.value_2) @@ -832,8 +832,8 @@ SELECT true AS valid FROM explain_json_2($$ (SELECT users_table_local.value_2 FROM users_table_local, events_table_local WHERE users_table_local.user_id = events_table_local.user_id AND event_type IN (5,6,7,8)) as bar, (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as baz WHERE - foo.value_2 = bar.value_2 - AND + foo.value_2 = bar.value_2 + AND foo.value_2 = baz.value_2 $$); DEBUG: generating subplan 91_1 for subquery SELECT users_table_local.value_2 FROM non_colocated_subquery.users_table_local, non_colocated_subquery.events_table_local WHERE ((users_table_local.user_id OPERATOR(pg_catalog.=) events_table_local.user_id) AND (events_table_local.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) @@ -850,20 +850,20 @@ SELECT true AS valid FROM explain_json_2($$ SELECT count(*) FROM - (SELECT user_id FROM users_table) as foo + (SELECT user_id FROM users_table) as foo JOIN ( SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4) UNION SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8) - ) a + ) a - ON (a.user_id = foo.user_id) + ON (a.user_id = foo.user_id) JOIN (SELECT value_1, value_2 FROM users_table) as bar - ON(foo.user_id = bar.value_1) + ON(foo.user_id = bar.value_1) WHERE value_2 IN (SELECT value_1 FROM users_table WHERE value_2 < 1) AND @@ -880,16 +880,16 @@ DEBUG: Plan 93 query after replacing subqueries and CTEs: SELECT count(*) AS co t (1 row) --- make sure that we don't pick the refeence table as +-- make sure that we don't pick the refeence table as -- the anchor SELECT true AS valid FROM explain_json_2($$ SELECT count(*) - FROM + FROM users_reference_table AS users_table_ref, (SELECT user_id FROM users_Table) AS foo, (SELECT user_id, value_2 FROM events_Table) AS bar - WHERE + WHERE users_table_ref.user_id = foo.user_id AND foo.user_id = bar.value_2; $$); @@ -926,7 +926,7 @@ DEBUG: Router planner cannot handle multi-shard select queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- similar to the above, make sure that we skip recursive planning when -- the subquery doesn't have any tables SELECT true AS valid FROM explain_json_2($$ @@ -945,7 +945,7 @@ DEBUG: Router planner cannot handle multi-shard select queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- similar to the above, make sure that we skip recursive planning when -- the subquery contains only intermediate results SELECT * @@ -964,7 +964,7 @@ JOIN LATERAL (SELECT * FROM (SELECT * - FROM + FROM (SELECT * FROM events_table WHERE value_3 > 4 INTERSECT @@ -1010,7 +1010,7 @@ SELECT count(*) FROM events_table WHERE user_id NOT IN (SELECT * FROM (SELECT * - FROM + FROM (SELECT * FROM events_table WHERE value_3 > 4 INTERSECT @@ -1061,14 +1061,14 @@ SELECT create_distributed_table('table1','tenant_id'); -- all of the above queries are non-colocated subquery joins -- because the views are replaced with subqueries UPDATE table2 SET id=20 FROM table1_view WHERE table1_view.id=table2.id; -DEBUG: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan 117_1 for subquery SELECT table1.id, table1.tenant_id FROM non_colocated_subquery.table1 WHERE (table1.id OPERATOR(pg_catalog.<) 100) DEBUG: Plan 117 query after replacing subqueries and CTEs: UPDATE non_colocated_subquery.table2 SET id = 20 FROM (SELECT intermediate_result.id, intermediate_result.tenant_id FROM read_intermediate_result('117_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, tenant_id integer)) table1_view WHERE (table1_view.id OPERATOR(pg_catalog.=) table2.id) DEBUG: Creating router plan DEBUG: Plan is router executable UPDATE table2_p1 SET id=20 FROM table1_view WHERE table1_view.id=table2_p1.id; -DEBUG: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan 119_1 for subquery SELECT table1.id, table1.tenant_id FROM non_colocated_subquery.table1 WHERE (table1.id OPERATOR(pg_catalog.<) 100) DEBUG: Plan 119 query after replacing subqueries and CTEs: UPDATE non_colocated_subquery.table2_p1 SET id = 20 FROM (SELECT intermediate_result.id, intermediate_result.tenant_id FROM read_intermediate_result('119_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, tenant_id integer)) table1_view WHERE (table1_view.id OPERATOR(pg_catalog.=) table2_p1.id) diff --git a/src/test/regress/expected/set_operations.out b/src/test/regress/expected/set_operations.out index 7f7f0015c..9958d2426 100644 --- a/src/test/regress/expected/set_operations.out +++ b/src/test/regress/expected/set_operations.out @@ -14,6 +14,13 @@ SELECT create_reference_table('ref'); (1 row) +CREATE TABLE test_not_colocated (LIKE test); +SELECT create_distributed_table('test_not_colocated', 'x', colocate_with := 'none'); + create_distributed_table +-------------------------- + +(1 row) + INSERT INTO test VALUES (1,1), (2,2); INSERT INTO ref VALUES (2,2), (3,3); -- top-level set operations are supported through recursive planning @@ -718,7 +725,7 @@ DEBUG: Plan 126 query after replacing subqueries and CTEs: SELECT u.x, u.y, tes DEBUG: Router planner cannot handle multi-shard select queries ERROR: cannot pushdown the subquery DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join --- distributed table in WHERE clause is recursively planned +-- distributed table in WHERE clause is recursively planned SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM ref WHERE a IN (SELECT x FROM test))) u ORDER BY 1,2; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries @@ -854,7 +861,7 @@ DEBUG: Plan is router executable 1 (1 row) --- other agg. distincts are also supported when group by includes partition key +-- other agg. distincts are also supported when group by includes partition key select avg(DISTINCT t.x) FROM ((SELECT avg(DISTINCT y) FROM test GROUP BY x) UNION (SELECT avg(DISTINCT y) FROM test GROUP BY x)) as t(x) ORDER BY 1; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries @@ -872,7 +879,7 @@ DEBUG: Plan is router executable 1.50000000000000000000 (1 row) --- other agg. distincts are not supported when group by doesn't include partition key +-- other agg. distincts are not supported when group by doesn't include partition key select count(DISTINCT t.x) FROM ((SELECT avg(DISTINCT y) FROM test GROUP BY y) UNION (SELECT avg(DISTINCT y) FROM test GROUP BY y)) as t(x) ORDER BY 1; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries @@ -881,7 +888,7 @@ DETAIL: table partitioning is unsuitable for aggregate (distinct) -- one of the leaves is a repartition join SET citus.enable_repartition_joins TO ON; -- repartition is recursively planned before the set operation -(SELECT x FROM test) INTERSECT (SELECT t1.x FROM test as t1, test as t2 WHERE t1.x = t2.y LIMIT 0) ORDER BY 1 DESC; +(SELECT x FROM test) INTERSECT (SELECT t1.x FROM test as t1, test as t2 WHERE t1.x = t2.y LIMIT 0) ORDER BY 1 DESC; DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: push down of limit count: 0 @@ -1017,7 +1024,7 @@ DEBUG: Router planner cannot handle multi-shard select queries 2 | 2 (2 rows) --- this should create lots of recursive calls since both views and set operations lead to recursive plans :) +-- this should create lots of recursive calls since both views and set operations lead to recursive plans :) ((SELECT x FROM set_view_recursive_second) INTERSECT (SELECT * FROM set_view_recursive)) EXCEPT (SELECT * FROM set_view_pushdown); DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries @@ -1045,11 +1052,49 @@ DEBUG: Plan is router executable --- (0 rows) +-- queries on non-colocated tables that would push down if they were not colocated are recursivelu planned +SELECT * FROM (SELECT * FROM test UNION SELECT * FROM test_not_colocated) u ORDER BY 1,2; +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan 188_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan 188_2 for subquery SELECT x, y FROM recursive_union.test_not_colocated +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 188_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('188_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('188_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 188 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('188_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +SELECT * FROM (SELECT * FROM test UNION ALL SELECT * FROM test_not_colocated) u ORDER BY 1,2; +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan 192_1 for subquery SELECT x, y FROM recursive_union.test +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan 192_2 for subquery SELECT x, y FROM recursive_union.test_not_colocated +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 192_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('192_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION ALL SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('192_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) +DEBUG: Plan 192 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('192_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y +DEBUG: Creating router plan +DEBUG: Plan is router executable + x | y +---+--- + 1 | 1 + 2 | 2 +(2 rows) + RESET client_min_messages; DROP SCHEMA recursive_union CASCADE; -NOTICE: drop cascades to 5 other objects +NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to table test drop cascades to table ref +drop cascades to table test_not_colocated drop cascades to view set_view_recursive drop cascades to view set_view_pushdown drop cascades to view set_view_recursive_second diff --git a/src/test/regress/output/multi_outer_join.source b/src/test/regress/output/multi_outer_join.source index 25b9d844a..e4b43ec0d 100644 --- a/src/test/regress/output/multi_outer_join.source +++ b/src/test/regress/output/multi_outer_join.source @@ -272,7 +272,7 @@ SELECT count(*) FROM multi_outer_join_left a LEFT JOIN multi_outer_join_right b ON (l_nationkey = r_nationkey); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- Anti-join should return customers for which there is no row in the right table SELECT min(l_custkey), max(l_custkey) diff --git a/src/test/regress/output/multi_outer_join_reference.source b/src/test/regress/output/multi_outer_join_reference.source index cd39421ba..5a650b85c 100644 --- a/src/test/regress/output/multi_outer_join_reference.source +++ b/src/test/regress/output/multi_outer_join_reference.source @@ -252,7 +252,7 @@ SELECT count(*) FROM multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_hash b ON (l_nationkey = r_nationkey); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- Anti-join should return customers for which there is no row in the right table SELECT min(l_custkey), max(l_custkey) @@ -326,7 +326,7 @@ FROM LEFT JOIN multi_outer_join_right_reference r1 ON (l1.l_custkey = r1.r_custkey) LEFT JOIN multi_outer_join_right_reference r2 ON (l1.l_custkey = r2.r_custkey) RIGHT JOIN multi_outer_join_left_hash l2 ON (r2.r_custkey = l2.l_custkey); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- add an anti-join, this should also error out SELECT * @@ -337,7 +337,7 @@ FROM RIGHT JOIN multi_outer_join_left_hash l2 ON (r2.r_custkey = l2.l_custkey) WHERE r1.r_custkey is NULL; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- Three way join 2-1-1 (broadcast + broadcast join) should work SELECT l_custkey, r_custkey, t_custkey diff --git a/src/test/regress/sql/set_operations.sql b/src/test/regress/sql/set_operations.sql index fd662fe74..99adf3d64 100644 --- a/src/test/regress/sql/set_operations.sql +++ b/src/test/regress/sql/set_operations.sql @@ -7,6 +7,9 @@ SELECT create_distributed_table('test', 'x'); CREATE TABLE recursive_union.ref (a int, b int); SELECT create_reference_table('ref'); +CREATE TABLE test_not_colocated (LIKE test); +SELECT create_distributed_table('test_not_colocated', 'x', colocate_with := 'none'); + INSERT INTO test VALUES (1,1), (2,2); INSERT INTO ref VALUES (2,2), (3,3); @@ -169,5 +172,9 @@ SELECT * FROM set_view_recursive_second ORDER BY 1,2; -- this should create lots of recursive calls since both views and set operations lead to recursive plans :) ((SELECT x FROM set_view_recursive_second) INTERSECT (SELECT * FROM set_view_recursive)) EXCEPT (SELECT * FROM set_view_pushdown); +-- queries on non-colocated tables that would push down if they were not colocated are recursivelu planned +SELECT * FROM (SELECT * FROM test UNION SELECT * FROM test_not_colocated) u ORDER BY 1,2; +SELECT * FROM (SELECT * FROM test UNION ALL SELECT * FROM test_not_colocated) u ORDER BY 1,2; + RESET client_min_messages; DROP SCHEMA recursive_union CASCADE;