Fix incorrect union all pushdown issue

pull/3306/head
Marco Slot 2019-12-14 05:42:25 +01:00
parent 7a909fc807
commit ba39d72fe1
21 changed files with 903 additions and 799 deletions

View File

@ -596,7 +596,7 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"complex joins are only supported when all distributed tables are "
"joined on their distribution columns with equal operator",
"co-located and joined on their distribution columns",
NULL, NULL);
}

View File

@ -10,7 +10,9 @@
*/
#include "postgres.h"
#include "distributed/colocation_utils.h"
#include "distributed/distributed_planner.h"
#include "distributed/listutils.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_logical_planner.h"
#include "distributed/multi_logical_optimizer.h"
@ -133,6 +135,8 @@ static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass
secondClass);
static Index RelationRestrictionPartitionKeyIndex(RelationRestriction *
relationRestriction);
static bool AllRelationsInRestrictionContextColocated(RelationRestrictionContext *
restrictionContext);
static RelationRestrictionContext * FilterRelationRestrictionContext(
RelationRestrictionContext *relationRestrictionContext,
Relids
@ -345,8 +349,20 @@ SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext
allAttributeEquivalenceList = lappend(allAttributeEquivalenceList,
attributeEquivalance);
return EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList,
restrictionContext);
if (!EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList,
restrictionContext))
{
/* cannot confirm equality for all distribution colums */
return false;
}
if (!AllRelationsInRestrictionContextColocated(restrictionContext))
{
/* distribution columns are equal, but tables are not co-located */
return false;
}
return true;
}
@ -1650,6 +1666,42 @@ RelationRestrictionPartitionKeyIndex(RelationRestriction *relationRestriction)
}
/*
* AllRelationsInRestrictionContextColocated determines whether all of the relations in the
* given relation restrictions list are co-located.
*/
static bool
AllRelationsInRestrictionContextColocated(RelationRestrictionContext *restrictionContext)
{
RelationRestriction *relationRestriction = NULL;
int initialColocationId = INVALID_COLOCATION_ID;
/* check whether all relations exists in the main restriction list */
foreach_ptr(relationRestriction, restrictionContext->relationRestrictionList)
{
Oid relationId = relationRestriction->relationId;
if (PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
{
continue;
}
int colocationId = TableColocationId(relationId);
if (initialColocationId == INVALID_COLOCATION_ID)
{
initialColocationId = colocationId;
}
else if (colocationId != initialColocationId)
{
return false;
}
}
return true;
}
/*
* RelationIdList returns list of unique relation ids in query tree.
*/

View File

@ -274,14 +274,14 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DROP TABLE stock;
CREATE TABLE stock (
s_w_id int NOT NULL,

View File

@ -279,14 +279,14 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock)
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- We don't support correlated subqueries in having
select s_i_id, sum(s_order_cnt) as ordercount
from stock s
group by s_i_id
having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = s.s_i_id)
order by s_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
\c - - - :master_port
SET citus.replication_model TO streaming;
SET citus.shard_replication_factor to 1;

View File

@ -116,7 +116,7 @@ select s_i_id
where
s_i_id in (select i_im_id from item)
AND s_i_id = ol_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- Subquery + repartion is supported when it is a NOT IN query where the subquery
-- returns unique results
select s_i_id
@ -124,7 +124,7 @@ select s_i_id
where
s_i_id not in (select i_id from item)
AND s_i_id = ol_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- Subquery + repartion is not supported when it is a NOT IN where the subquery
-- doesn't return unique results
select s_i_id
@ -132,7 +132,7 @@ select s_i_id
where
s_i_id not in (select i_im_id from item)
AND s_i_id = ol_i_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- Actual CHbenCHmark query is supported
select su_name, su_address
from supplier, nation

View File

@ -262,7 +262,7 @@ FROM
) as foo
RETURNING *;
DEBUG: generating subplan 15_1 for subquery SELECT dept FROM recursive_dml_queries.second_distributed_table
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- again a corrolated subquery
-- this time distribution key eq. exists
-- however recursive planning is prevented due to correlated subqueries
@ -292,7 +292,7 @@ FROM
) as baz
) as foo WHERE second_distributed_table.tenant_id = foo.tenant_id
RETURNING *;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- we don't support subqueries/CTEs inside VALUES
INSERT INTO
second_distributed_table (tenant_id, dept)

View File

@ -1,5 +1,5 @@
--
-- Full join with subquery pushdown support
-- Full join with subquery pushdown support
--
SET citus.next_shard_id TO 9000000;
CREATE SCHEMA full_join;
@ -49,7 +49,7 @@ SELECT * FROM test_table_1 FULL JOIN test_table_3 using(id) ORDER BY 1;
(4 rows)
-- Join subqueries using single column
SELECT * FROM
SELECT * FROM
(SELECT test_table_1.id FROM test_table_1 FULL JOIN test_table_3 using(id)) as j1
FULL JOIN
(SELECT test_table_1.id FROM test_table_1 FULL JOIN test_table_3 using(id)) as j2
@ -65,7 +65,7 @@ SELECT * FROM
(5 rows)
-- Join subqueries using multiple columns
SELECT * FROM
SELECT * FROM
(SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_3 using(id)) as j1
FULL JOIN
(SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_3 using(id)) as j2
@ -91,7 +91,7 @@ SELECT * FROM test_table_1 FULL JOIN test_table_3 USING(id, val1) ORDER BY 1;
(4 rows)
-- Full join with complicated target lists
SELECT count(DISTINCT id), (avg(test_table_1.val1) + id * id)::integer as avg_value, id::numeric IS NOT NULL as not_null
SELECT count(DISTINCT id), (avg(test_table_1.val1) + id * id)::integer as avg_value, id::numeric IS NOT NULL as not_null
FROM test_table_1 FULL JOIN test_table_3 using(id)
WHERE id::bigint < 55
GROUP BY id
@ -214,7 +214,7 @@ SELECT * FROM test_table_1 FULL JOIN test_table_2 using(id) ORDER BY 1;
(5 rows)
-- Join subqueries using multiple columns
SELECT * FROM
SELECT * FROM
(SELECT test_table_1.id, test_table_1.val1 FROM test_table_1 FULL JOIN test_table_2 using(id)) as j1
FULL JOIN
(SELECT test_table_2.id, test_table_2.val1 FROM test_table_1 FULL JOIN test_table_2 using(id)) as j2

View File

@ -59,4 +59,4 @@ FROM (customer LEFT OUTER JOIN orders ON (c_custkey = o_custkey)) AS
test(c_custkey, c_nationkey)
INNER JOIN lineitem ON (test.c_custkey = l_orderkey)
LIMIT 10;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns

View File

@ -83,7 +83,7 @@ SELECT create_distributed_table('temp_nations', 'name', 'hash');
SELECT master_modify_multiple_shards('DELETE FROM multi_shard_modify_test USING temp_nations WHERE multi_shard_modify_test.t_value = temp_nations.key AND temp_nations.name = ''foobar'' ');
WARNING: master_modify_multiple_shards is deprecated and will be removed in a future release.
HINT: Run the command directly
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- commands with a RETURNING clause are unsupported
SELECT master_modify_multiple_shards('DELETE FROM multi_shard_modify_test WHERE t_key = 3 RETURNING *');
WARNING: master_modify_multiple_shards is deprecated and will be removed in a future release.

View File

@ -656,7 +656,7 @@ WHERE user_id IN (SELECT user_id
UPDATE users_test_table
SET value_2 = (SELECT value_3
FROM users_test_table);
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
UPDATE users_test_table
SET value_2 = 2
WHERE
@ -671,7 +671,7 @@ WHERE
GROUP BY
user_id
);
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
UPDATE users_test_table
SET (value_1, value_2) = (2,1)
WHERE user_id IN

View File

@ -426,7 +426,7 @@ FROM events_table t1
LEFT JOIN users_table t2 ON t1.user_id > t2.user_id
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- outer joins on reference tables with expressions should work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
@ -467,7 +467,7 @@ SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- outer joins as subqueries should work
-- https://github.com/citusdata/citus/issues/2739
SELECT user_id, value_1, event_type

View File

@ -1616,7 +1616,7 @@ FROM
ORDER BY
user_id DESC, lastseen DESC
LIMIT 10;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- not pushdownable since lower LATERAL JOIN is not on the partition key
-- not recursively plannable due to LATERAL join where there is a reference
-- from an outer query

View File

@ -1420,7 +1420,7 @@ WHERE
GROUP BY 1
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
SELECT foo.user_id FROM
(
SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)

View File

@ -5,17 +5,17 @@
--
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
-- subqueries in WHERE with greater operator
SELECT
SELECT
user_id
FROM
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id AND event_type = 1
GROUP BY
user_id
@ -32,17 +32,17 @@ LIMIT 5;
(3 rows)
-- same query with one additional join on non distribution column
SELECT
SELECT
user_id
FROM
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id AND event_type = 1 AND
users_table.time > events_table.time
GROUP BY
@ -51,7 +51,7 @@ WHERE
GROUP BY user_id
HAVING count(*) > 1
ORDER BY user_id
LIMIT 5;
LIMIT 5;
user_id
---------
1
@ -60,17 +60,17 @@ LIMIT 5;
(3 rows)
-- the other way around is not supported
SELECT
SELECT
user_id
FROM
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id > events_table.user_id AND event_type = 1 AND
users_table.time = events_table.time
GROUP BY
@ -79,21 +79,21 @@ WHERE
GROUP BY user_id
HAVING count(*) > 1
ORDER BY user_id
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- subqueries in where with ALL operator
SELECT
SELECT
user_id
FROM
users_table
WHERE
FROM
users_table
WHERE
value_2 > 1 AND
value_2 < ALL (SELECT avg(value_3) FROM events_table WHERE users_table.user_id = events_table.user_id GROUP BY user_id)
GROUP BY
GROUP BY
1
ORDER BY
ORDER BY
1 DESC
LIMIT 3;
LIMIT 3;
user_id
---------
4
@ -102,15 +102,15 @@ LIMIT 3;
(3 rows)
-- IN operator on non-partition key
SELECT
SELECT
user_id
FROM
FROM
events_table as e1
WHERE
event_type IN
(SELECT
(SELECT
event_type
FROM
FROM
events_table as e2
WHERE
value_2 = 1 AND value_3 > 3 AND
@ -139,15 +139,15 @@ ORDER BY 1;
(17 rows)
-- NOT IN on non-partition key
SELECT
SELECT
user_id
FROM
FROM
events_table as e1
WHERE
event_type NOT IN
(SELECT
(SELECT
event_type
FROM
FROM
events_table as e2
WHERE
value_2 = 1 AND value_3 > 3 AND
@ -167,11 +167,11 @@ ORDER BY 1;
(6 rows)
-- non-correlated query with =ANY on partition keys
SELECT
user_id, count(*)
FROM
users_table
WHERE
SELECT
user_id, count(*)
FROM
users_table
WHERE
user_id =ANY(SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) GROUP BY 1 ORDER BY 2 DESC LIMIT 5;
user_id | count
---------+-------
@ -183,20 +183,20 @@ WHERE
(5 rows)
-- users that appeared more than 118 times
SELECT
SELECT
user_id
FROM
FROM
users_table
WHERE 2 <=
(SELECT
count(*)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
GROUP BY
(SELECT
count(*)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
GROUP BY
user_id)
GROUP BY
GROUP BY
user_id
ORDER BY
user_id;
@ -296,10 +296,10 @@ ORDER BY 1, 2;
-- the following query doesn't have a meaningful result
-- but it is a valid query with an arbitrary subquery in
-- WHERE clause
SELECT
user_id
FROM
users_table
SELECT
user_id
FROM
users_table
WHERE
user_id IN
(
@ -376,18 +376,18 @@ SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
FROM
users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id AND
u.user_id IN
WHERE u.user_id = e.user_id AND
u.user_id IN
(
SELECT
user_id
FROM
users_table
SELECT
user_id
FROM
users_table
WHERE value_2 >= 1
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id = users_table.user_id)
@ -401,8 +401,8 @@ ORDER BY 2 DESC, 1;
5 | 364
(1 row)
--
-- below tests only aims for cases where all relations
--
-- below tests only aims for cases where all relations
-- are not joined on partition key
--
-- e4 is not joined on the partition key
@ -465,12 +465,12 @@ SELECT user_id, value_2 FROM users_table WHERE
group by e1.user_id
HAVING sum(submit_card_info) > 0
);
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- left leaf query does not return partition key
SELECT
user_id
FROM
users_table
SELECT
user_id
FROM
users_table
WHERE
user_id IN
(
@ -541,18 +541,18 @@ SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
FROM
users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id AND
u.user_id IN
WHERE u.user_id = e.user_id AND
u.user_id IN
(
SELECT
user_id
FROM
users_table
SELECT
user_id
FROM
users_table
WHERE value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id != users_table.user_id)
@ -561,7 +561,7 @@ FROM (
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- subquery in where clause doesn't have a relation, but is constant
SELECT
user_id
@ -594,17 +594,17 @@ LIMIT 2;
(0 rows)
-- OFFSET is not supported in the subquey
SELECT
SELECT
user_id
FROM
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id AND event_type = 2
GROUP BY
user_id
@ -615,26 +615,26 @@ DETAIL: Offset clause is currently unsupported when a subquery references a col
-- we can detect unsupported subqueries even if they appear
-- in WHERE subquery -> FROM subquery -> WHERE subquery
-- but we can recursively plan that anyway
SELECT DISTINCT user_id
FROM users_table
WHERE user_id
IN (SELECT
f_inner.user_id
FROM
SELECT DISTINCT user_id
FROM users_table
WHERE user_id
IN (SELECT
f_inner.user_id
FROM
(
SELECT
e1.user_id
FROM
users_table u1, events_table e1
WHERE
SELECT
e1.user_id
FROM
users_table u1, events_table e1
WHERE
e1.user_id = u1.user_id
) as f_inner,
(
SELECT
e1.user_id
FROM
users_table u1, events_table e1
WHERE
SELECT
e1.user_id
FROM
users_table u1, events_table e1
WHERE
e1.user_id = u1.user_id
AND e1.user_id IN (SELECT user_id FROM users_table ORDER BY user_id LIMIT 3)
) as f_outer
@ -675,5 +675,5 @@ SELECT user_id, value_2 FROM users_table WHERE
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=1 AND value_3 > 1 AND test_join_function(events_table.user_id, users_table.user_id))
ORDER BY 1 DESC, 2 DESC
LIMIT 3;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DROP FUNCTION test_join_function(int,int);

View File

@ -7,10 +7,10 @@
-- a very simple union query
SELECT user_id, counter
FROM (
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6)
) user_id
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6)
) user_id
ORDER BY 2 DESC,1
LIMIT 5;
user_id | counter
@ -40,10 +40,10 @@ LIMIT 5;
-- a very simple union query with reference table
SELECT user_id, counter
FROM (
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6)
) user_id
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6)
) user_id
ORDER BY 2 DESC,1
LIMIT 5;
user_id | counter
@ -58,10 +58,10 @@ LIMIT 5;
-- the same query with union all
SELECT user_id, counter
FROM (
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION ALL
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6)
) user_id
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6)
) user_id
ORDER BY 2 DESC,1
LIMIT 5;
user_id | counter
@ -76,10 +76,10 @@ LIMIT 5;
-- the same query with union all and reference table
SELECT user_id, counter
FROM (
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION ALL
SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6)
) user_id
SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6)
) user_id
ORDER BY 2 DESC,1
LIMIT 5;
user_id | counter
@ -92,12 +92,12 @@ LIMIT 5;
(5 rows)
-- the same query with group by
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6)
) user_id
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6)
) user_id
GROUP BY 1
ORDER BY 2 DESC,1
LIMIT 5;
@ -111,12 +111,12 @@ LIMIT 5;
(5 rows)
-- the same query with UNION ALL clause
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION ALL
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6)
) user_id
SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (5, 6)
) user_id
GROUP BY 1
ORDER BY 2 DESC,1
LIMIT 5;
@ -130,12 +130,12 @@ LIMIT 5;
(5 rows)
-- the same query target list entries shuffled
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT value_2 % 10 AS counter, user_id FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT value_2 % 10 AS counter, user_id FROM events_table WHERE event_type IN (5, 6)
) user_id
SELECT value_2 % 10 AS counter, user_id FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT value_2 % 10 AS counter, user_id FROM events_table WHERE event_type IN (5, 6)
) user_id
GROUP BY 1
ORDER BY 2 DESC,1
LIMIT 5;
@ -149,15 +149,15 @@ LIMIT 5;
(5 rows)
-- same query with GROUP BY
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT user_id, value_2 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT user_id, value_2 AS counter FROM events_table WHERE event_type IN (5, 6)
) user_id
GROUP BY
user_id
--HAVING sum(counter) > 900
SELECT user_id, value_2 AS counter FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT user_id, value_2 AS counter FROM events_table WHERE event_type IN (5, 6)
) user_id
GROUP BY
user_id
--HAVING sum(counter) > 900
ORDER BY 1,2 DESC LIMIT 5;
user_id | sum
---------+-----
@ -170,15 +170,15 @@ ORDER BY 1,2 DESC LIMIT 5;
-- the same query target list entries shuffled but this time the subqueries target list
-- is shuffled
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT value_2 AS counter, user_id FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT value_2 AS counter, user_id FROM events_table WHERE event_type IN (5, 6)
) user_id
GROUP BY
user_id
--HAVING sum(counter) > 900
SELECT value_2 AS counter, user_id FROM events_table WHERE event_type IN (1, 2)
UNION
SELECT value_2 AS counter, user_id FROM events_table WHERE event_type IN (5, 6)
) user_id
GROUP BY
user_id
--HAVING sum(counter) > 900
ORDER BY 1,2 DESC LIMIT 5;
user_id | sum
---------+-----
@ -190,10 +190,10 @@ ORDER BY 1,2 DESC LIMIT 5;
(5 rows)
-- similar query this time more subqueries and target list contains a resjunk entry
SELECT sum(counter)
SELECT sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 1 GROUP BY user_id HAVING sum(value_2) > 5
UNION
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 2 and value_1 < 3 GROUP BY user_id HAVING sum(value_2) > 25
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 3 and value_1 < 4 GROUP BY user_id HAVING sum(value_2) > 25
@ -201,7 +201,7 @@ FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 4 and value_1 < 5 GROUP BY user_id HAVING sum(value_2) > 25
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25
) user_id
) user_id
GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
sum
-----
@ -212,10 +212,10 @@ GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
(4 rows)
-- similar query this time more subqueries with reference table and target list contains a resjunk entry
SELECT sum(counter)
SELECT sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 1 GROUP BY user_id HAVING sum(value_2) > 25
UNION
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 2 and value_1 < 3 GROUP BY user_id HAVING sum(value_2) > 25
UNION
SELECT user_id, sum(value_2) AS counter FROM users_reference_table where value_1 < 3 and value_1 < 4 GROUP BY user_id HAVING sum(value_2) > 25
@ -223,7 +223,7 @@ FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 4 and value_1 < 5 GROUP BY user_id HAVING sum(value_2) > 25
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25
) user_id
) user_id
GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
sum
-----
@ -234,7 +234,7 @@ GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
(4 rows)
-- similar query as above, with UNION ALL
SELECT sum(counter)
SELECT sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 1 GROUP BY user_id HAVING sum(value_2) > 250
UNION ALL
@ -245,7 +245,7 @@ FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 4 and value_1 < 5 GROUP BY user_id HAVING sum(value_2) > 25
UNION ALL
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25
) user_id
) user_id
GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
sum
-----
@ -261,41 +261,41 @@ FROM (
( SELECT user_id,
sum(counter)
FROM
(SELECT
(SELECT
user_id, sum(value_2) AS counter
FROM
FROM
users_table
GROUP BY
GROUP BY
user_id
UNION
SELECT
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
FROM
events_table
GROUP BY
GROUP BY
user_id) user_id_1
GROUP BY
GROUP BY
user_id)
UNION
(SELECT
(SELECT
user_id, sum(counter)
FROM
(SELECT
(SELECT
user_id, sum(value_2) AS counter
FROM
FROM
users_table
GROUP BY
GROUP BY
user_id
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
events_table
GROUP BY
GROUP BY
user_id) user_id_2
GROUP BY
user_id)) AS ftop
ORDER BY 2 DESC, 1 DESC
GROUP BY
user_id)) AS ftop
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
user_id | sum
---------+-----
@ -312,41 +312,41 @@ FROM (
( SELECT user_id,
sum(counter)
FROM
(SELECT
(SELECT
user_id, sum(value_2) AS counter
FROM
FROM
users_table
GROUP BY
GROUP BY
user_id
UNION
SELECT
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
FROM
events_reference_table
GROUP BY
GROUP BY
user_id) user_id_1
GROUP BY
GROUP BY
user_id)
UNION
(SELECT
(SELECT
user_id, sum(counter)
FROM
(SELECT
(SELECT
user_id, sum(value_2) AS counter
FROM
FROM
users_table
GROUP BY
GROUP BY
user_id
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
events_table
GROUP BY
GROUP BY
user_id) user_id_2
GROUP BY
user_id)) AS ftop
ORDER BY 2 DESC, 1 DESC
GROUP BY
user_id)) AS ftop
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
user_id | sum
---------+-----
@ -368,40 +368,40 @@ FROM
FROM (
(SELECT *
FROM
(SELECT
(SELECT
"events"."user_id", "events"."time", 0 AS event
FROM
FROM
events_table as "events"
WHERE
event_type IN (1, 2)) events_subquery_1)
UNION
WHERE
event_type IN (1, 2)) events_subquery_1)
UNION
(SELECT *
FROM
(SELECT
(SELECT
"events"."user_id", "events"."time", 1 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (2, 3) ) events_subquery_2)
UNION
UNION
(SELECT *
FROM
(SELECT
(SELECT
"events"."user_id", "events"."time", 2 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (4, 5) ) events_subquery_3)
UNION
UNION
(SELECT *
FROM
(SELECT
(SELECT
"events"."user_id", "events"."time", 3 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (6, 1)) events_subquery_4)) t1
GROUP BY "t1"."user_id") AS t) "q"
GROUP BY "t1"."user_id") AS t) "q"
) as final_query
GROUP BY types
ORDER BY types;
@ -419,40 +419,40 @@ SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
FROM
(SELECT *, random()
FROM
(SELECT
(SELECT
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
FROM
(SELECT
(SELECT
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
FROM(
(SELECT
(SELECT
"events"."user_id", "events"."time", 0 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (1, 2))
UNION
(SELECT
UNION
(SELECT
"events"."user_id", "events"."time", 1 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (2, 3) )
UNION
(SELECT
UNION
(SELECT
"events"."user_id", "events"."time", 2 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (4, 5) )
UNION
(SELECT
UNION
(SELECT
"events"."user_id", "events"."time", 3 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (6, 1))) t1
GROUP BY "t1"."user_id") AS t) "q"
GROUP BY "t1"."user_id") AS t) "q"
) as final_query
GROUP BY types
ORDER BY types;
@ -471,34 +471,34 @@ FROM
FROM
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
FROM (
(SELECT
(SELECT
"events"."user_id", "events"."time", 0 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (1, 2))
UNION
(SELECT
UNION
(SELECT
"events"."user_id", "events"."time", 1 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (2, 3) )
UNION
(SELECT
UNION
(SELECT
"events"."user_id", "events"."time", 2 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (4, 5) )
UNION
(SELECT
UNION
(SELECT
"events"."user_id", "events"."time", 3 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (6, 1))) t1
GROUP BY "t1"."user_id") AS t) "q"
GROUP BY "t1"."user_id") AS t) "q"
GROUP BY types
ORDER BY types;
types | sumofeventtype
@ -512,39 +512,39 @@ ORDER BY types;
-- again same query but with only two top level empty queries (i.e., no group bys)
SELECT *
FROM
( SELECT *
( SELECT *
FROM
( SELECT "t1"."user_id"
FROM (
(SELECT
(SELECT
"events"."user_id", "events"."time", 0 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (1, 2))
UNION
(SELECT
UNION
(SELECT
"events"."user_id", "events"."time", 1 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (2, 3) )
UNION
(SELECT
UNION
(SELECT
"events"."user_id", "events"."time", 2 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (4, 5) )
UNION
(SELECT
UNION
(SELECT
"events"."user_id", "events"."time", 3 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (6, 1))) t1
) AS t) "q"
ORDER BY 1
) AS t) "q"
ORDER BY 1
LIMIT 5;
user_id
---------
@ -562,34 +562,34 @@ FROM
FROM
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
FROM (
(SELECT
(SELECT
"events"."user_id", "events"."time", 0 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (1, 2))
UNION ALL
(SELECT
(SELECT
"events"."user_id", "events"."time", 1 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (2, 3) )
UNION ALL
(SELECT
(SELECT
"events"."user_id", "events"."time", 2 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (4, 5) )
UNION ALL
(SELECT
(SELECT
"events"."user_id", "events"."time", 3 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (6, 1))) t1
GROUP BY "t1"."user_id") AS t) "q"
GROUP BY "t1"."user_id") AS t) "q"
GROUP BY types
ORDER BY types;
types | sumofeventtype
@ -601,9 +601,9 @@ ORDER BY types;
(4 rows)
-- some UNION ALL queries that are going to be pulled up
SELECT
SELECT
count(*)
FROM
FROM
(
(SELECT user_id FROM users_table)
UNION ALL
@ -615,9 +615,9 @@ FROM
(1 row)
-- some UNION ALL queries that are going to be pulled up with reference table
SELECT
SELECT
count(*)
FROM
FROM
(
(SELECT user_id FROM users_table)
UNION ALL
@ -629,9 +629,9 @@ FROM
(1 row)
-- similar query without top level agg
SELECT
SELECT
user_id
FROM
FROM
(
(SELECT user_id FROM users_table)
UNION ALL
@ -649,9 +649,9 @@ LIMIT 5;
(5 rows)
-- similar query with multiple target list entries
SELECT
SELECT
user_id, value_3
FROM
FROM
(
(SELECT value_3, user_id FROM users_table)
UNION ALL
@ -669,9 +669,9 @@ LIMIT 5;
(5 rows)
-- similar query group by inside the subqueries
SELECT
SELECT
user_id, value_3_sum
FROM
FROM
(
(SELECT sum(value_3) as value_3_sum, user_id FROM users_table GROUP BY user_id)
UNION ALL
@ -689,9 +689,9 @@ LIMIT 5;
(5 rows)
-- similar query top level group by
SELECT
SELECT
user_id, sum(value_3)
FROM
FROM
(
(SELECT value_3, user_id FROM users_table)
UNION ALL
@ -710,9 +710,9 @@ LIMIT 5;
(5 rows)
-- a long set operation list
SELECT
SELECT
user_id, value_3
FROM
FROM
(
(SELECT value_3, user_id FROM events_table where event_type IN (1, 2))
UNION ALL
@ -738,9 +738,9 @@ LIMIT 5;
(5 rows)
-- no partition key on the top
SELECT
SELECT
max(value_3)
FROM
FROM
(
(SELECT value_3, user_id FROM events_table where event_type IN (1, 2))
UNION ALL
@ -768,12 +768,12 @@ LIMIT 5;
-- now lets also have some unsupported queries
-- group by is not on the partition key, supported through recursive planning
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id
UNION
SELECT value_1 as user_id, sum(value_2) AS counter FROM users_table GROUP BY value_1
) user_id
) user_id
GROUP BY user_id
ORDER BY 1,2;
user_id | sum
@ -788,10 +788,10 @@ ORDER BY 1,2;
(7 rows)
-- partition key is not selected, supported through recursive planning
SELECT sum(counter)
SELECT sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 1 GROUP BY user_id HAVING sum(value_2) > 25
UNION
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 2 and value_1 < 3 GROUP BY user_id HAVING sum(value_2) > 25
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 3 and value_1 < 4 GROUP BY user_id HAVING sum(value_2) > 25
@ -799,7 +799,7 @@ FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 4 and value_1 < 5 GROUP BY user_id HAVING sum(value_2) > 25
UNION
SELECT 2 * user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25
) user_id
) user_id
GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
sum
-----
@ -814,20 +814,20 @@ GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
SELECT * FROM
(
(
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
UNION
UNION
SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id
) user_id_1
GROUP BY user_id
)
)
UNION
(
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
EXCEPT
EXCEPT
SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id
) user_id_2
GROUP BY user_id)
@ -850,12 +850,12 @@ ORDER BY 1,2;
(12 rows)
-- non-equi join are not supported since there is no equivalence between the partition column
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
UNION
UNION
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1
) user_id
) user_id
GROUP BY user_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- non-equi join also not supported for UNION ALL
@ -864,27 +864,27 @@ FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
UNION ALL
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1
) user_id
) user_id
GROUP BY user_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- joins inside unions are supported -- slightly more comlex than the above
SELECT * FROM
(
(
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
UNION
UNION
SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id
) user_id_1
GROUP BY user_id
)
)
UNION
(
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
UNION
UNION
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE (events_table.user_id = users_table.user_id) GROUP BY events_table.user_id
) user_id_2
GROUP BY user_id)
@ -995,12 +995,12 @@ LIMIT 10;
(10 rows)
-- offset inside the union
SELECT user_id, sum(counter)
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM events_table GROUP BY user_id
UNION
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id ORDER BY user_id OFFSET 4
) user_id
) user_id
GROUP BY user_id
ORDER BY 1,2;
user_id | sum
@ -1017,39 +1017,39 @@ FROM (
( SELECT user_id,
sum(counter)
FROM
(SELECT
(SELECT
user_id, sum(value_2) AS counter
FROM
FROM
users_table
GROUP BY
GROUP BY
user_id
UNION
SELECT
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
FROM
events_table
GROUP BY
GROUP BY
user_id) user_id_1
GROUP BY
GROUP BY
user_id)
UNION
(SELECT
(SELECT
user_id, sum(counter)
FROM
(SELECT
(SELECT
sum(value_2) AS counter, user_id
FROM
FROM
users_table
GROUP BY
GROUP BY
user_id
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
UNION
SELECT
user_id, sum(value_2) AS counter
FROM
events_table
GROUP BY
GROUP BY
user_id) user_id_2
GROUP BY
GROUP BY
user_id)) AS ftop
ORDER BY 1,2;
user_id | sum
@ -1075,9 +1075,9 @@ ORDER BY 1,2;
(18 rows)
-- some UNION all queries that are going to be pulled up
SELECT
SELECT
count(*)
FROM
FROM
(
(SELECT user_id FROM users_table)
UNION ALL
@ -1089,9 +1089,9 @@ FROM
(1 row)
-- last query does not have partition key
SELECT
SELECT
user_id, value_3
FROM
FROM
(
(SELECT value_3, user_id FROM events_table where event_type IN (1, 2))
UNION ALL
@ -1117,9 +1117,9 @@ LIMIT 5;
(5 rows)
-- we allow joins within unions
SELECT
SELECT
count(*)
FROM
FROM
(
(SELECT user_id FROM users_table)
UNION ALL
@ -1131,9 +1131,9 @@ FROM
(1 row)
-- we support unions on subqueries without relations through recursive planning
SELECT
SELECT
count(*)
FROM
FROM
(
(SELECT user_id FROM users_table)
UNION ALL
@ -1145,9 +1145,9 @@ FROM
(1 row)
-- we support pushing down subqueries without relations through recursive planning
SELECT
SELECT
count(*)
FROM
FROM
(
(SELECT user_id FROM users_table)
UNION ALL
@ -1159,9 +1159,9 @@ FROM
(1 row)
-- we support subqueries without relations within a union
SELECT
SELECT
user_id, value_3
FROM
FROM
(
(SELECT value_3, user_id FROM events_table where event_type IN (1, 2))
UNION ALL
@ -1197,35 +1197,35 @@ FROM
FROM (
(SELECT *
FROM
(SELECT
(SELECT
"events"."user_id", "events"."time", 0 AS event
FROM
FROM
events_table as "events"
WHERE
event_type IN (1, 2)) events_subquery_1)
UNION
WHERE
event_type IN (1, 2)) events_subquery_1)
UNION
(SELECT *
FROM
(SELECT
(SELECT
"events"."user_id", "events"."time", 1 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (2, 3) ) events_subquery_2)
UNION
UNION
(SELECT *
FROM
(SELECT
(SELECT
"events"."user_id", "events"."time", 2 AS event
FROM
FROM
events_table as "events"
WHERE
WHERE
event_type IN (4, 5) ) events_subquery_3)
UNION
UNION
(SELECT *
FROM
(SELECT 1, now(), 3 AS event) events_subquery_4)) t1
GROUP BY "t1"."user_id") AS t) "q"
GROUP BY "t1"."user_id") AS t) "q"
) as final_query
GROUP BY types
ORDER BY types;

View File

@ -209,7 +209,7 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5
WHERE
foo1.user_id = foo4.user_id AND
@ -241,7 +241,7 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (17,18,19,20)) as foo5
WHERE
foo1.user_id = foo4.user_id AND
@ -258,7 +258,7 @@ DEBUG: Plan 26 query after replacing subqueries and CTEs: SELECT user_id, rando
t
(1 row)
-- There are two non colocated joins, one is in the one of the leaf queries,
-- There are two non colocated joins, one is in the one of the leaf queries,
-- the other is on the top-level subquery
SELECT true AS valid FROM explain_json_2($$
@ -271,7 +271,7 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5
WHERE
foo1.user_id = foo4.user_id AND
foo1.user_id = foo2.user_id AND
@ -302,7 +302,7 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5
WHERE
foo1.user_id = foo4.user_id AND
foo1.user_id = foo2.user_id AND
@ -322,8 +322,8 @@ DEBUG: Plan 31 query after replacing subqueries and CTEs: SELECT user_id, rando
-- Deeper subqueries are non-colocated
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
SELECT
count(*)
FROM
(
SELECT
@ -332,7 +332,7 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo,
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id) as foo_top JOIN
foo.user_id = bar.user_id) as foo_top JOIN
(
SELECT
@ -341,7 +341,7 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo,
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id) as bar_top
foo.user_id = bar.user_id) as bar_top
ON (foo_top.user_id = bar_top.user_id);
$$);
DEBUG: generating subplan 34_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))
@ -355,8 +355,8 @@ DEBUG: Plan 34 query after replacing subqueries and CTEs: SELECT count(*) AS co
-- Top level Subquery is not colocated
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
SELECT
count(*)
FROM
(
SELECT
@ -365,7 +365,7 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo,
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id) as foo_top JOIN
foo.user_id = bar.user_id) as foo_top JOIN
(
SELECT
@ -374,8 +374,8 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo,
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as bar
WHERE
foo.user_id = bar.user_id) as bar_top
ON (foo_top.value_2 = bar_top.user_id);
foo.user_id = bar.user_id) as bar_top
ON (foo_top.value_2 = bar_top.user_id);
$$);
DEBUG: generating subplan 37_1 for subquery SELECT foo.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)
@ -388,8 +388,8 @@ DEBUG: Plan 37 query after replacing subqueries and CTEs: SELECT count(*) AS co
-- Top level Subquery is not colocated as the above
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
SELECT
count(*)
FROM
(
SELECT
@ -398,7 +398,7 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo,
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id) as foo_top JOIN
foo.user_id = bar.user_id) as foo_top JOIN
(
SELECT
foo.user_id
@ -406,7 +406,7 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo,
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (13,14,15,16)) as bar
WHERE
foo.user_id = bar.user_id) as bar_top
foo.user_id = bar.user_id) as bar_top
ON (foo_top.value_2 = bar_top.user_id);
$$);
DEBUG: generating subplan 39_1 for subquery SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))
@ -420,13 +420,13 @@ DEBUG: Plan 39 query after replacing subqueries and CTEs: SELECT count(*) AS co
-- non colocated joins are deep inside the query
SELECT true AS valid FROM explain_json_2($$
SELECT
SELECT
count(*)
FROM
(
SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.user_id as my_users FROM events_table, users_table WHERE events_table.event_type = users_table.user_id) as foo
SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.user_id as my_users FROM events_table, users_table WHERE events_table.event_type = users_table.user_id) as foo
WHERE foo.my_users = users_table.user_id) as mid_level_query
) as bar;
$$);
@ -443,9 +443,9 @@ DEBUG: Plan 42 query after replacing subqueries and CTEs: SELECT count(*) AS co
-- via regular repartitioning since PostgreSQL would pull the query up
SELECT true AS valid FROM explain_json_2($$
SELECT count(*) FROM ( SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.event_type as my_users, random() FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) as foo
SELECT count(*) FROM ( SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.event_type as my_users, random() FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) as foo
WHERE foo.my_users = users_table.user_id) as mid_level_query ) as bar;
$$);
@ -458,17 +458,17 @@ DEBUG: Plan 44 query after replacing subqueries and CTEs: SELECT count(*) AS co
-- same as the above query, but, one level deeper subquery
SELECT true AS valid FROM explain_json_2($$
SELECT
SELECT
count(*)
FROM
(
SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.user_id as my_users FROM events_table,
SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.user_id as my_users FROM events_table,
(SELECT events_table.user_id, random() FROM users_table, events_table WHERE users_table.user_id = events_table.user_id) as selected_users
WHERE events_table.event_type = selected_users.user_id) as foo
WHERE events_table.event_type = selected_users.user_id) as foo
WHERE foo.my_users = users_table.user_id) as mid_level_query
) as bar;
$$);
@ -484,21 +484,21 @@ DEBUG: Plan 46 query after replacing subqueries and CTEs: SELECT count(*) AS co
-- the subquery on the distribution key
SELECT true AS valid FROM explain_json_2($$
SELECT
SELECT
count(*)
FROM
(
SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.user_id as my_users FROM events_table,
(SELECT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND
(SELECT events_table.user_id as my_users FROM events_table,
(SELECT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND
users_table.user_id IN (SELECT value_2 FROM events_table)
) as selected_users
WHERE events_table.user_id = selected_users.user_id) as foo
WHERE events_table.user_id = selected_users.user_id) as foo
WHERE foo.my_users = users_table.user_id) as mid_level_query
@ -518,13 +518,13 @@ SELECT true AS valid FROM explain_json_2($$SELECT
FROM
users_table
WHERE
value_1
value_1
IN
(SELECT
users_table.user_id
FROM
users_table, events_table
WHERE
(SELECT
users_table.user_id
FROM
users_table, events_table
WHERE
users_table.user_id = events_table.value_2 AND event_type IN (5,6));$$);
DEBUG: generating subplan 50_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6])))
DEBUG: Plan 50 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('50_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))
@ -535,12 +535,12 @@ DEBUG: Plan 50 query after replacing subqueries and CTEs: SELECT count(*) AS co
-- leaf subquery repartitioning should work fine when used with CTEs
SELECT true AS valid FROM explain_json_2($$
WITH q1 AS (SELECT user_id FROM users_table)
SELECT count(*) FROM q1, (SELECT
users_table.user_id, random()
FROM
users_table, events_table
WHERE
WITH q1 AS (SELECT user_id FROM users_table)
SELECT count(*) FROM q1, (SELECT
users_table.user_id, random()
FROM
users_table, events_table
WHERE
users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$);
DEBUG: generating subplan 52_1 for CTE q1: SELECT user_id FROM public.users_table
DEBUG: generating subplan 52_2 for subquery SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))
@ -552,12 +552,12 @@ DEBUG: Plan 52 query after replacing subqueries and CTEs: SELECT count(*) AS co
-- subquery joins should work fine when used with CTEs
SELECT true AS valid FROM explain_json_2($$
WITH q1 AS (SELECT user_id FROM users_table)
SELECT count(*) FROM q1, (SELECT
users_table.user_id, random()
FROM
users_table, events_table
WHERE
WITH q1 AS (SELECT user_id FROM users_table)
SELECT count(*) FROM q1, (SELECT
users_table.user_id, random()
FROM
users_table, events_table
WHERE
users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$);
DEBUG: generating subplan 55_1 for CTE q1: SELECT user_id FROM public.users_table
DEBUG: Plan 55 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('55_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) q1, (SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) bar WHERE (bar.user_id OPERATOR(pg_catalog.=) q1.user_id)
@ -584,19 +584,19 @@ SELECT event, array_length(events_table, 1)
FROM (
SELECT event, array_agg(t.user_id) AS events_table
FROM (
SELECT
SELECT
DISTINCT ON(e.event_type::text) e.event_type::text as event, e.time, e.user_id
FROM
FROM
users_table AS u,
events_table AS e,
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as bar
WHERE u.user_id = e.user_id AND
u.user_id IN
WHERE u.user_id = e.user_id AND
u.user_id IN
(
SELECT
user_id
FROM
users_table
SELECT
user_id
FROM
users_table
WHERE value_2 >= 5
AND EXISTS (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4))
LIMIT 5
@ -622,22 +622,22 @@ DEBUG: Plan 60 query after replacing subqueries and CTEs: SELECT event, array_l
-- the relations are joined under a join tree with an alias
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
SELECT
count(*)
FROM
(users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1);
(users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1);
$$);
DEBUG: generating subplan 66_1 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan 66 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN public.users_table u2 USING (value_1)) a(value_1, user_id, "time", value_2, value_3, value_4, user_id_1, time_1, value_2_1, value_3_1, value_4_1) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('66_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1))
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- a very similar query to the above
-- however, this time we users a subquery instead of join alias, and it works
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
SELECT
count(*)
FROM
(SELECT * FROM users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1);
(SELECT * FROM users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1);
$$);
DEBUG: generating subplan 68_1 for subquery SELECT u1.value_1, u1.user_id, u1."time", u1.value_2, u1.value_3, u1.value_4, u2.user_id, u2."time", u2.value_2, u2.value_3, u2.value_4 FROM (public.users_table u1 JOIN public.users_table u2 USING (value_1))
DEBUG: Plan 68 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_1, intermediate_result.user_id, intermediate_result."time", intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4, intermediate_result.user_id_1 AS user_id, intermediate_result.time_1 AS "time", intermediate_result.value_2_1 AS value_2, intermediate_result.value_3_1 AS value_3, intermediate_result.value_4_1 AS value_4 FROM read_intermediate_result('68_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, user_id integer, "time" timestamp without time zone, value_2 integer, value_3 double precision, value_4 bigint, user_id_1 integer, time_1 timestamp without time zone, value_2_1 integer, value_3_1 double precision, value_4_1 bigint)) a(value_1, user_id, "time", value_2, value_3, value_4, user_id_1, time_1, value_2_1, value_3_1, value_4_1) JOIN (SELECT users_table.value_1, random() AS random FROM public.users_table) u3 USING (value_1))
@ -710,9 +710,9 @@ SELECT true AS valid FROM explain_json_2($$
SELECT user_id FROM users_table
UNION
SELECT user_id FROM users_table
) a
) a
JOIN
(SELECT value_1 FROM users_table) as foo ON (a.user_id = foo.value_1)
(SELECT value_1 FROM users_table) as foo ON (a.user_id = foo.value_1)
);
$$);
DEBUG: generating subplan 77_1 for subquery SELECT user_id FROM public.users_table
@ -734,9 +734,9 @@ SELECT true AS valid FROM explain_json_2($$
SELECT user_id FROM users_table
UNION
SELECT user_id FROM users_table
) a
) a
JOIN
users_table as foo ON (a.user_id = foo.value_1)
users_table as foo ON (a.user_id = foo.value_1)
);
$$);
DEBUG: generating subplan 81_1 for subquery SELECT user_id FROM public.users_table
@ -755,21 +755,21 @@ DEBUG: Plan 80 query after replacing subqueries and CTEs: SELECT a.user_id, foo
SELECT true AS valid FROM explain_json_2($$
SELECT * FROM
(
(SELECT user_id FROM users_table) as foo
(
(SELECT user_id FROM users_table) as foo
JOIN
(
SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4)
UNION
SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8)
) a
) a
ON (a.user_id = foo.user_id)
ON (a.user_id = foo.user_id)
JOIN
(SELECT value_1 FROM users_table) as bar
ON(foo.user_id = bar.value_1)
ON(foo.user_id = bar.value_1)
);
$$);
DEBUG: generating subplan 84_1 for subquery SELECT value_1 FROM public.users_table
@ -783,7 +783,7 @@ DEBUG: Plan 84 query after replacing subqueries and CTEs: SELECT foo.user_id, a
-- inside a CTE
SELECT true AS valid FROM explain_json_2($$
WITH non_colocated_subquery AS
WITH non_colocated_subquery AS
(
SELECT
foo.value_2
@ -793,7 +793,7 @@ SELECT true AS valid FROM explain_json_2($$
WHERE
foo.value_2 = bar.value_2
),
non_colocated_subquery_2 AS
non_colocated_subquery_2 AS
(
SELECT
count(*) as cnt
@ -804,11 +804,11 @@ SELECT true AS valid FROM explain_json_2($$
IN
(SELECT event_type FROM events_table WHERE user_id < 4)
)
SELECT
*
FROM
non_colocated_subquery, non_colocated_subquery_2
WHERE
SELECT
*
FROM
non_colocated_subquery, non_colocated_subquery_2
WHERE
non_colocated_subquery.value_2 != non_colocated_subquery_2.cnt
$$);
DEBUG: generating subplan 86_1 for CTE non_colocated_subquery: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.value_2 OPERATOR(pg_catalog.=) bar.value_2)
@ -832,8 +832,8 @@ SELECT true AS valid FROM explain_json_2($$
(SELECT users_table_local.value_2 FROM users_table_local, events_table_local WHERE users_table_local.user_id = events_table_local.user_id AND event_type IN (5,6,7,8)) as bar,
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as baz
WHERE
foo.value_2 = bar.value_2
AND
foo.value_2 = bar.value_2
AND
foo.value_2 = baz.value_2
$$);
DEBUG: generating subplan 91_1 for subquery SELECT users_table_local.value_2 FROM non_colocated_subquery.users_table_local, non_colocated_subquery.events_table_local WHERE ((users_table_local.user_id OPERATOR(pg_catalog.=) events_table_local.user_id) AND (events_table_local.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
@ -850,20 +850,20 @@ SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(SELECT user_id FROM users_table) as foo
(SELECT user_id FROM users_table) as foo
JOIN
(
SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4)
UNION
SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8)
) a
) a
ON (a.user_id = foo.user_id)
ON (a.user_id = foo.user_id)
JOIN
(SELECT value_1, value_2 FROM users_table) as bar
ON(foo.user_id = bar.value_1)
ON(foo.user_id = bar.value_1)
WHERE
value_2 IN (SELECT value_1 FROM users_table WHERE value_2 < 1)
AND
@ -880,16 +880,16 @@ DEBUG: Plan 93 query after replacing subqueries and CTEs: SELECT count(*) AS co
t
(1 row)
-- make sure that we don't pick the refeence table as
-- make sure that we don't pick the refeence table as
-- the anchor
SELECT true AS valid FROM explain_json_2($$
SELECT count(*)
FROM
FROM
users_reference_table AS users_table_ref,
(SELECT user_id FROM users_Table) AS foo,
(SELECT user_id, value_2 FROM events_Table) AS bar
WHERE
WHERE
users_table_ref.user_id = foo.user_id
AND foo.user_id = bar.value_2;
$$);
@ -926,7 +926,7 @@ DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- similar to the above, make sure that we skip recursive planning when
-- the subquery doesn't have any tables
SELECT true AS valid FROM explain_json_2($$
@ -945,7 +945,7 @@ DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- similar to the above, make sure that we skip recursive planning when
-- the subquery contains only intermediate results
SELECT *
@ -964,7 +964,7 @@ JOIN LATERAL
(SELECT *
FROM
(SELECT *
FROM
FROM
(SELECT *
FROM events_table WHERE value_3 > 4
INTERSECT
@ -1010,7 +1010,7 @@ SELECT count(*) FROM events_table WHERE user_id NOT IN
(SELECT *
FROM
(SELECT *
FROM
FROM
(SELECT *
FROM events_table WHERE value_3 > 4
INTERSECT
@ -1061,14 +1061,14 @@ SELECT create_distributed_table('table1','tenant_id');
-- all of the above queries are non-colocated subquery joins
-- because the views are replaced with subqueries
UPDATE table2 SET id=20 FROM table1_view WHERE table1_view.id=table2.id;
DEBUG: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan 117_1 for subquery SELECT table1.id, table1.tenant_id FROM non_colocated_subquery.table1 WHERE (table1.id OPERATOR(pg_catalog.<) 100)
DEBUG: Plan 117 query after replacing subqueries and CTEs: UPDATE non_colocated_subquery.table2 SET id = 20 FROM (SELECT intermediate_result.id, intermediate_result.tenant_id FROM read_intermediate_result('117_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, tenant_id integer)) table1_view WHERE (table1_view.id OPERATOR(pg_catalog.=) table2.id)
DEBUG: Creating router plan
DEBUG: Plan is router executable
UPDATE table2_p1 SET id=20 FROM table1_view WHERE table1_view.id=table2_p1.id;
DEBUG: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan 119_1 for subquery SELECT table1.id, table1.tenant_id FROM non_colocated_subquery.table1 WHERE (table1.id OPERATOR(pg_catalog.<) 100)
DEBUG: Plan 119 query after replacing subqueries and CTEs: UPDATE non_colocated_subquery.table2_p1 SET id = 20 FROM (SELECT intermediate_result.id, intermediate_result.tenant_id FROM read_intermediate_result('119_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, tenant_id integer)) table1_view WHERE (table1_view.id OPERATOR(pg_catalog.=) table2_p1.id)

View File

@ -14,6 +14,13 @@ SELECT create_reference_table('ref');
(1 row)
CREATE TABLE test_not_colocated (LIKE test);
SELECT create_distributed_table('test_not_colocated', 'x', colocate_with := 'none');
create_distributed_table
--------------------------
(1 row)
INSERT INTO test VALUES (1,1), (2,2);
INSERT INTO ref VALUES (2,2), (3,3);
-- top-level set operations are supported through recursive planning
@ -718,7 +725,7 @@ DEBUG: Plan 126 query after replacing subqueries and CTEs: SELECT u.x, u.y, tes
DEBUG: Router planner cannot handle multi-shard select queries
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
-- distributed table in WHERE clause is recursively planned
-- distributed table in WHERE clause is recursively planned
SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM ref WHERE a IN (SELECT x FROM test))) u ORDER BY 1,2;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
@ -854,7 +861,7 @@ DEBUG: Plan is router executable
1
(1 row)
-- other agg. distincts are also supported when group by includes partition key
-- other agg. distincts are also supported when group by includes partition key
select avg(DISTINCT t.x) FROM ((SELECT avg(DISTINCT y) FROM test GROUP BY x) UNION (SELECT avg(DISTINCT y) FROM test GROUP BY x)) as t(x) ORDER BY 1;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
@ -872,7 +879,7 @@ DEBUG: Plan is router executable
1.50000000000000000000
(1 row)
-- other agg. distincts are not supported when group by doesn't include partition key
-- other agg. distincts are not supported when group by doesn't include partition key
select count(DISTINCT t.x) FROM ((SELECT avg(DISTINCT y) FROM test GROUP BY y) UNION (SELECT avg(DISTINCT y) FROM test GROUP BY y)) as t(x) ORDER BY 1;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
@ -881,7 +888,7 @@ DETAIL: table partitioning is unsuitable for aggregate (distinct)
-- one of the leaves is a repartition join
SET citus.enable_repartition_joins TO ON;
-- repartition is recursively planned before the set operation
(SELECT x FROM test) INTERSECT (SELECT t1.x FROM test as t1, test as t2 WHERE t1.x = t2.y LIMIT 0) ORDER BY 1 DESC;
(SELECT x FROM test) INTERSECT (SELECT t1.x FROM test as t1, test as t2 WHERE t1.x = t2.y LIMIT 0) ORDER BY 1 DESC;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: push down of limit count: 0
@ -1017,7 +1024,7 @@ DEBUG: Router planner cannot handle multi-shard select queries
2 | 2
(2 rows)
-- this should create lots of recursive calls since both views and set operations lead to recursive plans :)
-- this should create lots of recursive calls since both views and set operations lead to recursive plans :)
((SELECT x FROM set_view_recursive_second) INTERSECT (SELECT * FROM set_view_recursive)) EXCEPT (SELECT * FROM set_view_pushdown);
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
@ -1045,11 +1052,49 @@ DEBUG: Plan is router executable
---
(0 rows)
-- queries on non-colocated tables that would push down if they were not colocated are recursivelu planned
SELECT * FROM (SELECT * FROM test UNION SELECT * FROM test_not_colocated) u ORDER BY 1,2;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan 188_1 for subquery SELECT x, y FROM recursive_union.test
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan 188_2 for subquery SELECT x, y FROM recursive_union.test_not_colocated
DEBUG: Creating router plan
DEBUG: Plan is router executable
DEBUG: generating subplan 188_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('188_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('188_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)
DEBUG: Plan 188 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('188_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y
DEBUG: Creating router plan
DEBUG: Plan is router executable
x | y
---+---
1 | 1
2 | 2
(2 rows)
SELECT * FROM (SELECT * FROM test UNION ALL SELECT * FROM test_not_colocated) u ORDER BY 1,2;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan 192_1 for subquery SELECT x, y FROM recursive_union.test
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan 192_2 for subquery SELECT x, y FROM recursive_union.test_not_colocated
DEBUG: Creating router plan
DEBUG: Plan is router executable
DEBUG: generating subplan 192_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('192_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION ALL SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('192_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)
DEBUG: Plan 192 query after replacing subqueries and CTEs: SELECT x, y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('192_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u ORDER BY x, y
DEBUG: Creating router plan
DEBUG: Plan is router executable
x | y
---+---
1 | 1
2 | 2
(2 rows)
RESET client_min_messages;
DROP SCHEMA recursive_union CASCADE;
NOTICE: drop cascades to 5 other objects
NOTICE: drop cascades to 6 other objects
DETAIL: drop cascades to table test
drop cascades to table ref
drop cascades to table test_not_colocated
drop cascades to view set_view_recursive
drop cascades to view set_view_pushdown
drop cascades to view set_view_recursive_second

View File

@ -272,7 +272,7 @@ SELECT
count(*)
FROM
multi_outer_join_left a LEFT JOIN multi_outer_join_right b ON (l_nationkey = r_nationkey);
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- Anti-join should return customers for which there is no row in the right table
SELECT
min(l_custkey), max(l_custkey)

View File

@ -252,7 +252,7 @@ SELECT
count(*)
FROM
multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_hash b ON (l_nationkey = r_nationkey);
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- Anti-join should return customers for which there is no row in the right table
SELECT
min(l_custkey), max(l_custkey)
@ -326,7 +326,7 @@ FROM
LEFT JOIN multi_outer_join_right_reference r1 ON (l1.l_custkey = r1.r_custkey)
LEFT JOIN multi_outer_join_right_reference r2 ON (l1.l_custkey = r2.r_custkey)
RIGHT JOIN multi_outer_join_left_hash l2 ON (r2.r_custkey = l2.l_custkey);
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- add an anti-join, this should also error out
SELECT
*
@ -337,7 +337,7 @@ FROM
RIGHT JOIN multi_outer_join_left_hash l2 ON (r2.r_custkey = l2.l_custkey)
WHERE
r1.r_custkey is NULL;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- Three way join 2-1-1 (broadcast + broadcast join) should work
SELECT
l_custkey, r_custkey, t_custkey

View File

@ -7,6 +7,9 @@ SELECT create_distributed_table('test', 'x');
CREATE TABLE recursive_union.ref (a int, b int);
SELECT create_reference_table('ref');
CREATE TABLE test_not_colocated (LIKE test);
SELECT create_distributed_table('test_not_colocated', 'x', colocate_with := 'none');
INSERT INTO test VALUES (1,1), (2,2);
INSERT INTO ref VALUES (2,2), (3,3);
@ -169,5 +172,9 @@ SELECT * FROM set_view_recursive_second ORDER BY 1,2;
-- this should create lots of recursive calls since both views and set operations lead to recursive plans :)
((SELECT x FROM set_view_recursive_second) INTERSECT (SELECT * FROM set_view_recursive)) EXCEPT (SELECT * FROM set_view_pushdown);
-- queries on non-colocated tables that would push down if they were not colocated are recursivelu planned
SELECT * FROM (SELECT * FROM test UNION SELECT * FROM test_not_colocated) u ORDER BY 1,2;
SELECT * FROM (SELECT * FROM test UNION ALL SELECT * FROM test_not_colocated) u ORDER BY 1,2;
RESET client_min_messages;
DROP SCHEMA recursive_union CASCADE;