citus/src/test/regress/expected/multi_subquery_complex_refe...

1213 lines
41 KiB
Plaintext

--
-- multi subquery complex queries aims to expand existing subquery pushdown
-- regression tests to cover more caeses
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
--
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
SET citus.enable_router_execution TO FALSE;
CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int);
SELECT create_distributed_table('user_buy_test_table', 'user_id');
create_distributed_table
--------------------------
(1 row)
INSERT INTO user_buy_test_table VALUES(1,2,1);
INSERT INTO user_buy_test_table VALUES(2,3,4);
INSERT INTO user_buy_test_table VALUES(3,4,2);
INSERT INTO user_buy_test_table VALUES(7,5,2);
CREATE TABLE users_return_test_table(user_id int, item_id int, buy_count int);
SELECT create_distributed_table('users_return_test_table', 'user_id');
create_distributed_table
--------------------------
(1 row)
INSERT INTO users_return_test_table VALUES(4,1,1);
INSERT INTO users_return_test_table VALUES(1,3,1);
INSERT INTO users_return_test_table VALUES(3,2,2);
CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int);
SELECT create_reference_table('users_ref_test_table');
create_reference_table
------------------------
(1 row)
INSERT INTO users_ref_test_table VALUES(1,'User_1',45);
INSERT INTO users_ref_test_table VALUES(2,'User_2',46);
INSERT INTO users_ref_test_table VALUES(3,'User_3',47);
INSERT INTO users_ref_test_table VALUES(4,'User_4',48);
INSERT INTO users_ref_test_table VALUES(5,'User_5',49);
INSERT INTO users_ref_test_table VALUES(6,'User_6',50);
-- Simple Join test with reference table
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
count
-------
3
(1 row)
-- Should work, reference table at the inner side is allowed
SELECT count(*) FROM
(SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47;
count
-------
1
(1 row)
-- Should work, although no equality between partition column and reference table
SELECT subquery_1.item_id FROM
(SELECT user_buy_test_table.item_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1
ORDER BY 1;
item_id
---------
2
3
4
5
(4 rows)
-- Should work, although no equality between partition column and reference table
SELECT subquery_1.user_id FROM
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1
ORDER BY 1;
user_id
---------
1
2
3
3
7
7
7
7
7
7
(10 rows)
-- Shouldn't work, reference table at the outer side is not allowed
SELECT * FROM
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
ON users_ref_test_table.id = user_buy_test_table.user_id) subquery_1;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- Should work, reference table at the inner side is allowed
SELECT count(*) FROM
(SELECT random() FROM users_ref_test_table RIGHT JOIN user_buy_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
count
-------
4
(1 row)
-- Shouldn't work, reference table at the outer side is not allowed
SELECT * FROM
(SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table
ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- Equi join test with reference table on non-partition keys
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1;
count
-------
4
(1 row)
-- Non-equi join test with reference table on non-partition keys
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
count
-------
10
(1 row)
-- Non-equi left joins with reference tables on non-partition keys
SELECT count(*) FROM
(SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
count
-------
10
(1 row)
-- Should pass since reference table locates in the inner part of each left join
SELECT count(*) FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
LEFT JOIN
(SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
count
-------
2
(1 row)
-- two subqueries, each include joins with reference table
-- also, two hash distributed tables are joined on partition keys
SELECT count(*) FROM
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id AND users_ref_test_table.k_no > 88 AND user_buy_test_table.item_id < 88) subquery_1,
(SELECT DISTINCT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
WHERE subquery_1.user_id = subquery_2.user_id ;
count
-------
4
(1 row)
-- Should be able to push down since reference tables are inner joined
-- with hash distributed tables, the results of those joins are the parts of
-- an outer join
SELECT subquery_2.id FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5;
id
----
3
2
1
(3 rows)
-- the same query as the above, but this Citus fails to pushdown the query
-- since the outer part of the right join doesn't include any joins
SELECT * FROM
(SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2
ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
user_id | sum
---------+----------
12 | 92221920
17 | 89192642
96 | 85143744
45 | 84267456
90 | 84157047
43 | 82110240
1 | 81735612
72 | 78992640
67 | 72385516
97 | 71002659
(10 rows)
-- same query as above, reference table is wrapped into a subquery
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
user_id | sum
---------+----------
12 | 92221920
17 | 89192642
96 | 85143744
45 | 84267456
90 | 84157047
43 | 82110240
1 | 81735612
72 | 78992640
67 | 72385516
97 | 71002659
(10 rows)
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
user_id | sum
---------+----------
12 | 92221920
17 | 89192642
96 | 85143744
45 | 84267456
90 | 84157047
43 | 82110240
1 | 81735612
72 | 78992640
67 | 72385516
97 | 71002659
(10 rows)
-- should not be able to pushdown since reference table is in the
-- direct outer part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- should not be able to pushdown since reference table is in the
-- direct outer part of the left join wrapped into a subquery
SELECT
*
FROM
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
ON (users_table.user_id = ref_all.value_2);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- should not be able to pushdown since reference table is in the
-- outer part of the left join
SELECT
user_id, sum(value_1)
FROM
(SELECT
users_table.user_id, users_table.value_1, random()
FROM
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
) as foo
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT * FROM
(
SELECT DISTINCT foo.user_id
FROM
((SELECT
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
FROM
events_reference_table as "events"
WHERE
event_type > 80) as "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_table as "users"
WHERE
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
user_id
---------
89
(1 row)
-- the same query but this time reference table is in the outer part of the query
SELECT * FROM
(
SELECT DISTINCT foo.user_id
FROM
((SELECT
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
FROM
events_reference_table as "events"
WHERE
event_type > 80) as "temp_data_queries"
LEFT JOIN
(SELECT
"users"."user_id"
FROM
users_table as "users"
WHERE
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- we could even suuport the following where the subquery
-- on the outer part of the left join contains a reference table
SELECT max(events_all.cnt), events_all.usr_id
FROM
(SELECT users_table.user_id as usr_id,
count(*) as cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) GROUP BY users_table.user_id) AS events_all
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5;
max | usr_id
-------+--------
14605 | 23
13090 | 17
12915 | 25
12317 | 90
12285 | 87
(5 rows)
-- but, we fail to pushdown the following query where join that reference table appears
-- wrapped into a subquery
SELECT max(events_all.cnt),
events_all.usr_id
FROM(
SELECT *, random() FROM
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner) AS events_all
LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- LATERAL JOINs used with INNER JOINs with reference tables
SET citus.subquery_pushdown to ON;
SELECT user_id, lastseen
FROM
(SELECT
"some_users_data".user_id, lastseen
FROM
(SELECT
filter_users_1.user_id, time AS lastseen
FROM
(SELECT
user_where_1_1.user_id
FROM
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id))
filter_users_1
JOIN LATERAL
(SELECT
user_id, time
FROM
events_reference_table as "events"
WHERE
user_id > 12 and user_id < 16 AND
user_id = filter_users_1.user_id
ORDER BY
time DESC
LIMIT 1) "last_events_1"
ON TRUE
ORDER BY
time DESC
LIMIT 10) "some_recent_users"
JOIN LATERAL
(SELECT
"users".user_id
FROM
users_reference_table as "users"
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 70
LIMIT 1) "some_users_data"
ON TRUE
ORDER BY
lastseen DESC
LIMIT 10) "some_users"
ORDER BY
user_id DESC
LIMIT 10;
user_id | lastseen
---------+---------------------------------
14 | Tue Jan 21 05:46:51.286381 2014
14 | Tue Jan 21 05:46:51.286381 2014
14 | Tue Jan 21 05:46:51.286381 2014
14 | Tue Jan 21 05:46:51.286381 2014
14 | Tue Jan 21 05:46:51.286381 2014
14 | Tue Jan 21 05:46:51.286381 2014
14 | Tue Jan 21 05:46:51.286381 2014
14 | Tue Jan 21 05:46:51.286381 2014
14 | Tue Jan 21 05:46:51.286381 2014
14 | Tue Jan 21 05:46:51.286381 2014
(10 rows)
SET citus.subquery_pushdown to OFF;
-- NESTED INNER JOINs with reference tables
SELECT
count(*) AS value, "generated_group_field"
FROM
(SELECT
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
FROM
(SELECT
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
FROM
(SELECT
*
FROM
(SELECT
"events"."time", "events"."user_id", "events"."value_2"
FROM
events_table as "events"
WHERE
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
INNER JOIN
(SELECT
user_where_1_1.real_user_id
FROM
(SELECT
"users"."user_id" as real_user_id
FROM
users_reference_table as "users"
WHERE
user_id > 10 and user_id < 40 and value_2 > 50 ) user_where_1_1
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 10 and user_id < 40 and value_3 > 50 ) user_where_1_join_1
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".user_id)) "user_filters_1"
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
GROUP BY
"generated_group_field"
ORDER BY
generated_group_field DESC, value DESC;
value | generated_group_field
-------+-----------------------
1 | 966
1 | 917
1 | 905
1 | 868
1 | 836
1 | 791
1 | 671
1 | 642
1 | 358
1 | 317
1 | 307
1 | 302
1 | 214
1 | 166
1 | 116
1 | 1
(16 rows)
-- single level inner joins with reference tables
SELECT
"value_3", count(*) AS cnt
FROM
(SELECT
"value_3", "user_id", random()
FROM
(SELECT
users_in_segment_1.user_id, value_3
FROM
(SELECT
user_id, value_3 * 2 as value_3
FROM
(SELECT
user_id, value_3
FROM
(SELECT
"users"."user_id", value_3
FROM
users_reference_table as "users"
WHERE
user_id > 10 and user_id < 40 and value_2 > 30
) simple_user_where_1
) all_buckets_1
) users_in_segment_1
JOIN
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 10 and user_id < 40 and value_2 > 60
) some_users_data
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
) segmentalias_1) "tempQuery"
GROUP BY "value_3"
ORDER BY cnt, value_3 DESC LIMIT 10;
value_3 | cnt
---------+-----
556 | 75
228 | 75
146 | 75
70 | 75
1442 | 79
1232 | 79
1090 | 79
1012 | 79
886 | 79
674 | 79
(10 rows)
-- nested LATERAL JOINs with reference tables
SET citus.subquery_pushdown to ON;
SELECT *
FROM
(SELECT "some_users_data".user_id, "some_recent_users".value_3
FROM
(SELECT
filter_users_1.user_id, value_3
FROM
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 20 and user_id < 70 and users.value_2 = 200) filter_users_1
JOIN LATERAL
(SELECT
user_id, value_3
FROM
events_reference_table as "events"
WHERE
user_id > 20 and user_id < 70 AND
("events".user_id = "filter_users_1".user_id)
ORDER BY
value_3 DESC
LIMIT 1) "last_events_1" ON true
ORDER BY value_3 DESC
LIMIT 10) "some_recent_users"
JOIN LATERAL
(SELECT
"users".user_id
FROM
users_reference_table as "users"
WHERE
"users"."user_id" = "some_recent_users"."user_id" AND
users.value_2 > 200
LIMIT 1) "some_users_data" ON true
ORDER BY
value_3 DESC
LIMIT 10) "some_users"
ORDER BY
value_3 DESC
LIMIT 10;
user_id | value_3
---------+---------
44 | 998
65 | 996
66 | 996
37 | 995
57 | 989
21 | 985
(6 rows)
SET citus.subquery_pushdown to OFF;
-- LEFT JOINs used with INNER JOINs should not error out since reference table joined
-- with hash table that Citus can push down
SELECT
count(*) AS cnt, "generated_group_field"
FROM
(SELECT
"eventQuery"."user_id", random(), generated_group_field
FROM
(SELECT
"multi_group_wrapper_1".*, generated_group_field, random()
FROM
(SELECT *
FROM
(SELECT
"events"."time", "events"."user_id" as event_user_id
FROM
events_table as "events"
WHERE
user_id > 80) "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 80 and value_2 = 5) "user_filters_1"
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
LEFT JOIN
(SELECT
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
FROM
users_table as "users") "left_group_by_1"
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
group BY
"generated_group_field"
ORDER BY
cnt DESC, generated_group_field ASC
LIMIT 10;
cnt | generated_group_field
-----+-----------------------
176 | 551
176 | 569
176 | 645
176 | 713
176 | 734
88 | 3
88 | 5
88 | 15
88 | 32
88 | 68
(10 rows)
-- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the
-- right side of the RIGHT JOIN.
SELECT
count(*) AS cnt, "generated_group_field"
FROM
(SELECT
"eventQuery"."user_id", random(), generated_group_field
FROM
(SELECT
"multi_group_wrapper_1".*, generated_group_field, random()
FROM
(SELECT *
FROM
(SELECT
"events"."time", "events"."user_id" as event_user_id
FROM
events_table as "events"
WHERE
user_id > 80) "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_table as "users"
WHERE
user_id > 80 and value_2 = 5) "user_filters_1"
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
RIGHT JOIN
(SELECT
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
FROM
users_reference_table as "users") "right_group_by_1"
ON ("right_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
group BY
"generated_group_field"
ORDER BY
cnt DESC, generated_group_field ASC
LIMIT 10;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- right join where the inner part of the join includes a reference table
-- joined with hash partitioned table using non-equi join
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
(
SELECT u.user_id, 'step=>1'::text AS event, e.time
FROM users_table AS u,
events_reference_table AS e
WHERE u.user_id > e.user_id
AND u.user_id >= 10
AND u.user_id <= 25
AND e.event_type IN (100, 101, 102)
)
) t1 RIGHT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 10
AND e.user_id <= 25
AND e.event_type IN (106, 107, 108)
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
ORDER BY user_id;
user_id | sum | length | hasdone_event
---------+-----+--------+----------------
11 | 306 | 14 | Has done event
12 | 363 | 14 | Has done event
14 | 510 | 14 | Has done event
18 | 600 | 14 | Has done event
19 | 618 | 14 | Has done event
(5 rows)
-- a similar query as the above, with non-partition key comparison
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
(
SELECT u.user_id, 'step=>1'::text AS event, e.time
FROM users_table AS u,
events_reference_table AS e
WHERE u.value_1 > e.user_id
AND u.user_id >= 10
AND u.user_id <= 25
AND e.event_type >= 125 AND e.event_type < 130
)
) t1 RIGHT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 10
AND e.user_id <= 25
AND e.event_type >= 130 AND e.event_type < 135
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
ORDER BY user_id;
user_id | sum | length | hasdone_event
---------+------+--------+----------------
10 | 6018 | 14 | Has done event
16 | 5373 | 14 | Has done event
17 | 5683 | 14 | Has done event
18 | 5321 | 14 | Has done event
(4 rows)
-- LEFT JOINs used with INNER JOINs
-- events_table and users_reference_table joined
-- with event_table.non_part_key < reference_table.any_key
SELECT
count(*) AS cnt, "generated_group_field"
FROM
(SELECT
"eventQuery"."user_id", random(), generated_group_field
FROM
(SELECT
"multi_group_wrapper_1".*, generated_group_field, random()
FROM
(SELECT *
FROM
(SELECT
"events"."time", "events"."user_id" as event_user_id
FROM
events_table as "events"
WHERE
user_id > 80) "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 80 and value_2 = 5) "user_filters_1"
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
RIGHT JOIN
(SELECT
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
FROM
users_table as "users") "left_group_by_1"
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
group BY
"generated_group_field"
ORDER BY
cnt DESC, generated_group_field ASC
LIMIT 10;
cnt | generated_group_field
-----+-----------------------
540 | 814
533 | 746
473 | 914
449 | 684
445 | 715
423 | 191
419 | 39
415 | 108
414 | 819
411 | 642
(10 rows)
-- Outer subquery with reference table
SELECT "some_users_data".user_id, lastseen
FROM
(SELECT user_id, max(time) AS lastseen
FROM
(SELECT user_id, time
FROM
(SELECT
user_id, time
FROM
events_reference_table as "events"
WHERE
user_id > 10 and user_id < 40) "events_1"
ORDER BY
time DESC) "recent_events_1"
GROUP BY
user_id
ORDER BY
max(TIME) DESC) "some_recent_users"
FULL JOIN
(SELECT
"users".user_id
FROM
users_table as "users"
WHERE
users.value_2 > 50 and users.value_2 < 55) "some_users_data"
ON "some_users_data"."user_id" = "some_recent_users"."user_id"
ORDER BY
user_id
limit 50;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
--
-- UNIONs and JOINs with reference tables, should error out
--
SELECT ("final_query"."event_types") as types
FROM
( SELECT *, random()
FROM
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
FROM
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
FROM (
(SELECT
*
FROM
(SELECT
"events"."user_id", "events"."time", 0 AS event
FROM
events_table as "events"
WHERE
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
UNION
(SELECT
*
FROM
(SELECT
"events"."user_id", "events"."time", 1 AS event
FROM
events_reference_table as "events"
WHERE
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
UNION
(SELECT
*
FROM
(SELECT
"events"."user_id", "events"."time", 2 AS event
FROM
events_table as "events"
WHERE
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
UNION
(SELECT
*
FROM
(SELECT
"events"."user_id", "events"."time", 3 AS event
FROM
events_table as "events"
WHERE
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1
GROUP BY "t1"."user_id") AS t) "q"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_table as "users"
WHERE
value_1 > 50 and value_1 < 70) AS t
ON (t.user_id = q.user_id)) as final_query
ORDER BY
types;
ERROR: cannot push down this subquery
DETAIL: Reference tables are not supported with union operator
-- reference table exist in the subquery of union, should error out
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
FROM
( SELECT
*, random()
FROM
(SELECT
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
FROM
( SELECT
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
FROM (
(SELECT
*
FROM
(SELECT
"events"."time", 0 AS event, "events"."user_id"
FROM
events_table as "events"
WHERE
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
UNION
(SELECT *
FROM
(
SELECT * FROM
(
SELECT
max("users"."time"),
0 AS event,
"users"."user_id"
FROM
events_reference_table as "events", users_table as "users"
WHERE
events.user_id = users.user_id AND
event_type IN (10, 11, 12, 13, 14, 15)
GROUP BY "users"."user_id"
) as events_subquery_5
) events_subquery_2)
UNION
(SELECT *
FROM
(SELECT
"events"."time", 2 AS event, "events"."user_id"
FROM
events_table as "events"
WHERE
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
UNION
(SELECT *
FROM
(SELECT
"events"."time", 3 AS event, "events"."user_id"
FROM
events_table as "events"
WHERE
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)
) t1
GROUP BY "t1"."user_id") AS t) "q"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_table as "users"
WHERE
value_1 > 50 and value_1 < 70) AS t
ON (t.user_id = q.user_id)) as final_query
GROUP BY
types
ORDER BY
types;
ERROR: cannot push down this subquery
DETAIL: Reference tables are not supported with union operator
--
-- Should error out with UNION ALL Queries on reference tables
--
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
FROM
( SELECT *, random()
FROM
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
FROM
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
FROM (
(SELECT *
FROM
(SELECT
"events"."user_id", "events"."time", 0 AS event
FROM
events_table as "events"
WHERE
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
UNION ALL
(SELECT *
FROM
(SELECT
"events"."user_id", "events"."time", 1 AS event
FROM
events_table as "events"
WHERE
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
UNION ALL
(SELECT *
FROM
(SELECT
"events"."user_id", "events"."time", 2 AS event
FROM
events_reference_table as "events"
WHERE
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
UNION ALL
(SELECT *
FROM
(SELECT
"events"."user_id", "events"."time", 3 AS event
FROM
events_table as "events"
WHERE
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1
GROUP BY "t1"."user_id") AS t) "q"
INNER JOIN
(SELECT "users"."user_id"
FROM users_table as "users"
WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query
GROUP BY types
ORDER BY types;
ERROR: cannot push down this subquery
DETAIL: Reference tables are not supported with union operator
-- just a sanity check that we don't allow this if the reference table is on the
-- left part of the left join
SELECT count(*) FROM
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- we don't allow non equi join among hash partitioned tables
SELECT count(*) FROM
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1,
(SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table
ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2
WHERE subquery_1.user_id != subquery_2.user_id ;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- we cannot push this query since hash partitioned tables
-- are not joined on partition keys with equality
SELECT
count(*) AS cnt, "generated_group_field"
FROM
(SELECT
"eventQuery"."user_id", random(), generated_group_field
FROM
(SELECT
"multi_group_wrapper_1".*, generated_group_field, random()
FROM
(SELECT *
FROM
(SELECT
"events"."time", "events"."user_id" as event_user_id
FROM
events_table as "events"
WHERE
user_id > 80) "temp_data_queries"
INNER JOIN
(SELECT
"users"."user_id"
FROM
users_reference_table as "users"
WHERE
user_id > 80 and value_2 = 5) "user_filters_1"
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
RIGHT JOIN
(SELECT
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
FROM
users_table as "users") "left_group_by_1"
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
group BY
"generated_group_field"
ORDER BY
cnt DESC, generated_group_field ASC
LIMIT 10;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- two hash partitioned relations are not joined
-- on partiton keys although reference table is fine
-- to push down
SELECT
u1.user_id, count(*)
FROM
events_table as e1, users_table as u1
WHERE
event_type IN
(SELECT
event_type
FROM
events_reference_table as e2
WHERE
value_2 = 15 AND
value_3 > 25 AND
e1.value_2 > e2.value_2
)
AND u1.user_id > e1.user_id
GROUP BY 1
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
SELECT foo.user_id FROM
(
SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
WHERE event_type > 100000
) as foo;
user_id
---------
(0 rows)
-- not supported since group by is on the reference table column
SELECT foo.user_id FROM
(
SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
GROUP BY r.user_id
) as foo;
ERROR: cannot push down this subquery
DETAIL: Group by list without partition column is currently unsupported
-- supported since the group by contains at least one distributed table
SELECT foo.user_id FROM
(
SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
GROUP BY r.user_id, m.user_id
) as foo
ORDER BY 1 LIMIT 3;
user_id
---------
0
1
2
(3 rows)
-- not supported since distinct is on the reference table column
SELECT foo.user_id FROM
(
SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
) as foo;
ERROR: cannot push down this subquery
DETAIL: Distinct on columns without partition column is currently unsupported
-- not supported since distinct on is on the reference table column
SELECT foo.user_id FROM
(
SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
) as foo;
ERROR: cannot push down this subquery
DETAIL: Distinct on columns without partition column is currently unsupported
-- supported since the distinct on contains at least one distributed table
SELECT foo.user_id FROM
(
SELECT DISTINCT ON(r.user_id, m.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id)
) as foo
ORDER BY 1 LIMIT 3;
user_id
---------
0
1
2
(3 rows)
DROP TABLE user_buy_test_table;
DROP TABLE users_ref_test_table;
DROP TABLE users_return_test_table;