mirror of https://github.com/citusdata/citus.git
2255 lines
64 KiB
Plaintext
2255 lines
64 KiB
Plaintext
--
|
||
-- multi subquery behavioral analytics queries aims to expand existing subquery pushdown
|
||
-- regression tests to cover more cases
|
||
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
|
||
--
|
||
--- We don't need shard id sequence here given that we're not creating any shards, so not writing it at all
|
||
-- The following line is intended to force Citus to NOT use router planner for the tests in this
|
||
-- file. The motivation for doing this is to make sure that single-task queries can be planned
|
||
-- by non-router code-paths. Thus, this flag should NOT be used in production. Otherwise, the actual
|
||
-- router queries would fail.
|
||
SET citus.enable_router_execution TO FALSE;
|
||
---------------------------------------------------------------------
|
||
-- Vanilla funnel query
|
||
---------------------------------------------------------------------
|
||
SELECT user_id, array_length(events_table, 1)
|
||
FROM (
|
||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||
FROM (
|
||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND u.user_id >= 1
|
||
AND u.user_id <= 3
|
||
AND e.event_type IN (1, 2)
|
||
) t
|
||
GROUP BY user_id
|
||
) q
|
||
ORDER BY 2 DESC, 1;
|
||
user_id | array_length
|
||
---------------------------------------------------------------------
|
||
3 | 187
|
||
2 | 180
|
||
1 | 28
|
||
(3 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Funnel grouped by whether or not a user has done an event
|
||
-- This has multiple subqueries joinin at the top level
|
||
---------------------------------------------------------------------
|
||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||
FROM (
|
||
SELECT
|
||
t1.user_id,
|
||
array_agg(event ORDER BY time) AS events_table,
|
||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||
FROM (
|
||
(
|
||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND u.user_id >= 1
|
||
AND u.user_id <= 3
|
||
AND e.event_type IN (1, 2)
|
||
)
|
||
UNION
|
||
(
|
||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND u.user_id >= 1
|
||
AND u.user_id <= 3
|
||
AND e.event_type IN (3, 4)
|
||
)
|
||
) t1 LEFT JOIN (
|
||
SELECT DISTINCT user_id,
|
||
'Has done event'::TEXT AS hasdone_event
|
||
FROM events_table AS e
|
||
WHERE e.user_id >= 1
|
||
AND e.user_id <= 3
|
||
AND e.event_type IN (5, 6)
|
||
) t2 ON (t1.user_id = t2.user_id)
|
||
GROUP BY t1.user_id, hasdone_event
|
||
) t GROUP BY user_id, hasdone_event
|
||
ORDER BY user_id;
|
||
user_id | sum | length | hasdone_event
|
||
---------------------------------------------------------------------
|
||
1 | 12 | 14 | Has done event
|
||
2 | 20 | 14 | Has done event
|
||
3 | 20 | 14 | Has done event
|
||
(3 rows)
|
||
|
||
-- same query but multiple joins are one level below, returns count of row instead of actual rows
|
||
SELECT count(*)
|
||
FROM (
|
||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||
FROM (
|
||
SELECT
|
||
t1.user_id,
|
||
array_agg(event ORDER BY time) AS events_table,
|
||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||
FROM (
|
||
(
|
||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND u.user_id >= 1
|
||
AND u.user_id <= 3
|
||
AND e.event_type IN (1, 2)
|
||
)
|
||
UNION
|
||
(
|
||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND u.user_id >= 1
|
||
AND u.user_id <= 3
|
||
AND e.event_type IN (3, 4)
|
||
)
|
||
) t1 LEFT JOIN (
|
||
SELECT DISTINCT user_id,
|
||
'Has done event'::TEXT AS hasdone_event
|
||
FROM events_table AS e
|
||
WHERE e.user_id >= 1
|
||
AND e.user_id <= 3
|
||
AND e.event_type IN (5, 6)
|
||
) t2 ON (t1.user_id = t2.user_id)
|
||
GROUP BY t1.user_id, hasdone_event
|
||
) t GROUP BY user_id, hasdone_event
|
||
ORDER BY user_id) u;
|
||
count
|
||
---------------------------------------------------------------------
|
||
3
|
||
(1 row)
|
||
|
||
-- Same queries written without unions
|
||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||
FROM (
|
||
SELECT
|
||
t1.user_id,
|
||
array_agg(event ORDER BY time) AS events_table,
|
||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||
FROM (
|
||
SELECT
|
||
u.user_id,
|
||
CASE WHEN e.event_type IN (1, 2) THEN 'step=>1'::text else 'step==>2'::text END AS event,
|
||
e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND u.user_id >= 1
|
||
AND u.user_id <= 3
|
||
AND e.event_type IN (1, 2, 3, 4)
|
||
GROUP BY 1,2,3
|
||
) t1 LEFT JOIN (
|
||
SELECT DISTINCT user_id,
|
||
'Has done event'::TEXT AS hasdone_event
|
||
FROM events_table AS e
|
||
WHERE e.user_id >= 1
|
||
AND e.user_id <= 3
|
||
AND e.event_type IN (5, 6)
|
||
) t2 ON (t1.user_id = t2.user_id)
|
||
GROUP BY t1.user_id, hasdone_event
|
||
) t GROUP BY user_id, hasdone_event
|
||
ORDER BY user_id;
|
||
user_id | sum | length | hasdone_event
|
||
---------------------------------------------------------------------
|
||
1 | 12 | 14 | Has done event
|
||
2 | 20 | 14 | Has done event
|
||
3 | 20 | 14 | Has done event
|
||
(3 rows)
|
||
|
||
-- same query but multiple joins are one level below, returns count of row instead of actual rows
|
||
SELECT count(*)
|
||
FROM (
|
||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||
FROM (
|
||
SELECT
|
||
t1.user_id,
|
||
array_agg(event ORDER BY time) AS events_table,
|
||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||
FROM (
|
||
SELECT
|
||
u.user_id,
|
||
CASE WHEN e.event_type in (1, 2) then 'step=>1'::text else 'step==>2'::text END AS event,
|
||
e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND u.user_id >= 1
|
||
AND u.user_id <= 3
|
||
AND e.event_type IN (1, 2, 3, 4)
|
||
GROUP BY 1,2,3
|
||
) t1 LEFT JOIN (
|
||
SELECT DISTINCT user_id,
|
||
'Has done event'::TEXT AS hasdone_event
|
||
FROM events_table AS e
|
||
WHERE e.user_id >= 1
|
||
AND e.user_id <= 3
|
||
AND e.event_type IN (5, 6)
|
||
) t2 ON (t1.user_id = t2.user_id)
|
||
GROUP BY t1.user_id, hasdone_event
|
||
) t GROUP BY user_id, hasdone_event
|
||
ORDER BY user_id) u;
|
||
count
|
||
---------------------------------------------------------------------
|
||
3
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Funnel, grouped by the number of times a user has done an event
|
||
---------------------------------------------------------------------
|
||
SELECT
|
||
user_id,
|
||
avg(array_length(events_table, 1)) AS event_average,
|
||
count_pay
|
||
FROM (
|
||
SELECT
|
||
subquery_1.user_id,
|
||
array_agg(event ORDER BY time) AS events_table,
|
||
COALESCE(count_pay, 0) AS count_pay
|
||
FROM
|
||
(
|
||
(SELECT
|
||
users_table.user_id,
|
||
'action=>1'AS event,
|
||
events_table.time
|
||
FROM
|
||
users_table,
|
||
events_table
|
||
WHERE
|
||
users_table.user_id = events_table.user_id AND
|
||
users_table.user_id >= 1 AND
|
||
users_table.user_id <= 3 AND
|
||
events_table.event_type > 1 AND events_table.event_type < 3
|
||
)
|
||
UNION
|
||
(SELECT
|
||
users_table.user_id,
|
||
'action=>2'AS event,
|
||
events_table.time
|
||
FROM
|
||
users_table,
|
||
events_table
|
||
WHERE
|
||
users_table.user_id = events_table.user_id AND
|
||
users_table.user_id >= 1 AND
|
||
users_table.user_id <= 3 AND
|
||
events_table.event_type > 1 AND events_table.event_type < 4
|
||
)
|
||
) AS subquery_1
|
||
LEFT JOIN
|
||
(SELECT
|
||
user_id,
|
||
COUNT(*) AS count_pay
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
user_id >= 1 AND
|
||
user_id <= 3 AND
|
||
users_table.value_1 > 2 AND users_table.value_1 < 5
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
COUNT(*) > 1) AS subquery_2
|
||
ON
|
||
subquery_1.user_id = subquery_2.user_id
|
||
GROUP BY
|
||
subquery_1.user_id,
|
||
count_pay) AS subquery_top
|
||
WHERE
|
||
array_ndims(events_table) > 0
|
||
GROUP BY
|
||
count_pay, user_id
|
||
ORDER BY
|
||
event_average DESC, count_pay DESC, user_id DESC;
|
||
user_id | event_average | count_pay
|
||
---------------------------------------------------------------------
|
||
3 | 19.0000000000000000 | 7
|
||
2 | 12.0000000000000000 | 9
|
||
1 | 7.0000000000000000 | 5
|
||
(3 rows)
|
||
|
||
SELECT
|
||
user_id,
|
||
avg(array_length(events_table, 1)) AS event_average,
|
||
count_pay
|
||
FROM (
|
||
SELECT
|
||
subquery_1.user_id,
|
||
array_agg(event ORDER BY time) AS events_table,
|
||
COALESCE(count_pay, 0) AS count_pay
|
||
FROM
|
||
(
|
||
(SELECT
|
||
users_table.user_id,
|
||
'action=>1'AS event,
|
||
events_table.time
|
||
FROM
|
||
users_table,
|
||
events_table
|
||
WHERE
|
||
users_table.user_id = events_table.user_id AND
|
||
users_table.user_id >= 1 AND
|
||
users_table.user_id <= 3 AND
|
||
events_table.event_type > 1 AND events_table.event_type < 3
|
||
)
|
||
UNION
|
||
(SELECT
|
||
users_table.user_id,
|
||
'action=>2'AS event,
|
||
events_table.time
|
||
FROM
|
||
users_table,
|
||
events_table
|
||
WHERE
|
||
users_table.user_id = events_table.user_id AND
|
||
users_table.user_id >= 1 AND
|
||
users_table.user_id <= 3 AND
|
||
events_table.event_type > 1 AND events_table.event_type < 4
|
||
)
|
||
) AS subquery_1
|
||
LEFT JOIN
|
||
(SELECT
|
||
user_id,
|
||
COUNT(*) AS count_pay
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
user_id >= 1 AND
|
||
user_id <= 3 AND
|
||
users_table.value_1 > 2 AND users_table.value_1 < 4
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
COUNT(*) > 1) AS subquery_2
|
||
ON
|
||
subquery_1.user_id = subquery_2.user_id
|
||
GROUP BY
|
||
subquery_1.user_id,
|
||
count_pay) AS subquery_top
|
||
WHERE
|
||
array_ndims(events_table) > 0
|
||
GROUP BY
|
||
count_pay, user_id
|
||
HAVING
|
||
avg(array_length(events_table, 1)) > 0
|
||
ORDER BY
|
||
event_average DESC, count_pay DESC, user_id DESC;
|
||
user_id | event_average | count_pay
|
||
---------------------------------------------------------------------
|
||
3 | 19.0000000000000000 | 3
|
||
2 | 12.0000000000000000 | 4
|
||
1 | 7.0000000000000000 | 3
|
||
(3 rows)
|
||
|
||
-- Same queries rewritten without using unions
|
||
SELECT
|
||
user_id,
|
||
avg(array_length(events_table, 1)) AS event_average,
|
||
count_pay
|
||
FROM (
|
||
SELECT
|
||
subquery_1.user_id,
|
||
array_agg(event ORDER BY time) AS events_table,
|
||
COALESCE(count_pay, 0) AS count_pay
|
||
FROM
|
||
(
|
||
SELECT
|
||
users_table.user_id,
|
||
CASE
|
||
WHEN
|
||
events_table.event_type > 1 AND events_table.event_type < 3
|
||
THEN 'action=>1'
|
||
ELSE 'action=>2'
|
||
END AS event,
|
||
events_table.time
|
||
FROM
|
||
users_table,
|
||
events_table
|
||
WHERE
|
||
users_table.user_id = events_table.user_id AND
|
||
users_table.user_id >= 1 AND
|
||
users_table.user_id <= 3 AND
|
||
(events_table.event_type > 1 AND events_table.event_type < 3
|
||
OR
|
||
events_table.event_type > 2 AND events_table.event_type < 4)
|
||
GROUP BY 1, 2, 3
|
||
) AS subquery_1
|
||
LEFT JOIN
|
||
(SELECT
|
||
user_id,
|
||
COUNT(*) AS count_pay
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
user_id >= 1 AND
|
||
user_id <= 3 AND
|
||
users_table.value_1 > 3 AND users_table.value_1 < 5
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
COUNT(*) > 1) AS subquery_2
|
||
ON
|
||
subquery_1.user_id = subquery_2.user_id
|
||
GROUP BY
|
||
subquery_1.user_id,
|
||
count_pay) AS subquery_top
|
||
WHERE
|
||
array_ndims(events_table) > 0
|
||
GROUP BY
|
||
count_pay, user_id
|
||
ORDER BY
|
||
event_average DESC, count_pay DESC, user_id DESC;
|
||
user_id | event_average | count_pay
|
||
---------------------------------------------------------------------
|
||
3 | 12.0000000000000000 | 4
|
||
2 | 9.0000000000000000 | 5
|
||
1 | 5.0000000000000000 | 2
|
||
(3 rows)
|
||
|
||
SELECT
|
||
user_id,
|
||
avg(array_length(events_table, 1)) AS event_average,
|
||
count_pay
|
||
FROM (
|
||
SELECT
|
||
subquery_1.user_id,
|
||
array_agg(event ORDER BY time) AS events_table,
|
||
COALESCE(count_pay, 0) AS count_pay
|
||
FROM
|
||
(
|
||
SELECT
|
||
users_table.user_id,
|
||
CASE WHEN events_table.event_type > 1 AND events_table.event_type < 3 THEN 'action=>1' ELSE 'action=>2' END AS event,
|
||
events_table.time
|
||
FROM
|
||
users_table,
|
||
events_table
|
||
WHERE
|
||
users_table.user_id = events_table.user_id AND
|
||
users_table.user_id >= 1 AND
|
||
users_table.user_id <= 3 AND
|
||
(events_table.event_type > 1 AND events_table.event_type < 3
|
||
OR
|
||
events_table.event_type > 2 AND events_table.event_type < 4)
|
||
GROUP BY 1, 2, 3
|
||
) AS subquery_1
|
||
LEFT JOIN
|
||
(SELECT
|
||
user_id,
|
||
COUNT(*) AS count_pay
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
user_id >= 1 AND
|
||
user_id <= 3 AND
|
||
users_table.value_1 > 3 AND users_table.value_1 < 5
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
COUNT(*) > 1) AS subquery_2
|
||
ON
|
||
subquery_1.user_id = subquery_2.user_id
|
||
GROUP BY
|
||
subquery_1.user_id,
|
||
count_pay) AS subquery_top
|
||
WHERE
|
||
array_ndims(events_table) > 0
|
||
GROUP BY
|
||
count_pay, user_id
|
||
HAVING
|
||
avg(array_length(events_table, 1)) > 0
|
||
ORDER BY
|
||
event_average DESC, count_pay DESC, user_id DESC;
|
||
user_id | event_average | count_pay
|
||
---------------------------------------------------------------------
|
||
3 | 12.0000000000000000 | 4
|
||
2 | 9.0000000000000000 | 5
|
||
1 | 5.0000000000000000 | 2
|
||
(3 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Most recently seen users_table events_table
|
||
---------------------------------------------------------------------
|
||
-- Note that we don't use ORDER BY/LIMIT yet
|
||
---------------------------------------------------------------------
|
||
SELECT
|
||
user_id,
|
||
user_lastseen,
|
||
array_length(event_array, 1)
|
||
FROM (
|
||
SELECT
|
||
user_id,
|
||
max(u.time) as user_lastseen,
|
||
array_agg(event_type ORDER BY u.time) AS event_array
|
||
FROM (
|
||
SELECT user_id, time
|
||
FROM users_table
|
||
WHERE
|
||
user_id >= 1 AND
|
||
user_id <= 3 AND
|
||
users_table.value_1 > 1 AND users_table.value_1 < 3
|
||
) u LEFT JOIN LATERAL (
|
||
SELECT event_type, time
|
||
FROM events_table
|
||
WHERE user_id = u.user_id AND
|
||
events_table.event_type > 1 AND events_table.event_type < 3
|
||
) t ON true
|
||
GROUP BY user_id
|
||
) AS shard_union
|
||
ORDER BY user_lastseen DESC, user_id;
|
||
user_id | user_lastseen | array_length
|
||
---------------------------------------------------------------------
|
||
2 | Thu Nov 23 11:47:26.900284 2017 | 12
|
||
3 | Thu Nov 23 11:18:53.114408 2017 | 14
|
||
(2 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||
---------------------------------------------------------------------
|
||
SELECT user_id
|
||
FROM users_table
|
||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2)
|
||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4)
|
||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6)
|
||
GROUP BY
|
||
user_id
|
||
ORDER BY
|
||
user_id DESC
|
||
LIMIT 5;
|
||
user_id
|
||
---------------------------------------------------------------------
|
||
6
|
||
5
|
||
4
|
||
3
|
||
1
|
||
(5 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Find customers who have done X, and satisfy other customer specific criteria
|
||
---------------------------------------------------------------------
|
||
SELECT user_id, value_2 FROM users_table WHERE
|
||
value_1 > 1 AND value_1 < 3
|
||
AND value_2 >= 1
|
||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type < 3 AND value_3 > 1 AND user_id = users_table.user_id)
|
||
ORDER BY 2 DESC, 1 DESC
|
||
LIMIT 5;
|
||
user_id | value_2
|
||
---------------------------------------------------------------------
|
||
6 | 4
|
||
6 | 4
|
||
2 | 4
|
||
6 | 3
|
||
4 | 3
|
||
(5 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||
---------------------------------------------------------------------
|
||
SELECT user_id, value_2 FROM users_table WHERE
|
||
value_1 = 2
|
||
AND value_2 >= 1
|
||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=2 AND value_3 > 1 AND user_id = users_table.user_id)
|
||
ORDER BY 1 DESC, 2 DESC
|
||
LIMIT 3;
|
||
user_id | value_2
|
||
---------------------------------------------------------------------
|
||
5 | 5
|
||
5 | 5
|
||
5 | 2
|
||
(3 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||
---------------------------------------------------------------------
|
||
SELECT user_id, sum(value_2) as cnt FROM users_table WHERE
|
||
value_1 > 1
|
||
AND value_2 >= 1
|
||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type != 1 AND value_3 > 1 AND user_id = users_table.user_id)
|
||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 2 AND value_3 > 1 AND user_id = users_table.user_id)
|
||
GROUP BY
|
||
user_id
|
||
ORDER BY cnt DESC, user_id DESC
|
||
LIMIT 5;
|
||
user_id | cnt
|
||
---------------------------------------------------------------------
|
||
4 | 43
|
||
2 | 37
|
||
3 | 34
|
||
1 | 17
|
||
6 | 15
|
||
(5 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||
---------------------------------------------------------------------
|
||
SELECT user_id, value_2 FROM users_table WHERE
|
||
value_2 >= 1
|
||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id)
|
||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id = users_table.user_id)
|
||
ORDER BY 2 DESC, 1 DESC
|
||
LIMIT 4;
|
||
user_id | value_2
|
||
---------------------------------------------------------------------
|
||
5 | 5
|
||
5 | 5
|
||
5 | 5
|
||
5 | 4
|
||
(4 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||
---------------------------------------------------------------------
|
||
SELECT user_id,
|
||
avg(value_2)
|
||
FROM users_table
|
||
WHERE value_1 > 1
|
||
AND value_1 < 3
|
||
AND value_2 >= 1
|
||
AND EXISTS (SELECT user_id
|
||
FROM events_table
|
||
WHERE event_type > 1
|
||
AND event_type < 3
|
||
AND value_3 > 1
|
||
AND user_id = users_table.user_id
|
||
GROUP BY user_id
|
||
HAVING Count(*) > 2)
|
||
GROUP BY
|
||
user_id
|
||
ORDER BY
|
||
1 DESC, 2 DESC
|
||
LIMIT 5;
|
||
user_id | avg
|
||
---------------------------------------------------------------------
|
||
4 | 2.0000000000000000
|
||
3 | 2.0000000000000000
|
||
(2 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Find me all users_table who logged in more than once
|
||
---------------------------------------------------------------------
|
||
SELECT user_id, value_1 from
|
||
(
|
||
SELECT
|
||
user_id, value_1 From users_table
|
||
WHERE
|
||
value_2 > 1 and user_id = 2
|
||
GROUP BY
|
||
value_1, user_id
|
||
HAVING
|
||
count(*) > 1
|
||
) AS a
|
||
ORDER BY
|
||
user_id ASC, value_1 ASC;
|
||
user_id | value_1
|
||
---------------------------------------------------------------------
|
||
2 | 0
|
||
2 | 2
|
||
2 | 3
|
||
2 | 4
|
||
(4 rows)
|
||
|
||
-- same query with additional filter to make it not router plannable
|
||
SELECT user_id, value_1 from
|
||
(
|
||
SELECT
|
||
user_id, value_1 From users_table
|
||
WHERE
|
||
value_2 > 1 and (user_id = 2 OR user_id = 3)
|
||
GROUP BY
|
||
value_1, user_id
|
||
HAVING count(*) > 1
|
||
) AS a
|
||
ORDER BY
|
||
user_id ASC, value_1 ASC;
|
||
user_id | value_1
|
||
---------------------------------------------------------------------
|
||
2 | 0
|
||
2 | 2
|
||
2 | 3
|
||
2 | 4
|
||
3 | 1
|
||
3 | 2
|
||
3 | 3
|
||
3 | 4
|
||
(8 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Find me all users_table who has done some event and has filters
|
||
---------------------------------------------------------------------
|
||
SELECT user_id
|
||
FROM events_table
|
||
WHERE
|
||
event_type = 3 AND value_2 > 2 AND
|
||
user_id IN
|
||
(SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
value_1 = 1 AND value_2 > 2
|
||
)
|
||
ORDER BY 1;
|
||
user_id
|
||
---------------------------------------------------------------------
|
||
1
|
||
2
|
||
2
|
||
3
|
||
5
|
||
(5 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Which events_table did people who has done some specific events_table
|
||
---------------------------------------------------------------------
|
||
SELECT
|
||
user_id, event_type FROM events_table
|
||
WHERE
|
||
user_id in (SELECT user_id from events_table WHERE event_type > 3 and event_type < 5)
|
||
GROUP BY
|
||
user_id, event_type
|
||
ORDER BY 2 DESC, 1
|
||
LIMIT 3;
|
||
user_id | event_type
|
||
---------------------------------------------------------------------
|
||
1 | 6
|
||
2 | 5
|
||
3 | 5
|
||
(3 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Find me all the users_table who has done some event more than three times
|
||
---------------------------------------------------------------------
|
||
SELECT user_id FROM
|
||
(
|
||
SELECT
|
||
user_id
|
||
FROM
|
||
events_table
|
||
WHERE
|
||
event_type = 2
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
count(*) > 1
|
||
) AS a
|
||
ORDER BY
|
||
user_id;
|
||
user_id
|
||
---------------------------------------------------------------------
|
||
1
|
||
2
|
||
3
|
||
4
|
||
6
|
||
(5 rows)
|
||
|
||
---------------------------------------------------------------------
|
||
-- Find my assets that have the highest probability and fetch their metadata
|
||
---------------------------------------------------------------------
|
||
CREATE TEMP TABLE assets AS
|
||
SELECT
|
||
users_table.user_id, users_table.value_1, prob
|
||
FROM
|
||
users_table
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma, events_table as short_list
|
||
WHERE
|
||
short_list.user_id = ma.user_id and ma.value_1 < 2 and short_list.event_type < 2
|
||
) temp
|
||
ON users_table.user_id = temp.user_id
|
||
WHERE
|
||
users_table.value_1 < 2;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM assets;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
732 | 6 | 3.3934426229508197
|
||
(1 row)
|
||
|
||
DROP TABLE assets;
|
||
-- count number of distinct users who have value_1 equal to 5 or 13 but not 3
|
||
-- is recusrively planned
|
||
SET client_min_messages TO DEBUG1;
|
||
SELECT count(*) FROM
|
||
(
|
||
SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 = '1' OR value_1 = '3') AND
|
||
user_id NOT IN (select user_id from users_table where value_1 = '4')
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
count(distinct value_1) = 2
|
||
) as foo;
|
||
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.=) 4)
|
||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.user_id FROM public.users_table WHERE (((users_table.value_1 OPERATOR(pg_catalog.=) 1) OR (users_table.value_1 OPERATOR(pg_catalog.=) 3)) AND (NOT (users_table.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))))) GROUP BY users_table.user_id HAVING (count(DISTINCT users_table.value_1) OPERATOR(pg_catalog.=) 2)) foo
|
||
count
|
||
---------------------------------------------------------------------
|
||
1
|
||
(1 row)
|
||
|
||
RESET client_min_messages;
|
||
-- previous push down query
|
||
SELECT subquery_count FROM
|
||
(SELECT count(*) as subquery_count FROM
|
||
(SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 = '1' OR value_1 = '3')
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
count(distinct value_1) = 2) as a
|
||
LEFT JOIN
|
||
(SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 = '2')
|
||
GROUP BY
|
||
user_id) as b
|
||
ON a.user_id = b.user_id
|
||
WHERE
|
||
b.user_id IS NULL
|
||
GROUP BY
|
||
a.user_id
|
||
) AS inner_subquery;
|
||
subquery_count
|
||
---------------------------------------------------------------------
|
||
1
|
||
(1 row)
|
||
|
||
-- new pushdown query without single range table entry at top requirement
|
||
SELECT count(*) as subquery_count
|
||
FROM (
|
||
SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 = '1' OR value_1 = '3')
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
count(distinct value_1) = 2
|
||
) as a
|
||
LEFT JOIN (
|
||
SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 = '2')
|
||
GROUP BY
|
||
user_id) AS b
|
||
ON a.user_id = b.user_id
|
||
WHERE
|
||
b.user_id IS NULL
|
||
GROUP BY
|
||
a.user_id;
|
||
subquery_count
|
||
---------------------------------------------------------------------
|
||
1
|
||
(1 row)
|
||
|
||
-- most queries below has limit clause
|
||
-- therefore setting subquery_pushdown flag for all
|
||
SET citus.subquery_pushdown to ON;
|
||
NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks.
|
||
DETAIL: When enabled, the planner skips many correctness checks for subqueries and pushes down the queries to shards as-is. It means that the queries are likely to return wrong results unless the user is absolutely sure that pushing down the subquery is safe. This GUC is maintained only for backward compatibility, no new users are supposed to use it. The planner is capable of pushing down as much computation as possible to the shards depending on the query.
|
||
-- multi-subquery-join
|
||
-- The first query has filters on partition column to make it router plannable
|
||
-- but it is processed by logical planner since we disabled router execution
|
||
SELECT
|
||
e1.user_id,
|
||
sum(view_homepage) AS viewed_homepage,
|
||
sum(use_demo) AS use_demo,
|
||
sum(enter_credit_card) AS entered_credit_card,
|
||
sum(submit_card_info) as submit_card_info,
|
||
sum(see_bought_screen) as see_bought_screen
|
||
FROM (
|
||
-- Get the first time each user viewed the homepage.
|
||
SELECT
|
||
user_id,
|
||
1 AS view_homepage,
|
||
min(time) AS view_homepage_time
|
||
FROM events_table
|
||
WHERE user_id = 1 and
|
||
event_type IN (1, 2)
|
||
GROUP BY user_id
|
||
) e1 LEFT JOIN LATERAL (
|
||
SELECT
|
||
user_id,
|
||
1 AS use_demo,
|
||
time AS use_demo_time
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e1.user_id AND user_id = 1 and
|
||
event_type IN (2, 3)
|
||
ORDER BY time
|
||
LIMIT 1
|
||
) e2 ON true LEFT JOIN LATERAL (
|
||
SELECT
|
||
user_id,
|
||
1 AS enter_credit_card,
|
||
time AS enter_credit_card_time
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e2.user_id AND user_id = 1 and
|
||
event_type IN (3, 4)
|
||
ORDER BY time
|
||
LIMIT 1
|
||
) e3 ON true LEFT JOIN LATERAL (
|
||
SELECT
|
||
1 AS submit_card_info,
|
||
user_id,
|
||
time AS enter_credit_card_time
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e3.user_id AND user_id = 1 and
|
||
event_type IN (4, 5)
|
||
ORDER BY time
|
||
LIMIT 1
|
||
) e4 ON true LEFT JOIN LATERAL (
|
||
SELECT
|
||
1 AS see_bought_screen
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e4.user_id AND user_id = 1 and
|
||
event_type IN (5, 6)
|
||
ORDER BY time
|
||
LIMIT 1
|
||
) e5 ON true
|
||
WHERE
|
||
e1.user_id = 1
|
||
GROUP BY
|
||
e1.user_id
|
||
LIMIT 1;
|
||
user_id | viewed_homepage | use_demo | entered_credit_card | submit_card_info | see_bought_screen
|
||
---------------------------------------------------------------------
|
||
1 | 1 | 1 | 1 | 1 | 1
|
||
(1 row)
|
||
|
||
-- Same query without all limitations
|
||
SELECT
|
||
e1.user_id,
|
||
sum(view_homepage) AS viewed_homepage,
|
||
sum(use_demo) AS use_demo,
|
||
sum(enter_credit_card) AS entered_credit_card,
|
||
sum(submit_card_info) as submit_card_info,
|
||
sum(see_bought_screen) as see_bought_screen
|
||
FROM (
|
||
-- Get the first time each user viewed the homepage.
|
||
SELECT
|
||
user_id,
|
||
1 AS view_homepage,
|
||
min(time) AS view_homepage_time
|
||
FROM events_table
|
||
WHERE
|
||
event_type IN (1, 2)
|
||
GROUP BY user_id
|
||
) e1 LEFT JOIN LATERAL (
|
||
SELECT
|
||
user_id,
|
||
1 AS use_demo,
|
||
time AS use_demo_time
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e1.user_id AND
|
||
event_type IN (2, 3)
|
||
ORDER BY time
|
||
) e2 ON true LEFT JOIN LATERAL (
|
||
SELECT
|
||
user_id,
|
||
1 AS enter_credit_card,
|
||
time AS enter_credit_card_time
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e2.user_id AND
|
||
event_type IN (3, 4)
|
||
ORDER BY time
|
||
) e3 ON true LEFT JOIN LATERAL (
|
||
SELECT
|
||
1 AS submit_card_info,
|
||
user_id,
|
||
time AS enter_credit_card_time
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e3.user_id AND
|
||
event_type IN (4, 5)
|
||
ORDER BY time
|
||
) e4 ON true LEFT JOIN LATERAL (
|
||
SELECT
|
||
1 AS see_bought_screen
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e4.user_id AND
|
||
event_type IN (5, 6)
|
||
ORDER BY time
|
||
) e5 ON true
|
||
GROUP BY e1.user_id
|
||
ORDER BY 6 DESC NULLS LAST, 5 DESC NULLS LAST, 4 DESC NULLS LAST, 3 DESC NULLS LAST, 2 DESC NULLS LAST, 1
|
||
LIMIT 15;
|
||
user_id | viewed_homepage | use_demo | entered_credit_card | submit_card_info | see_bought_screen
|
||
---------------------------------------------------------------------
|
||
2 | 1080 | 1080 | 1080 | 1080 | 1080
|
||
3 | 540 | 540 | 540 | 540 | 540
|
||
4 | 252 | 252 | 252 | 252 | 252
|
||
1 | 200 | 200 | 200 | 200 | 200
|
||
6 | 128 | 128 | 128 | 128 | 128
|
||
5 | 72 | 72 | 72 | 72 | 72
|
||
(6 rows)
|
||
|
||
-- Same query without all limitations but uses having() to show only those submitted their credit card info
|
||
SELECT
|
||
e1.user_id,
|
||
sum(view_homepage) AS viewed_homepage,
|
||
sum(use_demo) AS use_demo,
|
||
sum(enter_credit_card) AS entered_credit_card,
|
||
sum(submit_card_info) as submit_card_info,
|
||
sum(see_bought_screen) as see_bought_screen
|
||
FROM (
|
||
-- Get the first time each user viewed the homepage.
|
||
SELECT
|
||
user_id,
|
||
1 AS view_homepage,
|
||
min(time) AS view_homepage_time
|
||
FROM events_table
|
||
WHERE
|
||
event_type IN (1, 2)
|
||
GROUP BY user_id
|
||
) e1 LEFT JOIN LATERAL (
|
||
SELECT
|
||
user_id,
|
||
1 AS use_demo,
|
||
time AS use_demo_time
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e1.user_id AND
|
||
event_type IN (2, 3)
|
||
ORDER BY time
|
||
) e2 ON true LEFT JOIN LATERAL (
|
||
SELECT
|
||
user_id,
|
||
1 AS enter_credit_card,
|
||
time AS enter_credit_card_time
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e2.user_id AND
|
||
event_type IN (3, 4)
|
||
ORDER BY time
|
||
) e3 ON true LEFT JOIN LATERAL (
|
||
SELECT
|
||
1 AS submit_card_info,
|
||
user_id,
|
||
time AS enter_credit_card_time
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e3.user_id AND
|
||
event_type IN (4, 5)
|
||
ORDER BY time
|
||
) e4 ON true LEFT JOIN LATERAL (
|
||
SELECT
|
||
1 AS see_bought_screen
|
||
FROM events_table
|
||
WHERE
|
||
user_id = e4.user_id AND
|
||
event_type IN (5, 6)
|
||
ORDER BY time
|
||
) e5 ON true
|
||
group by e1.user_id
|
||
HAVING sum(submit_card_info) > 0
|
||
ORDER BY 6 DESC NULLS LAST, 5 DESC NULLS LAST, 4 DESC NULLS LAST, 3 DESC NULLS LAST, 2 DESC NULLS LAST, 1
|
||
LIMIT 15;
|
||
user_id | viewed_homepage | use_demo | entered_credit_card | submit_card_info | see_bought_screen
|
||
---------------------------------------------------------------------
|
||
2 | 1080 | 1080 | 1080 | 1080 | 1080
|
||
3 | 540 | 540 | 540 | 540 | 540
|
||
4 | 252 | 252 | 252 | 252 | 252
|
||
1 | 200 | 200 | 200 | 200 | 200
|
||
6 | 128 | 128 | 128 | 128 | 128
|
||
5 | 72 | 72 | 72 | 72 | 72
|
||
(6 rows)
|
||
|
||
-- Explain analyze on this query fails due to #756
|
||
-- avg expression used on order by
|
||
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
||
FROM (
|
||
SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 2)
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
count(distinct value_1) > 2
|
||
) as a
|
||
LEFT JOIN (
|
||
SELECT
|
||
user_id, value_2, value_3
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 3)) AS b
|
||
ON a.user_id = b.user_id
|
||
WHERE
|
||
b.user_id IS NOT NULL
|
||
GROUP BY
|
||
a.user_id
|
||
ORDER BY
|
||
avg(b.value_3), 2, 1
|
||
LIMIT 5;
|
||
user_id | subquery_avg
|
||
---------------------------------------------------------------------
|
||
3 | 3.6000000000000000
|
||
5 | 2.1666666666666667
|
||
4 | 2.6666666666666667
|
||
1 | 2.3333333333333333
|
||
(4 rows)
|
||
|
||
-- add having to the same query
|
||
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
||
FROM (
|
||
SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 2)
|
||
GROUP BY user_id
|
||
HAVING count(distinct value_1) > 2
|
||
) as a
|
||
LEFT JOIN (
|
||
SELECT
|
||
user_id, value_2, value_3
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 3)) AS b
|
||
ON a.user_id = b.user_id
|
||
WHERE
|
||
b.user_id IS NOT NULL
|
||
GROUP BY
|
||
a.user_id
|
||
HAVING
|
||
sum(b.value_3) > 5
|
||
ORDER BY
|
||
avg(b.value_3), 2, 1
|
||
LIMIT 5;
|
||
user_id | subquery_avg
|
||
---------------------------------------------------------------------
|
||
3 | 3.6000000000000000
|
||
5 | 2.1666666666666667
|
||
4 | 2.6666666666666667
|
||
1 | 2.3333333333333333
|
||
(4 rows)
|
||
|
||
-- avg on the value_3 is not a resjunk
|
||
SELECT a.user_id, avg(b.value_2) as subquery_avg, avg(b.value_3)
|
||
FROM
|
||
(SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 2)
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
count(distinct value_1) > 2
|
||
) as a
|
||
LEFT JOIN
|
||
(
|
||
SELECT
|
||
user_id, value_2, value_3
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 3)
|
||
) AS b
|
||
ON a.user_id = b.user_id
|
||
WHERE
|
||
b.user_id IS NOT NULL
|
||
GROUP BY
|
||
a.user_id
|
||
ORDER BY
|
||
avg(b.value_3) DESC, 2, 1
|
||
LIMIT 5;
|
||
user_id | subquery_avg | avg
|
||
---------------------------------------------------------------------
|
||
1 | 2.3333333333333333 | 3.33333333333333
|
||
4 | 2.6666666666666667 | 2.55555555555556
|
||
5 | 2.1666666666666667 | 2.16666666666667
|
||
3 | 3.6000000000000000 | 1.6
|
||
(4 rows)
|
||
|
||
-- a powerful query structure that analyzes users/events
|
||
-- using (relation JOIN subquery JOIN relation)
|
||
SELECT u.user_id, sub.value_2, sub.value_3, COUNT(e2.user_id) counts
|
||
FROM
|
||
users_table u
|
||
LEFT OUTER JOIN LATERAL
|
||
(SELECT
|
||
*
|
||
FROM
|
||
events_table e1
|
||
WHERE
|
||
e1.user_id = u.user_id
|
||
ORDER BY
|
||
e1.value_3 DESC
|
||
LIMIT 1
|
||
) sub
|
||
ON true
|
||
LEFT OUTER JOIN events_table e2
|
||
ON e2.user_id = sub.user_id
|
||
WHERE
|
||
e2.value_2 > 1 AND e2.value_2 < 5 AND u.value_2 > 1 AND u.value_2 < 5
|
||
GROUP BY
|
||
u.user_id, sub.value_2, sub.value_3
|
||
ORDER BY
|
||
4 DESC, 1 DESC, 2 ASC, 3 ASC
|
||
LIMIT 10;
|
||
user_id | value_2 | value_3 | counts
|
||
---------------------------------------------------------------------
|
||
5 | 3 | 4 | 160
|
||
2 | 3 | 5 | 156
|
||
3 | 2 | 5 | 108
|
||
4 | 2 | 4 | 90
|
||
1 | 2 | 5 | 60
|
||
6 | 2 | 5 | 48
|
||
(6 rows)
|
||
|
||
-- distinct users joined with events
|
||
SELECT
|
||
avg(events_table.event_type) as avg_type,
|
||
count(*) as users_count
|
||
FROM events_table
|
||
JOIN
|
||
(SELECT
|
||
DISTINCT user_id
|
||
FROM
|
||
users_table
|
||
) as distinct_users
|
||
ON distinct_users.user_id = events_table.user_id
|
||
GROUP BY
|
||
distinct_users.user_id
|
||
ORDER BY
|
||
users_count desc, avg_type DESC
|
||
LIMIT 5;
|
||
avg_type | users_count
|
||
---------------------------------------------------------------------
|
||
2.3750000000000000 | 24
|
||
2.5714285714285714 | 21
|
||
2.5294117647058824 | 17
|
||
2.7333333333333333 | 15
|
||
2.2142857142857143 | 14
|
||
(5 rows)
|
||
|
||
-- reduce the data set, aggregate and join
|
||
SELECT
|
||
events_table.event_type,
|
||
users_count.ct
|
||
FROM events_table
|
||
JOIN
|
||
(SELECT distinct_users.user_id, count(1) as ct
|
||
FROM
|
||
(SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
) as distinct_users
|
||
GROUP BY
|
||
distinct_users.user_id
|
||
) as users_count
|
||
ON users_count.user_id = events_table.user_id
|
||
ORDER BY
|
||
users_count.ct desc, event_type DESC
|
||
LIMIT 5;
|
||
event_type | ct
|
||
---------------------------------------------------------------------
|
||
5 | 26
|
||
4 | 26
|
||
3 | 26
|
||
3 | 26
|
||
3 | 26
|
||
(5 rows)
|
||
|
||
--- now, test (subquery JOIN subquery)
|
||
SELECT n1.user_id, count_1, total_count
|
||
FROM
|
||
(SELECT
|
||
user_id, count(1) as count_1
|
||
FROM
|
||
users_table
|
||
GROUP BY
|
||
user_id
|
||
) n1
|
||
INNER JOIN
|
||
(
|
||
SELECT
|
||
user_id, count(1) as total_count
|
||
FROM
|
||
events_table
|
||
GROUP BY
|
||
user_id, event_type
|
||
) n2
|
||
ON (n2.user_id = n1.user_id)
|
||
ORDER BY
|
||
total_count DESC, count_1 DESC, 1 DESC
|
||
LIMIT 10;
|
||
user_id | count_1 | total_count
|
||
---------------------------------------------------------------------
|
||
2 | 18 | 7
|
||
3 | 17 | 7
|
||
2 | 18 | 6
|
||
5 | 26 | 5
|
||
5 | 26 | 5
|
||
4 | 23 | 5
|
||
3 | 17 | 5
|
||
1 | 7 | 5
|
||
4 | 23 | 4
|
||
4 | 23 | 4
|
||
(10 rows)
|
||
|
||
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
||
FROM
|
||
(SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 2)
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
count(distinct value_1) > 2
|
||
) as a
|
||
LEFT JOIN
|
||
(SELECT
|
||
DISTINCT ON (user_id) user_id, value_2, value_3
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 3)
|
||
ORDER BY
|
||
1,2,3
|
||
) AS b
|
||
ON a.user_id = b.user_id
|
||
WHERE b.user_id IS NOT NULL
|
||
GROUP BY a.user_id
|
||
ORDER BY avg(b.value_3), 2, 1
|
||
LIMIT 5;
|
||
user_id | subquery_avg
|
||
---------------------------------------------------------------------
|
||
5 | 0.00000000000000000000
|
||
3 | 2.0000000000000000
|
||
4 | 1.00000000000000000000
|
||
1 | 0.00000000000000000000
|
||
(4 rows)
|
||
|
||
-- distinct clause must include partition column
|
||
-- when used in target list
|
||
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
||
FROM
|
||
(SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 2)
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
count(distinct value_1) > 2
|
||
) as a
|
||
LEFT JOIN
|
||
(SELECT
|
||
DISTINCT ON (value_2) value_2 , user_id, value_3
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 3)
|
||
ORDER BY
|
||
1,2,3
|
||
) AS b
|
||
USING (user_id)
|
||
GROUP BY user_id;
|
||
ERROR: cannot push down this subquery
|
||
DETAIL: Distinct on columns without partition column is currently unsupported
|
||
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
||
FROM
|
||
(SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 2)
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
count(distinct value_1) > 2
|
||
) as a
|
||
LEFT JOIN
|
||
(SELECT
|
||
DISTINCT ON (value_2, user_id) value_2 , user_id, value_3
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 > 3)
|
||
ORDER BY
|
||
1,2,3
|
||
) AS b
|
||
ON a.user_id = b.user_id
|
||
WHERE
|
||
b.user_id IS NOT NULL
|
||
GROUP BY
|
||
a.user_id
|
||
ORDER BY
|
||
avg(b.value_3), 2, 1
|
||
LIMIT 5;
|
||
user_id | subquery_avg
|
||
---------------------------------------------------------------------
|
||
3 | 3.3333333333333333
|
||
5 | 2.2000000000000000
|
||
4 | 3.2500000000000000
|
||
1 | 2.3333333333333333
|
||
(4 rows)
|
||
|
||
SELECT user_id, event_type
|
||
FROM
|
||
(SELECT *
|
||
FROM
|
||
(
|
||
(SELECT
|
||
event_type, user_id as a_user_id
|
||
FROM
|
||
events_table) AS a
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id AS user_id, ma.value_2 AS value_2,
|
||
(GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma
|
||
WHERE
|
||
(ma.value_2 > 1)
|
||
ORDER BY
|
||
prob DESC, value_2 DESC, user_id DESC
|
||
LIMIT 10
|
||
) AS ma
|
||
ON (a.a_user_id = ma.user_id)
|
||
) AS inner_sub
|
||
ORDER BY
|
||
prob DESC, value_2 DESC, user_id DESC, event_type DESC
|
||
LIMIT 10
|
||
) AS outer_sub
|
||
ORDER BY
|
||
prob DESC, value_2 DESC, user_id DESC, event_type DESC
|
||
LIMIT 10;
|
||
user_id | event_type
|
||
---------------------------------------------------------------------
|
||
3 | 5
|
||
3 | 4
|
||
3 | 4
|
||
3 | 4
|
||
3 | 4
|
||
3 | 3
|
||
3 | 3
|
||
3 | 3
|
||
3 | 3
|
||
3 | 3
|
||
(10 rows)
|
||
|
||
-- very similar query but produces different result due to
|
||
-- ordering difference in the previous one's inner query
|
||
SELECT user_id, event_type
|
||
FROM
|
||
(SELECT
|
||
event_type, user_id as a_user_id
|
||
FROM
|
||
events_table) AS a
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id AS user_id, ma.value_2 AS value_2,
|
||
(GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma
|
||
WHERE
|
||
(ma.value_2 > 1)
|
||
ORDER BY
|
||
prob DESC, user_id DESC
|
||
LIMIT 10
|
||
) AS ma
|
||
ON (a.a_user_id = ma.user_id)
|
||
ORDER BY
|
||
prob DESC, event_type DESC, user_id DESC
|
||
LIMIT 10;
|
||
user_id | event_type
|
||
---------------------------------------------------------------------
|
||
3 | 5
|
||
2 | 5
|
||
2 | 5
|
||
3 | 4
|
||
3 | 4
|
||
3 | 4
|
||
3 | 4
|
||
2 | 4
|
||
2 | 4
|
||
2 | 4
|
||
(10 rows)
|
||
|
||
-- now they produce the same result when ordering fixed in 'outer_sub'
|
||
SELECT user_id, event_type
|
||
FROM
|
||
(SELECT *
|
||
FROM
|
||
(
|
||
(SELECT
|
||
event_type, user_id as a_user_id
|
||
FROM
|
||
events_table
|
||
) AS a
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id AS user_id, ma.value_2 AS value_2,
|
||
(GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma
|
||
WHERE
|
||
(ma.value_2 > 1)
|
||
ORDER BY
|
||
prob DESC, user_id DESC
|
||
LIMIT 10
|
||
) AS ma
|
||
ON (a.a_user_id = ma.user_id)
|
||
) AS inner_sub
|
||
ORDER BY
|
||
prob DESC, event_type DESC, user_id DESC
|
||
LIMIT 10
|
||
) AS outer_sub
|
||
ORDER BY
|
||
prob DESC, event_type DESC, user_id DESC
|
||
LIMIT 10;
|
||
user_id | event_type
|
||
---------------------------------------------------------------------
|
||
3 | 5
|
||
2 | 5
|
||
2 | 5
|
||
3 | 4
|
||
3 | 4
|
||
3 | 4
|
||
3 | 4
|
||
2 | 4
|
||
2 | 4
|
||
2 | 4
|
||
(10 rows)
|
||
|
||
-- this is one complex join query derived from a user's production query
|
||
-- declare the function on workers and master
|
||
-- With array_index:
|
||
CREATE OR REPLACE FUNCTION array_index(ANYARRAY, ANYELEMENT)
|
||
RETURNS INT AS $$
|
||
SELECT i
|
||
FROM (SELECT generate_series(array_lower($1, 1), array_upper($1, 1))) g(i)
|
||
WHERE $1 [i] = $2
|
||
LIMIT 1;
|
||
$$ LANGUAGE sql;
|
||
SELECT *
|
||
FROM
|
||
(SELECT *
|
||
FROM (
|
||
(SELECT user_id AS user_id_e,
|
||
event_type AS event_type_e
|
||
FROM events_table ) AS ma_e
|
||
JOIN
|
||
(SELECT value_2,
|
||
value_3,
|
||
user_id
|
||
FROM
|
||
(SELECT *
|
||
FROM (
|
||
(SELECT user_id_p AS user_id
|
||
FROM
|
||
(SELECT *
|
||
FROM (
|
||
(SELECT
|
||
user_id AS user_id_p
|
||
FROM
|
||
events_table
|
||
WHERE
|
||
(event_type IN (1,2,3,4,5)) ) AS ma_p
|
||
JOIN
|
||
(SELECT
|
||
user_id AS user_id_a
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_2 % 5 = 1) ) AS a
|
||
ON (a.user_id_a = ma_p.user_id_p) ) ) AS a_ma_p ) AS inner_filter_q
|
||
JOIN
|
||
(SELECT
|
||
value_2, value_3, user_id AS user_id_ck
|
||
FROM
|
||
events_table
|
||
WHERE
|
||
event_type = ANY(ARRAY [4, 5, 6])
|
||
ORDER BY
|
||
value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
|
||
LIMIT 10 )
|
||
AS ma_ck ON (ma_ck.user_id_ck = inner_filter_q.user_id) )
|
||
AS inner_sub_q
|
||
ORDER BY
|
||
value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
|
||
LIMIT 10 )
|
||
AS outer_sub_q
|
||
ORDER BY
|
||
value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
|
||
LIMIT 10)
|
||
AS inner_search_q
|
||
ON (ma_e.user_id_e = inner_search_q.user_id) )
|
||
AS outer_inner_sub_q
|
||
ORDER BY
|
||
value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC
|
||
LIMIT 10)
|
||
AS outer_outer_sub_q
|
||
ORDER BY
|
||
value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC
|
||
LIMIT 10;
|
||
user_id_e | event_type_e | value_2 | value_3 | user_id
|
||
---------------------------------------------------------------------
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
(10 rows)
|
||
|
||
-- top level select * is removed now there is
|
||
-- a join at top level.
|
||
SELECT *
|
||
FROM
|
||
(
|
||
(SELECT
|
||
user_id AS user_id_e, event_type as event_type_e
|
||
FROM
|
||
events_table
|
||
) AS ma_e
|
||
JOIN
|
||
(SELECT
|
||
value_2, value_3, user_id
|
||
FROM
|
||
(SELECT
|
||
*
|
||
FROM
|
||
(
|
||
(SELECT
|
||
user_id_p AS user_id
|
||
FROM
|
||
(SELECT
|
||
*
|
||
FROM
|
||
(
|
||
(SELECT
|
||
user_id AS user_id_p
|
||
FROM
|
||
events_table
|
||
WHERE
|
||
(event_type IN (1, 2, 3, 4, 5))
|
||
) AS ma_p
|
||
JOIN
|
||
(SELECT
|
||
user_id AS user_id_a
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_2 % 5 = 1)
|
||
) AS a
|
||
ON (a.user_id_a = ma_p.user_id_p)
|
||
)
|
||
) AS a_ma_p
|
||
) AS inner_filter_q
|
||
JOIN
|
||
(SELECT
|
||
value_2, value_3, user_id AS user_id_ck
|
||
FROM
|
||
events_table
|
||
WHERE
|
||
event_type = ANY(ARRAY [4, 5, 6])
|
||
ORDER BY
|
||
value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
|
||
LIMIT 10
|
||
) AS ma_ck
|
||
ON (ma_ck.user_id_ck = inner_filter_q.user_id)
|
||
) AS inner_sub_q
|
||
ORDER BY
|
||
value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
|
||
LIMIT 10
|
||
) AS outer_sub_q
|
||
ORDER BY
|
||
value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
|
||
LIMIT 10) AS inner_search_q
|
||
ON (ma_e.user_id_e = inner_search_q.user_id)
|
||
) AS outer_inner_sub_q
|
||
ORDER BY
|
||
value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC
|
||
LIMIT 10;
|
||
user_id_e | event_type_e | value_2 | value_3 | user_id
|
||
---------------------------------------------------------------------
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
5 | 5 | 2 | 0 | 5
|
||
(10 rows)
|
||
|
||
-- drop created functions
|
||
DROP FUNCTION array_index(ANYARRAY, ANYELEMENT);
|
||
-- a query with a constant subquery
|
||
SELECT count(*) as subquery_count
|
||
FROM (
|
||
SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 = '1' OR value_1 = '3')
|
||
GROUP BY user_id
|
||
HAVING count(distinct value_1) = 2
|
||
) as a
|
||
LEFT JOIN (
|
||
SELECT
|
||
1 as user_id
|
||
) AS b
|
||
ON a.user_id = b.user_id
|
||
WHERE b.user_id IS NULL
|
||
GROUP BY a.user_id;
|
||
subquery_count
|
||
---------------------------------------------------------------------
|
||
1
|
||
1
|
||
1
|
||
1
|
||
1
|
||
(5 rows)
|
||
|
||
-- volatile function in the subquery
|
||
SELECT count(*) as subquery_count
|
||
FROM (
|
||
SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 = '1' OR value_1 = '3')
|
||
GROUP BY user_id
|
||
HAVING count(distinct value_1) = 2
|
||
) as a
|
||
INNER JOIN (
|
||
SELECT
|
||
random()::int as user_id
|
||
) AS b
|
||
ON a.user_id = b.user_id
|
||
WHERE b.user_id IS NULL
|
||
GROUP BY a.user_id;
|
||
ERROR: cannot push down this subquery
|
||
DETAIL: Subqueries without a FROM clause can only contain immutable functions
|
||
-- this is slightly different, we use RTE_VALUEs here
|
||
SELECT Count(*) AS subquery_count
|
||
FROM (SELECT
|
||
user_id
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
(value_1 = '1' OR value_1 = '3' )
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
Count(DISTINCT value_1) = 2) AS a
|
||
INNER JOIN
|
||
(SELECT
|
||
*
|
||
FROM
|
||
(VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (user_id, letter)) AS b
|
||
ON a.user_id = b.user_id
|
||
WHERE b.user_id IS NULL
|
||
GROUP BY a.user_id;
|
||
subquery_count
|
||
---------------------------------------------------------------------
|
||
(0 rows)
|
||
|
||
-- same query without LIMIT/OFFSET returns 30 rows
|
||
SET client_min_messages TO DEBUG1;
|
||
-- now, lets use a simple expression on the LIMIT and explicit coercion on the OFFSET
|
||
SELECT user_id, array_length(events_table, 1)
|
||
FROM (
|
||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||
FROM (
|
||
SELECT
|
||
u.user_id, e.event_type::text AS event, e.time
|
||
FROM
|
||
users_table AS u,
|
||
events_table AS e
|
||
WHERE
|
||
u.user_id = e.user_id AND e.event_type IN (1, 2)
|
||
) t
|
||
GROUP BY user_id
|
||
) q
|
||
ORDER BY 2 DESC, 1
|
||
LIMIT 1+1 OFFSET 1::smallint;
|
||
DEBUG: push down of limit count: 3
|
||
user_id | array_length
|
||
---------------------------------------------------------------------
|
||
4 | 184
|
||
2 | 180
|
||
(2 rows)
|
||
|
||
-- now, lets use implicit coersion in LIMIT and a simple expressions on OFFSET
|
||
SELECT user_id, array_length(events_table, 1)
|
||
FROM (
|
||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||
FROM (
|
||
SELECT
|
||
u.user_id, e.event_type::text AS event, e.time
|
||
FROM
|
||
users_table AS u,
|
||
events_table AS e
|
||
WHERE
|
||
u.user_id = e.user_id AND e.event_type IN (1, 2)
|
||
) t
|
||
GROUP BY user_id
|
||
) q
|
||
ORDER BY 2 DESC, 1
|
||
LIMIT '3' OFFSET 2+1;
|
||
DEBUG: push down of limit count: 6
|
||
user_id | array_length
|
||
---------------------------------------------------------------------
|
||
5 | 156
|
||
6 | 40
|
||
1 | 28
|
||
(3 rows)
|
||
|
||
-- create a test function which is marked as volatile
|
||
CREATE OR REPLACE FUNCTION volatile_func_test()
|
||
RETURNS INT AS $$
|
||
SELECT 1;
|
||
$$ LANGUAGE sql VOLATILE;
|
||
DEBUG: switching to sequential query execution mode
|
||
DETAIL: A command for a distributed function is run. To make sure subsequent commands see the function correctly we need to make sure to use only one connection for all future commands
|
||
-- Citus should be able to evalute functions/row comparisons on the LIMIT/OFFSET
|
||
SELECT user_id, array_length(events_table, 1)
|
||
FROM (
|
||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||
FROM (
|
||
SELECT
|
||
u.user_id, e.event_type::text AS event, e.time
|
||
FROM
|
||
users_table AS u,
|
||
events_table AS e
|
||
WHERE
|
||
u.user_id = e.user_id AND e.event_type IN (1, 2, 3, 4)
|
||
) t
|
||
GROUP BY user_id
|
||
) q
|
||
ORDER BY 2 DESC, 1
|
||
LIMIT volatile_func_test() + (ROW(1,2,NULL) < ROW(1,3,0))::int OFFSET volatile_func_test() + volatile_func_test();
|
||
DEBUG: push down of limit count: 4
|
||
user_id | array_length
|
||
---------------------------------------------------------------------
|
||
3 | 340
|
||
5 | 312
|
||
(2 rows)
|
||
|
||
-- now, lets use expressions on both the LIMIT and OFFSET
|
||
SELECT user_id, array_length(events_table, 1)
|
||
FROM (
|
||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||
FROM (
|
||
SELECT
|
||
u.user_id, e.event_type::text AS event, e.time
|
||
FROM
|
||
users_table AS u,
|
||
events_table AS e
|
||
WHERE
|
||
u.user_id = e.user_id AND e.event_type IN (1, 2)
|
||
) t
|
||
GROUP BY user_id
|
||
) q
|
||
ORDER BY 2 DESC, 1
|
||
LIMIT (5 > 4)::int OFFSET
|
||
CASE
|
||
WHEN 5 != 5 THEN 27
|
||
WHEN 1 > 5 THEN 28
|
||
ELSE 2
|
||
END;
|
||
DEBUG: push down of limit count: 3
|
||
user_id | array_length
|
||
---------------------------------------------------------------------
|
||
2 | 180
|
||
(1 row)
|
||
|
||
-- we don't allow parameters on the LIMIT/OFFSET clauses
|
||
PREPARE parametrized_limit AS
|
||
SELECT user_id, array_length(events_table, 1)
|
||
FROM (
|
||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||
FROM (
|
||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND e.event_type IN (1, 2)
|
||
) t
|
||
GROUP BY user_id
|
||
) q
|
||
ORDER BY 2 DESC, 1
|
||
LIMIT $1 OFFSET $2;
|
||
EXECUTE parametrized_limit(1,1);
|
||
DEBUG: push down of limit count: 2
|
||
user_id | array_length
|
||
---------------------------------------------------------------------
|
||
4 | 184
|
||
(1 row)
|
||
|
||
PREPARE parametrized_offset AS
|
||
SELECT user_id, array_length(events_table, 1)
|
||
FROM (
|
||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||
FROM (
|
||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND e.event_type IN (1, 2)
|
||
) t
|
||
GROUP BY user_id
|
||
) q
|
||
ORDER BY 2 DESC, 1
|
||
LIMIT 1 OFFSET $1;
|
||
EXECUTE parametrized_offset(1);
|
||
DEBUG: push down of limit count: 2
|
||
user_id | array_length
|
||
---------------------------------------------------------------------
|
||
4 | 184
|
||
(1 row)
|
||
|
||
SET client_min_messages TO DEFAULT;
|
||
DROP FUNCTION volatile_func_test();
|
||
CREATE FUNCTION test_join_function_2(integer, integer) RETURNS bool
|
||
AS 'select $1 > $2;'
|
||
LANGUAGE SQL
|
||
IMMUTABLE
|
||
RETURNS NULL ON NULL INPUT;
|
||
-- we don't support joins via functions
|
||
SELECT user_id, array_length(events_table, 1)
|
||
FROM (
|
||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||
FROM (
|
||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE test_join_function_2(u.user_id, e.user_id)
|
||
) t
|
||
GROUP BY user_id
|
||
) q
|
||
ORDER BY 2 DESC, 1;
|
||
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
||
-- note that the following query has both equi-joins on the partition keys
|
||
-- and non-equi-joins on other columns. We now support query filters
|
||
-- having non-equi-joins as long as they have equi-joins on partition keys.
|
||
SELECT
|
||
users_table.user_id, users_table.value_1, prob
|
||
FROM
|
||
users_table
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma, events_table as short_list
|
||
WHERE
|
||
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 3
|
||
) temp
|
||
ON users_table.user_id = temp.user_id
|
||
WHERE
|
||
users_table.value_1 < 3 AND test_join_function_2(users_table.user_id, temp.user_id);
|
||
user_id | value_1 | prob
|
||
---------------------------------------------------------------------
|
||
(0 rows)
|
||
|
||
-- we do support the following since there is already an equality on the partition
|
||
-- key and we have an additional join via a function
|
||
SELECT
|
||
temp.user_id, users_table.value_1, prob
|
||
FROM
|
||
users_table
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob, random()
|
||
FROM
|
||
users_table AS ma, events_table as short_list
|
||
WHERE
|
||
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 4 AND
|
||
test_join_function_2(ma.value_1, short_list.value_2)
|
||
) temp
|
||
ON users_table.user_id = temp.user_id
|
||
WHERE
|
||
users_table.value_1 < 3
|
||
ORDER BY 2 DESC, 1 DESC
|
||
LIMIT 10;
|
||
user_id | value_1 | prob
|
||
---------------------------------------------------------------------
|
||
6 | 2 | 0.50000000000000000000
|
||
6 | 2 | 0.50000000000000000000
|
||
6 | 2 | 0.50000000000000000000
|
||
6 | 2 | 0.50000000000000000000
|
||
6 | 2 | 0.50000000000000000000
|
||
6 | 2 | 0.50000000000000000000
|
||
6 | 2 | 0.50000000000000000000
|
||
6 | 2 | 0.50000000000000000000
|
||
6 | 2 | 0.50000000000000000000
|
||
5 | 2 | 0.50000000000000000000
|
||
(10 rows)
|
||
|
||
-- similarly we do support non equi joins on columns if there is aready
|
||
-- an equality join
|
||
SELECT
|
||
count(*)
|
||
FROM
|
||
(SELECT
|
||
event_type, random()
|
||
FROM
|
||
events_table, users_table
|
||
WHERE
|
||
events_table.user_id = users_table.user_id AND
|
||
events_table.time > users_table.time AND
|
||
events_table.value_2 IN (0, 4)
|
||
) as foo;
|
||
count
|
||
---------------------------------------------------------------------
|
||
180
|
||
(1 row)
|
||
|
||
-- the other way around is not supported
|
||
SELECT
|
||
count(*)
|
||
FROM
|
||
(SELECT
|
||
event_type, random()
|
||
FROM
|
||
events_table, users_table
|
||
WHERE
|
||
events_table.user_id > users_table.user_id AND
|
||
events_table.time = users_table.time AND
|
||
events_table.value_2 IN (0, 4)
|
||
) as foo;
|
||
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
||
-- we can even allow that on top level joins
|
||
SELECT
|
||
count(*)
|
||
FROM
|
||
(SELECT
|
||
event_type, random(), events_table.user_id
|
||
FROM
|
||
events_table, users_table
|
||
WHERE
|
||
events_table.user_id = users_table.user_id AND
|
||
events_table.value_2 IN (0, 4)
|
||
) as foo,
|
||
(SELECT
|
||
event_type, random(), events_table.user_id
|
||
FROM
|
||
events_table, users_table
|
||
WHERE
|
||
events_table.user_id = users_table.user_id AND
|
||
events_table.value_2 IN (1, 5)
|
||
) as bar
|
||
WHERE foo.event_type > bar.event_type
|
||
AND foo.user_id = bar.user_id;
|
||
count
|
||
---------------------------------------------------------------------
|
||
11971
|
||
(1 row)
|
||
|
||
-- note that the following is not supported
|
||
-- since the top level join is not on the distribution key
|
||
SELECT
|
||
count(*)
|
||
FROM
|
||
(SELECT
|
||
event_type, random()
|
||
FROM
|
||
events_table, users_table
|
||
WHERE
|
||
events_table.user_id = users_table.user_id AND
|
||
events_table.value_2 IN (0, 4)
|
||
) as foo,
|
||
(SELECT
|
||
event_type, random()
|
||
FROM
|
||
events_table, users_table
|
||
WHERE
|
||
events_table.user_id = users_table.user_id AND
|
||
events_table.value_2 IN (1, 5)
|
||
) as bar
|
||
WHERE foo.event_type = bar.event_type;
|
||
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
||
-- DISTINCT in the outer query and DISTINCT in the subquery
|
||
SELECT
|
||
DISTINCT users_ids.user_id
|
||
FROM
|
||
(SELECT DISTINCT user_id FROM users_table) as users_ids
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma, events_table as short_list
|
||
WHERE
|
||
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 3
|
||
) temp
|
||
ON users_ids.user_id = temp.user_id
|
||
WHERE temp.value_1 < 3
|
||
ORDER BY 1
|
||
LIMIT 5;
|
||
user_id
|
||
---------------------------------------------------------------------
|
||
1
|
||
2
|
||
3
|
||
4
|
||
5
|
||
(5 rows)
|
||
|
||
-- DISTINCT ON in the outer query and DISTINCT in the subquery
|
||
SELECT
|
||
DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob
|
||
FROM
|
||
(SELECT DISTINCT user_id FROM users_table) as users_ids
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma, events_table as short_list
|
||
WHERE
|
||
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 2
|
||
) temp
|
||
ON users_ids.user_id = temp.user_id
|
||
WHERE temp.value_1 < 3
|
||
ORDER BY 1, 2
|
||
LIMIT 5;
|
||
user_id | value_1 | prob
|
||
---------------------------------------------------------------------
|
||
1 | 1 | 0.50000000000000000000
|
||
2 | 0 | 0.50000000000000000000
|
||
3 | 0 | 0.50000000000000000000
|
||
4 | 0 | 0.50000000000000000000
|
||
5 | 0 | 0.50000000000000000000
|
||
(5 rows)
|
||
|
||
-- DISTINCT ON in the outer query and DISTINCT ON in the subquery
|
||
SELECT
|
||
DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob
|
||
FROM
|
||
(SELECT DISTINCT ON (user_id) user_id, value_1 FROM users_table ORDER BY 1,2) as users_ids
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma, events_table as short_list
|
||
WHERE
|
||
short_list.user_id = ma.user_id and ma.value_1 < 2 and short_list.event_type < 3
|
||
) temp
|
||
ON users_ids.user_id = temp.user_id
|
||
ORDER BY 1,2
|
||
LIMIT 5;
|
||
user_id | value_1 | prob
|
||
---------------------------------------------------------------------
|
||
1 | 1 | 0.50000000000000000000
|
||
2 | 0 | 0.50000000000000000000
|
||
3 | 0 | 0.50000000000000000000
|
||
4 | 0 | 0.50000000000000000000
|
||
5 | 0 | 0.50000000000000000000
|
||
(5 rows)
|
||
|
||
-- Getting aggregation of value which is created by aggregation in subquery
|
||
SELECT
|
||
count(1),
|
||
avg(agg_value)
|
||
FROM
|
||
(SELECT
|
||
users_table.user_id,
|
||
avg(users_table.value_1 / events_table.value_4) AS agg_value
|
||
FROM
|
||
users_table,
|
||
events_table
|
||
WHERE
|
||
users_table.user_id = events_table.user_id
|
||
GROUP BY
|
||
1
|
||
) AS temp;
|
||
count | avg
|
||
---------------------------------------------------------------------
|
||
6 |
|
||
(1 row)
|
||
|
||
-- Test the case when a subquery has a lateral reference to two levels upper
|
||
SELECT
|
||
b.user_id, b.value_2, b.cnt
|
||
FROM (
|
||
SELECT
|
||
user_id,
|
||
value_2
|
||
FROM events_table
|
||
WHERE events_table.user_id BETWEEN 2 AND 5
|
||
) a,
|
||
LATERAL (
|
||
SELECT
|
||
user_id, value_2, count(*) as cnt
|
||
FROM (
|
||
SELECT
|
||
value_2, time, user_id
|
||
FROM events_table
|
||
WHERE user_id BETWEEN 2 AND 5
|
||
AND events_table.user_id = a.user_id
|
||
AND events_table.value_2 = a.value_2
|
||
ORDER BY time DESC
|
||
) events
|
||
GROUP BY user_id, value_2
|
||
) b
|
||
ORDER BY user_id, value_2, cnt
|
||
LIMIT 1;
|
||
user_id | value_2 | cnt
|
||
---------------------------------------------------------------------
|
||
2 | 0 | 1
|
||
(1 row)
|
||
|
||
DROP FUNCTION test_join_function_2(integer, integer);
|
||
SET citus.enable_router_execution TO TRUE;
|
||
SET citus.subquery_pushdown to OFF;
|