citus/src/test/regress/sql/multi_subquery_behavioral_a...

1880 lines
47 KiB
PL/PgSQL
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

--
-- multi subquery behavioral analytics queries aims to expand existing subquery pushdown
-- regression tests to cover more cases
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
--
--- We don't need shard id sequence here given that we're not creating any shards, so not writing it at all
-- The following line is intended to force Citus to NOT use router planner for the tests in this
-- file. The motivation for doing this is to make sure that single-task queries can be planned
-- by non-router code-paths. Thus, this flag should NOT be used in production. Otherwise, the actual
-- router queries would fail.
SET citus.enable_router_execution TO FALSE;
------------------------------------
-- Vanilla funnel query
------------------------------------
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT u.user_id, e.event_type::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 1
AND u.user_id <= 3
AND e.event_type IN (1, 2)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
------------------------------------
-- Funnel grouped by whether or not a user has done an event
-- This has multiple subqueries joinin at the top level
------------------------------------
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
(
SELECT u.user_id, 'step=>1'::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 1
AND u.user_id <= 3
AND e.event_type IN (1, 2)
)
UNION
(
SELECT u.user_id, 'step=>2'::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 1
AND u.user_id <= 3
AND e.event_type IN (3, 4)
)
) t1 LEFT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 1
AND e.user_id <= 3
AND e.event_type IN (5, 6)
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
ORDER BY user_id;
-- same query but multiple joins are one level below, returns count of row instead of actual rows
SELECT count(*)
FROM (
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
(
SELECT u.user_id, 'step=>1'::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 1
AND u.user_id <= 3
AND e.event_type IN (1, 2)
)
UNION
(
SELECT u.user_id, 'step=>2'::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 1
AND u.user_id <= 3
AND e.event_type IN (3, 4)
)
) t1 LEFT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 1
AND e.user_id <= 3
AND e.event_type IN (5, 6)
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
ORDER BY user_id) u;
-- Same queries written without unions
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
SELECT
u.user_id,
CASE WHEN e.event_type IN (1, 2) THEN 'step=>1'::text else 'step==>2'::text END AS event,
e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 1
AND u.user_id <= 3
AND e.event_type IN (1, 2, 3, 4)
GROUP BY 1,2,3
) t1 LEFT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 1
AND e.user_id <= 3
AND e.event_type IN (5, 6)
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
ORDER BY user_id;
-- same query but multiple joins are one level below, returns count of row instead of actual rows
SELECT count(*)
FROM (
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
SELECT
u.user_id,
CASE WHEN e.event_type in (1, 2) then 'step=>1'::text else 'step==>2'::text END AS event,
e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 1
AND u.user_id <= 3
AND e.event_type IN (1, 2, 3, 4)
GROUP BY 1,2,3
) t1 LEFT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 1
AND e.user_id <= 3
AND e.event_type IN (5, 6)
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event
ORDER BY user_id) u;
------------------------------------
-- Funnel, grouped by the number of times a user has done an event
------------------------------------
SELECT
user_id,
avg(array_length(events_table, 1)) AS event_average,
count_pay
FROM (
SELECT
subquery_1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(count_pay, 0) AS count_pay
FROM
(
(SELECT
users_table.user_id,
'action=>1'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
events_table.event_type > 1 AND events_table.event_type < 3
)
UNION
(SELECT
users_table.user_id,
'action=>2'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
events_table.event_type > 1 AND events_table.event_type < 4
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 1 AND
user_id <= 3 AND
users_table.value_1 > 2 AND users_table.value_1 < 5
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
WHERE
array_ndims(events_table) > 0
GROUP BY
count_pay, user_id
ORDER BY
event_average DESC, count_pay DESC, user_id DESC;
SELECT
user_id,
avg(array_length(events_table, 1)) AS event_average,
count_pay
FROM (
SELECT
subquery_1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(count_pay, 0) AS count_pay
FROM
(
(SELECT
users_table.user_id,
'action=>1'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
events_table.event_type > 1 AND events_table.event_type < 3
)
UNION
(SELECT
users_table.user_id,
'action=>2'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
events_table.event_type > 1 AND events_table.event_type < 4
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 1 AND
user_id <= 3 AND
users_table.value_1 > 2 AND users_table.value_1 < 4
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
WHERE
array_ndims(events_table) > 0
GROUP BY
count_pay, user_id
HAVING
avg(array_length(events_table, 1)) > 0
ORDER BY
event_average DESC, count_pay DESC, user_id DESC;
-- Same queries rewritten without using unions
SELECT
user_id,
avg(array_length(events_table, 1)) AS event_average,
count_pay
FROM (
SELECT
subquery_1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(count_pay, 0) AS count_pay
FROM
(
SELECT
users_table.user_id,
CASE
WHEN
events_table.event_type > 1 AND events_table.event_type < 3
THEN 'action=>1'
ELSE 'action=>2'
END AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
(events_table.event_type > 1 AND events_table.event_type < 3
OR
events_table.event_type > 2 AND events_table.event_type < 4)
GROUP BY 1, 2, 3
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 1 AND
user_id <= 3 AND
users_table.value_1 > 3 AND users_table.value_1 < 5
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
WHERE
array_ndims(events_table) > 0
GROUP BY
count_pay, user_id
ORDER BY
event_average DESC, count_pay DESC, user_id DESC;
SELECT
user_id,
avg(array_length(events_table, 1)) AS event_average,
count_pay
FROM (
SELECT
subquery_1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(count_pay, 0) AS count_pay
FROM
(
SELECT
users_table.user_id,
CASE WHEN events_table.event_type > 1 AND events_table.event_type < 3 THEN 'action=>1' ELSE 'action=>2' END AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
(events_table.event_type > 1 AND events_table.event_type < 3
OR
events_table.event_type > 2 AND events_table.event_type < 4)
GROUP BY 1, 2, 3
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 1 AND
user_id <= 3 AND
users_table.value_1 > 3 AND users_table.value_1 < 5
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
WHERE
array_ndims(events_table) > 0
GROUP BY
count_pay, user_id
HAVING
avg(array_length(events_table, 1)) > 0
ORDER BY
event_average DESC, count_pay DESC, user_id DESC;
------------------------------------
-- Most recently seen users_table events_table
------------------------------------
-- Note that we don't use ORDER BY/LIMIT yet
------------------------------------
SELECT
user_id,
user_lastseen,
array_length(event_array, 1)
FROM (
SELECT
user_id,
max(u.time) as user_lastseen,
array_agg(event_type ORDER BY u.time) AS event_array
FROM (
SELECT user_id, time
FROM users_table
WHERE
user_id >= 1 AND
user_id <= 3 AND
users_table.value_1 > 1 AND users_table.value_1 < 3
) u LEFT JOIN LATERAL (
SELECT event_type, time
FROM events_table
WHERE user_id = u.user_id AND
events_table.event_type > 1 AND events_table.event_type < 3
) t ON true
GROUP BY user_id
) AS shard_union
ORDER BY user_lastseen DESC, user_id;
------------------------------------
-- Count the number of distinct users_table who are in segment X and Y and Z
------------------------------------
SELECT user_id
FROM users_table
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6)
GROUP BY
user_id
ORDER BY
user_id DESC
LIMIT 5;
------------------------------------
-- Find customers who have done X, and satisfy other customer specific criteria
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 1 AND value_1 < 3
AND value_2 >= 1
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type < 3 AND value_3 > 1 AND user_id = users_table.user_id)
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
------------------------------------
-- Customers who haven’t done X, and satisfy other customer specific criteria
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
value_1 = 2
AND value_2 >= 1
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=2 AND value_3 > 1 AND user_id = users_table.user_id)
ORDER BY 1 DESC, 2 DESC
LIMIT 3;
------------------------------------
-- Customers who have done X and Y, and satisfy other customer specific criteria
------------------------------------
SELECT user_id, sum(value_2) as cnt FROM users_table WHERE
value_1 > 1
AND value_2 >= 1
AND EXISTS (SELECT user_id FROM events_table WHERE event_type != 1 AND value_3 > 1 AND user_id = users_table.user_id)
AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 2 AND value_3 > 1 AND user_id = users_table.user_id)
GROUP BY
user_id
ORDER BY cnt DESC, user_id DESC
LIMIT 5;
------------------------------------
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
------------------------------------
SELECT user_id, value_2 FROM users_table WHERE
value_2 >= 1
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id = users_table.user_id)
ORDER BY 2 DESC, 1 DESC
LIMIT 4;
------------------------------------
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
------------------------------------
SELECT user_id,
avg(value_2)
FROM users_table
WHERE value_1 > 1
AND value_1 < 3
AND value_2 >= 1
AND EXISTS (SELECT user_id
FROM events_table
WHERE event_type > 1
AND event_type < 3
AND value_3 > 1
AND user_id = users_table.user_id
GROUP BY user_id
HAVING Count(*) > 2)
GROUP BY
user_id
ORDER BY
1 DESC, 2 DESC
LIMIT 5;
------------------------------------
-- Find me all users_table who logged in more than once
------------------------------------
SELECT user_id, value_1 from
(
SELECT
user_id, value_1 From users_table
WHERE
value_2 > 1 and user_id = 2
GROUP BY
value_1, user_id
HAVING
count(*) > 1
) AS a
ORDER BY
user_id ASC, value_1 ASC;
-- same query with additional filter to make it not router plannable
SELECT user_id, value_1 from
(
SELECT
user_id, value_1 From users_table
WHERE
value_2 > 1 and (user_id = 2 OR user_id = 3)
GROUP BY
value_1, user_id
HAVING count(*) > 1
) AS a
ORDER BY
user_id ASC, value_1 ASC;
------------------------------------
-- Find me all users_table who has done some event and has filters
------------------------------------
SELECT user_id
FROM events_table
WHERE
event_type = 3 AND value_2 > 2 AND
user_id IN
(SELECT
user_id
FROM
users_table
WHERE
value_1 = 1 AND value_2 > 2
)
ORDER BY 1;
------------------------------------
-- Which events_table did people who has done some specific events_table
------------------------------------
SELECT
user_id, event_type FROM events_table
WHERE
user_id in (SELECT user_id from events_table WHERE event_type > 3 and event_type < 5)
GROUP BY
user_id, event_type
ORDER BY 2 DESC, 1
LIMIT 3;
------------------------------------
-- Find me all the users_table who has done some event more than three times
------------------------------------
SELECT user_id FROM
(
SELECT
user_id
FROM
events_table
WHERE
event_type = 2
GROUP BY
user_id
HAVING
count(*) > 1
) AS a
ORDER BY
user_id;
------------------------------------
-- Find my assets that have the highest probability and fetch their metadata
------------------------------------
CREATE TEMP TABLE assets AS
SELECT
users_table.user_id, users_table.value_1, prob
FROM
users_table
JOIN
(SELECT
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 2 and short_list.event_type < 2
) temp
ON users_table.user_id = temp.user_id
WHERE
users_table.value_1 < 2;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM assets;
DROP TABLE assets;
-- count number of distinct users who have value_1 equal to 5 or 13 but not 3
-- is recusrively planned
SET client_min_messages TO DEBUG1;
SELECT count(*) FROM
(
SELECT
user_id
FROM
users_table
WHERE
(value_1 = '1' OR value_1 = '3') AND
user_id NOT IN (select user_id from users_table where value_1 = '4')
GROUP BY
user_id
HAVING
count(distinct value_1) = 2
) as foo;
RESET client_min_messages;
-- previous push down query
SELECT subquery_count FROM
(SELECT count(*) as subquery_count FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 = '1' OR value_1 = '3')
GROUP BY
user_id
HAVING
count(distinct value_1) = 2) as a
LEFT JOIN
(SELECT
user_id
FROM
users_table
WHERE
(value_1 = '2')
GROUP BY
user_id) as b
ON a.user_id = b.user_id
WHERE
b.user_id IS NULL
GROUP BY
a.user_id
) AS inner_subquery;
-- new pushdown query without single range table entry at top requirement
SELECT count(*) as subquery_count
FROM (
SELECT
user_id
FROM
users_table
WHERE
(value_1 = '1' OR value_1 = '3')
GROUP BY
user_id
HAVING
count(distinct value_1) = 2
) as a
LEFT JOIN (
SELECT
user_id
FROM
users_table
WHERE
(value_1 = '2')
GROUP BY
user_id) AS b
ON a.user_id = b.user_id
WHERE
b.user_id IS NULL
GROUP BY
a.user_id;
-- most queries below has limit clause
-- therefore setting subquery_pushdown flag for all
SET citus.subquery_pushdown to ON;
-- multi-subquery-join
-- The first query has filters on partion column to make it router plannable
-- but it is processed by logical planner since we disabled router execution
SELECT
e1.user_id,
sum(view_homepage) AS viewed_homepage,
sum(use_demo) AS use_demo,
sum(enter_credit_card) AS entered_credit_card,
sum(submit_card_info) as submit_card_info,
sum(see_bought_screen) as see_bought_screen
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE user_id = 1 and
event_type IN (1, 2)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND user_id = 1 and
event_type IN (2, 3)
ORDER BY time
LIMIT 1
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND user_id = 1 and
event_type IN (3, 4)
ORDER BY time
LIMIT 1
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e3.user_id AND user_id = 1 and
event_type IN (4, 5)
ORDER BY time
LIMIT 1
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_table
WHERE
user_id = e4.user_id AND user_id = 1 and
event_type IN (5, 6)
ORDER BY time
LIMIT 1
) e5 ON true
WHERE
e1.user_id = 1
GROUP BY
e1.user_id
LIMIT 1;
-- Same query without all limitations
SELECT
e1.user_id,
sum(view_homepage) AS viewed_homepage,
sum(use_demo) AS use_demo,
sum(enter_credit_card) AS entered_credit_card,
sum(submit_card_info) as submit_card_info,
sum(see_bought_screen) as see_bought_screen
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE
event_type IN (1, 2)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND
event_type IN (2, 3)
ORDER BY time
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND
event_type IN (3, 4)
ORDER BY time
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e3.user_id AND
event_type IN (4, 5)
ORDER BY time
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_table
WHERE
user_id = e4.user_id AND
event_type IN (5, 6)
ORDER BY time
) e5 ON true
GROUP BY e1.user_id
ORDER BY 6 DESC NULLS LAST, 5 DESC NULLS LAST, 4 DESC NULLS LAST, 3 DESC NULLS LAST, 2 DESC NULLS LAST, 1
LIMIT 15;
-- Same query without all limitations but uses having() to show only those submitted their credit card info
SELECT
e1.user_id,
sum(view_homepage) AS viewed_homepage,
sum(use_demo) AS use_demo,
sum(enter_credit_card) AS entered_credit_card,
sum(submit_card_info) as submit_card_info,
sum(see_bought_screen) as see_bought_screen
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE
event_type IN (1, 2)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND
event_type IN (2, 3)
ORDER BY time
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND
event_type IN (3, 4)
ORDER BY time
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e3.user_id AND
event_type IN (4, 5)
ORDER BY time
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_table
WHERE
user_id = e4.user_id AND
event_type IN (5, 6)
ORDER BY time
) e5 ON true
group by e1.user_id
HAVING sum(submit_card_info) > 0
ORDER BY 6 DESC NULLS LAST, 5 DESC NULLS LAST, 4 DESC NULLS LAST, 3 DESC NULLS LAST, 2 DESC NULLS LAST, 1
LIMIT 15;
-- Explain analyze on this query fails due to #756
-- avg expression used on order by
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM (
SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN (
SELECT
user_id, value_2, value_3
FROM
users_table
WHERE
(value_1 > 3)) AS b
ON a.user_id = b.user_id
WHERE
b.user_id IS NOT NULL
GROUP BY
a.user_id
ORDER BY
avg(b.value_3), 2, 1
LIMIT 5;
-- add having to the same query
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM (
SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY user_id
HAVING count(distinct value_1) > 2
) as a
LEFT JOIN (
SELECT
user_id, value_2, value_3
FROM
users_table
WHERE
(value_1 > 3)) AS b
ON a.user_id = b.user_id
WHERE
b.user_id IS NOT NULL
GROUP BY
a.user_id
HAVING
sum(b.value_3) > 5
ORDER BY
avg(b.value_3), 2, 1
LIMIT 5;
-- avg on the value_3 is not a resjunk
SELECT a.user_id, avg(b.value_2) as subquery_avg, avg(b.value_3)
FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN
(
SELECT
user_id, value_2, value_3
FROM
users_table
WHERE
(value_1 > 3)
) AS b
ON a.user_id = b.user_id
WHERE
b.user_id IS NOT NULL
GROUP BY
a.user_id
ORDER BY
avg(b.value_3) DESC, 2, 1
LIMIT 5;
-- a powerful query structure that analyzes users/events
-- using (relation JOIN subquery JOIN relation)
SELECT u.user_id, sub.value_2, sub.value_3, COUNT(e2.user_id) counts
FROM
users_table u
LEFT OUTER JOIN LATERAL
(SELECT
*
FROM
events_table e1
WHERE
e1.user_id = u.user_id
ORDER BY
e1.value_3 DESC
LIMIT 1
) sub
ON true
LEFT OUTER JOIN events_table e2
ON e2.user_id = sub.user_id
WHERE
e2.value_2 > 1 AND e2.value_2 < 5 AND u.value_2 > 1 AND u.value_2 < 5
GROUP BY
u.user_id, sub.value_2, sub.value_3
ORDER BY
4 DESC, 1 DESC, 2 ASC, 3 ASC
LIMIT 10;
-- distinct users joined with events
SELECT
avg(events_table.event_type) as avg_type,
count(*) as users_count
FROM events_table
JOIN
(SELECT
DISTINCT user_id
FROM
users_table
) as distinct_users
ON distinct_users.user_id = events_table.user_id
GROUP BY
distinct_users.user_id
ORDER BY
users_count desc, avg_type DESC
LIMIT 5;
-- reduce the data set, aggregate and join
SELECT
events_table.event_type,
users_count.ct
FROM events_table
JOIN
(SELECT distinct_users.user_id, count(1) as ct
FROM
(SELECT
user_id
FROM
users_table
) as distinct_users
GROUP BY
distinct_users.user_id
) as users_count
ON users_count.user_id = events_table.user_id
ORDER BY
users_count.ct desc, event_type DESC
LIMIT 5;
--- now, test (subquery JOIN subquery)
SELECT n1.user_id, count_1, total_count
FROM
(SELECT
user_id, count(1) as count_1
FROM
users_table
GROUP BY
user_id
) n1
INNER JOIN
(
SELECT
user_id, count(1) as total_count
FROM
events_table
GROUP BY
user_id, event_type
) n2
ON (n2.user_id = n1.user_id)
ORDER BY
total_count DESC, count_1 DESC, 1 DESC
LIMIT 10;
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN
(SELECT
DISTINCT ON (user_id) user_id, value_2, value_3
FROM
users_table
WHERE
(value_1 > 3)
ORDER BY
1,2,3
) AS b
ON a.user_id = b.user_id
WHERE b.user_id IS NOT NULL
GROUP BY a.user_id
ORDER BY avg(b.value_3), 2, 1
LIMIT 5;
-- distinct clause must include partition column
-- when used in target list
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN
(SELECT
DISTINCT ON (value_2) value_2 , user_id, value_3
FROM
users_table
WHERE
(value_1 > 3)
ORDER BY
1,2,3
) AS b
USING (user_id)
GROUP BY user_id;
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN
(SELECT
DISTINCT ON (value_2, user_id) value_2 , user_id, value_3
FROM
users_table
WHERE
(value_1 > 3)
ORDER BY
1,2,3
) AS b
ON a.user_id = b.user_id
WHERE
b.user_id IS NOT NULL
GROUP BY
a.user_id
ORDER BY
avg(b.value_3), 2, 1
LIMIT 5;
SELECT user_id, event_type
FROM
(SELECT *
FROM
(
(SELECT
event_type, user_id as a_user_id
FROM
events_table) AS a
JOIN
(SELECT
ma.user_id AS user_id, ma.value_2 AS value_2,
(GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma
WHERE
(ma.value_2 > 1)
ORDER BY
prob DESC, value_2 DESC, user_id DESC
LIMIT 10
) AS ma
ON (a.a_user_id = ma.user_id)
) AS inner_sub
ORDER BY
prob DESC, value_2 DESC, user_id DESC, event_type DESC
LIMIT 10
) AS outer_sub
ORDER BY
prob DESC, value_2 DESC, user_id DESC, event_type DESC
LIMIT 10;
-- very similar query but produces different result due to
-- ordering difference in the previous one's inner query
SELECT user_id, event_type
FROM
(SELECT
event_type, user_id as a_user_id
FROM
events_table) AS a
JOIN
(SELECT
ma.user_id AS user_id, ma.value_2 AS value_2,
(GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma
WHERE
(ma.value_2 > 1)
ORDER BY
prob DESC, user_id DESC
LIMIT 10
) AS ma
ON (a.a_user_id = ma.user_id)
ORDER BY
prob DESC, event_type DESC, user_id DESC
LIMIT 10;
-- now they produce the same result when ordering fixed in 'outer_sub'
SELECT user_id, event_type
FROM
(SELECT *
FROM
(
(SELECT
event_type, user_id as a_user_id
FROM
events_table
) AS a
JOIN
(SELECT
ma.user_id AS user_id, ma.value_2 AS value_2,
(GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma
WHERE
(ma.value_2 > 1)
ORDER BY
prob DESC, user_id DESC
LIMIT 10
) AS ma
ON (a.a_user_id = ma.user_id)
) AS inner_sub
ORDER BY
prob DESC, event_type DESC, user_id DESC
LIMIT 10
) AS outer_sub
ORDER BY
prob DESC, event_type DESC, user_id DESC
LIMIT 10;
-- this is one complex join query derived from a user's production query
-- first declare the function on workers on master
-- With array_index:
SELECT * FROM run_command_on_workers('CREATE OR REPLACE FUNCTION array_index(ANYARRAY, ANYELEMENT)
RETURNS INT AS $$
SELECT i
FROM (SELECT generate_series(array_lower($1, 1), array_upper($1, 1))) g(i)
WHERE $1 [i] = $2
LIMIT 1;
$$ LANGUAGE sql')
ORDER BY 1,2;
CREATE OR REPLACE FUNCTION array_index(ANYARRAY, ANYELEMENT)
RETURNS INT AS $$
SELECT i
FROM (SELECT generate_series(array_lower($1, 1), array_upper($1, 1))) g(i)
WHERE $1 [i] = $2
LIMIT 1;
$$ LANGUAGE sql;
SELECT *
FROM
(SELECT *
FROM (
(SELECT user_id AS user_id_e,
event_type AS event_type_e
FROM events_table ) AS ma_e
JOIN
(SELECT value_2,
value_3,
user_id
FROM
(SELECT *
FROM (
(SELECT user_id_p AS user_id
FROM
(SELECT *
FROM (
(SELECT
user_id AS user_id_p
FROM
events_table
WHERE
(event_type IN (1,2,3,4,5)) ) AS ma_p
JOIN
(SELECT
user_id AS user_id_a
FROM
users_table
WHERE
(value_2 % 5 = 1) ) AS a
ON (a.user_id_a = ma_p.user_id_p) ) ) AS a_ma_p ) AS inner_filter_q
JOIN
(SELECT
value_2, value_3, user_id AS user_id_ck
FROM
events_table
WHERE
event_type = ANY(ARRAY [4, 5, 6])
ORDER BY
value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
LIMIT 10 )
AS ma_ck ON (ma_ck.user_id_ck = inner_filter_q.user_id) )
AS inner_sub_q
ORDER BY
value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
LIMIT 10 )
AS outer_sub_q
ORDER BY
value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
LIMIT 10)
AS inner_search_q
ON (ma_e.user_id_e = inner_search_q.user_id) )
AS outer_inner_sub_q
ORDER BY
value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC
LIMIT 10)
AS outer_outer_sub_q
ORDER BY
value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC
LIMIT 10;
-- top level select * is removed now there is
-- a join at top level.
SELECT *
FROM
(
(SELECT
user_id AS user_id_e, event_type as event_type_e
FROM
events_table
) AS ma_e
JOIN
(SELECT
value_2, value_3, user_id
FROM
(SELECT
*
FROM
(
(SELECT
user_id_p AS user_id
FROM
(SELECT
*
FROM
(
(SELECT
user_id AS user_id_p
FROM
events_table
WHERE
(event_type IN (1, 2, 3, 4, 5))
) AS ma_p
JOIN
(SELECT
user_id AS user_id_a
FROM
users_table
WHERE
(value_2 % 5 = 1)
) AS a
ON (a.user_id_a = ma_p.user_id_p)
)
) AS a_ma_p
) AS inner_filter_q
JOIN
(SELECT
value_2, value_3, user_id AS user_id_ck
FROM
events_table
WHERE
event_type = ANY(ARRAY [4, 5, 6])
ORDER BY
value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
LIMIT 10
) AS ma_ck
ON (ma_ck.user_id_ck = inner_filter_q.user_id)
) AS inner_sub_q
ORDER BY
value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
LIMIT 10
) AS outer_sub_q
ORDER BY
value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC
LIMIT 10) AS inner_search_q
ON (ma_e.user_id_e = inner_search_q.user_id)
) AS outer_inner_sub_q
ORDER BY
value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC
LIMIT 10;
-- drop created functions
SELECT * FROM run_command_on_workers('DROP FUNCTION array_index(ANYARRAY, ANYELEMENT)')
ORDER BY 1,2;
DROP FUNCTION array_index(ANYARRAY, ANYELEMENT);
-- a query with a constant subquery
SELECT count(*) as subquery_count
FROM (
SELECT
user_id
FROM
users_table
WHERE
(value_1 = '1' OR value_1 = '3')
GROUP BY user_id
HAVING count(distinct value_1) = 2
) as a
LEFT JOIN (
SELECT
1 as user_id
) AS b
ON a.user_id = b.user_id
WHERE b.user_id IS NULL
GROUP BY a.user_id;
-- volatile function in the subquery
SELECT count(*) as subquery_count
FROM (
SELECT
user_id
FROM
users_table
WHERE
(value_1 = '1' OR value_1 = '3')
GROUP BY user_id
HAVING count(distinct value_1) = 2
) as a
INNER JOIN (
SELECT
random()::int as user_id
) AS b
ON a.user_id = b.user_id
WHERE b.user_id IS NULL
GROUP BY a.user_id;
-- this is slightly different, we use RTE_VALUEs here
SELECT Count(*) AS subquery_count
FROM (SELECT
user_id
FROM
users_table
WHERE
(value_1 = '1' OR value_1 = '3' )
GROUP BY
user_id
HAVING
Count(DISTINCT value_1) = 2) AS a
INNER JOIN
(SELECT
*
FROM
(VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (user_id, letter)) AS b
ON a.user_id = b.user_id
WHERE b.user_id IS NULL
GROUP BY a.user_id;
-- same query without LIMIT/OFFSET returns 30 rows
SET client_min_messages TO DEBUG1;
-- now, lets use a simple expression on the LIMIT and explicit coercion on the OFFSET
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
users_table AS u,
events_table AS e
WHERE
u.user_id = e.user_id AND e.event_type IN (1, 2)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1
LIMIT 1+1 OFFSET 1::smallint;
-- now, lets use implicit coersion in LIMIT and a simple expressions on OFFSET
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
users_table AS u,
events_table AS e
WHERE
u.user_id = e.user_id AND e.event_type IN (1, 2)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1
LIMIT '3' OFFSET 2+1;
-- create a test function which is marked as volatile
CREATE OR REPLACE FUNCTION volatile_func_test()
RETURNS INT AS $$
SELECT 1;
$$ LANGUAGE sql VOLATILE;
-- Citus should be able to evalute functions/row comparisons on the LIMIT/OFFSET
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
users_table AS u,
events_table AS e
WHERE
u.user_id = e.user_id AND e.event_type IN (1, 2, 3, 4)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1
LIMIT volatile_func_test() + (ROW(1,2,NULL) < ROW(1,3,0))::int OFFSET volatile_func_test() + volatile_func_test();
-- now, lets use expressions on both the LIMIT and OFFSET
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
users_table AS u,
events_table AS e
WHERE
u.user_id = e.user_id AND e.event_type IN (1, 2)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1
LIMIT (5 > 4)::int OFFSET
CASE
WHEN 5 != 5 THEN 27
WHEN 1 > 5 THEN 28
ELSE 2
END;
-- we don't allow parameters on the LIMIT/OFFSET clauses
PREPARE parametrized_limit AS
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT u.user_id, e.event_type::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND e.event_type IN (1, 2)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1
LIMIT $1 OFFSET $2;
EXECUTE parametrized_limit(1,1);
PREPARE parametrized_offset AS
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT u.user_id, e.event_type::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND e.event_type IN (1, 2)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1
LIMIT 1 OFFSET $1;
EXECUTE parametrized_offset(1);
SET client_min_messages TO DEFAULT;
DROP FUNCTION volatile_func_test();
CREATE FUNCTION test_join_function_2(integer, integer) RETURNS bool
AS 'select $1 > $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
SELECT run_command_on_workers($f$
CREATE FUNCTION test_join_function_2(integer, integer) RETURNS bool
AS 'select $1 > $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
$f$);
-- we don't support joins via functions
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT u.user_id, e.event_type::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE test_join_function_2(u.user_id, e.user_id)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
-- note that the following query has both equi-joins on the partition keys
-- and non-equi-joins on other columns. We now support query filters
-- having non-equi-joins as long as they have equi-joins on partition keys.
SELECT
users_table.user_id, users_table.value_1, prob
FROM
users_table
JOIN
(SELECT
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 3
) temp
ON users_table.user_id = temp.user_id
WHERE
users_table.value_1 < 3 AND test_join_function_2(users_table.user_id, temp.user_id);
-- we do support the following since there is already an equality on the partition
-- key and we have an additional join via a function
SELECT
temp.user_id, users_table.value_1, prob
FROM
users_table
JOIN
(SELECT
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob, random()
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 4 AND
test_join_function_2(ma.value_1, short_list.value_2)
) temp
ON users_table.user_id = temp.user_id
WHERE
users_table.value_1 < 3
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- similarly we do support non equi joins on columns if there is aready
-- an equality join
SELECT
count(*)
FROM
(SELECT
event_type, random()
FROM
events_table, users_table
WHERE
events_table.user_id = users_table.user_id AND
events_table.time > users_table.time AND
events_table.value_2 IN (0, 4)
) as foo;
-- the other way around is not supported
SELECT
count(*)
FROM
(SELECT
event_type, random()
FROM
events_table, users_table
WHERE
events_table.user_id > users_table.user_id AND
events_table.time = users_table.time AND
events_table.value_2 IN (0, 4)
) as foo;
-- we can even allow that on top level joins
SELECT
count(*)
FROM
(SELECT
event_type, random(), events_table.user_id
FROM
events_table, users_table
WHERE
events_table.user_id = users_table.user_id AND
events_table.value_2 IN (0, 4)
) as foo,
(SELECT
event_type, random(), events_table.user_id
FROM
events_table, users_table
WHERE
events_table.user_id = users_table.user_id AND
events_table.value_2 IN (1, 5)
) as bar
WHERE foo.event_type > bar.event_type
AND foo.user_id = bar.user_id;
-- note that the following is not supported
-- since the top level join is not on the distribution key
SELECT
count(*)
FROM
(SELECT
event_type, random()
FROM
events_table, users_table
WHERE
events_table.user_id = users_table.user_id AND
events_table.value_2 IN (0, 4)
) as foo,
(SELECT
event_type, random()
FROM
events_table, users_table
WHERE
events_table.user_id = users_table.user_id AND
events_table.value_2 IN (1, 5)
) as bar
WHERE foo.event_type = bar.event_type;
-- DISTINCT in the outer query and DISTINCT in the subquery
SELECT
DISTINCT users_ids.user_id
FROM
(SELECT DISTINCT user_id FROM users_table) as users_ids
JOIN
(SELECT
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 3
) temp
ON users_ids.user_id = temp.user_id
WHERE temp.value_1 < 3
ORDER BY 1
LIMIT 5;
-- DISTINCT ON in the outer query and DISTINCT in the subquery
SELECT
DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob
FROM
(SELECT DISTINCT user_id FROM users_table) as users_ids
JOIN
(SELECT
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 2
) temp
ON users_ids.user_id = temp.user_id
WHERE temp.value_1 < 3
ORDER BY 1, 2
LIMIT 5;
-- DISTINCT ON in the outer query and DISTINCT ON in the subquery
SELECT
DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob
FROM
(SELECT DISTINCT ON (user_id) user_id, value_1 FROM users_table ORDER BY 1,2) as users_ids
JOIN
(SELECT
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 2 and short_list.event_type < 3
) temp
ON users_ids.user_id = temp.user_id
ORDER BY 1,2
LIMIT 5;
-- Getting aggregation of value which is created by aggregation in subquery
SELECT
count(1),
avg(agg_value)
FROM
(SELECT
users_table.user_id,
avg(users_table.value_1 / events_table.value_4) AS agg_value
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id
GROUP BY
1
) AS temp;
-- Test the case when a subquery has a lateral reference to two levels upper
SELECT
b.user_id, b.value_2, b.cnt
FROM (
SELECT
user_id,
value_2
FROM events_table
WHERE events_table.user_id BETWEEN 2 AND 5
) a,
LATERAL (
SELECT
user_id, value_2, count(*) as cnt
FROM (
SELECT
value_2, time, user_id
FROM events_table
WHERE user_id BETWEEN 2 AND 5
AND events_table.user_id = a.user_id
AND events_table.value_2 = a.value_2
ORDER BY time DESC
) events
GROUP BY user_id, value_2
) b
ORDER BY user_id, value_2, cnt
LIMIT 1;
DROP FUNCTION test_join_function_2(integer, integer);
SELECT run_command_on_workers($f$
DROP FUNCTION test_join_function_2(integer, integer);
$f$);
SET citus.enable_router_execution TO TRUE;
SET citus.subquery_pushdown to OFF;