-- -- multi subquery behavioral analytics queries aims to expand existing subquery pushdown -- regression tests to cover more cases -- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql -- --- We don't need shard id sequence here given that we're not creating any shards, so not writing it at all -- The following line is intended to force Citus to NOT use router planner for the tests in this -- file. The motivation for doing this is to make sure that single-task queries can be planned -- by non-router code-paths. Thus, this flag should NOT be used in production. Otherwise, the actual -- router queries would fail. SET citus.enable_router_execution TO FALSE; ------------------------------------ -- Vanilla funnel query ------------------------------------ SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( SELECT u.user_id, e.event_type::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND u.user_id >= 10 AND u.user_id <= 25 AND e.event_type IN (100, 101, 102) ) t GROUP BY user_id ) q ORDER BY 2 DESC, 1; user_id | array_length ---------+-------------- 13 | 172 12 | 121 23 | 115 10 | 114 20 | 90 (5 rows) ------------------------------------ -- Funnel grouped by whether or not a user has done an event -- This has multiple subqueries joinin at the top level ------------------------------------ SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event FROM ( SELECT t1.user_id, array_agg(event ORDER BY time) AS events_table, COALESCE(hasdone_event, 'Has not done event') AS hasdone_event FROM ( ( SELECT u.user_id, 'step=>1'::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND u.user_id >= 10 AND u.user_id <= 25 AND e.event_type IN (100, 101, 102) ) UNION ( SELECT u.user_id, 'step=>2'::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND u.user_id >= 10 AND u.user_id <= 25 AND e.event_type IN (103, 104, 105) ) ) t1 LEFT JOIN ( SELECT DISTINCT user_id, 'Has done event'::TEXT AS hasdone_event FROM events_table AS e WHERE e.user_id >= 10 AND e.user_id <= 25 AND e.event_type IN (106, 107, 108) ) t2 ON (t1.user_id = t2.user_id) GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event ORDER BY user_id; user_id | sum | length | hasdone_event ---------+-----+--------+-------------------- 10 | 1 | 18 | Has not done event 12 | 1 | 14 | Has done event 13 | 2 | 18 | Has not done event 15 | 1 | 18 | Has not done event 17 | 1 | 18 | Has not done event 19 | 1 | 14 | Has done event 20 | 2 | 18 | Has not done event 23 | 1 | 18 | Has not done event (8 rows) -- same query but multiple joins are one level below, returns count of row instead of actual rows SELECT count(*) FROM ( SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event FROM ( SELECT t1.user_id, array_agg(event ORDER BY time) AS events_table, COALESCE(hasdone_event, 'Has not done event') AS hasdone_event FROM ( ( SELECT u.user_id, 'step=>1'::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND u.user_id >= 10 AND u.user_id <= 25 AND e.event_type IN (100, 101, 102) ) UNION ( SELECT u.user_id, 'step=>2'::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND u.user_id >= 10 AND u.user_id <= 25 AND e.event_type IN (103, 104, 105) ) ) t1 LEFT JOIN ( SELECT DISTINCT user_id, 'Has done event'::TEXT AS hasdone_event FROM events_table AS e WHERE e.user_id >= 10 AND e.user_id <= 25 AND e.event_type IN (106, 107, 108) ) t2 ON (t1.user_id = t2.user_id) GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event ORDER BY user_id) u; count ------- 8 (1 row) -- Same queries written without unions SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event FROM ( SELECT t1.user_id, array_agg(event ORDER BY time) AS events_table, COALESCE(hasdone_event, 'Has not done event') AS hasdone_event FROM ( SELECT u.user_id, CASE WHEN e.event_type IN (100, 101, 102) THEN 'step=>1'::text else 'step==>2'::text END AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND u.user_id >= 10 AND u.user_id <= 25 AND e.event_type IN (100, 101, 102, 103, 104, 105) GROUP BY 1,2,3 ) t1 LEFT JOIN ( SELECT DISTINCT user_id, 'Has done event'::TEXT AS hasdone_event FROM events_table AS e WHERE e.user_id >= 10 AND e.user_id <= 25 AND e.event_type IN (106, 107, 108) ) t2 ON (t1.user_id = t2.user_id) GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event ORDER BY user_id; user_id | sum | length | hasdone_event ---------+-----+--------+-------------------- 10 | 1 | 18 | Has not done event 12 | 1 | 14 | Has done event 13 | 2 | 18 | Has not done event 15 | 1 | 18 | Has not done event 17 | 1 | 18 | Has not done event 19 | 1 | 14 | Has done event 20 | 2 | 18 | Has not done event 23 | 1 | 18 | Has not done event (8 rows) -- same query but multiple joins are one level below, returns count of row instead of actual rows SELECT count(*) FROM ( SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event FROM ( SELECT t1.user_id, array_agg(event ORDER BY time) AS events_table, COALESCE(hasdone_event, 'Has not done event') AS hasdone_event FROM ( SELECT u.user_id, CASE WHEN e.event_type in (100, 101, 102) then 'step=>1'::text else 'step==>2'::text END AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND u.user_id >= 10 AND u.user_id <= 25 AND e.event_type IN (100, 101, 102, 103, 104, 105) GROUP BY 1,2,3 ) t1 LEFT JOIN ( SELECT DISTINCT user_id, 'Has done event'::TEXT AS hasdone_event FROM events_table AS e WHERE e.user_id >= 10 AND e.user_id <= 25 AND e.event_type IN (106, 107, 108) ) t2 ON (t1.user_id = t2.user_id) GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event ORDER BY user_id) u; count ------- 8 (1 row) ------------------------------------ -- Funnel, grouped by the number of times a user has done an event ------------------------------------ SELECT user_id, avg(array_length(events_table, 1)) AS event_average, count_pay FROM ( SELECT subquery_1.user_id, array_agg(event ORDER BY time) AS events_table, COALESCE(count_pay, 0) AS count_pay FROM ( (SELECT users_table.user_id, 'action=>1'AS event, events_table.time FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id >= 10 AND users_table.user_id <= 70 AND events_table.event_type > 10 AND events_table.event_type < 12 ) UNION (SELECT users_table.user_id, 'action=>2'AS event, events_table.time FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id >= 10 AND users_table.user_id <= 70 AND events_table.event_type > 12 AND events_table.event_type < 14 ) ) AS subquery_1 LEFT JOIN (SELECT user_id, COUNT(*) AS count_pay FROM users_table WHERE user_id >= 10 AND user_id <= 70 AND users_table.value_1 > 15 AND users_table.value_1 < 17 GROUP BY user_id HAVING COUNT(*) > 1) AS subquery_2 ON subquery_1.user_id = subquery_2.user_id GROUP BY subquery_1.user_id, count_pay) AS subquery_top WHERE array_ndims(events_table) > 0 GROUP BY count_pay, user_id ORDER BY event_average DESC, count_pay DESC, user_id DESC; user_id | event_average | count_pay ---------+------------------------+----------- 69 | 1.00000000000000000000 | 0 65 | 1.00000000000000000000 | 0 58 | 1.00000000000000000000 | 0 49 | 1.00000000000000000000 | 0 40 | 1.00000000000000000000 | 0 32 | 1.00000000000000000000 | 0 29 | 1.00000000000000000000 | 0 18 | 1.00000000000000000000 | 0 (8 rows) SELECT user_id, avg(array_length(events_table, 1)) AS event_average, count_pay FROM ( SELECT subquery_1.user_id, array_agg(event ORDER BY time) AS events_table, COALESCE(count_pay, 0) AS count_pay FROM ( (SELECT users_table.user_id, 'action=>1'AS event, events_table.time FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id >= 10 AND users_table.user_id <= 70 AND events_table.event_type > 10 AND events_table.event_type < 12 ) UNION (SELECT users_table.user_id, 'action=>2'AS event, events_table.time FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id >= 10 AND users_table.user_id <= 70 AND events_table.event_type > 12 AND events_table.event_type < 14 ) ) AS subquery_1 LEFT JOIN (SELECT user_id, COUNT(*) AS count_pay FROM users_table WHERE user_id >= 10 AND user_id <= 70 AND users_table.value_1 > 15 AND users_table.value_1 < 17 GROUP BY user_id HAVING COUNT(*) > 1) AS subquery_2 ON subquery_1.user_id = subquery_2.user_id GROUP BY subquery_1.user_id, count_pay) AS subquery_top WHERE array_ndims(events_table) > 0 GROUP BY count_pay, user_id HAVING avg(array_length(events_table, 1)) > 0 ORDER BY event_average DESC, count_pay DESC, user_id DESC; user_id | event_average | count_pay ---------+------------------------+----------- 69 | 1.00000000000000000000 | 0 65 | 1.00000000000000000000 | 0 58 | 1.00000000000000000000 | 0 49 | 1.00000000000000000000 | 0 40 | 1.00000000000000000000 | 0 32 | 1.00000000000000000000 | 0 29 | 1.00000000000000000000 | 0 18 | 1.00000000000000000000 | 0 (8 rows) -- Same queries rewritten without using unions SELECT user_id, avg(array_length(events_table, 1)) AS event_average, count_pay FROM ( SELECT subquery_1.user_id, array_agg(event ORDER BY time) AS events_table, COALESCE(count_pay, 0) AS count_pay FROM ( SELECT users_table.user_id, CASE WHEN events_table.event_type > 10 AND events_table.event_type < 12 THEN 'action=>1' ELSE 'action=>2' END AS event, events_table.time FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id >= 10 AND users_table.user_id <= 70 AND (events_table.event_type > 10 AND events_table.event_type < 12 OR events_table.event_type > 12 AND events_table.event_type < 14) GROUP BY 1, 2, 3 ) AS subquery_1 LEFT JOIN (SELECT user_id, COUNT(*) AS count_pay FROM users_table WHERE user_id >= 10 AND user_id <= 70 AND users_table.value_1 > 15 AND users_table.value_1 < 17 GROUP BY user_id HAVING COUNT(*) > 1) AS subquery_2 ON subquery_1.user_id = subquery_2.user_id GROUP BY subquery_1.user_id, count_pay) AS subquery_top WHERE array_ndims(events_table) > 0 GROUP BY count_pay, user_id ORDER BY event_average DESC, count_pay DESC, user_id DESC; user_id | event_average | count_pay ---------+------------------------+----------- 69 | 1.00000000000000000000 | 0 65 | 1.00000000000000000000 | 0 58 | 1.00000000000000000000 | 0 49 | 1.00000000000000000000 | 0 40 | 1.00000000000000000000 | 0 32 | 1.00000000000000000000 | 0 29 | 1.00000000000000000000 | 0 18 | 1.00000000000000000000 | 0 (8 rows) SELECT user_id, avg(array_length(events_table, 1)) AS event_average, count_pay FROM ( SELECT subquery_1.user_id, array_agg(event ORDER BY time) AS events_table, COALESCE(count_pay, 0) AS count_pay FROM ( SELECT users_table.user_id, CASE WHEN events_table.event_type > 10 AND events_table.event_type < 12 THEN 'action=>1' ELSE 'action=>2' END AS event, events_table.time FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id >= 10 AND users_table.user_id <= 70 AND (events_table.event_type > 10 AND events_table.event_type < 12 OR events_table.event_type > 12 AND events_table.event_type < 14) GROUP BY 1, 2, 3 ) AS subquery_1 LEFT JOIN (SELECT user_id, COUNT(*) AS count_pay FROM users_table WHERE user_id >= 10 AND user_id <= 70 AND users_table.value_1 > 15 AND users_table.value_1 < 17 GROUP BY user_id HAVING COUNT(*) > 1) AS subquery_2 ON subquery_1.user_id = subquery_2.user_id GROUP BY subquery_1.user_id, count_pay) AS subquery_top WHERE array_ndims(events_table) > 0 GROUP BY count_pay, user_id HAVING avg(array_length(events_table, 1)) > 0 ORDER BY event_average DESC, count_pay DESC, user_id DESC; user_id | event_average | count_pay ---------+------------------------+----------- 69 | 1.00000000000000000000 | 0 65 | 1.00000000000000000000 | 0 58 | 1.00000000000000000000 | 0 49 | 1.00000000000000000000 | 0 40 | 1.00000000000000000000 | 0 32 | 1.00000000000000000000 | 0 29 | 1.00000000000000000000 | 0 18 | 1.00000000000000000000 | 0 (8 rows) ------------------------------------ -- Most recently seen users_table events_table ------------------------------------ -- Note that we don't use ORDER BY/LIMIT yet ------------------------------------ SELECT user_id, user_lastseen, array_length(event_array, 1) FROM ( SELECT user_id, max(u.time) as user_lastseen, array_agg(event_type ORDER BY u.time) AS event_array FROM ( SELECT user_id, time FROM users_table WHERE user_id >= 10 AND user_id <= 70 AND users_table.value_1 > 10 AND users_table.value_1 < 12 ) u LEFT JOIN LATERAL ( SELECT event_type, time FROM events_table WHERE user_id = u.user_id AND events_table.event_type > 10 AND events_table.event_type < 12 ) t ON true GROUP BY user_id ) AS shard_union ORDER BY user_lastseen DESC, user_id; user_id | user_lastseen | array_length ---------+---------------------------------+-------------- 12 | Sun Jan 19 01:49:20.372688 2014 | 1 20 | Sat Jan 18 14:25:31.817903 2014 | 1 42 | Thu Jan 16 07:08:02.651966 2014 | 1 56 | Tue Jan 14 12:11:47.27375 2014 | 1 57 | Mon Jan 13 14:53:50.494836 2014 | 1 65 | Sun Jan 12 03:14:26.810597 2014 | 1 (6 rows) ------------------------------------ -- Count the number of distinct users_table who are in segment X and Y and Z ------------------------------------ SELECT user_id FROM users_table WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20) AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40) AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60) GROUP BY user_id ORDER BY user_id DESC LIMIT 5; user_id --------- 93 90 88 87 84 (5 rows) ------------------------------------ -- Find customers who have done X, and satisfy other customer specific criteria ------------------------------------ SELECT user_id, value_2 FROM users_table WHERE value_1 > 101 AND value_1 < 110 AND value_2 >= 5 AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 101 AND event_type < 110 AND value_3 > 100 AND user_id = users_table.user_id) ORDER BY 2 DESC, 1 DESC LIMIT 5; user_id | value_2 ---------+--------- 95 | 951 4 | 934 2 | 908 90 | 900 49 | 847 (5 rows) ------------------------------------ -- Customers who haven’t done X, and satisfy other customer specific criteria ------------------------------------ SELECT user_id, value_2 FROM users_table WHERE value_1 = 101 AND value_2 >= 5 AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id = users_table.user_id) ORDER BY 1 DESC, 2 DESC LIMIT 3; user_id | value_2 ---------+--------- 58 | 585 51 | 1000 48 | 861 (3 rows) ------------------------------------ -- Customers who have done X and Y, and satisfy other customer specific criteria ------------------------------------ SELECT user_id, sum(value_2) as cnt FROM users_table WHERE value_1 > 100 AND value_2 >= 5 AND EXISTS (SELECT user_id FROM events_table WHERE event_type != 100 AND value_3 > 100 AND user_id = users_table.user_id) AND EXISTS (SELECT user_id FROM events_table WHERE event_type = 101 AND value_3 > 100 AND user_id = users_table.user_id) GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 5; user_id | cnt ---------+------- 49 | 48606 69 | 46524 86 | 46163 80 | 45995 35 | 45437 (5 rows) ------------------------------------ -- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria ------------------------------------ SELECT user_id, value_2 FROM users_table WHERE value_2 >= 5 AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id = users_table.user_id) AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id = users_table.user_id) ORDER BY 2 DESC, 1 DESC LIMIT 4; user_id | value_2 ---------+--------- 8 | 996 96 | 995 8 | 995 96 | 989 (4 rows) ------------------------------------ -- Customers who have done X more than 2 times, and satisfy other customer specific criteria ------------------------------------ SELECT user_id, avg(value_2) FROM users_table WHERE value_1 > 100 AND value_1 < 124 AND value_2 >= 5 AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type < 124 AND value_3 > 100 AND user_id = users_table.user_id GROUP BY user_id HAVING Count(*) > 2) GROUP BY user_id ORDER BY 1 DESC, 2 DESC LIMIT 5; user_id | avg ---------+---------------------- 99 | 571.6666666666666667 98 | 758.0000000000000000 96 | 459.6666666666666667 90 | 453.3333333333333333 89 | 215.0000000000000000 (5 rows) ------------------------------------ -- Find me all users_table who logged in more than once ------------------------------------ SELECT user_id, value_1 from ( SELECT user_id, value_1 From users_table WHERE value_2 > 100 and user_id = 15 GROUP BY value_1, user_id HAVING count(*) > 1 ) AS a ORDER BY user_id ASC, value_1 ASC; user_id | value_1 ---------+--------- 15 | 212 15 | 230 15 | 417 15 | 490 15 | 529 15 | 926 (6 rows) -- same query with additional filter to make it not router plannable SELECT user_id, value_1 from ( SELECT user_id, value_1 From users_table WHERE value_2 > 100 and (user_id = 15 OR user_id = 16) GROUP BY value_1, user_id HAVING count(*) > 1 ) AS a ORDER BY user_id ASC, value_1 ASC; user_id | value_1 ---------+--------- 15 | 212 15 | 230 15 | 417 15 | 490 15 | 529 15 | 926 16 | 339 16 | 485 16 | 717 16 | 903 (10 rows) ------------------------------------ -- Find me all users_table who has done some event and has filters ------------------------------------ SELECT user_id FROM events_table WHERE event_type = 16 AND value_2 > 50 AND user_id IN (SELECT user_id FROM users_table WHERE value_1 = 15 AND value_2 > 25 ) ORDER BY 1; user_id --------- 7 53 (2 rows) ------------------------------------ -- Which events_table did people who has done some specific events_table ------------------------------------ SELECT user_id, event_type FROM events_table WHERE user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505) GROUP BY user_id, event_type ORDER BY 2 DESC, 1 LIMIT 3; user_id | event_type ---------+------------ 18 | 999 23 | 999 26 | 999 (3 rows) ------------------------------------ -- Find me all the users_table who has done some event more than three times ------------------------------------ SELECT user_id FROM ( SELECT user_id FROM events_table WHERE event_type = 901 GROUP BY user_id HAVING count(*) > 3 ) AS a ORDER BY user_id; user_id --------- 57 (1 row) ------------------------------------ -- Find my assets that have the highest probability and fetch their metadata ------------------------------------ CREATE TEMP TABLE assets AS SELECT users_table.user_id, users_table.value_1, prob FROM users_table JOIN (SELECT ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob FROM users_table AS ma, events_table as short_list WHERE short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.event_type < 50 ) temp ON users_table.user_id = temp.user_id WHERE users_table.value_1 < 50; -- get some statistics from the aggregated results to ensure the results are correct SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM assets; count | count | avg -------+-------+--------------------- 14371 | 101 | 50.5232064574490293 (1 row) DROP TABLE assets; -- count number of distinct users who have value_1 equal to 5 or 13 but not 3 -- original query that fails SELECT count(*) FROM ( SELECT user_id FROM users_table WHERE (value_1 = '5' OR value_1 = '13') AND user_id NOT IN (select user_id from users_table where value_1 = '3') GROUP BY user_id HAVING count(distinct value_1) = 2 ) as foo; ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -- previous push down query SELECT subquery_count FROM (SELECT count(*) as subquery_count FROM (SELECT user_id FROM users_table WHERE (value_1 = '5' OR value_1 = '13') GROUP BY user_id HAVING count(distinct value_1) = 2) as a LEFT JOIN (SELECT user_id FROM users_table WHERE (value_1 = '3') GROUP BY user_id) as b ON a.user_id = b.user_id WHERE b.user_id IS NULL GROUP BY a.user_id ) AS inner_subquery; subquery_count ---------------- 1 (1 row) -- new pushdown query without single range table entry at top requirement SELECT count(*) as subquery_count FROM ( SELECT user_id FROM users_table WHERE (value_1 = '5' OR value_1 = '13') GROUP BY user_id HAVING count(distinct value_1) = 2 ) as a LEFT JOIN ( SELECT user_id FROM users_table WHERE (value_1 = '3') GROUP BY user_id) AS b ON a.user_id = b.user_id WHERE b.user_id IS NULL GROUP BY a.user_id; subquery_count ---------------- 1 (1 row) -- most queries below has limit clause -- therefore setting subquery_pushdown flag for all SET citus.subquery_pushdown to ON; -- multi-subquery-join -- The first query has filters on partion column to make it router plannable -- but it is processed by logical planner since we disabled router execution SELECT e1.user_id, sum(view_homepage) AS viewed_homepage, sum(use_demo) AS use_demo, sum(enter_credit_card) AS entered_credit_card, sum(submit_card_info) as submit_card_info, sum(see_bought_screen) as see_bought_screen FROM ( -- Get the first time each user viewed the homepage. SELECT user_id, 1 AS view_homepage, min(time) AS view_homepage_time FROM events_table WHERE user_id = 1 and event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90) GROUP BY user_id ) e1 LEFT JOIN LATERAL ( SELECT user_id, 1 AS use_demo, time AS use_demo_time FROM events_table WHERE user_id = e1.user_id AND user_id = 1 and event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91) ORDER BY time LIMIT 1 ) e2 ON true LEFT JOIN LATERAL ( SELECT user_id, 1 AS enter_credit_card, time AS enter_credit_card_time FROM events_table WHERE user_id = e2.user_id AND user_id = 1 and event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92) ORDER BY time LIMIT 1 ) e3 ON true LEFT JOIN LATERAL ( SELECT 1 AS submit_card_info, user_id, time AS enter_credit_card_time FROM events_table WHERE user_id = e3.user_id AND user_id = 1 and event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93) ORDER BY time LIMIT 1 ) e4 ON true LEFT JOIN LATERAL ( SELECT 1 AS see_bought_screen FROM events_table WHERE user_id = e4.user_id AND user_id = 1 and event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94) ORDER BY time LIMIT 1 ) e5 ON true WHERE e1.user_id = 1 GROUP BY e1.user_id LIMIT 1; user_id | viewed_homepage | use_demo | entered_credit_card | submit_card_info | see_bought_screen ---------+-----------------+----------+---------------------+------------------+------------------- 1 | 1 | | | | (1 row) -- Same query without all limitations SELECT e1.user_id, sum(view_homepage) AS viewed_homepage, sum(use_demo) AS use_demo, sum(enter_credit_card) AS entered_credit_card, sum(submit_card_info) as submit_card_info, sum(see_bought_screen) as see_bought_screen FROM ( -- Get the first time each user viewed the homepage. SELECT user_id, 1 AS view_homepage, min(time) AS view_homepage_time FROM events_table WHERE event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90) GROUP BY user_id ) e1 LEFT JOIN LATERAL ( SELECT user_id, 1 AS use_demo, time AS use_demo_time FROM events_table WHERE user_id = e1.user_id AND event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91) ORDER BY time ) e2 ON true LEFT JOIN LATERAL ( SELECT user_id, 1 AS enter_credit_card, time AS enter_credit_card_time FROM events_table WHERE user_id = e2.user_id AND event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92) ORDER BY time ) e3 ON true LEFT JOIN LATERAL ( SELECT 1 AS submit_card_info, user_id, time AS enter_credit_card_time FROM events_table WHERE user_id = e3.user_id AND event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93) ORDER BY time ) e4 ON true LEFT JOIN LATERAL ( SELECT 1 AS see_bought_screen FROM events_table WHERE user_id = e4.user_id AND event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94) ORDER BY time ) e5 ON true GROUP BY e1.user_id ORDER BY 6 DESC NULLS LAST, 5 DESC NULLS LAST, 4 DESC NULLS LAST, 3 DESC NULLS LAST, 2 DESC NULLS LAST, 1 LIMIT 15; user_id | viewed_homepage | use_demo | entered_credit_card | submit_card_info | see_bought_screen ---------+-----------------+----------+---------------------+------------------+------------------- 72 | 36 | 36 | 36 | 36 | 36 95 | 12 | 12 | 12 | 12 | 12 82 | 4 | 4 | 4 | 4 | 4 74 | 3 | 3 | 3 | 3 | 3 83 | 3 | 3 | 3 | 3 | 3 6 | 2 | 2 | 2 | 2 | 2 42 | 1 | 1 | 1 | 1 | 1 5 | 4 | 4 | 4 | 4 | 93 | 4 | 4 | 4 | 4 | 51 | 1 | 1 | 1 | 1 | 85 | 6 | 6 | 6 | | 73 | 4 | 4 | 4 | | 0 | 3 | 3 | 3 | | 10 | 2 | 2 | 2 | | 13 | 2 | 2 | 2 | | (15 rows) -- Same query without all limitations but uses having() to show only those submitted their credit card info SELECT e1.user_id, sum(view_homepage) AS viewed_homepage, sum(use_demo) AS use_demo, sum(enter_credit_card) AS entered_credit_card, sum(submit_card_info) as submit_card_info, sum(see_bought_screen) as see_bought_screen FROM ( -- Get the first time each user viewed the homepage. SELECT user_id, 1 AS view_homepage, min(time) AS view_homepage_time FROM events_table WHERE event_type IN (10, 20, 30, 40, 50, 60, 70, 80, 90) GROUP BY user_id ) e1 LEFT JOIN LATERAL ( SELECT user_id, 1 AS use_demo, time AS use_demo_time FROM events_table WHERE user_id = e1.user_id AND event_type IN (11, 21, 31, 41, 51, 61, 71, 81, 91) ORDER BY time ) e2 ON true LEFT JOIN LATERAL ( SELECT user_id, 1 AS enter_credit_card, time AS enter_credit_card_time FROM events_table WHERE user_id = e2.user_id AND event_type IN (12, 22, 32, 42, 52, 62, 72, 82, 92) ORDER BY time ) e3 ON true LEFT JOIN LATERAL ( SELECT 1 AS submit_card_info, user_id, time AS enter_credit_card_time FROM events_table WHERE user_id = e3.user_id AND event_type IN (13, 23, 33, 43, 53, 63, 73, 83, 93) ORDER BY time ) e4 ON true LEFT JOIN LATERAL ( SELECT 1 AS see_bought_screen FROM events_table WHERE user_id = e4.user_id AND event_type IN (14, 24, 34, 44, 54, 64, 74, 84, 94) ORDER BY time ) e5 ON true group by e1.user_id HAVING sum(submit_card_info) > 0 ORDER BY 6 DESC NULLS LAST, 5 DESC NULLS LAST, 4 DESC NULLS LAST, 3 DESC NULLS LAST, 2 DESC NULLS LAST, 1 LIMIT 15; user_id | viewed_homepage | use_demo | entered_credit_card | submit_card_info | see_bought_screen ---------+-----------------+----------+---------------------+------------------+------------------- 72 | 36 | 36 | 36 | 36 | 36 95 | 12 | 12 | 12 | 12 | 12 82 | 4 | 4 | 4 | 4 | 4 74 | 3 | 3 | 3 | 3 | 3 83 | 3 | 3 | 3 | 3 | 3 6 | 2 | 2 | 2 | 2 | 2 42 | 1 | 1 | 1 | 1 | 1 5 | 4 | 4 | 4 | 4 | 93 | 4 | 4 | 4 | 4 | 51 | 1 | 1 | 1 | 1 | (10 rows) -- Explain analyze on this query fails due to #756 -- avg expression used on order by SELECT a.user_id, avg(b.value_2) as subquery_avg FROM ( SELECT user_id FROM users_table WHERE (value_1 > 5) GROUP BY user_id HAVING count(distinct value_1) > 88 ) as a LEFT JOIN ( SELECT user_id, value_2, value_3 FROM users_table WHERE (value_1 > 3)) AS b ON a.user_id = b.user_id WHERE b.user_id IS NOT NULL GROUP BY a.user_id ORDER BY avg(b.value_3), 2, 1 LIMIT 5; user_id | subquery_avg ---------+---------------------- 99 | 456.7446808510638298 83 | 469.6037735849056604 61 | 486.5869565217391304 78 | 434.9009009009009009 77 | 449.9313725490196078 (5 rows) -- add having to the same query SELECT a.user_id, avg(b.value_2) as subquery_avg FROM ( SELECT user_id FROM users_table WHERE (value_1 > 5) GROUP BY user_id HAVING count(distinct value_1) > 88 ) as a LEFT JOIN ( SELECT user_id, value_2, value_3 FROM users_table WHERE (value_1 > 3)) AS b ON a.user_id = b.user_id WHERE b.user_id IS NOT NULL GROUP BY a.user_id HAVING sum(b.value_3) > 50000 ORDER BY avg(b.value_3), 2, 1 LIMIT 5; user_id | subquery_avg ---------+---------------------- 78 | 434.9009009009009009 29 | 505.0934579439252336 17 | 526.9633027522935780 91 | 501.4339622641509434 24 | 515.1714285714285714 (5 rows) -- avg on the value_3 is not a resjunk SELECT a.user_id, avg(b.value_2) as subquery_avg, avg(b.value_3) FROM (SELECT user_id FROM users_table WHERE (value_1 > 5) GROUP BY user_id HAVING count(distinct value_1) > 88 ) as a LEFT JOIN ( SELECT user_id, value_2, value_3 FROM users_table WHERE (value_1 > 3) ) AS b ON a.user_id = b.user_id WHERE b.user_id IS NOT NULL GROUP BY a.user_id ORDER BY avg(b.value_3) DESC, 2, 1 LIMIT 5; user_id | subquery_avg | avg ---------+----------------------+------------------ 6 | 523.8247422680412371 | 569.226804123711 62 | 497.1545454545454545 | 567.681818181818 8 | 524.5894736842105263 | 565.2 10 | 502.2017543859649123 | 561.929824561404 16 | 467.5145631067961165 | 561.73786407767 (5 rows) -- a powerful query structure that analyzes users/events -- using (relation JOIN subquery JOIN relation) SELECT u.user_id, sub.value_2, sub.value_3, COUNT(e2.user_id) counts FROM users_table u LEFT OUTER JOIN LATERAL (SELECT * FROM events_table e1 WHERE e1.user_id = u.user_id ORDER BY e1.value_3 DESC LIMIT 1 ) sub ON true LEFT OUTER JOIN events_table e2 ON e2.user_id = sub.user_id WHERE e2.value_2 > 10 AND e2.value_2 < 50 AND u.value_2 > 10 AND u.value_2 < 50 GROUP BY u.user_id, sub.value_2, sub.value_3 ORDER BY 4 DESC, 1 DESC, 2 ASC, 3 ASC LIMIT 10; user_id | value_2 | value_3 | counts ---------+---------+---------+-------- 87 | 807 | 990 | 45 25 | 613 | 992 | 40 26 | 952 | 982 | 36 17 | 277 | 993 | 36 83 | 571 | 1000 | 35 99 | 309 | 998 | 32 96 | 571 | 987 | 30 95 | 631 | 997 | 30 82 | 444 | 997 | 28 57 | 975 | 989 | 25 (10 rows) -- distinct users joined with events SELECT avg(events_table.event_type) as avg_type, count(*) as users_count FROM events_table JOIN (SELECT DISTINCT user_id FROM users_table ) as distinct_users ON distinct_users.user_id = events_table.user_id GROUP BY distinct_users.user_id ORDER BY users_count desc, avg_type DESC LIMIT 5; avg_type | users_count ----------------------+------------- 496.5748031496062992 | 127 531.1788617886178862 | 123 504.6806722689075630 | 119 503.7203389830508475 | 118 506.3793103448275862 | 116 (5 rows) -- reduce the data set, aggregate and join SELECT events_table.event_type, users_count.ct FROM events_table JOIN (SELECT distinct_users.user_id, count(1) as ct FROM (SELECT user_id FROM users_table ) as distinct_users GROUP BY distinct_users.user_id ) as users_count ON users_count.user_id = events_table.user_id ORDER BY users_count.ct desc, event_type DESC LIMIT 5; event_type | ct ------------+----- 996 | 121 986 | 121 979 | 121 975 | 121 960 | 121 (5 rows) --- now, test (subquery JOIN subquery) SELECT n1.user_id, count_1, total_count FROM (SELECT user_id, count(1) as count_1 FROM users_table GROUP BY user_id ) n1 INNER JOIN ( SELECT user_id, count(1) as total_count FROM events_table GROUP BY user_id, event_type ) n2 ON (n2.user_id = n1.user_id) ORDER BY total_count DESC, count_1 DESC, 1 DESC LIMIT 10; user_id | count_1 | total_count ---------+---------+------------- 57 | 105 | 4 78 | 112 | 3 45 | 111 | 3 40 | 107 | 3 36 | 106 | 3 25 | 105 | 3 86 | 100 | 3 80 | 100 | 3 60 | 100 | 3 35 | 100 | 3 (10 rows) SELECT a.user_id, avg(b.value_2) as subquery_avg FROM (SELECT user_id FROM users_table WHERE (value_1 > 5) GROUP BY user_id HAVING count(distinct value_1) > 88 ) as a LEFT JOIN (SELECT DISTINCT ON (user_id) user_id, value_2, value_3 FROM users_table WHERE (value_1 > 3) ORDER BY 1,2,3 ) AS b ON a.user_id = b.user_id WHERE b.user_id IS NOT NULL GROUP BY a.user_id ORDER BY avg(b.value_3), 2, 1 LIMIT 5; user_id | subquery_avg ---------+--------------------- 10 | 5.0000000000000000 87 | 12.0000000000000000 77 | 28.0000000000000000 37 | 17.0000000000000000 11 | 3.0000000000000000 (5 rows) -- distinct clause must include partition column -- when used in target list SELECT a.user_id, avg(b.value_2) as subquery_avg FROM (SELECT user_id FROM users_table WHERE (value_1 > 5) GROUP BY user_id HAVING count(distinct value_1) > 88 ) as a LEFT JOIN (SELECT DISTINCT ON (value_2) value_2 , user_id, value_3 FROM users_table WHERE (value_1 > 3) ORDER BY 1,2,3 ) AS b USING (user_id) GROUP BY user_id; ERROR: cannot push down this subquery DETAIL: Distinct on columns without partition column is currently unsupported SELECT a.user_id, avg(b.value_2) as subquery_avg FROM (SELECT user_id FROM users_table WHERE (value_1 > 5) GROUP BY user_id HAVING count(distinct value_1) > 88 ) as a LEFT JOIN (SELECT DISTINCT ON (value_2, user_id) value_2 , user_id, value_3 FROM users_table WHERE (value_1 > 3) ORDER BY 1,2,3 ) AS b ON a.user_id = b.user_id WHERE b.user_id IS NOT NULL GROUP BY a.user_id ORDER BY avg(b.value_3), 2, 1 LIMIT 5; user_id | subquery_avg ---------+---------------------- 99 | 459.1910112359550562 83 | 458.0721649484536082 9 | 541.5217391304347826 78 | 434.2336448598130841 77 | 443.8686868686868687 (5 rows) SELECT user_id, event_type FROM (SELECT * FROM ( (SELECT event_type, user_id as a_user_id FROM events_table) AS a JOIN (SELECT ma.user_id AS user_id, ma.value_2 AS value_2, (GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob FROM users_table AS ma WHERE (ma.value_2 > 100) ORDER BY prob DESC, user_id DESC LIMIT 10 ) AS ma ON (a.a_user_id = ma.user_id) ) AS inner_sub ORDER BY prob DESC, user_id DESC LIMIT 10 ) AS outer_sub ORDER BY prob DESC, event_type DESC, user_id DESC LIMIT 10; user_id | event_type ---------+------------ 10 | 813 10 | 806 10 | 805 10 | 685 10 | 591 10 | 442 10 | 333 10 | 317 10 | 244 10 | 169 (10 rows) -- very similar query but produces different result due to -- ordering difference in the previous one's inner query SELECT user_id, event_type FROM (SELECT event_type, user_id as a_user_id FROM events_table) AS a JOIN (SELECT ma.user_id AS user_id, ma.value_2 AS value_2, (GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob FROM users_table AS ma WHERE (ma.value_2 > 100) ORDER BY prob DESC, user_id DESC LIMIT 10 ) AS ma ON (a.a_user_id = ma.user_id) ORDER BY prob DESC, event_type DESC, user_id DESC LIMIT 10; user_id | event_type ---------+------------ 10 | 998 10 | 996 10 | 981 10 | 975 10 | 962 10 | 945 10 | 945 10 | 933 10 | 932 10 | 915 (10 rows) -- now they produce the same result when ordering fixed in 'outer_sub' SELECT user_id, event_type FROM (SELECT * FROM ( (SELECT event_type, user_id as a_user_id FROM events_table ) AS a JOIN (SELECT ma.user_id AS user_id, ma.value_2 AS value_2, (GREATEST(coalesce((ma.value_3 * ma.value_2) / 20, 0.0) + GREATEST(1.0))) / 2 AS prob FROM users_table AS ma WHERE (ma.value_2 > 100) ORDER BY prob DESC, user_id DESC LIMIT 10 ) AS ma ON (a.a_user_id = ma.user_id) ) AS inner_sub ORDER BY prob DESC, event_type DESC, user_id DESC LIMIT 10 ) AS outer_sub ORDER BY prob DESC, event_type DESC, user_id DESC LIMIT 10; user_id | event_type ---------+------------ 10 | 998 10 | 996 10 | 981 10 | 975 10 | 962 10 | 945 10 | 945 10 | 933 10 | 932 10 | 915 (10 rows) -- this is one complex join query derived from a user's production query -- first declare the function on workers on master -- With array_index: SELECT * FROM run_command_on_workers('CREATE OR REPLACE FUNCTION array_index(ANYARRAY, ANYELEMENT) RETURNS INT AS $$ SELECT i FROM (SELECT generate_series(array_lower($1, 1), array_upper($1, 1))) g(i) WHERE $1 [i] = $2 LIMIT 1; $$ LANGUAGE sql') ORDER BY 1,2; nodename | nodeport | success | result -----------+----------+---------+----------------- localhost | 57637 | t | CREATE FUNCTION localhost | 57638 | t | CREATE FUNCTION (2 rows) CREATE OR REPLACE FUNCTION array_index(ANYARRAY, ANYELEMENT) RETURNS INT AS $$ SELECT i FROM (SELECT generate_series(array_lower($1, 1), array_upper($1, 1))) g(i) WHERE $1 [i] = $2 LIMIT 1; $$ LANGUAGE sql; SELECT * FROM (SELECT * FROM ( (SELECT user_id AS user_id_e, event_type AS event_type_e FROM events_table ) AS ma_e JOIN (SELECT value_2, value_3, user_id FROM (SELECT * FROM ( (SELECT user_id_p AS user_id FROM (SELECT * FROM ( (SELECT user_id AS user_id_p FROM events_table WHERE (event_type IN (1,2,3,4,5)) ) AS ma_p JOIN (SELECT user_id AS user_id_a FROM users_table WHERE (value_2 % 5 = 1) ) AS a ON (a.user_id_a = ma_p.user_id_p) ) ) AS a_ma_p ) AS inner_filter_q JOIN (SELECT value_2, value_3, user_id AS user_id_ck FROM events_table WHERE event_type = ANY(ARRAY [10, 11, 12]) ORDER BY value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC LIMIT 10 ) AS ma_ck ON (ma_ck.user_id_ck = inner_filter_q.user_id) ) AS inner_sub_q ORDER BY value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC LIMIT 10 ) AS outer_sub_q ORDER BY value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC LIMIT 10) AS inner_search_q ON (ma_e.user_id_e = inner_search_q.user_id) ) AS outer_inner_sub_q ORDER BY value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC LIMIT 10) AS outer_outer_sub_q ORDER BY value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC LIMIT 10; user_id_e | event_type_e | value_2 | value_3 | user_id -----------+--------------+---------+---------+--------- 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 (10 rows) -- top level select * is removed now there is -- a join at top level. SELECT * FROM ( (SELECT user_id AS user_id_e, event_type as event_type_e FROM events_table ) AS ma_e JOIN (SELECT value_2, value_3, user_id FROM (SELECT * FROM ( (SELECT user_id_p AS user_id FROM (SELECT * FROM ( (SELECT user_id AS user_id_p FROM events_table WHERE (event_type IN (1, 2, 3, 4, 5)) ) AS ma_p JOIN (SELECT user_id AS user_id_a FROM users_table WHERE (value_2 % 5 = 1) ) AS a ON (a.user_id_a = ma_p.user_id_p) ) ) AS a_ma_p ) AS inner_filter_q JOIN (SELECT value_2, value_3, user_id AS user_id_ck FROM events_table WHERE event_type = ANY(ARRAY [10, 11, 12]) ORDER BY value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC LIMIT 10 ) AS ma_ck ON (ma_ck.user_id_ck = inner_filter_q.user_id) ) AS inner_sub_q ORDER BY value_3 ASC, user_id_ck DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC LIMIT 10 ) AS outer_sub_q ORDER BY value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC LIMIT 10) AS inner_search_q ON (ma_e.user_id_e = inner_search_q.user_id) ) AS outer_inner_sub_q ORDER BY value_3 ASC, user_id DESC, array_index(ARRAY [1, 2, 3], (value_2 % 3)) ASC, event_type_e DESC LIMIT 10; user_id_e | event_type_e | value_2 | value_3 | user_id -----------+--------------+---------+---------+--------- 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 65 | 991 | 167 | 108 | 65 (10 rows) -- drop created functions SELECT * FROM run_command_on_workers('DROP FUNCTION array_index(ANYARRAY, ANYELEMENT)') ORDER BY 1,2; nodename | nodeport | success | result -----------+----------+---------+--------------- localhost | 57637 | t | DROP FUNCTION localhost | 57638 | t | DROP FUNCTION (2 rows) DROP FUNCTION array_index(ANYARRAY, ANYELEMENT); -- a not supported query due to constant range table entry SELECT count(*) as subquery_count FROM ( SELECT user_id FROM users_table WHERE (value_1 = '5' OR value_1 = '13') GROUP BY user_id HAVING count(distinct value_1) = 2 ) as a LEFT JOIN ( SELECT 1 as user_id ) AS b ON a.user_id = b.user_id WHERE b.user_id IS NULL GROUP BY a.user_id; ERROR: cannot push down this subquery DETAIL: Subqueries without relations are unsupported -- same with INNER JOIN SELECT count(*) as subquery_count FROM ( SELECT user_id FROM users_table WHERE (value_1 = '5' OR value_1 = '13') GROUP BY user_id HAVING count(distinct value_1) = 2 ) as a INNER JOIN ( SELECT 1 as user_id ) AS b ON a.user_id = b.user_id WHERE b.user_id IS NULL GROUP BY a.user_id; ERROR: cannot push down this subquery DETAIL: Subqueries without relations are unsupported -- this is slightly different, we use RTE_VALUEs here SELECT Count(*) AS subquery_count FROM (SELECT user_id FROM users_table WHERE (value_1 = '5' OR value_1 = '13' ) GROUP BY user_id HAVING Count(DISTINCT value_1) = 2) AS a INNER JOIN (SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (user_id, letter)) AS b ON a.user_id = b.user_id WHERE b.user_id IS NULL GROUP BY a.user_id; ERROR: cannot push down this subquery DETAIL: Table expressions other than simple relations and subqueries are currently unsupported -- same query without LIMIT/OFFSET returns 30 rows SET client_min_messages TO DEBUG1; -- now, lets use a simple expression on the LIMIT and explicit coercion on the OFFSET SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( SELECT u.user_id, e.event_type::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND e.event_type IN (100, 101, 102) ) t GROUP BY user_id ) q ORDER BY 2 DESC, 1 LIMIT 3+3 OFFSET 5::smallint; DEBUG: push down of limit count: 11 user_id | array_length ---------+-------------- 23 | 115 46 | 115 10 | 114 96 | 113 73 | 111 91 | 107 (6 rows) -- now, lets use implicit coersion in LIMIT and a simple expressions on OFFSET SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( SELECT u.user_id, e.event_type::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND e.event_type IN (100, 101, 102) ) t GROUP BY user_id ) q ORDER BY 2 DESC, 1 LIMIT '3' OFFSET 27+2; DEBUG: push down of limit count: 32 user_id | array_length ---------+-------------- 0 | 54 (1 row) -- create a test function which is marked as volatile CREATE OR REPLACE FUNCTION volatile_func_test() RETURNS INT AS $$ SELECT 5; $$ LANGUAGE sql VOLATILE; -- Citus should be able to evalute functions/row comparisons on the LIMIT/OFFSET SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( SELECT u.user_id, e.event_type::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND e.event_type IN (100, 101, 102) ) t GROUP BY user_id ) q ORDER BY 2 DESC, 1 LIMIT volatile_func_test() + (ROW(1,2,NULL) < ROW(1,3,0))::int OFFSET volatile_func_test() + volatile_func_test(); DEBUG: push down of limit count: 16 user_id | array_length ---------+-------------- 91 | 107 69 | 103 67 | 101 35 | 100 80 | 100 86 | 100 (6 rows) -- now, lets use expressions on both the LIMIT and OFFSET SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( SELECT u.user_id, e.event_type::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND e.event_type IN (100, 101, 102) ) t GROUP BY user_id ) q ORDER BY 2 DESC, 1 LIMIT (5 > 4)::int OFFSET CASE WHEN 5 != 5 THEN 27 WHEN 1 > 5 THEN 28 ELSE 29 END; DEBUG: push down of limit count: 30 user_id | array_length ---------+-------------- 0 | 54 (1 row) -- we don't allow parameters on the LIMIT/OFFSET clauses PREPARE parametrized_limit AS SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( SELECT u.user_id, e.event_type::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND e.event_type IN (100, 101, 102) ) t GROUP BY user_id ) q ORDER BY 2 DESC, 1 LIMIT $1 OFFSET $2; EXECUTE parametrized_limit(3,3); DEBUG: push down of limit count: 6 user_id | array_length ---------+-------------- 13 | 172 12 | 121 23 | 115 (3 rows) PREPARE parametrized_offset AS SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( SELECT u.user_id, e.event_type::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND e.event_type IN (100, 101, 102) ) t GROUP BY user_id ) q ORDER BY 2 DESC, 1 LIMIT 3 OFFSET $1; EXECUTE parametrized_offset(3); DEBUG: push down of limit count: 6 user_id | array_length ---------+-------------- 13 | 172 12 | 121 23 | 115 (3 rows) SET client_min_messages TO DEFAULT; DROP FUNCTION volatile_func_test(); CREATE FUNCTION test_join_function_2(integer, integer) RETURNS bool AS 'select $1 > $2;' LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT; -- we don't support joins via functions SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( SELECT u.user_id, e.event_type::text AS event, e.time FROM users_table AS u, events_table AS e WHERE test_join_function_2(u.user_id, e.user_id) ) t GROUP BY user_id ) q ORDER BY 2 DESC, 1; ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -- note that the following query has joins on the partition keys -- however we fail to push down it due to the function call on the -- where clause. We probably need to relax that check SELECT users_table.user_id, users_table.value_1, prob FROM users_table JOIN (SELECT ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob FROM users_table AS ma, events_table as short_list WHERE short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.event_type < 50 ) temp ON users_table.user_id = temp.user_id WHERE users_table.value_1 < 50 AND test_join_function_2(users_table.user_id, temp.user_id); ERROR: unsupported clause type DROP FUNCTION test_join_function_2(integer, integer); SET citus.enable_router_execution TO TRUE; SET citus.subquery_pushdown to OFF;