mirror of https://github.com/citusdata/citus.git
2900 lines
100 KiB
Plaintext
2900 lines
100 KiB
Plaintext
--
|
|
-- multi subquery complex queries aims to expand existing subquery pushdown
|
|
-- regression tests to cover more caeses
|
|
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
|
|
--
|
|
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
|
-- SET citus.next_shard_id TO 1400000;
|
|
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
|
|
|
--
|
|
-- UNIONs and JOINs mixed
|
|
--
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- same query with target entries shuffled inside UNIONs
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 1 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- supported through recursive planning since events_subquery_2 doesn't have partition key on the target list
|
|
-- within the shuffled target list
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 1 AS event, "events"."user_id" * 2
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 234
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- supported through recursive planning since events_subquery_2 doesn't have partition key on the target list
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 1 AS event, "events"."value_2" as user_id
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 369
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- we can support arbitrary subqueries within UNIONs
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT
|
|
*, random()
|
|
FROM
|
|
(SELECT
|
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT
|
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(
|
|
SELECT * FROM
|
|
(
|
|
SELECT
|
|
max("events"."time"),
|
|
0 AS event,
|
|
"events"."user_id"
|
|
FROM
|
|
events_table as "events", users_table as "users"
|
|
WHERE
|
|
events.user_id = users.user_id AND
|
|
event_type IN (1, 2)
|
|
GROUP BY "events"."user_id"
|
|
) as events_subquery_5
|
|
) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6)) events_subquery_4)
|
|
) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
2 | 433
|
|
3 | 75
|
|
(3 rows)
|
|
|
|
SET citus.enable_repartition_joins to ON;
|
|
SET client_min_messages TO DEBUG1;
|
|
-- recursively planned since events_subquery_5 is not joined on partition key
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT
|
|
*, random()
|
|
FROM
|
|
(SELECT
|
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT
|
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(
|
|
SELECT * FROM
|
|
(
|
|
SELECT
|
|
max("events"."time"),
|
|
0 AS event,
|
|
"events"."user_id"
|
|
FROM
|
|
events_table as "events", users_table as "users"
|
|
WHERE
|
|
events.user_id = users.value_2 AND
|
|
event_type IN (1, 2)
|
|
GROUP BY "events"."user_id"
|
|
) as events_subquery_5
|
|
) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6)) events_subquery_4)
|
|
) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
DEBUG: cannot use real time executor with repartition jobs
|
|
HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker.
|
|
DEBUG: generating subplan 16_1 for subquery SELECT max(events."time") AS max, 0 AS event, events.user_id FROM public.events_table events, public.users_table users WHERE ((events.user_id OPERATOR(pg_catalog.=) users.value_2) AND (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2]))) GROUP BY events.user_id
|
|
DEBUG: generating subplan 16_2 for subquery SELECT "time", event, user_id FROM (SELECT events."time", 0 AS event, events.user_id FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2]))) events_subquery_1
|
|
DEBUG: generating subplan 16_3 for subquery SELECT "time", event, user_id FROM (SELECT events."time", 2 AS event, events.user_id FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[3, 4]))) events_subquery_3
|
|
DEBUG: generating subplan 16_4 for subquery SELECT "time", event, user_id FROM (SELECT events."time", 3 AS event, events.user_id FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6]))) events_subquery_4
|
|
DEBUG: generating subplan 16_5 for subquery SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('16_2'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer) UNION SELECT events_subquery_2.max, events_subquery_2.event, events_subquery_2.user_id FROM (SELECT events_subquery_5.max, events_subquery_5.event, events_subquery_5.user_id FROM (SELECT intermediate_result.max, intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('16_1'::text, 'binary'::citus_copy_format) intermediate_result(max timestamp without time zone, event integer, user_id integer)) events_subquery_5) events_subquery_2 UNION SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('16_3'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer) UNION SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('16_4'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer)
|
|
DEBUG: Plan 16 query after replacing subqueries and CTEs: SELECT event_types AS types, count(*) AS sumofeventtype FROM (SELECT q.user_id, q."time", q.event_types, t.user_id, random() AS random FROM ((SELECT t_1.user_id, t_1."time", unnest(t_1.collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('16_5'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer)) t1 GROUP BY t1.user_id) t_1) q JOIN (SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 OPERATOR(pg_catalog.>) 0) AND (users.value_1 OPERATOR(pg_catalog.<) 4))) t ON ((t.user_id OPERATOR(pg_catalog.=) q.user_id)))) final_query(user_id, "time", event_types, user_id_1, random) GROUP BY event_types ORDER BY event_types
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
2 | 433
|
|
3 | 75
|
|
(3 rows)
|
|
|
|
RESET client_min_messages;
|
|
SET citus.enable_repartition_joins to OFF;
|
|
-- recursively planned since the join is not equi join
|
|
SET client_min_messages TO DEBUG1;
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (4, 5)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id != q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
DEBUG: generating subplan 22_1 for subquery SELECT user_id, "time", unnest(collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT events_subquery_1.user_id, events_subquery_1."time", events_subquery_1.event FROM (SELECT events.user_id, events."time", 0 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2]))) events_subquery_1 UNION SELECT events_subquery_2.user_id, events_subquery_2."time", events_subquery_2.event FROM (SELECT events.user_id, events."time", 1 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[3, 4]))) events_subquery_2 UNION SELECT events_subquery_3.user_id, events_subquery_3."time", events_subquery_3.event FROM (SELECT events.user_id, events."time", 2 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6]))) events_subquery_3 UNION SELECT events_subquery_4.user_id, events_subquery_4."time", events_subquery_4.event FROM (SELECT events.user_id, events."time", 3 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[4, 5]))) events_subquery_4) t1 GROUP BY t1.user_id) t
|
|
DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT event_types AS types, count(*) AS sumofeventtype FROM (SELECT q.user_id, q."time", q.event_types, t.user_id, random() AS random FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_types FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_types integer)) q JOIN (SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 OPERATOR(pg_catalog.>) 0) AND (users.value_1 OPERATOR(pg_catalog.<) 4))) t ON ((t.user_id OPERATOR(pg_catalog.<>) q.user_id)))) final_query(user_id, "time", event_types, user_id_1, random) GROUP BY event_types ORDER BY event_types
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 2088
|
|
1 | 2163
|
|
2 | 397
|
|
3 | 1397
|
|
(4 rows)
|
|
|
|
RESET client_min_messages;
|
|
-- not supported since subquery 3 includes a JOIN with non-equi join
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events", users_table as "users"
|
|
WHERE
|
|
event_type IN (5, 6) AND users.user_id != events.user_id ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (4, 5)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
|
|
-- similar query with more union statements (to enable UNION tree become larger)
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (2, 3) ) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (4, 5)) events_subquery_4)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 4 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6)) events_subquery_5)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 5 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (6, 1)) events_subquery_6)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 6 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (2, 5)) events_subquery_6)
|
|
) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT "users"."user_id"
|
|
FROM users_table as "users"
|
|
WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY types
|
|
ORDER BY types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 434
|
|
2 | 433
|
|
3 | 255
|
|
4 | 75
|
|
5 | 268
|
|
6 | 256
|
|
(7 rows)
|
|
|
|
--
|
|
-- UNION ALL Queries
|
|
--
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (6, 1)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT "users"."user_id"
|
|
FROM users_table as "users"
|
|
WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY types
|
|
ORDER BY types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- same query target list entries shuffled
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 1 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY types
|
|
ORDER BY types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- supported through recursive planning since subquery 3 does not have partition key
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."value_2", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT "users"."user_id"
|
|
FROM users_table as "users"
|
|
WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY types
|
|
ORDER BY types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 62
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- supported through recursive planning since events_subquery_4 does not have partition key on the
|
|
-- target list
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 1 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id" * 2
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY types
|
|
ORDER BY types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 124
|
|
(4 rows)
|
|
|
|
-- union all with inner and left joins
|
|
SELECT user_id, count(*) as cnt
|
|
FROM
|
|
(SELECT first_query.user_id, random()
|
|
FROM
|
|
( SELECT
|
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT
|
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "first_query"
|
|
INNER JOIN
|
|
(SELECT "t"."user_id"
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
LEFT OUTER JOIN
|
|
(
|
|
SELECT
|
|
DISTINCT "events"."user_id" as user_id
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (0, 6)
|
|
GROUP BY
|
|
user_id
|
|
) as t2
|
|
ON (t2.user_id = t.user_id) WHERE t2.user_id is NULL) as second_query
|
|
ON ("first_query".user_id = "second_query".user_id)) as final_query
|
|
GROUP BY
|
|
user_id ORDER BY cnt DESC, user_id DESC
|
|
LIMIT 10;
|
|
user_id | cnt
|
|
---------+-----
|
|
3 | 275
|
|
6 | 72
|
|
(2 rows)
|
|
|
|
-- recursively planned since the join between t and t2 is not equi join
|
|
-- union all with inner and left joins
|
|
SET client_min_messages TO DEBUG1;
|
|
SELECT user_id, count(*) as cnt
|
|
FROM
|
|
(SELECT first_query.user_id, random()
|
|
FROM
|
|
( SELECT
|
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT
|
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "first_query"
|
|
INNER JOIN
|
|
(SELECT "t"."user_id"
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
LEFT OUTER JOIN
|
|
(
|
|
SELECT
|
|
DISTINCT "events"."user_id" as user_id
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (0, 6)
|
|
GROUP BY
|
|
user_id
|
|
) as t2
|
|
ON (t2.user_id > t.user_id) WHERE t2.user_id is NULL) as second_query
|
|
ON ("first_query".user_id = "second_query".user_id)) as final_query
|
|
GROUP BY
|
|
user_id ORDER BY cnt DESC, user_id DESC
|
|
LIMIT 10;
|
|
DEBUG: generating subplan 42_1 for subquery SELECT DISTINCT user_id FROM public.events_table events WHERE (event_type OPERATOR(pg_catalog.=) ANY (ARRAY[0, 6])) GROUP BY user_id
|
|
DEBUG: Plan 42 query after replacing subqueries and CTEs: SELECT user_id, count(*) AS cnt FROM (SELECT first_query.user_id, random() AS random FROM ((SELECT t.user_id, t."time", unnest(t.collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT events_subquery_1.user_id, events_subquery_1."time", events_subquery_1.event FROM (SELECT events.user_id, events."time", 0 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2]))) events_subquery_1 UNION ALL SELECT events_subquery_2.user_id, events_subquery_2."time", events_subquery_2.event FROM (SELECT events.user_id, events."time", 1 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[3, 4]))) events_subquery_2 UNION ALL SELECT events_subquery_3.user_id, events_subquery_3."time", events_subquery_3.event FROM (SELECT events.user_id, events."time", 2 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6]))) events_subquery_3 UNION ALL SELECT events_subquery_4.user_id, events_subquery_4."time", events_subquery_4.event FROM (SELECT events.user_id, events."time", 3 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 6]))) events_subquery_4) t1 GROUP BY t1.user_id) t) first_query JOIN (SELECT t.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 OPERATOR(pg_catalog.>) 0) AND (users.value_1 OPERATOR(pg_catalog.<) 4))) t LEFT JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('42_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) t2 ON ((t2.user_id OPERATOR(pg_catalog.>) t.user_id))) WHERE (t2.user_id IS NULL)) second_query ON ((first_query.user_id OPERATOR(pg_catalog.=) second_query.user_id)))) final_query GROUP BY user_id ORDER BY (count(*)) DESC, user_id DESC LIMIT 10
|
|
user_id | cnt
|
|
---------+-----
|
|
5 | 324
|
|
6 | 72
|
|
(2 rows)
|
|
|
|
RESET client_min_messages;
|
|
--
|
|
-- Union, inner join and left join
|
|
--
|
|
SELECT user_id, count(*) as cnt
|
|
FROM
|
|
(SELECT first_query.user_id, random()
|
|
FROM
|
|
( SELECT
|
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT
|
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "first_query"
|
|
INNER JOIN
|
|
(SELECT "t"."user_id"
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
LEFT OUTER JOIN
|
|
(
|
|
SELECT
|
|
DISTINCT "events"."user_id" as user_id
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (0, 6)
|
|
GROUP BY
|
|
user_id
|
|
) as t2
|
|
ON (t2.user_id = t.user_id) WHERE t2.user_id is NULL) as second_query
|
|
ON ("first_query".user_id = "second_query".user_id)) as final_query
|
|
GROUP BY
|
|
user_id ORDER BY cnt DESC, user_id DESC
|
|
LIMIT 10;
|
|
user_id | cnt
|
|
---------+-----
|
|
3 | 275
|
|
6 | 72
|
|
(2 rows)
|
|
|
|
-- Simple LATERAL JOINs with GROUP BYs in each side
|
|
-- need to set subquery_pushdown due to limit for next 2 queries
|
|
SET citus.subquery_pushdown to ON;
|
|
SELECT *
|
|
FROM
|
|
(SELECT "some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT user_id, max(time) AS lastseen
|
|
FROM
|
|
(SELECT user_id, time
|
|
FROM
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4) "events_1"
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(time) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 1 and users.value_2 < 3
|
|
LIMIT 1) "some_users_data"
|
|
ON TRUE
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 50) "some_users"
|
|
order BY
|
|
user_id
|
|
LIMIT 50;
|
|
user_id | lastseen
|
|
---------+---------------------------------
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
3 | Thu Nov 23 18:08:26.550729 2017
|
|
(2 rows)
|
|
|
|
-- same query with subuqery joins in topmost select
|
|
SELECT "some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT user_id, max(time) AS lastseen
|
|
FROM
|
|
(SELECT user_id, time
|
|
FROM
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4) "events_1"
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(TIME) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 1 and users.value_2 < 3
|
|
LIMIT 1) "some_users_data"
|
|
ON TRUE
|
|
ORDER BY
|
|
user_id
|
|
limit 50;
|
|
user_id | lastseen
|
|
---------+---------------------------------
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
3 | Thu Nov 23 18:08:26.550729 2017
|
|
(2 rows)
|
|
|
|
-- reset subquery_pushdown
|
|
SET citus.subquery_pushdown to OFF;
|
|
-- we recursively plan recent_events_1
|
|
-- but not some_users_data since it has a reference
|
|
-- from an outer query which is not recursively planned
|
|
SELECT "some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT user_id, max(time) AS lastseen
|
|
FROM
|
|
(SELECT user_id, time
|
|
FROM
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4) "events_1"
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(TIME) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."value_1" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 1 and users.value_2 < 3
|
|
LIMIT 1) "some_users_data"
|
|
ON TRUE
|
|
ORDER BY
|
|
user_id
|
|
limit 50;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query
|
|
-- we recursively plan some queries but fail in the end
|
|
-- since some_users_data since it has a reference
|
|
-- from an outer query which is not recursively planned
|
|
SELECT "some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT 2 * user_id as user_id, max(time) AS lastseen
|
|
FROM
|
|
(SELECT user_id, time
|
|
FROM
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4) "events_1"
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(TIME) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 1 and users.value_2 < 3
|
|
LIMIT 1) "some_users_data"
|
|
ON TRUE
|
|
ORDER BY
|
|
user_id
|
|
limit 50;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query
|
|
-- LATERAL JOINs used with INNER JOINs
|
|
SET citus.subquery_pushdown to ON;
|
|
SELECT user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id))
|
|
filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 AND
|
|
user_id = filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1"
|
|
ON TRUE
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
LIMIT 1) "some_users_data"
|
|
ON TRUE
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
user_id DESC, lastseen DESC
|
|
LIMIT 10;
|
|
user_id | lastseen
|
|
---------+---------------------------------
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
(10 rows)
|
|
|
|
--
|
|
-- A similar query with topmost select is dropped
|
|
-- and replaced by aggregation. Notice the heavy use of limit
|
|
--
|
|
SELECT "some_users_data".user_id, MAX(lastseen), count(*)
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and user_id = filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
LIMIT 1) "some_users_data" ON true
|
|
GROUP BY 1
|
|
ORDER BY 2, 1 DESC
|
|
LIMIT 10;
|
|
user_id | max | count
|
|
---------+---------------------------------+-------
|
|
2 | Thu Nov 23 17:26:14.563216 2017 | 10
|
|
(1 row)
|
|
|
|
SET citus.subquery_pushdown to OFF;
|
|
-- not supported since the inner JOIN is not equi join and LATERAL JOIN prevents recursive planning
|
|
SET client_min_messages TO DEBUG2;
|
|
SELECT user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id != "user_where_1_join_1".user_id)) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and user_id = filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
user_id DESC, lastseen DESC
|
|
LIMIT 10;
|
|
DEBUG: generating subplan 53_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3))
|
|
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
|
|
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query
|
|
SET citus.enable_repartition_joins to ON;
|
|
SET client_min_messages TO DEBUG1;
|
|
-- recursively planner since the inner JOIN is not on the partition key
|
|
SELECT user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id", "users"."value_1"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".value_1)) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and user_id = filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
user_id DESC, lastseen DESC
|
|
LIMIT 10;
|
|
DEBUG: generating subplan 56_1 for subquery SELECT user_id, value_1 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3))
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query
|
|
SET citus.enable_repartition_joins to OFF;
|
|
RESET client_min_messages;
|
|
-- not supported since upper LATERAL JOIN is not equi join
|
|
SELECT user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id", "users"."value_1"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and user_id != filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
user_id DESC, lastseen DESC
|
|
LIMIT 10;
|
|
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
|
|
-- not pushdownable since lower LATERAL JOIN is not on the partition key
|
|
-- not recursively plannable due to LATERAL join where there is a reference
|
|
-- from an outer query
|
|
SELECT user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id", "users"."value_1"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and user_id = filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."value_1" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
user_id DESC, lastseen DESC
|
|
LIMIT 10;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query
|
|
-- NESTED INNER JOINs
|
|
SELECT
|
|
count(*) AS value, "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."time", "events"."user_id", "events"."value_2"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND event_type IN (4, 5) ) "temp_data_queries"
|
|
INNER JOIN
|
|
(SELECT
|
|
user_where_1_1.real_user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id" as real_user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3 ) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_3 > 3 ) user_where_1_join_1
|
|
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".user_id)) "user_filters_1"
|
|
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
|
GROUP BY
|
|
"generated_group_field"
|
|
ORDER BY
|
|
generated_group_field DESC, value DESC;
|
|
value | generated_group_field
|
|
-------+-----------------------
|
|
1 | 5
|
|
2 | 2
|
|
2 | 1
|
|
1 | 0
|
|
(4 rows)
|
|
|
|
SET citus.enable_repartition_joins to ON;
|
|
SET client_min_messages TO DEBUG1;
|
|
-- recursively planned since the first inner join is not on the partition key
|
|
SELECT
|
|
count(*) AS value, "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."time", "events"."user_id", "events"."value_2"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND event_type IN (4, 5) ) "temp_data_queries"
|
|
INNER JOIN
|
|
(SELECT
|
|
user_where_1_1.real_user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id" as real_user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3 ) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id", "users"."value_2"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_3 > 3 ) user_where_1_join_1
|
|
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".value_2)) "user_filters_1"
|
|
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
|
GROUP BY
|
|
"generated_group_field"
|
|
ORDER BY
|
|
generated_group_field DESC, value DESC;
|
|
DEBUG: generating subplan 64_1 for subquery SELECT user_id, value_2 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_3 OPERATOR(pg_catalog.>) (3)::double precision))
|
|
DEBUG: Plan 64 query after replacing subqueries and CTEs: SELECT count(*) AS value, generated_group_field FROM (SELECT DISTINCT "pushedDownQuery_1".real_user_id, "pushedDownQuery_1".generated_group_field FROM (SELECT "eventQuery".real_user_id, "eventQuery"."time", random() AS random, "eventQuery".value_2 AS generated_group_field FROM (SELECT temp_data_queries."time", temp_data_queries.user_id, temp_data_queries.value_2, user_filters_1.real_user_id FROM ((SELECT events."time", events.user_id, events.value_2 FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[4, 5])))) temp_data_queries JOIN (SELECT user_where_1_1.real_user_id FROM ((SELECT users.user_id AS real_user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_2 OPERATOR(pg_catalog.>) 3))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('64_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) user_where_1_join_1 ON ((user_where_1_1.real_user_id OPERATOR(pg_catalog.=) user_where_1_join_1.value_2)))) user_filters_1 ON ((temp_data_queries.user_id OPERATOR(pg_catalog.=) user_filters_1.real_user_id)))) "eventQuery") "pushedDownQuery_1") "pushedDownQuery" GROUP BY generated_group_field ORDER BY generated_group_field DESC, (count(*)) DESC
|
|
value | generated_group_field
|
|
-------+-----------------------
|
|
1 | 5
|
|
2 | 2
|
|
2 | 1
|
|
1 | 0
|
|
(4 rows)
|
|
|
|
-- recursive planning kicked-in since the non-equi join is among subqueries
|
|
SELECT
|
|
count(*) AS value, "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."time", "events"."user_id", "events"."value_2"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND event_type IN (4, 5) ) "temp_data_queries"
|
|
INNER JOIN
|
|
(SELECT
|
|
user_where_1_1.real_user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id" as real_user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3 ) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id", "users"."value_2"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_3 > 3 ) user_where_1_join_1
|
|
ON ("user_where_1_1".real_user_id >= "user_where_1_join_1".user_id)) "user_filters_1"
|
|
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
|
GROUP BY
|
|
"generated_group_field"
|
|
ORDER BY
|
|
generated_group_field DESC, value DESC;
|
|
DEBUG: generating subplan 66_1 for subquery SELECT user_id, value_2 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_3 OPERATOR(pg_catalog.>) (3)::double precision))
|
|
DEBUG: Plan 66 query after replacing subqueries and CTEs: SELECT count(*) AS value, generated_group_field FROM (SELECT DISTINCT "pushedDownQuery_1".real_user_id, "pushedDownQuery_1".generated_group_field FROM (SELECT "eventQuery".real_user_id, "eventQuery"."time", random() AS random, "eventQuery".value_2 AS generated_group_field FROM (SELECT temp_data_queries."time", temp_data_queries.user_id, temp_data_queries.value_2, user_filters_1.real_user_id FROM ((SELECT events."time", events.user_id, events.value_2 FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[4, 5])))) temp_data_queries JOIN (SELECT user_where_1_1.real_user_id FROM ((SELECT users.user_id AS real_user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_2 OPERATOR(pg_catalog.>) 3))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('66_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) user_where_1_join_1 ON ((user_where_1_1.real_user_id OPERATOR(pg_catalog.>=) user_where_1_join_1.user_id)))) user_filters_1 ON ((temp_data_queries.user_id OPERATOR(pg_catalog.=) user_filters_1.real_user_id)))) "eventQuery") "pushedDownQuery_1") "pushedDownQuery" GROUP BY generated_group_field ORDER BY generated_group_field DESC, (count(*)) DESC
|
|
value | generated_group_field
|
|
-------+-----------------------
|
|
1 | 5
|
|
2 | 2
|
|
2 | 1
|
|
1 | 0
|
|
(4 rows)
|
|
|
|
|
|
SET citus.enable_repartition_joins to OFF;
|
|
RESET client_min_messages;
|
|
-- single level inner joins
|
|
SELECT
|
|
"value_3", count(*) AS cnt
|
|
FROM
|
|
(SELECT
|
|
"value_3", "user_id", random()
|
|
FROM
|
|
(SELECT
|
|
users_in_segment_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
user_id, value_3 * 2 as value_3
|
|
FROM
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id", value_3
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 2
|
|
) simple_user_where_1
|
|
) all_buckets_1
|
|
) users_in_segment_1
|
|
JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3
|
|
) some_users_data
|
|
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
|
|
) segmentalias_1) "tempQuery"
|
|
GROUP BY "value_3"
|
|
ORDER BY cnt, value_3 DESC LIMIT 10;
|
|
value_3 | cnt
|
|
---------+-----
|
|
0 | 7
|
|
10 | 21
|
|
4 | 21
|
|
8 | 28
|
|
6 | 28
|
|
2 | 35
|
|
(6 rows)
|
|
|
|
SET citus.enable_repartition_joins to ON;
|
|
SET client_min_messages TO DEBUG1;
|
|
-- although there is no column equality at all
|
|
-- still recursive planning plans "some_users_data"
|
|
-- and the query becomes OK
|
|
SELECT
|
|
"value_3", count(*) AS cnt
|
|
FROM
|
|
(SELECT
|
|
"value_3", "user_id", random()
|
|
FROM
|
|
(SELECT
|
|
users_in_segment_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
user_id, value_3 * 2 as value_3
|
|
FROM
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id", value_3
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 2
|
|
) simple_user_where_1
|
|
) all_buckets_1
|
|
) users_in_segment_1
|
|
JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3
|
|
) some_users_data
|
|
ON (true)
|
|
) segmentalias_1) "tempQuery"
|
|
GROUP BY "value_3"
|
|
ORDER BY cnt, value_3 DESC LIMIT 10;
|
|
DEBUG: generating subplan 69_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3))
|
|
DEBUG: Plan 69 query after replacing subqueries and CTEs: SELECT value_3, count(*) AS cnt FROM (SELECT segmentalias_1.value_3, segmentalias_1.user_id, random() AS random FROM (SELECT users_in_segment_1.user_id, users_in_segment_1.value_3 FROM ((SELECT all_buckets_1.user_id, (all_buckets_1.value_3 OPERATOR(pg_catalog.*) (2)::double precision) AS value_3 FROM (SELECT simple_user_where_1.user_id, simple_user_where_1.value_3 FROM (SELECT users.user_id, users.value_3 FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_2 OPERATOR(pg_catalog.>) 2))) simple_user_where_1) all_buckets_1) users_in_segment_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('69_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) some_users_data ON (true))) segmentalias_1) "tempQuery" GROUP BY value_3 ORDER BY (count(*)), value_3 DESC LIMIT 10
|
|
value_3 | cnt
|
|
---------+-----
|
|
0 | 14
|
|
10 | 42
|
|
4 | 42
|
|
8 | 56
|
|
6 | 56
|
|
2 | 70
|
|
(6 rows)
|
|
|
|
SET citus.enable_repartition_joins to OFF;
|
|
RESET client_min_messages;
|
|
-- nested LATERAL JOINs
|
|
SET citus.subquery_pushdown to ON;
|
|
SELECT *
|
|
FROM
|
|
(SELECT "some_users_data".user_id, "some_recent_users".value_3
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and users.value_2 = 2) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND
|
|
("events".user_id = "filter_users_1".user_id)
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY value_3 DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 2
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
value_3 DESC, user_id ASC
|
|
LIMIT 10;
|
|
user_id | value_3
|
|
---------+---------
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
(9 rows)
|
|
|
|
-- nested lateral join at top most level
|
|
SELECT "some_users_data".user_id, "some_recent_users".value_3
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and users.value_2 = 2
|
|
) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND
|
|
("events".user_id = "filter_users_1".user_id)
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 1
|
|
) "last_events_1" ON true
|
|
ORDER BY value_3 DESC
|
|
LIMIT 10
|
|
) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 2
|
|
LIMIT 1
|
|
) "some_users_data" ON true
|
|
ORDER BY
|
|
value_3 DESC, user_id ASC
|
|
LIMIT 10;
|
|
user_id | value_3
|
|
---------+---------
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
(9 rows)
|
|
|
|
-- longer nested lateral joins
|
|
SELECT *
|
|
FROM
|
|
(SELECT "some_users_data".user_id, "some_recent_users".value_3
|
|
FROM
|
|
(SELECT filter_users_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and users.value_2 = 2) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND
|
|
("events".user_id = "filter_users_1".user_id)
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 2
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
value_3 DESC, user_id DESC
|
|
LIMIT 10;
|
|
user_id | value_3
|
|
---------+---------
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
(9 rows)
|
|
|
|
-- longer nested lateral join wth top level join
|
|
SELECT "some_users_data".user_id, "some_recent_users".value_3
|
|
FROM
|
|
(SELECT filter_users_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and users.value_2 = 2
|
|
) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4
|
|
AND
|
|
("events".user_id = "filter_users_1".user_id)
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 1
|
|
) "last_events_1" ON TRUE
|
|
ORDER BY value_3 DESC
|
|
LIMIT 10
|
|
) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 2
|
|
LIMIT 1
|
|
) "some_users_data" ON TRUE
|
|
ORDER BY value_3 DESC, user_id DESC
|
|
LIMIT 10;
|
|
user_id | value_3
|
|
---------+---------
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
(9 rows)
|
|
|
|
SET citus.subquery_pushdown to OFF;
|
|
-- LEFT JOINs used with INNER JOINs
|
|
SELECT
|
|
count(*) AS cnt, "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
"eventQuery"."user_id", random(), generated_group_field
|
|
FROM
|
|
(SELECT
|
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
|
FROM
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", "events"."user_id" as event_user_id
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 4) "temp_data_queries"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 4 and value_2 = 5) "user_filters_1"
|
|
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
|
LEFT JOIN
|
|
(SELECT
|
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
|
FROM
|
|
users_table as "users") "left_group_by_1"
|
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
|
group BY
|
|
"generated_group_field"
|
|
ORDER BY
|
|
cnt DESC, generated_group_field ASC
|
|
LIMIT 10;
|
|
cnt | generated_group_field
|
|
-----+-----------------------
|
|
336 | 2
|
|
210 | 1
|
|
210 | 3
|
|
126 | 4
|
|
126 | 5
|
|
84 | 0
|
|
(6 rows)
|
|
|
|
-- single table subquery, no JOINS involved
|
|
SELECT
|
|
count(*) AS cnt, user_id
|
|
FROM
|
|
(SELECT
|
|
"eventQuery"."user_id", random()
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id"
|
|
FROM
|
|
events_table "events"
|
|
WHERE
|
|
event_type IN (1, 2)) "eventQuery") "pushedDownQuery"
|
|
GROUP BY
|
|
"user_id"
|
|
ORDER BY
|
|
cnt DESC, user_id DESC
|
|
LIMIT 10;
|
|
cnt | user_id
|
|
-----+---------
|
|
11 | 3
|
|
10 | 2
|
|
8 | 4
|
|
6 | 5
|
|
4 | 6
|
|
4 | 1
|
|
(6 rows)
|
|
|
|
-- lateral joins in the nested manner
|
|
SET citus.subquery_pushdown to ON;
|
|
SELECT *
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, value_2
|
|
FROM
|
|
(SELECT user_id, max(value_2) AS value_2
|
|
FROM
|
|
(SELECT user_id, value_2
|
|
FROM
|
|
(SELECT
|
|
user_id, value_2
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3) "events_1"
|
|
ORDER BY
|
|
value_2 DESC
|
|
LIMIT 10000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(value_2) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
value_2 > 4
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
value_2 DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
value_2 DESC, user_id DESC
|
|
LIMIT 10;
|
|
user_id | value_2
|
|
---------+---------
|
|
2 | 5
|
|
(1 row)
|
|
|
|
SET citus.subquery_pushdown to OFF;
|
|
-- not pushdownable since lower LATERAL JOIN is not on the partition key
|
|
-- not recursively plannable due to LATERAL join where there is a reference
|
|
-- from an outer query
|
|
SELECT *
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, value_2
|
|
FROM
|
|
(SELECT user_id, max(value_2) AS value_2
|
|
FROM
|
|
(SELECT user_id, value_2
|
|
FROM
|
|
(SELECT
|
|
user_id, value_2
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3) "events_1"
|
|
ORDER BY
|
|
value_2 DESC
|
|
LIMIT 10000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(value_2) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."value_2" = "some_recent_users"."user_id" AND
|
|
value_2 > 4
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
value_2 DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
value_2 DESC, user_id DESC
|
|
LIMIT 10;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query
|
|
-- lets test some unsupported set operations
|
|
-- not supported since we use INTERSECT
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
INTERSECT
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (4, 5)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 433
|
|
(2 rows)
|
|
|
|
-- supported through recursive planning
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (4, 5)) events_subquery_4) ORDER BY 1, 2 OFFSET 3) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 425
|
|
2 | 75
|
|
3 | 251
|
|
(4 rows)
|
|
|
|
-- not supported due to non relation rte
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
1 as user_id, now(), 3 AS event
|
|
) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 4
|
|
(4 rows)
|
|
|
|
-- similar to the above, but constant rte is on the right side of the query
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
1 as user_id, now(), 3 AS event
|
|
) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT 1 as user_id) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
-------+----------------
|
|
0 | 4
|
|
1 | 8
|
|
2 | 1
|
|
3 | 1
|
|
(4 rows)
|
|
|
|
-- we've fixed a bug related to joins w/wout alias
|
|
-- while implementing top window functions
|
|
-- thus adding some tests related to that (i.e., next 3 tests)
|
|
WITH users_events AS
|
|
(
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
)
|
|
SELECT
|
|
uid,
|
|
event_type,
|
|
value_2,
|
|
value_3
|
|
FROM (
|
|
(SELECT
|
|
user_id as uid
|
|
FROM
|
|
users_events
|
|
) users
|
|
JOIN
|
|
events_table
|
|
ON
|
|
users.uid = events_table.event_type
|
|
) a
|
|
ORDER BY
|
|
1,2,3,4
|
|
LIMIT 5;
|
|
uid | event_type | value_2 | value_3
|
|
-----+------------+---------+---------
|
|
1 | 1 | 0 | 2
|
|
1 | 1 | 0 | 2
|
|
1 | 1 | 0 | 2
|
|
1 | 1 | 0 | 2
|
|
1 | 1 | 0 | 2
|
|
(5 rows)
|
|
|
|
-- the following queries are almost the same,
|
|
-- the only difference is the final GROUP BY
|
|
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
|
FROM
|
|
(SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
(value_1 > 2)
|
|
GROUP BY
|
|
user_id
|
|
HAVING
|
|
count(distinct value_1) > 2
|
|
) as a
|
|
LEFT JOIN
|
|
(SELECT
|
|
DISTINCT ON (value_2) value_2 , user_id, value_3
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
(value_1 > 3)
|
|
ORDER BY
|
|
1,2,3
|
|
) AS b
|
|
USING (user_id)
|
|
GROUP BY user_id
|
|
ORDER BY 1, 2;
|
|
user_id | subquery_avg
|
|
---------+------------------------
|
|
1 | 2.3333333333333333
|
|
3 | 5.0000000000000000
|
|
4 | 1.00000000000000000000
|
|
5 |
|
|
(4 rows)
|
|
|
|
-- see the comment for the above query
|
|
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
|
FROM
|
|
(SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
(value_1 > 2)
|
|
GROUP BY
|
|
user_id
|
|
HAVING
|
|
count(distinct value_1) > 2
|
|
) as a
|
|
LEFT JOIN
|
|
(SELECT
|
|
DISTINCT ON (value_2) value_2 , user_id, value_3
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
(value_1 > 3)
|
|
ORDER BY
|
|
1,2,3
|
|
) AS b
|
|
USING (user_id)
|
|
GROUP BY a.user_id
|
|
ORDER BY 1, 2;
|
|
user_id | subquery_avg
|
|
---------+------------------------
|
|
1 | 2.3333333333333333
|
|
3 | 5.0000000000000000
|
|
4 | 1.00000000000000000000
|
|
5 |
|
|
(4 rows)
|
|
|
|
-- queries where column aliases are used
|
|
-- the query is not very complex. join is given an alias with aliases
|
|
-- for each output column
|
|
SELECT k1
|
|
FROM (
|
|
SELECT k1, random()
|
|
FROM (users_table JOIN events_table USING (user_id)) k (k1, k2, k3)) l
|
|
ORDER BY k1
|
|
LIMIT 5;
|
|
k1
|
|
----
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
(5 rows)
|
|
|
|
SELECT DISTINCT k1
|
|
FROM (
|
|
SELECT k1, random()
|
|
FROM (users_table JOIN events_table USING (user_id)) k (k1, k2, k3)) l
|
|
ORDER BY k1
|
|
LIMIT 5;
|
|
k1
|
|
----
|
|
1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
(5 rows)
|
|
|
|
SELECT x1, x3, value_2
|
|
FROM (users_table u FULL JOIN events_table e ON (u.user_id = e.user_id)) k(x1, x2, x3, x4, x5)
|
|
ORDER BY 1, 2, 3
|
|
LIMIT 5;
|
|
x1 | x3 | value_2
|
|
----+----+---------
|
|
1 | 1 | 1
|
|
1 | 1 | 1
|
|
1 | 1 | 1
|
|
1 | 1 | 2
|
|
1 | 1 | 2
|
|
(5 rows)
|
|
|
|
SELECT x1, x3, value_2
|
|
FROM (users_table u FULL JOIN events_table e USING (user_id)) k(x1, x2, x3, x4, x5)
|
|
ORDER BY 1, 2, 3
|
|
LIMIT 5;
|
|
x1 | x3 | value_2
|
|
----+----+---------
|
|
1 | 1 | 1
|
|
1 | 1 | 1
|
|
1 | 1 | 1
|
|
1 | 1 | 2
|
|
1 | 1 | 2
|
|
(5 rows)
|
|
|
|
SELECT c_custkey
|
|
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
|
|
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
|
|
ORDER BY 1 DESC
|
|
LIMIT 10;
|
|
c_custkey
|
|
-----------
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
(10 rows)
|
|
|
|
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
|
|
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
|
|
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
|
|
GROUP BY 1
|
|
ORDER BY 2, 1
|
|
LIMIT 10;
|
|
c_custkey | date_trunc
|
|
-----------+--------------------------
|
|
2 | Thu Nov 23 13:52:00 2017
|
|
6 | Thu Nov 23 14:43:00 2017
|
|
4 | Thu Nov 23 15:32:00 2017
|
|
5 | Thu Nov 23 16:48:00 2017
|
|
3 | Thu Nov 23 17:18:00 2017
|
|
1 | Thu Nov 23 17:30:00 2017
|
|
(6 rows)
|
|
|
|
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
|
|
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
|
|
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
|
|
GROUP BY 1
|
|
HAVING extract(minute from max(c_nationkey)) >= 45
|
|
ORDER BY 2, 1
|
|
LIMIT 10;
|
|
c_custkey | date_trunc
|
|
-----------+--------------------------
|
|
2 | Thu Nov 23 13:52:00 2017
|
|
5 | Thu Nov 23 16:48:00 2017
|
|
(2 rows)
|
|
|
|
SELECT user_id
|
|
FROM (users_table JOIN events_table USING (user_id)) AS test(user_id, c_nationkey)
|
|
FULL JOIN users_table AS u2 USING (user_id)
|
|
ORDER BY 1 DESC
|
|
LIMIT 10;
|
|
user_id
|
|
---------
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
(10 rows)
|
|
|
|
-- nested joins
|
|
SELECT bar, value_3_table.value_3
|
|
FROM ((users_table
|
|
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
|
|
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
|
|
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
|
|
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
|
|
GROUP BY 1,2
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
bar | value_3
|
|
-----+---------
|
|
3 | 5
|
|
2 | 5
|
|
1 | 5
|
|
5 | 4
|
|
4 | 4
|
|
3 | 4
|
|
2 | 4
|
|
1 | 4
|
|
5 | 3
|
|
4 | 3
|
|
(10 rows)
|
|
|
|
-- lateral joins
|
|
SELECT bar,
|
|
value_3_table.value_3
|
|
FROM ((users_table
|
|
JOIN (events_table
|
|
INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep) ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
|
|
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar, foo)
|
|
JOIN LATERAL
|
|
(SELECT value_3
|
|
FROM events_table
|
|
WHERE user_id = bar) AS value_3_table ON TRUE
|
|
GROUP BY 1, 2
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
bar | value_3
|
|
-----+---------
|
|
3 | 5
|
|
2 | 5
|
|
1 | 5
|
|
5 | 4
|
|
4 | 4
|
|
3 | 4
|
|
2 | 4
|
|
1 | 4
|
|
5 | 3
|
|
4 | 3
|
|
(10 rows)
|
|
|
|
--Joins inside subqueries are the sources of the values in the target list:
|
|
SELECT bar, foo.value_3, c_custkey, test_2.time_2 FROM
|
|
(
|
|
SELECT bar, value_3_table.value_3, random()
|
|
FROM ((users_table
|
|
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
|
|
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
|
|
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
|
|
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
|
|
GROUP BY 1,2
|
|
) as foo, (users_table
|
|
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join_2(user_id_deep)
|
|
ON (users_table.user_id = deeper_join_2.user_id_deep)) AS test_2(c_custkey, time_2) WHERE foo.bar = test_2.c_custkey
|
|
ORDER BY 2 DESC, 1 DESC, 3 DESC, 4 DESC
|
|
LIMIT 10;
|
|
bar | value_3 | c_custkey | time_2
|
|
-----+---------+-----------+---------------------------------
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
(10 rows)
|
|
|