mirror of https://github.com/citusdata/citus.git
2901 lines
105 KiB
Plaintext
2901 lines
105 KiB
Plaintext
--
|
|
-- multi subquery complex queries aims to expand existing subquery pushdown
|
|
-- regression tests to cover more caeses
|
|
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
|
|
--
|
|
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
|
-- SET citus.next_shard_id TO 1400000;
|
|
--
|
|
-- UNIONs and JOINs mixed
|
|
--
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- same query with target entries shuffled inside UNIONs
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 1 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- supported through recursive planning since events_subquery_2 doesn't have partition key on the target list
|
|
-- within the shuffled target list
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 1 AS event, "events"."user_id" * 2
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 234
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- supported through recursive planning since events_subquery_2 doesn't have partition key on the target list
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 1 AS event, "events"."value_2" as user_id
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 369
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- we can support arbitrary subqueries within UNIONs
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT
|
|
*, random()
|
|
FROM
|
|
(SELECT
|
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT
|
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(
|
|
SELECT * FROM
|
|
(
|
|
SELECT
|
|
max("events"."time"),
|
|
0 AS event,
|
|
"events"."user_id"
|
|
FROM
|
|
events_table as "events", users_table as "users"
|
|
WHERE
|
|
events.user_id = users.user_id AND
|
|
event_type IN (1, 2)
|
|
GROUP BY "events"."user_id"
|
|
) as events_subquery_5
|
|
) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6)) events_subquery_4)
|
|
) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
2 | 433
|
|
3 | 75
|
|
(3 rows)
|
|
|
|
SET citus.enable_repartition_joins to ON;
|
|
SET client_min_messages TO DEBUG1;
|
|
-- recursively planned since events_subquery_5 is not joined on partition key
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT
|
|
*, random()
|
|
FROM
|
|
(SELECT
|
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT
|
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(
|
|
SELECT * FROM
|
|
(
|
|
SELECT
|
|
max("events"."time"),
|
|
0 AS event,
|
|
"events"."user_id"
|
|
FROM
|
|
events_table as "events", users_table as "users"
|
|
WHERE
|
|
events.user_id = users.value_2 AND
|
|
event_type IN (1, 2)
|
|
GROUP BY "events"."user_id"
|
|
) as events_subquery_5
|
|
) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6)) events_subquery_4)
|
|
) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT max(events."time") AS max, 0 AS event, events.user_id FROM public.events_table events, public.users_table users WHERE ((events.user_id OPERATOR(pg_catalog.=) users.value_2) AND (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2]))) GROUP BY events.user_id
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT "time", event, user_id FROM (SELECT events."time", 0 AS event, events.user_id FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2]))) events_subquery_1
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT "time", event, user_id FROM (SELECT events."time", 2 AS event, events.user_id FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[3, 4]))) events_subquery_3
|
|
DEBUG: generating subplan XXX_4 for subquery SELECT "time", event, user_id FROM (SELECT events."time", 3 AS event, events.user_id FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6]))) events_subquery_4
|
|
DEBUG: generating subplan XXX_5 for subquery SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer) UNION SELECT events_subquery_2.max, events_subquery_2.event, events_subquery_2.user_id FROM (SELECT events_subquery_5.max, events_subquery_5.event, events_subquery_5.user_id FROM (SELECT intermediate_result.max, intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(max timestamp without time zone, event integer, user_id integer)) events_subquery_5) events_subquery_2 UNION SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer) UNION SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer)
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT event_types AS types, count(*) AS sumofeventtype FROM (SELECT q.user_id, q."time", q.event_types, t.user_id, random() AS random FROM ((SELECT t_1.user_id, t_1."time", unnest(t_1.collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer)) t1 GROUP BY t1.user_id) t_1) q JOIN (SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 OPERATOR(pg_catalog.>) 0) AND (users.value_1 OPERATOR(pg_catalog.<) 4))) t ON ((t.user_id OPERATOR(pg_catalog.=) q.user_id)))) final_query(user_id, "time", event_types, user_id_1, random) GROUP BY event_types ORDER BY event_types
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
2 | 433
|
|
3 | 75
|
|
(3 rows)
|
|
|
|
RESET client_min_messages;
|
|
SET citus.enable_repartition_joins to OFF;
|
|
-- recursively planned since the join is not equi join
|
|
SET client_min_messages TO DEBUG1;
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (4, 5)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id != q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", unnest(collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT events_subquery_1.user_id, events_subquery_1."time", events_subquery_1.event FROM (SELECT events.user_id, events."time", 0 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2]))) events_subquery_1 UNION SELECT events_subquery_2.user_id, events_subquery_2."time", events_subquery_2.event FROM (SELECT events.user_id, events."time", 1 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[3, 4]))) events_subquery_2 UNION SELECT events_subquery_3.user_id, events_subquery_3."time", events_subquery_3.event FROM (SELECT events.user_id, events."time", 2 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6]))) events_subquery_3 UNION SELECT events_subquery_4.user_id, events_subquery_4."time", events_subquery_4.event FROM (SELECT events.user_id, events."time", 3 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[4, 5]))) events_subquery_4) t1 GROUP BY t1.user_id) t
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT event_types AS types, count(*) AS sumofeventtype FROM (SELECT q.user_id, q."time", q.event_types, t.user_id, random() AS random FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_types FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_types integer)) q JOIN (SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 OPERATOR(pg_catalog.>) 0) AND (users.value_1 OPERATOR(pg_catalog.<) 4))) t ON ((t.user_id OPERATOR(pg_catalog.<>) q.user_id)))) final_query(user_id, "time", event_types, user_id_1, random) GROUP BY event_types ORDER BY event_types
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 2088
|
|
1 | 2163
|
|
2 | 397
|
|
3 | 1397
|
|
(4 rows)
|
|
|
|
RESET client_min_messages;
|
|
-- not supported since subquery 3 includes a JOIN with non-equi join
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events", users_table as "users"
|
|
WHERE
|
|
event_type IN (5, 6) AND users.user_id != events.user_id ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (4, 5)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
|
|
-- similar query with more union statements (to enable UNION tree become larger)
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (2, 3) ) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (4, 5)) events_subquery_4)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 4 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6)) events_subquery_5)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 5 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (6, 1)) events_subquery_6)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 6 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (2, 5)) events_subquery_6)
|
|
) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT "users"."user_id"
|
|
FROM users_table as "users"
|
|
WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY types
|
|
ORDER BY types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 434
|
|
2 | 433
|
|
3 | 255
|
|
4 | 75
|
|
5 | 268
|
|
6 | 256
|
|
(7 rows)
|
|
|
|
--
|
|
-- UNION ALL Queries
|
|
--
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (6, 1)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT "users"."user_id"
|
|
FROM users_table as "users"
|
|
WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY types
|
|
ORDER BY types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- same query target list entries shuffled
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 1 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY types
|
|
ORDER BY types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- supported through recursive planning since subquery 3 does not have partition key
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."value_2", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT "users"."user_id"
|
|
FROM users_table as "users"
|
|
WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY types
|
|
ORDER BY types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 62
|
|
3 | 268
|
|
(4 rows)
|
|
|
|
-- supported through recursive planning since events_subquery_4 does not have partition key on the
|
|
-- target list
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 0 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 1 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 2 AS event, "events"."user_id"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", 3 AS event, "events"."user_id" * 2
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY types
|
|
ORDER BY types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 124
|
|
(4 rows)
|
|
|
|
-- union all with inner and left joins
|
|
SELECT user_id, count(*) as cnt
|
|
FROM
|
|
(SELECT first_query.user_id, random()
|
|
FROM
|
|
( SELECT
|
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT
|
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "first_query"
|
|
INNER JOIN
|
|
(SELECT "t"."user_id"
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
LEFT OUTER JOIN
|
|
(
|
|
SELECT
|
|
DISTINCT "events"."user_id" as user_id
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (0, 6)
|
|
GROUP BY
|
|
user_id
|
|
) as t2
|
|
ON (t2.user_id = t.user_id) WHERE t2.user_id is NULL) as second_query
|
|
ON ("first_query".user_id = "second_query".user_id)) as final_query
|
|
GROUP BY
|
|
user_id ORDER BY cnt DESC, user_id DESC
|
|
LIMIT 10;
|
|
user_id | cnt
|
|
---------------------------------------------------------------------
|
|
3 | 275
|
|
6 | 72
|
|
(2 rows)
|
|
|
|
-- recursively planned since the join between t and t2 is not equi join
|
|
-- union all with inner and left joins
|
|
SET client_min_messages TO DEBUG1;
|
|
SELECT user_id, count(*) as cnt
|
|
FROM
|
|
(SELECT first_query.user_id, random()
|
|
FROM
|
|
( SELECT
|
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT
|
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION ALL
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "first_query"
|
|
INNER JOIN
|
|
(SELECT "t"."user_id"
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
LEFT OUTER JOIN
|
|
(
|
|
SELECT
|
|
DISTINCT "events"."user_id" as user_id
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (0, 6)
|
|
GROUP BY
|
|
user_id
|
|
) as t2
|
|
ON (t2.user_id > t.user_id) WHERE t2.user_id is NULL) as second_query
|
|
ON ("first_query".user_id = "second_query".user_id)) as final_query
|
|
GROUP BY
|
|
user_id ORDER BY cnt DESC, user_id DESC
|
|
LIMIT 10;
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT DISTINCT user_id FROM public.events_table events WHERE (event_type OPERATOR(pg_catalog.=) ANY (ARRAY[0, 6])) GROUP BY user_id
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, count(*) AS cnt FROM (SELECT first_query.user_id, random() AS random FROM ((SELECT t.user_id, t."time", unnest(t.collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT events_subquery_1.user_id, events_subquery_1."time", events_subquery_1.event FROM (SELECT events.user_id, events."time", 0 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2]))) events_subquery_1 UNION ALL SELECT events_subquery_2.user_id, events_subquery_2."time", events_subquery_2.event FROM (SELECT events.user_id, events."time", 1 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[3, 4]))) events_subquery_2 UNION ALL SELECT events_subquery_3.user_id, events_subquery_3."time", events_subquery_3.event FROM (SELECT events.user_id, events."time", 2 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6]))) events_subquery_3 UNION ALL SELECT events_subquery_4.user_id, events_subquery_4."time", events_subquery_4.event FROM (SELECT events.user_id, events."time", 3 AS event FROM public.events_table events WHERE (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 6]))) events_subquery_4) t1 GROUP BY t1.user_id) t) first_query JOIN (SELECT t.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 OPERATOR(pg_catalog.>) 0) AND (users.value_1 OPERATOR(pg_catalog.<) 4))) t LEFT JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) t2 ON ((t2.user_id OPERATOR(pg_catalog.>) t.user_id))) WHERE (t2.user_id IS NULL)) second_query ON ((first_query.user_id OPERATOR(pg_catalog.=) second_query.user_id)))) final_query GROUP BY user_id ORDER BY (count(*)) DESC, user_id DESC LIMIT 10
|
|
DEBUG: push down of limit count: 10
|
|
user_id | cnt
|
|
---------------------------------------------------------------------
|
|
5 | 324
|
|
6 | 72
|
|
(2 rows)
|
|
|
|
RESET client_min_messages;
|
|
--
|
|
-- Union, inner join and left join
|
|
--
|
|
SELECT user_id, count(*) as cnt
|
|
FROM
|
|
(SELECT first_query.user_id, random()
|
|
FROM
|
|
( SELECT
|
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT
|
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 6)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "first_query"
|
|
INNER JOIN
|
|
(SELECT "t"."user_id"
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
LEFT OUTER JOIN
|
|
(
|
|
SELECT
|
|
DISTINCT "events"."user_id" as user_id
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (0, 6)
|
|
GROUP BY
|
|
user_id
|
|
) as t2
|
|
ON (t2.user_id = t.user_id) WHERE t2.user_id is NULL) as second_query
|
|
ON ("first_query".user_id = "second_query".user_id)) as final_query
|
|
GROUP BY
|
|
user_id ORDER BY cnt DESC, user_id DESC
|
|
LIMIT 10;
|
|
user_id | cnt
|
|
---------------------------------------------------------------------
|
|
3 | 275
|
|
6 | 72
|
|
(2 rows)
|
|
|
|
-- Simple LATERAL JOINs with GROUP BYs in each side
|
|
-- need to set subquery_pushdown due to limit for next 2 queries
|
|
SET citus.subquery_pushdown to ON;
|
|
NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks.
|
|
DETAIL: When enabled, the planner skips many correctness checks for subqueries and pushes down the queries to shards as-is. It means that the queries are likely to return wrong results unless the user is absolutely sure that pushing down the subquery is safe. This GUC is maintained only for backward compatibility, no new users are supposed to use it. The planner is capable of pushing down as much computation as possible to the shards depending on the query.
|
|
SELECT *
|
|
FROM
|
|
(SELECT "some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT user_id, max(time) AS lastseen
|
|
FROM
|
|
(SELECT user_id, time
|
|
FROM
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4) "events_1"
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(time) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 1 and users.value_2 < 3
|
|
LIMIT 1) "some_users_data"
|
|
ON TRUE
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 50) "some_users"
|
|
order BY
|
|
user_id
|
|
LIMIT 50;
|
|
user_id | lastseen
|
|
---------------------------------------------------------------------
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
3 | Thu Nov 23 18:08:26.550729 2017
|
|
(2 rows)
|
|
|
|
-- same query with subuqery joins in topmost select
|
|
SELECT "some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT user_id, max(time) AS lastseen
|
|
FROM
|
|
(SELECT user_id, time
|
|
FROM
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4) "events_1"
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(TIME) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 1 and users.value_2 < 3
|
|
ORDER BY 1 LIMIT 1) "some_users_data"
|
|
ON TRUE
|
|
ORDER BY
|
|
user_id
|
|
limit 50;
|
|
user_id | lastseen
|
|
---------------------------------------------------------------------
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
3 | Thu Nov 23 18:08:26.550729 2017
|
|
(2 rows)
|
|
|
|
-- reset subquery_pushdown
|
|
SET citus.subquery_pushdown to OFF;
|
|
-- mixture of recursively planned subqueries and correlated subqueries
|
|
SELECT "some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT user_id, max(time) AS lastseen
|
|
FROM
|
|
(SELECT user_id, time
|
|
FROM
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4) "events_1"
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(TIME) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."value_1" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 1 and users.value_2 < 3
|
|
ORDER BY 1 LIMIT 1) "some_users_data"
|
|
ON TRUE
|
|
ORDER BY
|
|
user_id
|
|
limit 50;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables
|
|
SELECT "some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT 2 * user_id as user_id, max(time) AS lastseen
|
|
FROM
|
|
(SELECT user_id, time
|
|
FROM
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4) "events_1"
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(TIME) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 1 and users.value_2 < 3
|
|
ORDER BY 1 LIMIT 1) "some_users_data"
|
|
ON TRUE
|
|
ORDER BY
|
|
user_id
|
|
limit 50;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables
|
|
-- LATERAL JOINs used with INNER JOINs
|
|
SET citus.subquery_pushdown to ON;
|
|
NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks.
|
|
DETAIL: When enabled, the planner skips many correctness checks for subqueries and pushes down the queries to shards as-is. It means that the queries are likely to return wrong results unless the user is absolutely sure that pushing down the subquery is safe. This GUC is maintained only for backward compatibility, no new users are supposed to use it. The planner is capable of pushing down as much computation as possible to the shards depending on the query.
|
|
SELECT user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id))
|
|
filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 AND
|
|
user_id = filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1"
|
|
ON TRUE
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
ORDER BY 1 LIMIT 1) "some_users_data"
|
|
ON TRUE
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
user_id DESC, lastseen DESC
|
|
LIMIT 10;
|
|
user_id | lastseen
|
|
---------------------------------------------------------------------
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
(10 rows)
|
|
|
|
--
|
|
-- A similar query with topmost select is dropped
|
|
-- and replaced by aggregation. Notice the heavy use of limit
|
|
--
|
|
SELECT "some_users_data".user_id, MAX(lastseen), count(*)
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and user_id = filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
|
GROUP BY 1
|
|
ORDER BY 2, 1 DESC
|
|
LIMIT 10;
|
|
user_id | max | count
|
|
---------------------------------------------------------------------
|
|
2 | Thu Nov 23 17:26:14.563216 2017 | 10
|
|
(1 row)
|
|
|
|
SET citus.subquery_pushdown to OFF;
|
|
-- not supported since the inner JOIN is not equi join and LATERAL JOIN prevents recursive planning
|
|
SET client_min_messages TO DEBUG2;
|
|
SELECT user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id != "user_where_1_join_1".user_id)) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and user_id = filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
user_id DESC, lastseen DESC
|
|
LIMIT 10;
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3))
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.<>) user_where_1_join_1.user_id)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables
|
|
SET citus.enable_repartition_joins to ON;
|
|
SET client_min_messages TO DEBUG1;
|
|
-- recursively planner since the inner JOIN is not on the partition key
|
|
SELECT user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id", "users"."value_1"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".value_1)) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and user_id = filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
user_id DESC, lastseen DESC
|
|
LIMIT 10;
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_1 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3))
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.=) user_where_1_join_1.value_1)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables
|
|
SET citus.enable_repartition_joins to OFF;
|
|
RESET client_min_messages;
|
|
-- not supported since upper LATERAL JOIN is not equi join
|
|
SELECT user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id", "users"."value_1"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and user_id != filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
user_id DESC, lastseen DESC
|
|
LIMIT 10;
|
|
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
-- complex lateral join between inner join and correlated subquery
|
|
SELECT user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, lastseen
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, time AS lastseen
|
|
FROM
|
|
(SELECT
|
|
user_where_1_1.user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id", "users"."value_1"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1
|
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, time
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3 and user_id = filter_users_1.user_id
|
|
ORDER BY
|
|
time DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY time DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."value_1" = "some_recent_users"."user_id" AND
|
|
"users"."value_2" > 4
|
|
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
lastseen DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
user_id DESC, lastseen DESC
|
|
LIMIT 10;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables
|
|
-- NESTED INNER JOINs
|
|
SELECT
|
|
count(*) AS value, "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."time", "events"."user_id", "events"."value_2"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND event_type IN (4, 5) ) "temp_data_queries"
|
|
INNER JOIN
|
|
(SELECT
|
|
user_where_1_1.real_user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id" as real_user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3 ) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_3 > 3 ) user_where_1_join_1
|
|
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".user_id)) "user_filters_1"
|
|
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
|
GROUP BY
|
|
"generated_group_field"
|
|
ORDER BY
|
|
generated_group_field DESC, value DESC;
|
|
value | generated_group_field
|
|
---------------------------------------------------------------------
|
|
1 | 5
|
|
2 | 2
|
|
2 | 1
|
|
1 | 0
|
|
(4 rows)
|
|
|
|
SET citus.enable_repartition_joins to ON;
|
|
SET client_min_messages TO DEBUG1;
|
|
-- recursively planned since the first inner join is not on the partition key
|
|
SELECT
|
|
count(*) AS value, "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."time", "events"."user_id", "events"."value_2"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND event_type IN (4, 5) ) "temp_data_queries"
|
|
INNER JOIN
|
|
(SELECT
|
|
user_where_1_1.real_user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id" as real_user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3 ) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id", "users"."value_2"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_3 > 3 ) user_where_1_join_1
|
|
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".value_2)) "user_filters_1"
|
|
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
|
GROUP BY
|
|
"generated_group_field"
|
|
ORDER BY
|
|
generated_group_field DESC, value DESC;
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_2 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_3 OPERATOR(pg_catalog.>) (3)::double precision))
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS value, generated_group_field FROM (SELECT DISTINCT "pushedDownQuery_1".real_user_id, "pushedDownQuery_1".generated_group_field FROM (SELECT "eventQuery".real_user_id, "eventQuery"."time", random() AS random, "eventQuery".value_2 AS generated_group_field FROM (SELECT temp_data_queries."time", temp_data_queries.user_id, temp_data_queries.value_2, user_filters_1.real_user_id FROM ((SELECT events."time", events.user_id, events.value_2 FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[4, 5])))) temp_data_queries JOIN (SELECT user_where_1_1.real_user_id FROM ((SELECT users.user_id AS real_user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_2 OPERATOR(pg_catalog.>) 3))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) user_where_1_join_1 ON ((user_where_1_1.real_user_id OPERATOR(pg_catalog.=) user_where_1_join_1.value_2)))) user_filters_1 ON ((temp_data_queries.user_id OPERATOR(pg_catalog.=) user_filters_1.real_user_id)))) "eventQuery") "pushedDownQuery_1") "pushedDownQuery" GROUP BY generated_group_field ORDER BY generated_group_field DESC, (count(*)) DESC
|
|
value | generated_group_field
|
|
---------------------------------------------------------------------
|
|
1 | 5
|
|
2 | 2
|
|
2 | 1
|
|
1 | 0
|
|
(4 rows)
|
|
|
|
-- recursive planning kicked-in since the non-equi join is among subqueries
|
|
SELECT
|
|
count(*) AS value, "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
"eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."time", "events"."user_id", "events"."value_2"
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND event_type IN (4, 5) ) "temp_data_queries"
|
|
INNER JOIN
|
|
(SELECT
|
|
user_where_1_1.real_user_id
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id" as real_user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3 ) user_where_1_1
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id", "users"."value_2"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_3 > 3 ) user_where_1_join_1
|
|
ON ("user_where_1_1".real_user_id >= "user_where_1_join_1".user_id)) "user_filters_1"
|
|
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
|
GROUP BY
|
|
"generated_group_field"
|
|
ORDER BY
|
|
generated_group_field DESC, value DESC;
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_2 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_3 OPERATOR(pg_catalog.>) (3)::double precision))
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS value, generated_group_field FROM (SELECT DISTINCT "pushedDownQuery_1".real_user_id, "pushedDownQuery_1".generated_group_field FROM (SELECT "eventQuery".real_user_id, "eventQuery"."time", random() AS random, "eventQuery".value_2 AS generated_group_field FROM (SELECT temp_data_queries."time", temp_data_queries.user_id, temp_data_queries.value_2, user_filters_1.real_user_id FROM ((SELECT events."time", events.user_id, events.value_2 FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[4, 5])))) temp_data_queries JOIN (SELECT user_where_1_1.real_user_id FROM ((SELECT users.user_id AS real_user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_2 OPERATOR(pg_catalog.>) 3))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) user_where_1_join_1 ON ((user_where_1_1.real_user_id OPERATOR(pg_catalog.>=) user_where_1_join_1.user_id)))) user_filters_1 ON ((temp_data_queries.user_id OPERATOR(pg_catalog.=) user_filters_1.real_user_id)))) "eventQuery") "pushedDownQuery_1") "pushedDownQuery" GROUP BY generated_group_field ORDER BY generated_group_field DESC, (count(*)) DESC
|
|
value | generated_group_field
|
|
---------------------------------------------------------------------
|
|
1 | 5
|
|
2 | 2
|
|
2 | 1
|
|
1 | 0
|
|
(4 rows)
|
|
|
|
SET citus.enable_repartition_joins to OFF;
|
|
RESET client_min_messages;
|
|
-- single level inner joins
|
|
SELECT
|
|
"value_3", count(*) AS cnt
|
|
FROM
|
|
(SELECT
|
|
"value_3", "user_id", random()
|
|
FROM
|
|
(SELECT
|
|
users_in_segment_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
user_id, value_3 * 2 as value_3
|
|
FROM
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id", value_3
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 2
|
|
) simple_user_where_1
|
|
) all_buckets_1
|
|
) users_in_segment_1
|
|
JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3
|
|
) some_users_data
|
|
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
|
|
) segmentalias_1) "tempQuery"
|
|
GROUP BY "value_3"
|
|
ORDER BY cnt, value_3 DESC LIMIT 10;
|
|
value_3 | cnt
|
|
---------------------------------------------------------------------
|
|
0 | 7
|
|
10 | 21
|
|
4 | 21
|
|
8 | 28
|
|
6 | 28
|
|
2 | 35
|
|
(6 rows)
|
|
|
|
SET citus.enable_repartition_joins to ON;
|
|
SET client_min_messages TO DEBUG1;
|
|
-- although there is no column equality at all
|
|
-- still recursive planning plans "some_users_data"
|
|
-- and the query becomes OK
|
|
SELECT
|
|
"value_3", count(*) AS cnt
|
|
FROM
|
|
(SELECT
|
|
"value_3", "user_id", random()
|
|
FROM
|
|
(SELECT
|
|
users_in_segment_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
user_id, value_3 * 2 as value_3
|
|
FROM
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id", value_3
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 2
|
|
) simple_user_where_1
|
|
) all_buckets_1
|
|
) users_in_segment_1
|
|
JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and value_2 > 3
|
|
) some_users_data
|
|
ON (true)
|
|
) segmentalias_1) "tempQuery"
|
|
GROUP BY "value_3"
|
|
ORDER BY cnt, value_3 DESC LIMIT 10;
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3))
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value_3, count(*) AS cnt FROM (SELECT segmentalias_1.value_3, segmentalias_1.user_id, random() AS random FROM (SELECT users_in_segment_1.user_id, users_in_segment_1.value_3 FROM ((SELECT all_buckets_1.user_id, (all_buckets_1.value_3 OPERATOR(pg_catalog.*) (2)::double precision) AS value_3 FROM (SELECT simple_user_where_1.user_id, simple_user_where_1.value_3 FROM (SELECT users.user_id, users.value_3 FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_2 OPERATOR(pg_catalog.>) 2))) simple_user_where_1) all_buckets_1) users_in_segment_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) some_users_data ON (true))) segmentalias_1) "tempQuery" GROUP BY value_3 ORDER BY (count(*)), value_3 DESC LIMIT 10
|
|
value_3 | cnt
|
|
---------------------------------------------------------------------
|
|
0 | 14
|
|
10 | 42
|
|
4 | 42
|
|
8 | 56
|
|
6 | 56
|
|
2 | 70
|
|
(6 rows)
|
|
|
|
SET citus.enable_repartition_joins to OFF;
|
|
RESET client_min_messages;
|
|
-- nested LATERAL JOINs
|
|
SET citus.subquery_pushdown to ON;
|
|
NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks.
|
|
DETAIL: When enabled, the planner skips many correctness checks for subqueries and pushes down the queries to shards as-is. It means that the queries are likely to return wrong results unless the user is absolutely sure that pushing down the subquery is safe. This GUC is maintained only for backward compatibility, no new users are supposed to use it. The planner is capable of pushing down as much computation as possible to the shards depending on the query.
|
|
SELECT *
|
|
FROM
|
|
(SELECT "some_users_data".user_id, "some_recent_users".value_3
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and users.value_2 = 2) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND
|
|
("events".user_id = "filter_users_1".user_id)
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY value_3 DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 2
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
value_3 DESC, user_id ASC
|
|
LIMIT 10;
|
|
user_id | value_3
|
|
---------------------------------------------------------------------
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
(9 rows)
|
|
|
|
-- nested lateral join at top most level
|
|
SELECT "some_users_data".user_id, "some_recent_users".value_3
|
|
FROM
|
|
(SELECT
|
|
filter_users_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and users.value_2 = 2
|
|
) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND
|
|
("events".user_id = "filter_users_1".user_id)
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 1
|
|
) "last_events_1" ON true
|
|
ORDER BY value_3 DESC
|
|
LIMIT 10
|
|
) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 2
|
|
LIMIT 1
|
|
) "some_users_data" ON true
|
|
ORDER BY
|
|
value_3 DESC, user_id ASC
|
|
LIMIT 10;
|
|
user_id | value_3
|
|
---------------------------------------------------------------------
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
(9 rows)
|
|
|
|
-- longer nested lateral joins
|
|
SELECT *
|
|
FROM
|
|
(SELECT "some_users_data".user_id, "some_recent_users".value_3
|
|
FROM
|
|
(SELECT filter_users_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and users.value_2 = 2) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 AND
|
|
("events".user_id = "filter_users_1".user_id)
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 1) "last_events_1" ON true
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 10) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 2
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
value_3 DESC, user_id DESC
|
|
LIMIT 10;
|
|
user_id | value_3
|
|
---------------------------------------------------------------------
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
(9 rows)
|
|
|
|
-- longer nested lateral join wth top level join
|
|
SELECT "some_users_data".user_id, "some_recent_users".value_3
|
|
FROM
|
|
(SELECT filter_users_1.user_id, value_3
|
|
FROM
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 1 and user_id < 4 and users.value_2 = 2
|
|
) filter_users_1
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_3
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 4
|
|
AND
|
|
("events".user_id = "filter_users_1".user_id)
|
|
ORDER BY
|
|
value_3 DESC
|
|
LIMIT 1
|
|
) "last_events_1" ON TRUE
|
|
ORDER BY value_3 DESC
|
|
LIMIT 10
|
|
) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
users.value_2 > 2
|
|
LIMIT 1
|
|
) "some_users_data" ON TRUE
|
|
ORDER BY value_3 DESC, user_id DESC
|
|
LIMIT 10;
|
|
user_id | value_3
|
|
---------------------------------------------------------------------
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
3 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
2 | 5
|
|
(9 rows)
|
|
|
|
SET citus.subquery_pushdown to OFF;
|
|
-- LEFT JOINs used with INNER JOINs
|
|
SELECT
|
|
count(*) AS cnt, "generated_group_field"
|
|
FROM
|
|
(SELECT
|
|
"eventQuery"."user_id", random(), generated_group_field
|
|
FROM
|
|
(SELECT
|
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
|
FROM
|
|
(SELECT *
|
|
FROM
|
|
(SELECT
|
|
"events"."time", "events"."user_id" as event_user_id
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 4) "temp_data_queries"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
user_id > 4 and value_2 = 5) "user_filters_1"
|
|
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
|
LEFT JOIN
|
|
(SELECT
|
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
|
FROM
|
|
users_table as "users") "left_group_by_1"
|
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
|
group BY
|
|
"generated_group_field"
|
|
ORDER BY
|
|
cnt DESC, generated_group_field ASC
|
|
LIMIT 10;
|
|
cnt | generated_group_field
|
|
---------------------------------------------------------------------
|
|
336 | 2
|
|
210 | 1
|
|
210 | 3
|
|
126 | 4
|
|
126 | 5
|
|
84 | 0
|
|
(6 rows)
|
|
|
|
-- single table subquery, no JOINS involved
|
|
SELECT
|
|
count(*) AS cnt, user_id
|
|
FROM
|
|
(SELECT
|
|
"eventQuery"."user_id", random()
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id"
|
|
FROM
|
|
events_table "events"
|
|
WHERE
|
|
event_type IN (1, 2)) "eventQuery") "pushedDownQuery"
|
|
GROUP BY
|
|
"user_id"
|
|
ORDER BY
|
|
cnt DESC, user_id DESC
|
|
LIMIT 10;
|
|
cnt | user_id
|
|
---------------------------------------------------------------------
|
|
11 | 3
|
|
10 | 2
|
|
8 | 4
|
|
6 | 5
|
|
4 | 6
|
|
4 | 1
|
|
(6 rows)
|
|
|
|
-- lateral joins in the nested manner
|
|
SET citus.subquery_pushdown to ON;
|
|
NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks.
|
|
DETAIL: When enabled, the planner skips many correctness checks for subqueries and pushes down the queries to shards as-is. It means that the queries are likely to return wrong results unless the user is absolutely sure that pushing down the subquery is safe. This GUC is maintained only for backward compatibility, no new users are supposed to use it. The planner is capable of pushing down as much computation as possible to the shards depending on the query.
|
|
SELECT *
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, value_2
|
|
FROM
|
|
(SELECT user_id, max(value_2) AS value_2
|
|
FROM
|
|
(SELECT user_id, value_2
|
|
FROM
|
|
(SELECT
|
|
user_id, value_2
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3) "events_1"
|
|
ORDER BY
|
|
value_2 DESC
|
|
LIMIT 10000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(value_2) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
|
value_2 > 4
|
|
LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
value_2 DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
value_2 DESC, user_id DESC
|
|
LIMIT 10;
|
|
user_id | value_2
|
|
---------------------------------------------------------------------
|
|
2 | 5
|
|
(1 row)
|
|
|
|
SET citus.subquery_pushdown to OFF;
|
|
-- on side of the lateral join can be recursively plannen, then pushed down
|
|
SELECT *
|
|
FROM
|
|
(SELECT
|
|
"some_users_data".user_id, value_2
|
|
FROM
|
|
(SELECT user_id, max(value_2) AS value_2
|
|
FROM
|
|
(SELECT user_id, value_2
|
|
FROM
|
|
(SELECT
|
|
user_id, value_2
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
user_id > 1 and user_id < 3) "events_1"
|
|
ORDER BY
|
|
value_2 DESC
|
|
LIMIT 10000) "recent_events_1"
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
max(value_2) DESC) "some_recent_users"
|
|
JOIN LATERAL
|
|
(SELECT
|
|
"users".user_id
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
"users"."value_2" = "some_recent_users"."user_id" AND
|
|
value_2 > 4
|
|
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
|
ORDER BY
|
|
value_2 DESC
|
|
LIMIT 10) "some_users"
|
|
ORDER BY
|
|
value_2 DESC, user_id DESC
|
|
LIMIT 10;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables
|
|
-- lets test some unsupported set operations
|
|
-- not supported since we use INTERSECT
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
INTERSECT
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (4, 5)) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 433
|
|
(2 rows)
|
|
|
|
-- supported through recursive planning
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 3 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (4, 5)) events_subquery_4) ORDER BY 1, 2 OFFSET 3) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 425
|
|
2 | 75
|
|
3 | 251
|
|
(4 rows)
|
|
|
|
-- not supported due to non relation rte
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
1 as user_id, now(), 3 AS event
|
|
) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT
|
|
"users"."user_id"
|
|
FROM
|
|
users_table as "users"
|
|
WHERE
|
|
value_1 > 0 and value_1 < 4) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 449
|
|
1 | 433
|
|
2 | 75
|
|
3 | 4
|
|
(4 rows)
|
|
|
|
-- similar to the above, but constant rte is on the right side of the query
|
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
|
FROM
|
|
( SELECT *, random()
|
|
FROM
|
|
( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
|
FROM
|
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
|
FROM (
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 0 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (1, 2) ) events_subquery_1)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 1 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (3, 4) ) events_subquery_2)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
"events"."user_id", "events"."time", 2 AS event
|
|
FROM
|
|
events_table as "events"
|
|
WHERE
|
|
event_type IN (5, 6) ) events_subquery_3)
|
|
UNION
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
1 as user_id, now(), 3 AS event
|
|
) events_subquery_4)) t1
|
|
GROUP BY "t1"."user_id") AS t) "q"
|
|
INNER JOIN
|
|
(SELECT 1 as user_id) AS t
|
|
ON (t.user_id = q.user_id)) as final_query
|
|
GROUP BY
|
|
types
|
|
ORDER BY
|
|
types;
|
|
types | sumofeventtype
|
|
---------------------------------------------------------------------
|
|
0 | 4
|
|
1 | 8
|
|
2 | 1
|
|
3 | 1
|
|
(4 rows)
|
|
|
|
-- we've fixed a bug related to joins w/wout alias
|
|
-- while implementing top window functions
|
|
-- thus adding some tests related to that (i.e., next 3 tests)
|
|
WITH users_events AS
|
|
(
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
)
|
|
SELECT
|
|
uid,
|
|
event_type,
|
|
value_2,
|
|
value_3
|
|
FROM (
|
|
(SELECT
|
|
user_id as uid
|
|
FROM
|
|
users_events
|
|
) users
|
|
JOIN
|
|
events_table
|
|
ON
|
|
users.uid = events_table.event_type
|
|
) a
|
|
ORDER BY
|
|
1,2,3,4
|
|
LIMIT 5;
|
|
uid | event_type | value_2 | value_3
|
|
---------------------------------------------------------------------
|
|
1 | 1 | 0 | 2
|
|
1 | 1 | 0 | 2
|
|
1 | 1 | 0 | 2
|
|
1 | 1 | 0 | 2
|
|
1 | 1 | 0 | 2
|
|
(5 rows)
|
|
|
|
-- the following queries are almost the same,
|
|
-- the only difference is the final GROUP BY
|
|
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
|
FROM
|
|
(SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
(value_1 > 2)
|
|
GROUP BY
|
|
user_id
|
|
HAVING
|
|
count(distinct value_1) > 2
|
|
) as a
|
|
LEFT JOIN
|
|
(SELECT
|
|
DISTINCT ON (value_2) value_2 , user_id, value_3
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
(value_1 > 3)
|
|
ORDER BY
|
|
1,2,3
|
|
) AS b
|
|
USING (user_id)
|
|
GROUP BY user_id
|
|
ORDER BY 1, 2;
|
|
user_id | subquery_avg
|
|
---------------------------------------------------------------------
|
|
1 | 2.3333333333333333
|
|
3 | 5.0000000000000000
|
|
4 | 1.00000000000000000000
|
|
5 |
|
|
(4 rows)
|
|
|
|
-- see the comment for the above query
|
|
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
|
FROM
|
|
(SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
(value_1 > 2)
|
|
GROUP BY
|
|
user_id
|
|
HAVING
|
|
count(distinct value_1) > 2
|
|
) as a
|
|
LEFT JOIN
|
|
(SELECT
|
|
DISTINCT ON (value_2) value_2 , user_id, value_3
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
(value_1 > 3)
|
|
ORDER BY
|
|
1,2,3
|
|
) AS b
|
|
USING (user_id)
|
|
GROUP BY a.user_id
|
|
ORDER BY 1, 2;
|
|
user_id | subquery_avg
|
|
---------------------------------------------------------------------
|
|
1 | 2.3333333333333333
|
|
3 | 5.0000000000000000
|
|
4 | 1.00000000000000000000
|
|
5 |
|
|
(4 rows)
|
|
|
|
-- queries where column aliases are used
|
|
-- the query is not very complex. join is given an alias with aliases
|
|
-- for each output column
|
|
SELECT k1
|
|
FROM (
|
|
SELECT k1, random()
|
|
FROM (users_table JOIN events_table USING (user_id)) k (k1, k2, k3)) l
|
|
ORDER BY k1
|
|
LIMIT 5;
|
|
k1
|
|
---------------------------------------------------------------------
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
(5 rows)
|
|
|
|
SELECT DISTINCT k1
|
|
FROM (
|
|
SELECT k1, random()
|
|
FROM (users_table JOIN events_table USING (user_id)) k (k1, k2, k3)) l
|
|
ORDER BY k1
|
|
LIMIT 5;
|
|
k1
|
|
---------------------------------------------------------------------
|
|
1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
(5 rows)
|
|
|
|
SELECT x1, x3, value_2
|
|
FROM (users_table u FULL JOIN events_table e ON (u.user_id = e.user_id)) k(x1, x2, x3, x4, x5)
|
|
ORDER BY 1, 2, 3
|
|
LIMIT 5;
|
|
x1 | x3 | value_2
|
|
---------------------------------------------------------------------
|
|
1 | 1 | 1
|
|
1 | 1 | 1
|
|
1 | 1 | 1
|
|
1 | 1 | 2
|
|
1 | 1 | 2
|
|
(5 rows)
|
|
|
|
SELECT x1, x3, value_2
|
|
FROM (users_table u FULL JOIN events_table e USING (user_id)) k(x1, x2, x3, x4, x5)
|
|
ORDER BY 1, 2, 3
|
|
LIMIT 5;
|
|
x1 | x3 | value_2
|
|
---------------------------------------------------------------------
|
|
1 | 1 | 1
|
|
1 | 1 | 1
|
|
1 | 1 | 1
|
|
1 | 1 | 2
|
|
1 | 1 | 2
|
|
(5 rows)
|
|
|
|
SELECT c_custkey
|
|
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
|
|
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
|
|
ORDER BY 1 DESC
|
|
LIMIT 10;
|
|
c_custkey
|
|
---------------------------------------------------------------------
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
(10 rows)
|
|
|
|
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
|
|
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
|
|
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
|
|
GROUP BY 1
|
|
ORDER BY 2, 1
|
|
LIMIT 10;
|
|
c_custkey | date_trunc
|
|
---------------------------------------------------------------------
|
|
2 | Thu Nov 23 13:52:00 2017
|
|
6 | Thu Nov 23 14:43:00 2017
|
|
4 | Thu Nov 23 15:32:00 2017
|
|
5 | Thu Nov 23 16:48:00 2017
|
|
3 | Thu Nov 23 17:18:00 2017
|
|
1 | Thu Nov 23 17:30:00 2017
|
|
(6 rows)
|
|
|
|
SELECT c_custkey, date_trunc('minute', max(c_nationkey))
|
|
FROM (users_table LEFT OUTER JOIN events_table ON (users_table.user_id = events_table.user_id)) AS test(c_custkey, c_nationkey)
|
|
INNER JOIN users_table as u2 ON (test.c_custkey = u2.user_id)
|
|
GROUP BY 1
|
|
HAVING extract(minute from max(c_nationkey)) >= 45
|
|
ORDER BY 2, 1
|
|
LIMIT 10;
|
|
c_custkey | date_trunc
|
|
---------------------------------------------------------------------
|
|
2 | Thu Nov 23 13:52:00 2017
|
|
5 | Thu Nov 23 16:48:00 2017
|
|
(2 rows)
|
|
|
|
SELECT user_id
|
|
FROM (users_table JOIN events_table USING (user_id)) AS test(user_id, c_nationkey)
|
|
FULL JOIN users_table AS u2 USING (user_id)
|
|
ORDER BY 1 DESC
|
|
LIMIT 10;
|
|
user_id
|
|
---------------------------------------------------------------------
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
(10 rows)
|
|
|
|
-- nested joins
|
|
SELECT bar, value_3_table.value_3
|
|
FROM ((users_table
|
|
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
|
|
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
|
|
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
|
|
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
|
|
GROUP BY 1,2
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
bar | value_3
|
|
---------------------------------------------------------------------
|
|
3 | 5
|
|
2 | 5
|
|
1 | 5
|
|
5 | 4
|
|
4 | 4
|
|
3 | 4
|
|
2 | 4
|
|
1 | 4
|
|
5 | 3
|
|
4 | 3
|
|
(10 rows)
|
|
|
|
-- lateral joins
|
|
SELECT bar,
|
|
value_3_table.value_3
|
|
FROM ((users_table
|
|
JOIN (events_table
|
|
INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep) ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
|
|
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar, foo)
|
|
JOIN LATERAL
|
|
(SELECT value_3
|
|
FROM events_table
|
|
WHERE user_id = bar) AS value_3_table ON TRUE
|
|
GROUP BY 1, 2
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
bar | value_3
|
|
---------------------------------------------------------------------
|
|
3 | 5
|
|
2 | 5
|
|
1 | 5
|
|
5 | 4
|
|
4 | 4
|
|
3 | 4
|
|
2 | 4
|
|
1 | 4
|
|
5 | 3
|
|
4 | 3
|
|
(10 rows)
|
|
|
|
--Joins inside subqueries are the sources of the values in the target list:
|
|
SELECT bar, foo.value_3, c_custkey, test_2.time_2 FROM
|
|
(
|
|
SELECT bar, value_3_table.value_3, random()
|
|
FROM ((users_table
|
|
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join(user_id_deep)
|
|
ON (users_table.user_id = deeper_join.user_id_deep)) AS test(c_custkey, c_nationkey)
|
|
LEFT JOIN users_table AS u2 ON (test.c_custkey = u2.user_id)) outer_test(bar,foo)
|
|
JOIN LATERAL (SELECT value_3 FROM events_table WHERE user_id = bar) as value_3_table ON true
|
|
GROUP BY 1,2
|
|
) as foo, (users_table
|
|
JOIN (events_table INNER JOIN users_reference_table foo ON (events_table.user_id = foo.value_2)) AS deeper_join_2(user_id_deep)
|
|
ON (users_table.user_id = deeper_join_2.user_id_deep)) AS test_2(c_custkey, time_2) WHERE foo.bar = test_2.c_custkey
|
|
ORDER BY 2 DESC, 1 DESC, 3 DESC, 4 DESC
|
|
LIMIT 10;
|
|
bar | value_3 | c_custkey | time_2
|
|
---------------------------------------------------------------------
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
3 | 5 | 3 | Thu Nov 23 17:18:51.048758 2017
|
|
(10 rows)
|
|
|