citus/src/test/regress/expected/multi_insert_select.out

3813 lines
223 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

--
-- MULTI_INSERT_SELECT
--
CREATE SCHEMA multi_insert_select;
SET search_path = multi_insert_select,public;
SET citus.next_shard_id TO 13300000;
SET citus.next_placement_id TO 13300000;
-- create co-located tables
SET citus.shard_count = 4;
SET citus.shard_replication_factor = 2;
-- order of execution might change in parallel executions
-- and the error details might contain the worker node
-- so be less verbose with \set VERBOSITY TERSE when necessary
CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1));
SELECT create_distributed_table('raw_events_first', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1));
SELECT create_distributed_table('raw_events_second', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp, UNIQUE(user_id, value_1_agg));
SELECT create_distributed_table('agg_events', 'user_id');;
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- create the reference table as well
CREATE TABLE reference_table (user_id int);
SELECT create_reference_table('reference_table');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE insert_select_varchar_test (key varchar, value int);
SELECT create_distributed_table('insert_select_varchar_test', 'key', 'hash');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- set back to the defaults
SET citus.shard_count = DEFAULT;
SET citus.shard_replication_factor = DEFAULT;
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(1, now(), 10, 100, 1000.1, 10000);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(2, now(), 20, 200, 2000.1, 20000);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(3, now(), 30, 300, 3000.1, 30000);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(4, now(), 40, 400, 4000.1, 40000);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(5, now(), 50, 500, 5000.1, 50000);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(6, now(), 60, 600, 6000.1, 60000);
SET client_min_messages TO DEBUG2;
-- raw table to raw table
INSERT INTO raw_events_second SELECT * FROM raw_events_first;
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
-- see that our first multi shard INSERT...SELECT works expected
SET client_min_messages TO INFO;
SELECT
raw_events_first.user_id
FROM
raw_events_first, raw_events_second
WHERE
raw_events_first.user_id = raw_events_second.user_id
ORDER BY
user_id DESC;
user_id
---------------------------------------------------------------------
6
5
4
3
2
1
(6 rows)
-- see that we get unique vialitons
\set VERBOSITY TERSE
INSERT INTO raw_events_second SELECT * FROM raw_events_first;
ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300004"
\set VERBOSITY DEFAULT
-- stable functions should be allowed
INSERT INTO raw_events_second (user_id, time)
SELECT
user_id, now()
FROM
raw_events_first
WHERE
user_id < 0;
INSERT INTO raw_events_second (user_id)
SELECT
user_id
FROM
raw_events_first
WHERE
time > now() + interval '1 day';
-- hide version-dependent PL/pgSQL context messages
\set VERBOSITY terse
-- make sure we evaluate stable functions on the master, once
CREATE OR REPLACE FUNCTION evaluate_on_master()
RETURNS int LANGUAGE plpgsql STABLE
AS $function$
BEGIN
RAISE NOTICE 'evaluating on master';
RETURN 0;
END;
$function$;
INSERT INTO raw_events_second (user_id, value_1)
SELECT
user_id, evaluate_on_master()
FROM
raw_events_first
WHERE
user_id < 0;
NOTICE: evaluating on master
-- make sure we don't evaluate stable functions with column arguments
SET citus.enable_metadata_sync TO OFF;
CREATE OR REPLACE FUNCTION evaluate_on_master(x int)
RETURNS int LANGUAGE plpgsql STABLE
AS $function$
BEGIN
RAISE NOTICE 'evaluating on master';
RETURN x;
END;
$function$;
RESET citus.enable_metadata_sync;
INSERT INTO raw_events_second (user_id, value_1)
SELECT
user_id, evaluate_on_master(value_1)
FROM
raw_events_first
WHERE
user_id = 0;
ERROR: function multi_insert_select.evaluate_on_master(integer) does not exist
-- add one more row
INSERT INTO raw_events_first (user_id, time) VALUES
(7, now());
-- try a single shard query
SET client_min_messages TO DEBUG2;
INSERT INTO raw_events_second (user_id, time) SELECT user_id, time FROM raw_events_first WHERE user_id = 7;
DEBUG: Creating router plan
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time") SELECT raw_events_first.user_id, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 7) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
SET client_min_messages TO INFO;
-- add one more row
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(8, now(), 80, 800, 8000, 80000);
-- reorder columns
SET client_min_messages TO DEBUG2;
INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time)
SELECT
value_2, value_1, value_3, value_4, user_id, time
FROM
raw_events_first
WHERE
user_id = 8;
DEBUG: Creating router plan
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 8) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
-- a zero shard select
INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time)
SELECT
value_2, value_1, value_3, value_4, user_id, time
FROM
raw_events_first
WHERE
false;
DEBUG: Creating router plan
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
-- another zero shard select
INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time)
SELECT
value_2, value_1, value_3, value_4, user_id, time
FROM
raw_events_first
WHERE
0 != 0;
DEBUG: Creating router plan
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
-- add one more row
SET client_min_messages TO INFO;
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(9, now(), 90, 900, 9000, 90000);
-- show that RETURNING also works
SET client_min_messages TO DEBUG2;
INSERT INTO raw_events_second (user_id, value_1, value_3)
SELECT
user_id, value_1, value_3
FROM
raw_events_first
WHERE
value_3 = 9000
RETURNING *;
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
user_id | time | value_1 | value_2 | value_3 | value_4
---------------------------------------------------------------------
9 | | 90 | | 9000 |
(1 row)
-- hits two shards
\set VERBOSITY TERSE
INSERT INTO raw_events_second (user_id, value_1, value_3)
SELECT
user_id, value_1, value_3
FROM
raw_events_first
WHERE
user_id = 9 OR user_id = 16
RETURNING *;
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300007"
-- now do some aggregations
INSERT INTO agg_events
SELECT
user_id, sum(value_1), avg(value_2), sum(value_3), count(value_4)
FROM
raw_events_first
GROUP BY
user_id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
-- group by column not exists on the SELECT target list
INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, user_id)
SELECT
sum(value_3), count(value_4), sum(value_1), user_id
FROM
raw_events_first
GROUP BY
value_2, user_id
RETURNING *;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time
ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008"
-- some subquery tests
INSERT INTO agg_events
(value_1_agg,
user_id)
SELECT SUM(value_1),
id
FROM (SELECT raw_events_second.user_id AS id,
raw_events_second.value_1
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id) AS foo
GROUP BY id
ORDER BY id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id
ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008"
-- subquery one more level depth
INSERT INTO agg_events
(value_4_agg,
value_1_agg,
user_id)
SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id) AS foo
ORDER BY id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id
ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008"
\set VERBOSITY DEFAULT
-- join between subqueries
INSERT INTO agg_events
(user_id)
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id);
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL)
-- add one more level subqueris on top of subquery JOINs
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
-- subqueries in WHERE clause
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id
FROM raw_events_second
WHERE user_id = 2);
DEBUG: Creating router plan
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) 2))) AND (raw_events_first.user_id IS NOT NULL))
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id
FROM raw_events_second
WHERE user_id != 2 AND value_1 = 2000)
ON conflict (user_id, value_1) DO NOTHING;
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id
FROM raw_events_second WHERE false);
DEBUG: Creating router plan
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id
FROM raw_events_second
WHERE value_1 = 1000 OR value_1 = 2000 OR value_1 = 3000);
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL))
-- lets mix subqueries in FROM clause and subqueries in WHERE
INSERT INTO agg_events
(user_id)
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 1000) AS foo2 ) as f2
ON (f.id = f2.id)
WHERE f.id IN (SELECT user_id
FROM raw_events_second);
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)) AND (f2.id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)) AND (f2.id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)) AND (f2.id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)) AND (f2.id IS NOT NULL))
-- some UPSERTS
INSERT INTO agg_events AS ae
(
user_id,
value_1_agg,
agg_time
)
SELECT user_id,
value_1,
time
FROM raw_events_first
ON conflict (user_id, value_1_agg)
DO UPDATE
SET agg_time = EXCLUDED.agg_time
WHERE ae.agg_time < EXCLUDED.agg_time;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time)
-- upserts with returning
INSERT INTO agg_events AS ae
(
user_id,
value_1_agg,
agg_time
)
SELECT user_id,
value_1,
time
FROM raw_events_first
ON conflict (user_id, value_1_agg)
DO UPDATE
SET agg_time = EXCLUDED.agg_time
WHERE ae.agg_time < EXCLUDED.agg_time
RETURNING user_id, value_1_agg;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg
user_id | value_1_agg
---------------------------------------------------------------------
7 |
(1 row)
INSERT INTO agg_events (user_id, value_1_agg)
SELECT
user_id, sum(value_1 + value_2)
FROM
raw_events_first GROUP BY user_id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
-- FILTER CLAUSE
INSERT INTO agg_events (user_id, value_1_agg)
SELECT
user_id, sum(value_1 + value_2) FILTER (where value_3 = 15)
FROM
raw_events_first GROUP BY user_id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
-- a test with reference table JOINs
INSERT INTO
agg_events (user_id, value_1_agg)
SELECT
raw_events_first.user_id, sum(value_1)
FROM
reference_table, raw_events_first
WHERE
raw_events_first.user_id = reference_table.user_id
GROUP BY
raw_events_first.user_id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id
-- a note on the outer joins is that
-- we filter out outer join results
-- where partition column returns
-- NULL. Thus, we could INSERT less rows
-- than we expect from subquery result.
-- see the following tests
SET client_min_messages TO INFO;
-- we don't want to see constraint violations, so truncate first
TRUNCATE agg_events;
-- add a row to first table to make table contents different
INSERT INTO raw_events_second (user_id, time, value_1, value_2, value_3, value_4) VALUES
(10, now(), 100, 10000, 10000, 100000);
DELETE FROM raw_events_second WHERE user_id = 2;
-- we select 11 rows
SELECT t1.user_id AS col1,
t2.user_id AS col2
FROM raw_events_first t1
FULL JOIN raw_events_second t2
ON t1.user_id = t2.user_id
ORDER BY t1.user_id,
t2.user_id;
col1 | col2
---------------------------------------------------------------------
1 | 1
2 |
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
| 10
(10 rows)
SET client_min_messages TO DEBUG2;
-- we insert 10 rows since we filtered out
-- NULL partition column values
INSERT INTO agg_events (user_id, value_1_agg)
SELECT t1.user_id AS col1,
t2.user_id AS col2
FROM raw_events_first t1
FULL JOIN raw_events_second t2
ON t1.user_id = t2.user_id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300000 t1 FULL JOIN multi_insert_select.raw_events_second_13300004 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300001 t1 FULL JOIN multi_insert_select.raw_events_second_13300005 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300002 t1 FULL JOIN multi_insert_select.raw_events_second_13300006 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300003 t1 FULL JOIN multi_insert_select.raw_events_second_13300007 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL)
SET client_min_messages TO INFO;
-- see that the results are different from the SELECT query
SELECT
user_id, value_1_agg
FROM
agg_events
ORDER BY
user_id, value_1_agg;
user_id | value_1_agg
---------------------------------------------------------------------
1 | 1
2 |
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
(9 rows)
-- we don't want to see constraint violations, so truncate first
SET client_min_messages TO INFO;
TRUNCATE agg_events;
SET client_min_messages TO DEBUG2;
-- DISTINCT clause
INSERT INTO agg_events (value_1_agg, user_id)
SELECT
DISTINCT value_1, user_id
FROM
raw_events_first;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
-- we don't want to see constraint violations, so truncate first
SET client_min_messages TO INFO;
truncate agg_events;
SET client_min_messages TO DEBUG2;
-- DISTINCT ON clauses are supported
-- distinct on(non-partition column)
-- values are pulled to master
INSERT INTO agg_events (value_1_agg, user_id)
SELECT
DISTINCT ON (value_1) value_1, user_id
FROM
raw_events_first;
DEBUG: cannot push down this subquery
DETAIL: Distinct on columns without partition column is currently unsupported
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Collecting INSERT ... SELECT results on coordinator
SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2;
DEBUG: Router planner cannot handle multi-shard select queries
user_id | value_1_agg
---------------------------------------------------------------------
1 | 10
2 | 20
3 | 30
4 | 40
5 | 50
6 | 60
7 |
8 | 80
9 | 90
(9 rows)
-- we don't want to see constraint violations, so truncate first
SET client_min_messages TO INFO;
truncate agg_events;
SET client_min_messages TO DEBUG2;
-- distinct on(partition column)
-- queries are forwared to workers
INSERT INTO agg_events (value_1_agg, user_id)
SELECT
DISTINCT ON (user_id) value_1, user_id
FROM
raw_events_first;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2;
DEBUG: Router planner cannot handle multi-shard select queries
user_id | value_1_agg
---------------------------------------------------------------------
1 | 10
2 | 20
3 | 30
4 | 40
5 | 50
6 | 60
7 |
8 | 80
9 | 90
(9 rows)
-- We support CTEs
BEGIN;
WITH fist_table_agg AS MATERIALIZED
(SELECT max(value_1)+1 as v1_agg, user_id FROM raw_events_first GROUP BY user_id)
INSERT INTO agg_events
(value_1_agg, user_id)
SELECT
v1_agg, user_id
FROM
fist_table_agg;
DEBUG: distributed INSERT ... SELECT can only select from distributed tables
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for CTE fist_table_agg: SELECT (max(value_1) OPERATOR(pg_catalog.+) 1) AS v1_agg, user_id FROM multi_insert_select.raw_events_first GROUP BY user_id
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, v1_agg AS value_1_agg FROM (SELECT fist_table_agg.user_id, fist_table_agg.v1_agg FROM (SELECT intermediate_result.v1_agg, intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v1_agg integer, user_id integer)) fist_table_agg) citus_insert_select_subquery
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
ROLLBACK;
-- We do support CTEs that are referenced in the target list
INSERT INTO agg_events
WITH sub_cte AS (SELECT 1)
SELECT
raw_events_first.user_id, (SELECT * FROM sub_cte)
FROM
raw_events_first;
DEBUG: CTE sub_cte is going to be inlined via distributed planning
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
-- We support set operations
BEGIN;
INSERT INTO
raw_events_first(user_id)
SELECT
user_id
FROM
((SELECT user_id FROM raw_events_first) UNION
(SELECT user_id FROM raw_events_second)) as foo;
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
ROLLBACK;
-- We do support set operations through recursive planning
BEGIN;
SET LOCAL client_min_messages TO DEBUG;
INSERT INTO
raw_events_first(user_id)
(SELECT user_id FROM raw_events_first) INTERSECT
(SELECT user_id FROM raw_events_first);
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_first
DEBUG: Creating router plan
DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) citus_insert_select_subquery
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
ROLLBACK;
-- If the query is router plannable then it is executed via the coordinator
INSERT INTO
raw_events_first(user_id)
SELECT
user_id
FROM
((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT
(SELECT user_id FROM raw_events_second where user_id = 17)) as foo;
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: router planner does not support queries that reference non-colocated distributed tables
DEBUG: Distributed planning for a fast-path router query
DEBUG: Creating router plan
DEBUG: query has a single distribution column value: 15
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first WHERE (user_id OPERATOR(pg_catalog.=) 15)
DEBUG: Distributed planning for a fast-path router query
DEBUG: Creating router plan
DEBUG: query has a single distribution column value: 17
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE (user_id OPERATOR(pg_catalog.=) 17)
DEBUG: Creating router plan
DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) EXCEPT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- some supported LEFT joins
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
SET client_min_messages to debug3;
INSERT INTO agg_events (user_id)
SELECT
raw_events_second.user_id
FROM
reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id;
DEBUG: no shard pruning constraints on raw_events_second found
DEBUG: shard count after pruning for raw_events_second: 4
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
DEBUG: no shard pruning constraints on raw_events_second found
DEBUG: shard count after pruning for raw_events_second: 4
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: a push down safe left join with recurring left side
DEBUG: no shard pruning constraints on raw_events_second found
DEBUG: shard count after pruning for raw_events_second: 4
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
SET client_min_messages to debug2;
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id = 10;
DEBUG: Creating router plan
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 10) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_second.user_id = 10 OR raw_events_second.user_id = 11;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL))
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id = 10 AND raw_events_first.user_id = 20;
DEBUG: Creating router plan
DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id = 10 AND raw_events_second.user_id = 20;
DEBUG: Creating router plan
DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id IN (19, 20, 21);
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM ((SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_first(user_id, "time", value_1, value_2, value_3, value_4) LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_second.user_id IN (19, 20, 21);
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
SET client_min_messages TO WARNING;
-- following query should use repartitioned joins and results should
-- be routed via coordinator
SET citus.enable_repartition_joins TO true;
INSERT INTO agg_events
(user_id)
SELECT raw_events_first.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_second.user_id = raw_events_first.value_1
AND raw_events_first.value_1 = 12;
-- some unsupported LEFT/INNER JOINs
-- JOIN on one table with partition column other is not
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- same as the above with INNER JOIN
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
-- a not meaningful query
INSERT INTO agg_events
(user_id)
SELECT raw_events_second.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_first.value_1;
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- both tables joined on non-partition columns
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- same as the above with INNER JOIN
-- we support this with route to coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 6
(4 rows)
-- EXPLAIN ANALYZE is not supported for INSERT ... SELECT via coordinator
EXPLAIN (costs off, analyze on, BUFFERS OFF)
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
ERROR: EXPLAIN ANALYZE is currently not supported for INSERT ... SELECT commands via coordinator
-- even if there is a filter on the partition key, since the join is not on the partition key we reject
-- this query
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
WHERE
raw_events_first.user_id = 10;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- same as the above with INNER JOIN
-- we support this with route to coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
WHERE raw_events_first.user_id = 10;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 6
(4 rows)
-- make things a bit more complicate with IN clauses
-- we support this with route to coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4);
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 6
(4 rows)
-- implicit join on non partition column should also not be pushed down,
-- so we fall back to route via coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events
(user_id)
SELECT raw_events_first.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_second.user_id = raw_events_first.value_1;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 6
(4 rows)
RESET client_min_messages;
-- The following is again a tricky query for Citus. If the given filter was
-- on value_1 as shown in the above, Citus could push it down and use
-- distributed INSERT/SELECT. But we instead fall back to route via coordinator.
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events
(user_id)
SELECT raw_events_first.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_second.user_id = raw_events_first.value_1
AND raw_events_first.value_2 = 12;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 6
(4 rows)
-- foo is not joined on the partition key so the query is not
-- pushed down. So instead we route via coordinator.
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first LEFT JOIN
reference_table
ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(8 rows)
-- if the given filter was on value_1 as shown in the above, Citus could
-- push it down. But here the query falls back to route via coordinator.
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events
(user_id)
SELECT raw_events_first.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_second.user_id = raw_events_first.value_1
AND raw_events_first.value_2 = 12;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 6
(4 rows)
-- foo is not joined on the partition key so the query is not
-- pushed down, and it falls back to route via coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first LEFT JOIN
reference_table
ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(8 rows)
INSERT INTO agg_events
(value_4_agg,
value_1_agg,
user_id)
SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id != raw_events_second.user_id
GROUP BY raw_events_second.user_id) AS foo;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
SET client_min_messages TO DEBUG2;
-- INSERT returns NULL partition key value via coordinator
INSERT INTO agg_events
(value_4_agg,
value_1_agg,
user_id)
SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.value_3 AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.value_3) AS foo;
DEBUG: cannot push down this subquery
DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823]
DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647]
DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1]
DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1]
DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.value_3 AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_3
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(id) AS user_id, int4(v1) AS value_1_agg, int8(v4) AS value_4_agg FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 bigint, id double precision)) foo
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
ERROR: the partition column of table multi_insert_select.agg_events cannot be NULL
-- error cases
-- no part column at all
INSERT INTO raw_events_second
(value_1)
SELECT value_1
FROM raw_events_first;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: the query doesn't include the target table's partition column
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
ERROR: the partition column of table multi_insert_select.raw_events_second should have a value
INSERT INTO raw_events_second
(value_1)
SELECT user_id
FROM raw_events_first;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: the query doesn't include the target table's partition column
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
ERROR: the partition column of table multi_insert_select.raw_events_second should have a value
INSERT INTO raw_events_second
(user_id)
SELECT value_1
FROM raw_events_first;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
ERROR: the partition column value cannot be NULL
CONTEXT: while executing command on localhost:xxxxx
INSERT INTO raw_events_second
(user_id)
SELECT user_id * 2
FROM raw_events_first;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an operator in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0,repartitioned_results_xxxxx_from_13300001_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_1,repartitioned_results_xxxxx_from_13300001_to_1,repartitioned_results_xxxxx_from_13300003_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_3,repartitioned_results_xxxxx_from_13300002_to_3,repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
INSERT INTO raw_events_second
(user_id)
SELECT user_id :: bigint
FROM raw_events_first;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an explicit cast in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300002_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
INSERT INTO agg_events
(value_3_agg,
value_4_agg,
value_1_agg,
value_2_agg,
user_id)
SELECT SUM(value_3),
Count(value_4),
user_id,
SUM(value_1),
Avg(value_2)
FROM raw_events_first
GROUP BY user_id;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an aggregation in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
ERROR: the partition column value cannot be NULL
CONTEXT: while executing command on localhost:xxxxx
INSERT INTO agg_events
(value_3_agg,
value_4_agg,
value_1_agg,
value_2_agg,
user_id)
SELECT SUM(value_3),
Count(value_4),
user_id,
SUM(value_1),
value_2
FROM raw_events_first
GROUP BY user_id,
value_2;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
ERROR: the partition column value cannot be NULL
CONTEXT: while executing command on localhost:xxxxx
-- tables should be co-located
INSERT INTO agg_events (user_id)
SELECT
user_id
FROM
reference_table;
DEBUG: Creating router plan
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
DEBUG: Distributed planning for a fast-path router query
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
-- foo2 is recursively planned and INSERT...SELECT is done via coordinator
INSERT INTO agg_events
(user_id)
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
raw_events_second.value_1 AS v1,
SUM(raw_events_second.user_id) AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.value_1
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id);
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823]
DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647]
DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1]
DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1]
DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(f2.id) AS user_id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
-- the second part of the query is not routable since
-- GROUP BY not on the partition column (i.e., value_1) and thus join
-- on f.id = f2.id is not on the partition key (instead on the sum of partition key)
-- but we still recursively plan foo2 and run the query
INSERT INTO agg_events
(user_id)
SELECT f.id FROM
(SELECT
id
FROM (SELECT raw_events_first.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
raw_events_second.value_1 AS v1,
SUM(raw_events_second.user_id) AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.value_1
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id);
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823]
DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647]
DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1]
DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1]
DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT f.id AS user_id FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
SET client_min_messages TO WARNING;
-- cannot pushdown the query since the JOIN is not equi JOIN
-- falls back to route via coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id != f2.id)) as outer_most
GROUP BY outer_most.id;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(8 rows)
-- cannot pushdown since foo2 is not join on partition key
-- falls back to route via coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.value_1
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> HashAggregate
Group Key: remote_scan.id
Filter: (pg_catalog.sum(remote_scan.worker_column_4) > '10'::numeric)
-> Custom Scan (Citus Adaptive)
Task Count: 6
(11 rows)
-- cannot push down since foo doesn't have en equi join
-- falls back to route via coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id != reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(8 rows)
-- some unsupported LATERAL JOINs
-- join on averages is not on the partition key
-- should fall back to route via coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events (user_id, value_4_agg)
SELECT
averages.user_id, avg(averages.value_4)
FROM
(SELECT
raw_events_second.user_id
FROM
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
) reference_ids
JOIN LATERAL
(SELECT
user_id, value_4
FROM
raw_events_first WHERE
value_4 = reference_ids.user_id) as averages ON true
GROUP BY averages.user_id;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive)
Task Count: 6
(6 rows)
-- join among reference_ids and averages is not on the partition key
-- should fall back to route via coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events (user_id, value_4_agg)
SELECT
averages.user_id, avg(averages.value_4)
FROM
(SELECT
raw_events_second.user_id
FROM
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
) reference_ids
JOIN LATERAL
(SELECT
user_id, value_4
FROM
raw_events_first) as averages ON averages.value_4 = reference_ids.user_id
GROUP BY averages.user_id;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(8 rows)
-- join among the agg_ids and averages is not on the partition key
-- should fall back to route via coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events (user_id, value_4_agg)
SELECT
averages.user_id, avg(averages.value_4)
FROM
(SELECT
raw_events_second.user_id
FROM
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
) reference_ids
JOIN LATERAL
(SELECT
user_id, value_4
FROM
raw_events_first) as averages ON averages.user_id = reference_ids.user_id
JOIN LATERAL
(SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id)
GROUP BY averages.user_id;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 6
(4 rows)
-- Selected value in the WHERE is not partition key, so we cannot use distributed
-- INSERT/SELECT and falls back route via coordinator
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT value_1
FROM raw_events_second);
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
-- same as above but slightly more complex
-- since it also includes subquery in FROM as well
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO agg_events
(user_id)
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)
WHERE f.id IN (SELECT value_1
FROM raw_events_second);
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
-- some more semi-anti join tests
SET client_min_messages TO DEBUG2;
-- join in where
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT raw_events_second.user_id
FROM raw_events_second, raw_events_first
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
DEBUG: Creating router plan
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second, multi_insert_select.raw_events_first_13300000 raw_events_first_1 WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first_1.user_id) AND (raw_events_first_1.user_id OPERATOR(pg_catalog.=) 200)))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
RESET client_min_messages;
-- we cannot push this down since it is NOT IN
-- we use repartition insert/select instead
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id NOT IN (SELECT raw_events_second.user_id
FROM raw_events_second, raw_events_first
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 1
(6 rows)
SET client_min_messages TO DEBUG2;
-- safe to push down
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE EXISTS (SELECT 1
FROM raw_events_second
WHERE raw_events_second.user_id =raw_events_first.user_id);
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL))
-- we cannot push down
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE NOT EXISTS (SELECT 1
FROM raw_events_second
WHERE raw_events_second.user_id =raw_events_first.user_id);
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL))
-- more complex LEFT JOINs
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT raw_events_first.user_id AS id
FROM raw_events_first LEFT JOIN
reference_table
ON (raw_events_first.user_id = reference_table.user_id)) AS foo) as f
LEFT JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
RESET client_min_messages;
-- cannot push down since the f.id IN is matched with value_1
-- we use repartition insert/select instead
SELECT coordinator_plan($Q$
EXPLAIN (costs off)
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)
WHERE f.id IN (SELECT value_1
FROM raw_events_second));
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
SET client_min_messages TO DEBUG2;
-- same as above, but this time is it safe to push down since
-- f.id IN is matched with user_id
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)
WHERE f.id IN (SELECT user_id
FROM raw_events_second));
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL))
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL))
RESET client_min_messages;
-- cannot push down since top level user_id is matched with NOT IN
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id NOT IN (
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)
WHERE f.id IN (SELECT user_id
FROM raw_events_second));
-- cannot push down since join is not equi join (f.id > f2.id)
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id > f2.id)
WHERE f.id IN (SELECT user_id
FROM raw_events_second));
-- we currently not support grouping sets
INSERT INTO agg_events
(user_id,
value_1_agg,
value_2_agg)
SELECT user_id,
Sum(value_1) AS sum_val1,
Sum(value_2) AS sum_val2
FROM raw_events_second
GROUP BY grouping sets ( ( user_id ), ( value_1 ), ( user_id, value_1 ), ( ) );
ERROR: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP
HINT: Consider using an equality filter on the distributed table's partition column.
-- set back to INFO
SET client_min_messages TO INFO;
-- avoid constraint violations
TRUNCATE raw_events_first;
-- we don't support LIMIT for subquery pushdown, but
-- we recursively plan the query and run it via coordinator
INSERT INTO agg_events(user_id)
SELECT user_id
FROM users_table
WHERE user_id
IN (SELECT
user_id
FROM (
(
SELECT
user_id
FROM
(
SELECT
e1.user_id
FROM
users_table u1, events_table e1
WHERE
e1.user_id = u1.user_id LIMIT 3
) as f_inner
)
) AS f2);
-- Altering a table and selecting from it using a multi-shard statement
-- in the same transaction is allowed because we will use the same
-- connections for all co-located placements.
BEGIN;
ALTER TABLE raw_events_second DROP COLUMN value_4;
INSERT INTO raw_events_first SELECT * FROM raw_events_second;
ROLLBACK;
-- Alterating a table and selecting from it using a single-shard statement
-- in the same transaction is disallowed because we will use a different
-- connection.
BEGIN;
ALTER TABLE raw_events_second DROP COLUMN value_4;
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100;
ROLLBACK;
-- Altering a reference table and then performing an INSERT ... SELECT which
-- joins with the reference table is allowed, since the INSERT ... SELECT
-- would read from the reference table over the same connections with the ones
-- that performed the parallel DDL.
BEGIN;
ALTER TABLE reference_table ADD COLUMN z int;
INSERT INTO raw_events_first (user_id)
SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id);
ROLLBACK;
-- the same test with sequential DDL should work fine
BEGIN;
SET LOCAL citus.multi_shard_modify_mode TO 'sequential';
ALTER TABLE reference_table ADD COLUMN z int;
INSERT INTO raw_events_first (user_id)
SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id);
ROLLBACK;
-- Insert after copy is allowed
BEGIN;
COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ',';
INSERT INTO raw_events_first SELECT * FROM raw_events_second;
ROLLBACK;
-- Insert after copy is currently allowed for single-shard operation.
-- Both insert and copy are rolled back successfully.
BEGIN;
COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ',';
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 101;
SELECT user_id FROM raw_events_first WHERE user_id = 101;
user_id
---------------------------------------------------------------------
101
(1 row)
ROLLBACK;
BEGIN;
INSERT INTO raw_events_first SELECT * FROM raw_events_second;
COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ',';
ROLLBACK;
BEGIN;
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100;
COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ',';
ROLLBACK;
-- Similarly, multi-row INSERTs will take part in transactions and reuse connections...
BEGIN;
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100;
COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ',';
INSERT INTO raw_events_first (user_id, value_1) VALUES (105, 105), (106, 106);
ROLLBACK;
-- selecting from views works
CREATE VIEW test_view AS SELECT * FROM raw_events_first;
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(16, now(), 60, 600, 6000.1, 60000);
SELECT count(*) FROM raw_events_second;
count
---------------------------------------------------------------------
45
(1 row)
INSERT INTO raw_events_second SELECT * FROM test_view;
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(17, now(), 60, 600, 6000.1, 60000);
INSERT INTO raw_events_second SELECT * FROM test_view WHERE user_id = 17 GROUP BY 1,2,3,4,5,6;
SELECT count(*) FROM raw_events_second;
count
---------------------------------------------------------------------
47
(1 row)
-- intermediate results (CTEs) should be allowed when doing INSERT...SELECT within a CTE
WITH series AS (
SELECT s AS val FROM generate_series(60,70) s
),
inserts AS (
INSERT INTO raw_events_second (user_id)
SELECT
user_id
FROM
raw_events_first JOIN series ON (value_1 = val)
RETURNING
NULL
)
SELECT count(*) FROM inserts;
count
---------------------------------------------------------------------
2
(1 row)
-- we need this in our next test
truncate raw_events_first;
SET client_min_messages TO DEBUG2;
-- first show that the query works now
INSERT INTO raw_events_first SELECT * FROM raw_events_second;
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL)
SET client_min_messages TO INFO;
truncate raw_events_first;
SET client_min_messages TO DEBUG2;
-- now show that it works for a single shard query as well
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 5;
DEBUG: Creating router plan
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) 5) AND (raw_events_second.user_id IS NOT NULL))
DEBUG: Skipping target shard interval 13300001 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300002 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300003 since SELECT query for it pruned away
SET client_min_messages TO INFO;
-- now do some tests with varchars
INSERT INTO insert_select_varchar_test VALUES ('test_1', 10);
INSERT INTO insert_select_varchar_test VALUES ('test_2', 30);
INSERT INTO insert_select_varchar_test (key, value)
SELECT *, 100
FROM (SELECT f1.key
FROM (SELECT key
FROM insert_select_varchar_test
GROUP BY 1
HAVING Count(key) < 3) AS f1,
(SELECT key
FROM insert_select_varchar_test
GROUP BY 1
HAVING Sum(COALESCE(insert_select_varchar_test.value, 0)) >
20.0)
AS f2
WHERE f1.key = f2.key
GROUP BY 1) AS foo;
SELECT * FROM insert_select_varchar_test ORDER BY 1 DESC, 2 DESC;
key | value
---------------------------------------------------------------------
test_2 | 100
test_2 | 30
test_1 | 10
(3 rows)
-- some tests with DEFAULT columns and constant values
-- this test is mostly importantly intended for deparsing the query correctly
-- but still it is preferable to have this test here instead of multi_deparse_shard_query
CREATE TABLE table_with_defaults
(
store_id int,
first_name text,
default_1 int DEFAULT 1,
last_name text,
default_2 text DEFAULT '2'
);
-- we don't need many shards
SET citus.shard_count = 2;
SELECT create_distributed_table('table_with_defaults', 'store_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- let's see the queries
SET client_min_messages TO DEBUG2;
-- a very simple query
INSERT INTO table_with_defaults SELECT * FROM table_with_defaults;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
-- see that defaults are filled
INSERT INTO table_with_defaults (store_id, first_name)
SELECT
store_id, first_name
FROM
table_with_defaults;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
-- shuffle one of the defaults and skip the other
INSERT INTO table_with_defaults (default_2, store_id, first_name)
SELECT
default_2, store_id, first_name
FROM
table_with_defaults;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
-- shuffle both defaults
INSERT INTO table_with_defaults (default_2, store_id, default_1, first_name)
SELECT
default_2, store_id, default_1, first_name
FROM
table_with_defaults;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
-- use constants instead of non-default column
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name)
SELECT
default_2, 'Freund', store_id, 'Andres'
FROM
table_with_defaults;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
-- use constants instead of non-default column and skip both defauls
INSERT INTO table_with_defaults (last_name, store_id, first_name)
SELECT
'Freund', store_id, 'Andres'
FROM
table_with_defaults;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
-- use constants instead of default columns
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1)
SELECT
20, last_name, store_id, first_name, 10
FROM
table_with_defaults;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
-- use constants instead of both default columns and non-default columns
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1)
SELECT
20, 'Freund', store_id, 'Andres', 10
FROM
table_with_defaults;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
-- some of the ultimate queries where we have constants,
-- defaults and group by entry is not on the target entry
INSERT INTO table_with_defaults (default_2, store_id, first_name)
SELECT
'2000', store_id, 'Andres'
FROM
table_with_defaults
GROUP BY
last_name, store_id;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id
INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2)
SELECT
1000, store_id, 'Andres', '2000'
FROM
table_with_defaults
GROUP BY
last_name, store_id, first_name;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name
INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2)
SELECT
1000, store_id, 'Andres', '2000'
FROM
table_with_defaults
GROUP BY
last_name, store_id, first_name, default_2;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2
INSERT INTO table_with_defaults (default_1, store_id, first_name)
SELECT
1000, store_id, 'Andres'
FROM
table_with_defaults
GROUP BY
last_name, store_id, first_name, default_2;
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2
RESET client_min_messages;
-- Stable function in default should be allowed
ALTER TABLE table_with_defaults ADD COLUMN t timestamptz DEFAULT now();
INSERT INTO table_with_defaults (store_id, first_name, last_name)
SELECT
store_id, 'first '||store_id, 'last '||store_id
FROM
table_with_defaults
GROUP BY
store_id, first_name, last_name;
-- Volatile function in default should be disallowed - SERIAL pseudo-types
CREATE TABLE table_with_serial (
store_id int,
s bigserial
);
SELECT create_distributed_table('table_with_serial', 'store_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO table_with_serial (store_id)
SELECT
store_id
FROM
table_with_defaults
GROUP BY
store_id;
-- Volatile function in default should be disallowed - user-defined sequence
CREATE SEQUENCE user_defined_sequence;
CREATE TABLE table_with_user_sequence (
store_id int,
s bigint default nextval('user_defined_sequence')
);
SELECT create_distributed_table('table_with_user_sequence', 'store_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO table_with_user_sequence (store_id)
SELECT
store_id
FROM
table_with_defaults
GROUP BY
store_id;
-- do some more error/error message checks
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
CREATE TABLE text_table (part_col text, val int);
CREATE TABLE char_table (part_col char[], val int);
create table table_with_starts_with_defaults (a int DEFAULT 5, b int, c int);
SELECT create_distributed_table('text_table', 'part_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('char_table','part_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('table_with_starts_with_defaults', 'c');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO DEBUG;
INSERT INTO text_table (part_col)
SELECT
CASE WHEN part_col = 'onder' THEN 'marco'
END
FROM text_table ;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains a case expression in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
INSERT INTO text_table (part_col) SELECT COALESCE(part_col, 'onder') FROM text_table;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains a coalesce expression in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
INSERT INTO text_table (part_col) SELECT GREATEST(part_col, 'jason') FROM text_table;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
INSERT INTO text_table (part_col) SELECT LEAST(part_col, 'andres') FROM text_table;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
INSERT INTO text_table (part_col) SELECT NULLIF(part_col, 'metin') FROM text_table;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
INSERT INTO text_table (part_col) SELECT part_col isnull FROM text_table;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
INSERT INTO text_table (part_col) SELECT part_col::text from char_table;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
INSERT INTO text_table (part_col) SELECT (part_col = 'burak') is true FROM text_table;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
INSERT INTO text_table (part_col) SELECT val FROM text_table;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: The data type of the target table's partition column should exactly match the data type of the corresponding simple column reference in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
INSERT INTO text_table (part_col) SELECT val::text FROM text_table;
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: performing repartitioned INSERT ... SELECT
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
RESET client_min_messages;
insert into table_with_starts_with_defaults (b,c) select b,c FROM table_with_starts_with_defaults;
-- Test on partition column without native hash function
CREATE TABLE raw_table
(
id BIGINT,
time DATE
);
CREATE TABLE summary_table
(
time DATE,
count BIGINT
);
SELECT create_distributed_table('raw_table', 'time');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('summary_table', 'time');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO raw_table VALUES(1, '11-11-1980');
INSERT INTO summary_table SELECT time, COUNT(*) FROM raw_table GROUP BY time;
SELECT * FROM summary_table;
time | count
---------------------------------------------------------------------
11-11-1980 | 1
(1 row)
-- Test INSERT ... SELECT via coordinator
-- Select from constants
TRUNCATE raw_events_first;
INSERT INTO raw_events_first (user_id, value_1)
SELECT * FROM (VALUES (1,2), (3,4), (5,6)) AS v(int,int);
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id;
user_id | value_1
---------------------------------------------------------------------
1 | 2
3 | 4
5 | 6
(3 rows)
-- Select from local functions
TRUNCATE raw_events_first;
CREATE SEQUENCE insert_select_test_seq;
SET client_min_messages TO DEBUG;
INSERT INTO raw_events_first (user_id, value_1, value_2)
SELECT
s, nextval('insert_select_test_seq'), (random()*10)::int
FROM
generate_series(1, 5) s;
DEBUG: Creating router plan
DEBUG: distributed INSERT ... SELECT can only select from distributed tables
DEBUG: Collecting INSERT ... SELECT results on coordinator
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
DEBUG: Router planner cannot handle multi-shard select queries
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
-- ON CONFLICT is supported
INSERT INTO raw_events_first (user_id, value_1)
SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s
ON CONFLICT DO NOTHING;
DEBUG: Creating router plan
DEBUG: distributed INSERT ... SELECT can only select from distributed tables
DEBUG: Collecting INSERT ... SELECT results on coordinator
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING
-- RETURNING is supported
INSERT INTO raw_events_first (user_id, value_1)
SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s
RETURNING *;
DEBUG: Creating router plan
DEBUG: distributed INSERT ... SELECT can only select from distributed tables
DEBUG: Collecting INSERT ... SELECT results on coordinator
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
user_id | time | value_1 | value_2 | value_3 | value_4
---------------------------------------------------------------------
1 | | 11 | | |
2 | | 12 | | |
3 | | 13 | | |
4 | | 14 | | |
5 | | 15 | | |
(5 rows)
RESET client_min_messages;
-- INSERT ... SELECT and multi-shard SELECT in the same transaction is supported
TRUNCATE raw_events_first;
BEGIN;
INSERT INTO raw_events_first (user_id, value_1)
SELECT s, s FROM generate_series(1, 5) s;
SELECT user_id, value_1 FROM raw_events_first ORDER BY 1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
ROLLBACK;
-- INSERT ... SELECT and single-shard SELECT in the same transaction is supported
TRUNCATE raw_events_first;
BEGIN;
INSERT INTO raw_events_first (user_id, value_1)
SELECT s, s FROM generate_series(1, 5) s;
SELECT user_id, value_1 FROM raw_events_first WHERE user_id = 1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
(1 row)
COMMIT;
-- Select from local table
TRUNCATE raw_events_first;
CREATE TEMPORARY TABLE raw_events_first_local AS
SELECT s AS u, 2*s AS v FROM generate_series(1, 5) s;
INSERT INTO raw_events_first (user_id, value_1)
SELECT u, v FROM raw_events_first_local;
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 2
2 | 4
3 | 6
4 | 8
5 | 10
(5 rows)
-- Use columns in opposite order
TRUNCATE raw_events_first;
INSERT INTO raw_events_first (value_1, user_id)
SELECT u, v FROM raw_events_first_local;
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
2 | 1
4 | 2
6 | 3
8 | 4
10 | 5
(5 rows)
-- Set operations can work with opposite column order
TRUNCATE raw_events_first;
INSERT INTO raw_events_first (value_3, user_id)
( SELECT v, u::bigint FROM raw_events_first_local )
UNION ALL
( SELECT v, u FROM raw_events_first_local );
SELECT user_id, value_3 FROM raw_events_first ORDER BY user_id, value_3;
user_id | value_3
---------------------------------------------------------------------
1 | 2
1 | 2
2 | 4
2 | 4
3 | 6
3 | 6
4 | 8
4 | 8
5 | 10
5 | 10
(10 rows)
-- Select from other distributed table with limit
TRUNCATE raw_events_first;
TRUNCATE raw_events_second;
INSERT INTO raw_events_second (user_id, value_4)
SELECT s, 3*s FROM generate_series (1,5) s;
INSERT INTO raw_events_first (user_id, value_1)
SELECT user_id, value_4 FROM raw_events_second LIMIT 5;
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 3
2 | 6
3 | 9
4 | 12
5 | 15
(5 rows)
-- CTEs are supported in local queries
TRUNCATE raw_events_first;
WITH removed_rows AS (
DELETE FROM raw_events_first_local RETURNING u
)
INSERT INTO raw_events_first (user_id, value_1)
WITH value AS (SELECT 1)
SELECT * FROM removed_rows, value;
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 1
3 | 1
4 | 1
5 | 1
(5 rows)
-- nested CTEs are also supported
TRUNCATE raw_events_first;
INSERT INTO raw_events_first_local SELECT s, 2*s FROM generate_series(0, 10) s;
WITH rows_to_remove AS (
SELECT u FROM raw_events_first_local WHERE u > 0
),
removed_rows AS (
DELETE FROM raw_events_first_local
WHERE u IN (SELECT * FROM rows_to_remove)
RETURNING u, v
)
INSERT INTO raw_events_first (user_id, value_1)
WITH ultra_rows AS (
WITH numbers AS (
SELECT s FROM generate_series(1,10) s
),
super_rows AS (
SELECT u, v FROM removed_rows JOIN numbers ON (u = s)
)
SELECT * FROM super_rows LIMIT 5
)
SELECT u, v FROM ultra_rows;
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 2
2 | 4
3 | 6
4 | 8
5 | 10
(5 rows)
-- CTEs with duplicate names are also supported
TRUNCATE raw_events_first;
WITH super_rows AS (
SELECT u FROM raw_events_first_local
)
INSERT INTO raw_events_first (user_id, value_1)
WITH super_rows AS (
SELECT * FROM super_rows GROUP BY u
)
SELECT u, 5 FROM super_rows;
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
0 | 5
(1 row)
-- CTEs are supported in router queries
TRUNCATE raw_events_first;
WITH user_two AS (
SELECT user_id, value_4 FROM raw_events_second WHERE user_id = 2
)
INSERT INTO raw_events_first (user_id, value_1)
SELECT * FROM user_two;
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
2 | 6
(1 row)
-- CTEs are supported when there are name collisions
WITH numbers AS (
SELECT s FROM generate_series(1,10) s
)
INSERT INTO raw_events_first(user_id, value_1)
WITH numbers AS (
SELECT s, s FROM generate_series(1,5) s
)
SELECT * FROM numbers;
-- Select into distributed table with a sequence
CREATE TABLE "CaseSensitiveTable" ("UserID" int, "Value1" int);
SELECT create_distributed_table('"CaseSensitiveTable"', 'UserID');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO "CaseSensitiveTable"
SELECT s, s FROM generate_series(1,10) s;
SELECT * FROM "CaseSensitiveTable" ORDER BY "UserID";
UserID | Value1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
DROP TABLE "CaseSensitiveTable";
-- Select into distributed table with a sequence
CREATE TABLE dist_table_with_sequence (user_id serial, value_1 serial);
SELECT create_distributed_table('dist_table_with_sequence', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- from local query
INSERT INTO dist_table_with_sequence (value_1)
SELECT s FROM generate_series(1,5) s;
SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
-- from a distributed query
INSERT INTO dist_table_with_sequence (value_1)
SELECT value_1 FROM dist_table_with_sequence ORDER BY value_1;
SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 1
7 | 2
8 | 3
9 | 4
10 | 5
(10 rows)
TRUNCATE dist_table_with_sequence;
INSERT INTO dist_table_with_sequence (user_id)
SELECT user_id FROM raw_events_second ORDER BY user_id;
SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
WITH top10 AS (
SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10
)
INSERT INTO dist_table_with_sequence (value_1)
SELECT * FROM top10;
SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
-- router queries become logical planner queries when there is a nextval call
INSERT INTO dist_table_with_sequence (user_id)
SELECT user_id FROM dist_table_with_sequence WHERE user_id = 1;
SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
1 | 6
2 | 2
3 | 3
4 | 4
5 | 5
(6 rows)
DROP TABLE dist_table_with_sequence;
-- Select into distributed table with a user-defined sequence
CREATE SEQUENCE seq1;
CREATE SEQUENCE seq2;
CREATE TABLE dist_table_with_user_sequence (user_id int default nextval('seq1'), value_1 bigint default nextval('seq2'));
SELECT create_distributed_table('dist_table_with_user_sequence', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- from local query
INSERT INTO dist_table_with_user_sequence (value_1)
SELECT s FROM generate_series(1,5) s;
SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
-- from a distributed query
INSERT INTO dist_table_with_user_sequence (value_1)
SELECT value_1 FROM dist_table_with_user_sequence ORDER BY value_1;
SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 1
7 | 2
8 | 3
9 | 4
10 | 5
(10 rows)
TRUNCATE dist_table_with_user_sequence;
INSERT INTO dist_table_with_user_sequence (user_id)
SELECT user_id FROM raw_events_second ORDER BY user_id;
SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
WITH top10 AS (
SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10
)
INSERT INTO dist_table_with_user_sequence (value_1)
SELECT * FROM top10;
SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
(5 rows)
-- router queries become logical planner queries when there is a nextval call
INSERT INTO dist_table_with_user_sequence (user_id)
SELECT user_id FROM dist_table_with_user_sequence WHERE user_id = 1;
SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 1
1 | 6
2 | 2
3 | 3
4 | 4
5 | 5
(6 rows)
DROP TABLE dist_table_with_user_sequence;
DROP SEQUENCE seq1, seq2;
-- Select from distributed table into reference table
CREATE TABLE ref_table (user_id serial, value_1 int);
SELECT create_reference_table('ref_table');
create_reference_table
---------------------------------------------------------------------
(1 row)
INSERT INTO ref_table
SELECT user_id, value_1 FROM raw_events_second;
SELECT * FROM ref_table ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
(5 rows)
INSERT INTO ref_table (value_1)
SELECT value_1 FROM raw_events_second ORDER BY value_1;
SELECT * FROM ref_table ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 |
1 |
2 |
2 |
3 |
3 |
4 |
4 |
5 |
5 |
(10 rows)
INSERT INTO ref_table SELECT * FROM ref_table;
SELECT * FROM ref_table ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 |
1 |
1 |
1 |
2 |
2 |
2 |
2 |
3 |
3 |
3 |
3 |
4 |
4 |
4 |
4 |
5 |
5 |
5 |
5 |
(20 rows)
DROP TABLE ref_table;
-- Select from distributed table into reference table with user-defined sequence
CREATE SEQUENCE seq1;
CREATE TABLE ref_table_with_user_sequence (user_id int default nextval('seq1'), value_1 int);
SELECT create_reference_table('ref_table_with_user_sequence');
create_reference_table
---------------------------------------------------------------------
(1 row)
INSERT INTO ref_table_with_user_sequence
SELECT user_id, value_1 FROM raw_events_second;
SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
(5 rows)
INSERT INTO ref_table_with_user_sequence (value_1)
SELECT value_1 FROM raw_events_second ORDER BY value_1;
SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 |
1 |
2 |
2 |
3 |
3 |
4 |
4 |
5 |
5 |
(10 rows)
INSERT INTO ref_table_with_user_sequence SELECT * FROM ref_table_with_user_sequence;
SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 |
1 |
1 |
1 |
2 |
2 |
2 |
2 |
3 |
3 |
3 |
3 |
4 |
4 |
4 |
4 |
5 |
5 |
5 |
5 |
(20 rows)
DROP TABLE ref_table_with_user_sequence;
DROP SEQUENCE seq1;
-- Select from reference table into reference table
CREATE TABLE ref1 (d timestamptz);
SELECT create_reference_table('ref1');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE ref2 (d date);
SELECT create_reference_table('ref2');
create_reference_table
---------------------------------------------------------------------
(1 row)
INSERT INTO ref2 VALUES ('2017-10-31');
INSERT INTO ref1 SELECT * FROM ref2;
SELECT count(*) from ref1;
count
---------------------------------------------------------------------
1
(1 row)
-- also test with now()
INSERT INTO ref1 SELECT now() FROM ref2;
SELECT count(*) from ref1;
count
---------------------------------------------------------------------
2
(1 row)
DROP TABLE ref1;
DROP TABLE ref2;
-- Select into an append-partitioned table is not supported
CREATE TABLE insert_append_table (user_id int, value_4 bigint);
SELECT create_distributed_table('insert_append_table', 'user_id', 'append');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO insert_append_table (user_id, value_4)
SELECT user_id, 1 FROM raw_events_second LIMIT 5;
ERROR: INSERT ... SELECT into an append-distributed table is not supported
DROP TABLE insert_append_table;
-- Insert from other distributed table as prepared statement
TRUNCATE raw_events_first;
PREPARE insert_prep(int) AS
INSERT INTO raw_events_first (user_id, value_1)
SELECT $1, value_4 FROM raw_events_second ORDER BY value_4 LIMIT 1;
EXECUTE insert_prep(1);
EXECUTE insert_prep(2);
EXECUTE insert_prep(3);
EXECUTE insert_prep(4);
EXECUTE insert_prep(5);
EXECUTE insert_prep(6);
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
user_id | value_1
---------------------------------------------------------------------
1 | 3
2 | 3
3 | 3
4 | 3
5 | 3
6 | 3
(6 rows)
-- Inserting into views is handled via coordinator
TRUNCATE raw_events_first;
INSERT INTO test_view
SELECT * FROM raw_events_second;
SELECT user_id, value_4 FROM test_view ORDER BY user_id, value_4;
user_id | value_4
---------------------------------------------------------------------
1 | 3
2 | 6
3 | 9
4 | 12
5 | 15
(5 rows)
-- Drop the view now, because the column we are about to drop depends on it
DROP VIEW test_view;
-- Make sure we handle dropped columns correctly
CREATE TABLE drop_col_table (col1 text, col2 text, col3 text);
SELECT create_distributed_table('drop_col_table', 'col2');
create_distributed_table
---------------------------------------------------------------------
(1 row)
ALTER TABLE drop_col_table DROP COLUMN col1;
INSERT INTO drop_col_table (col3, col2)
SELECT value_4, user_id FROM raw_events_second LIMIT 5;
SELECT * FROM drop_col_table ORDER BY col2, col3;
col2 | col3
---------------------------------------------------------------------
1 | 3
2 | 6
3 | 9
4 | 12
5 | 15
(5 rows)
-- make sure the tuple went to the right shard
SELECT * FROM drop_col_table WHERE col2 = '1';
col2 | col3
---------------------------------------------------------------------
1 | 3
(1 row)
RESET client_min_messages;
-- make sure casts are handled correctly
CREATE TABLE coerce_events(user_id int, time timestamp, value_1 numeric);
SELECT create_distributed_table('coerce_events', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE coerce_agg (user_id int, value_1_agg int);
SELECT create_distributed_table('coerce_agg', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO coerce_events(user_id, value_1) VALUES (1, 1), (2, 2), (10, 10);
-- numeric -> int (straight function)
INSERT INTO coerce_agg(user_id, value_1_agg)
SELECT *
FROM (
SELECT user_id, value_1
FROM coerce_events
) AS ftop
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
-- int -> text
ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE text;
INSERT INTO coerce_agg(user_id, value_1_agg)
SELECT *
FROM (
SELECT user_id, value_1
FROM coerce_events
) AS ftop
LIMIT 5;
SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC;
user_id | value_1_agg
---------------------------------------------------------------------
10 | 10
10 | 10
2 | 2
2 | 2
1 | 1
1 | 1
(6 rows)
TRUNCATE coerce_agg;
-- int -> char(1)
ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(1);
INSERT INTO coerce_agg(user_id, value_1_agg)
SELECT *
FROM (
SELECT user_id, value_1
FROM coerce_events
) AS ftop
LIMIT 5;
ERROR: value too long for type character(1)
SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC;
user_id | value_1_agg
---------------------------------------------------------------------
(0 rows)
TRUNCATE coerce_agg;
TRUNCATE coerce_events;
-- char(5) -> char(1)
ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(5);
INSERT INTO coerce_events(user_id, value_1) VALUES (1, 'aaaaa'), (2, 'bbbbb');
INSERT INTO coerce_agg(user_id, value_1_agg)
SELECT *
FROM (
SELECT user_id, value_1
FROM coerce_events
) AS ftop
LIMIT 5;
ERROR: value too long for type character(1)
-- char(1) -> char(5)
ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(1) USING value_1::char(1);
ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(5);
INSERT INTO coerce_agg(user_id, value_1_agg)
SELECT *
FROM (
SELECT user_id, value_1
FROM coerce_events
) AS ftop
LIMIT 5;
SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC;
user_id | value_1_agg
---------------------------------------------------------------------
2 | b
1 | a
(2 rows)
TRUNCATE coerce_agg;
TRUNCATE coerce_events;
-- integer -> integer (check VALUE < 5)
ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer USING NULL;
ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE integer USING NULL;
ALTER TABLE coerce_agg ADD CONSTRAINT small_number CHECK (value_1_agg < 5);
INSERT INTO coerce_events (user_id, value_1) VALUES (1, 1), (10, 10);
\set VERBOSITY TERSE
INSERT INTO coerce_agg(user_id, value_1_agg)
SELECT *
FROM (
SELECT user_id, value_1
FROM coerce_events
) AS ftop;
ERROR: new row for relation "coerce_agg_13300067" violates check constraint "small_number_13300067"
\set VERBOSITY DEFAULT
SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC;
user_id | value_1_agg
---------------------------------------------------------------------
(0 rows)
-- integer[3] -> text[3]
TRUNCATE coerce_events;
ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer[3] USING NULL;
INSERT INTO coerce_events(user_id, value_1) VALUES (1, '{1,1,1}'), (2, '{2,2,2}');
ALTER TABLE coerce_agg DROP COLUMN value_1_agg;
ALTER TABLE coerce_agg ADD COLUMN value_1_agg text[3];
INSERT INTO coerce_agg(user_id, value_1_agg)
SELECT *
FROM (
SELECT user_id, value_1
FROM coerce_events
) AS ftop
LIMIT 5;
SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC;
user_id | value_1_agg
---------------------------------------------------------------------
2 | {2,2,2}
1 | {1,1,1}
(2 rows)
-- INSERT..SELECT + prepared statements + recursive planning
BEGIN;
PREPARE prepared_recursive_insert_select AS
INSERT INTO users_table
SELECT * FROM users_table
WHERE value_1 IN (SELECT value_2 FROM events_table OFFSET 0);
EXECUTE prepared_recursive_insert_select;
EXECUTE prepared_recursive_insert_select;
EXECUTE prepared_recursive_insert_select;
EXECUTE prepared_recursive_insert_select;
EXECUTE prepared_recursive_insert_select;
EXECUTE prepared_recursive_insert_select;
ROLLBACK;
-- upsert with on conflict update distribution column is unsupported
INSERT INTO agg_events AS ae
(
user_id,
value_1_agg,
agg_time
)
SELECT user_id,
value_1,
time
FROM raw_events_first
ON conflict (user_id, value_1_agg)
DO UPDATE
SET user_id = 42
RETURNING user_id, value_1_agg;
ERROR: modifying the partition value of rows is not allowed
-- test a small citus.remote_copy_flush_threshold
BEGIN;
SET LOCAL citus.remote_copy_flush_threshold TO 1;
INSERT INTO raw_events_first
SELECT * FROM raw_events_first OFFSET 0
ON CONFLICT DO NOTHING;
ABORT;
-- test fix for issue https://github.com/citusdata/citus/issues/5891
CREATE TABLE dist_table_1(
dist_col integer,
int_col integer,
text_col_1 text,
text_col_2 text
);
SELECT create_distributed_table('dist_table_1', 'dist_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO dist_table_1 VALUES (1, 1, 'string', 'string');
CREATE TABLE dist_table_2(
dist_col integer,
int_col integer
);
SELECT create_distributed_table('dist_table_2', 'dist_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO dist_table_2 VALUES (1, 1);
with a as (select random()) INSERT INTO dist_table_1
SELECT
t1.dist_col,
1,
'string',
'string'
FROM a, dist_table_1 t1
join dist_table_2 t2 using (dist_col)
limit 1
returning text_col_1;
text_col_1
---------------------------------------------------------------------
string
(1 row)
CREATE TABLE dist_table_3(
dist_col bigint,
int_col integer
);
SELECT create_distributed_table('dist_table_3', 'dist_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to
-- different types for distribution columns. Citus would not be able to handle this complex insert select.
INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col);
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
CREATE TABLE dist_table_4(
dist_col integer,
int_col integer
);
SELECT create_distributed_table('dist_table_4', 'dist_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4
-- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 6
(4 rows)
-- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query.
-- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
-- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT.
-- It is because the subquery with limit needs to be merged at coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Limit
-> Custom Scan (Citus Adaptive)
Task Count: 4
(7 rows)
CREATE TABLE dist_table_5(id int, id2 int);
SELECT create_distributed_table('dist_table_5','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE dist_table_6(id int, id2 int);
SELECT create_distributed_table('dist_table_6','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
-- verify that insert select with sublink can be pushed down when tables are colocated.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
CREATE TABLE ref_table_1(id int);
SELECT create_reference_table('ref_table_1');
create_reference_table
---------------------------------------------------------------------
(1 row)
-- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation.
INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1;
ERROR: correlated subqueries are not supported when the FROM clause contains a reference table
-- verify that insert select cannot be pushed down when we have recurring range table in from clause.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 1
(4 rows)
-- verify that insert select cannot be pushed down when we have reference table in outside of outer join in a chained-join.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
-- verify that insert select can be pushed down when we have reference table in outside of outer join.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
Task Count: 4
(4 rows)
-- verify that insert select cannot be pushed down when we have reference table in outside of left join and joined on non-partition column.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT ref_table_1.id FROM ref_table_1 LEFT JOIN dist_table_5 ON ref_table_1.id = dist_table_5.id2;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
CREATE TABLE loc_table_1(id int);
-- verify that insert select cannot be pushed down when it contains join between local and distributed tables.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Seq Scan on loc_table_1
Task Count: 4
(6 rows)
CREATE VIEW view_1 AS
SELECT id FROM dist_table_6;
CREATE MATERIALIZED VIEW view_2 AS
SELECT id FROM dist_table_6;
-- verify that insert select cannot be pushed down when it contains view.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
-- verify that insert select cannot be pushed down when it contains materialized view.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Seq Scan on view_2
(3 rows)
CREATE TABLE append_table(id integer, data text, int_data int);
SELECT create_distributed_table('append_table', 'id', 'append');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT master_create_empty_shard('append_table');
master_create_empty_shard
---------------------------------------------------------------------
13300096
(1 row)
-- verify that insert select push down for append tables are not supported.
INSERT INTO append_table SELECT * FROM append_table;
ERROR: INSERT ... SELECT into an append-distributed table is not supported
-- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner
-- and handled by pull to coordinator.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id > 5)
INSERT INTO dist_table_5
SELECT id FROM dist_table_5 JOIN cte_1 USING(id) OFFSET 5;
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Limit
-> Custom Scan (Citus Adaptive)
Task Count: 4
(7 rows)
-- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5
WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5)
SELECT id FROM dist_table_5 JOIN cte_1 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 1
(2 rows)
---------------------------------------------------------------------
-- Regression Test Script for Issue #7784
-- This script tests INSERT ... SELECT with a CTE for:
-- 1. Schema based sharding.
-- 2. A distributed table.
---------------------------------------------------------------------
-- Enable schema-based sharding
SET citus.enable_schema_based_sharding TO ON;
-- Create a table for schema based sharding
CREATE TABLE version_sch_based (
id bigserial NOT NULL,
description varchar(255),
PRIMARY KEY (id)
);
-- Insert an initial row.
INSERT INTO version_sch_based (description) VALUES ('Version 1');
-- Duplicate the row using a CTE and INSERT ... SELECT.
WITH v AS (
SELECT * FROM version_sch_based WHERE description = 'Version 1'
)
INSERT INTO version_sch_based (description)
SELECT description FROM v;
-- Expected output:
-- id | description
-- ----+-------------
-- 1 | Version 1
-- 2 | Version 1
-- Query the table and order by id for consistency.
SELECT * FROM version_sch_based ORDER BY id;
id | description
---------------------------------------------------------------------
1 | Version 1
2 | Version 1
(2 rows)
---------------------------------------------------------------------
-- Case 2: Distributed Table Scenario
---------------------------------------------------------------------
SET citus.enable_schema_based_sharding TO OFF;
-- Create a table for the distributed test.
CREATE TABLE version_dist (
id bigserial NOT NULL,
description varchar(255),
PRIMARY KEY (id)
);
-- Register the table as distributed using the 'id' column as the distribution key.
SELECT create_distributed_table('version_dist', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Insert an initial row.
INSERT INTO version_dist (description) VALUES ('Version 1');
-- Duplicate the row using a CTE and INSERT ... SELECT.
WITH v AS (
SELECT * FROM version_dist WHERE description = 'Version 1'
)
INSERT INTO version_dist (description)
SELECT description FROM v;
-- Expected output:
-- id | description
-- ----+-------------
-- 1 | Version 1
-- 2 | Version 1
-- Query the table and order by id for consistency.
SELECT * FROM version_dist ORDER BY id;
id | description
---------------------------------------------------------------------
1 | Version 1
2 | Version 1
(2 rows)
---------------------------------------------------------------------
-- Case 3: Distributed INSERT … SELECT with nextval()
-- Verifies that nextval() is evaluated on the coordinator only.
---------------------------------------------------------------------
-- A fresh sequence for clarity
CREATE SEQUENCE seq_nextval_test START 100;
-- Table with DEFAULT nextval()
CREATE TABLE version_dist_seq (
id bigint DEFAULT nextval('seq_nextval_test'),
description text,
PRIMARY KEY (id)
);
SELECT create_distributed_table('version_dist_seq', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Seed one row (id = 100)
INSERT INTO version_dist_seq (description) VALUES ('row0');
-- CTE duplication should produce **exactly one** new sequence value (id = 101)
WITH v AS (
SELECT * FROM version_dist_seq WHERE description = 'row0'
)
INSERT INTO version_dist_seq (description)
SELECT description FROM v;
-- Expected: ids are 100 and 101 (no gaps, no duplicates)
SELECT id, description FROM version_dist_seq ORDER BY id;
id | description
---------------------------------------------------------------------
100 | row0
101 | row0
(2 rows)
---------------------------------------------------------------------
-- Case 4: UNION ALL + nextval() in distributed INSERT … SELECT
---------------------------------------------------------------------
CREATE SEQUENCE seq_union_test START 200;
CREATE TABLE version_dist_union (
id bigint DEFAULT nextval('seq_union_test'),
val int,
PRIMARY KEY (id)
);
SELECT create_distributed_table('version_dist_union', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Seed rows
INSERT INTO version_dist_union (val) VALUES (1), (2);
-- UNION ALL duplication; each leg returns two rows -> four inserts total
WITH src AS (
SELECT val FROM version_dist_union
UNION ALL
SELECT val FROM version_dist_union
)
INSERT INTO version_dist_union(val)
SELECT val FROM src;
-- Expected IDs: 200,201,202,203,204,205
SELECT id, val FROM version_dist_union ORDER BY id;
id | val
---------------------------------------------------------------------
200 | 1
201 | 2
202 | 1
203 | 2
204 | 1
205 | 2
(6 rows)
-- End of Issue #7784
-- PR #8106 — CTE traversal works when following outer Vars
-- This script exercises three shapes:
-- T1) CTE referenced inside a correlated subquery (one level down)
-- T2) CTE referenced inside a nested subquery (two levels down)
-- T3) Subquery targetlist uses a scalar sublink into the outer CTE
CREATE SCHEMA pr8106_cte_outervar;
SET search_path = pr8106_cte_outervar, public;
-- Base tables for the tests
DROP TABLE IF EXISTS raw_events_first CASCADE;
NOTICE: table "raw_events_first" does not exist, skipping
DROP TABLE IF EXISTS agg_events CASCADE;
NOTICE: table "agg_events" does not exist, skipping
CREATE TABLE raw_events_first(
user_id int,
value_1 int
);
CREATE TABLE agg_events(
user_id int,
value_1_agg int
);
-- Distribute and colocate (distribution key = user_id)
SELECT create_distributed_table('raw_events_first', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('agg_events', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Seed data (duplicates on some user_ids; some NULL value_1s)
INSERT INTO raw_events_first(user_id, value_1) VALUES
(1, 10), (1, 20), (1, NULL),
(2, NULL),
(3, 30),
(4, NULL),
(5, 50), (5, NULL),
(6, NULL);
---------------------------------------------------------------------
-- T1) CTE referenced inside a correlated subquery (one level down)
---------------------------------------------------------------------
TRUNCATE agg_events;
WITH c AS MATERIALIZED (
SELECT user_id FROM raw_events_first
)
INSERT INTO agg_events (user_id)
SELECT t.user_id
FROM raw_events_first t
WHERE EXISTS (SELECT 1 FROM c WHERE c.user_id = t.user_id);
-- Expect one insert per row in raw_events_first (EXISTS always true per user_id)
SELECT 't1_count_matches' AS test,
(SELECT count(*) FROM agg_events) =
(SELECT count(*) FROM raw_events_first) AS ok;
test | ok
---------------------------------------------------------------------
t1_count_matches | t
(1 row)
-- Spot-check: how many rows were inserted
SELECT 't1_rows' AS test, count(*) AS rows FROM agg_events;
test | rows
---------------------------------------------------------------------
t1_rows | 9
(1 row)
---------------------------------------------------------------------
-- T2) CTE referenced inside a nested subquery (two levels down)
---------------------------------------------------------------------
TRUNCATE agg_events;
WITH c AS MATERIALIZED (
SELECT user_id FROM raw_events_first
)
INSERT INTO agg_events (user_id)
SELECT t.user_id
FROM raw_events_first t
WHERE EXISTS (
SELECT 1
FROM (SELECT user_id FROM c) c2
WHERE c2.user_id = t.user_id
);
-- Same cardinality expectation as T1
SELECT 't2_count_matches' AS test,
(SELECT count(*) FROM agg_events) =
(SELECT count(*) FROM raw_events_first) AS ok;
test | ok
---------------------------------------------------------------------
t2_count_matches | t
(1 row)
SELECT 't2_rows' AS test, count(*) AS rows FROM agg_events;
test | rows
---------------------------------------------------------------------
t2_rows | 9
(1 row)
---------------------------------------------------------------------
-- T3) Subquery targetlist uses a scalar sublink into the outer CTE
-- (use MAX() to keep scalar subquery single-row)
---------------------------------------------------------------------
TRUNCATE agg_events;
WITH c AS (SELECT user_id, value_1 FROM raw_events_first)
INSERT INTO agg_events (user_id, value_1_agg)
SELECT d.user_id, d.value_1_agg
FROM (
SELECT t.user_id,
(SELECT max(c.value_1) FROM c WHERE c.user_id = t.user_id) AS value_1_agg
FROM raw_events_first t
) AS d
WHERE d.value_1_agg IS NOT NULL;
-- Expect one insert per row in raw_events_first whose user_id has at least one non-NULL value_1
SELECT 't3_count_matches' AS test,
(SELECT count(*) FROM agg_events) =
(
SELECT count(*)
FROM raw_events_first t
WHERE EXISTS (
SELECT 1 FROM raw_events_first c
WHERE c.user_id = t.user_id AND c.value_1 IS NOT NULL
)
) AS ok;
test | ok
---------------------------------------------------------------------
t3_count_matches | t
(1 row)
-- Also verify no NULLs were inserted into value_1_agg
SELECT 't3_no_null_value_1_agg' AS test,
NOT EXISTS (SELECT 1 FROM agg_events WHERE value_1_agg IS NULL) AS ok;
test | ok
---------------------------------------------------------------------
t3_no_null_value_1_agg | t
(1 row)
-- Deterministic sample of results
SELECT 't3_sample' AS test, user_id, value_1_agg
FROM agg_events
ORDER BY user_id
LIMIT 5;
test | user_id | value_1_agg
---------------------------------------------------------------------
t3_sample | 1 | 20
t3_sample | 1 | 20
t3_sample | 1 | 20
t3_sample | 3 | 30
t3_sample | 5 | 50
(5 rows)
-- End of PR #8106 — CTE traversal works when following outer Vars
SET client_min_messages TO ERROR;
DROP SCHEMA pr8106_cte_outervar CASCADE;
DROP SCHEMA multi_insert_select CASCADE;