-- -- MULTI_INSERT_SELECT -- -- This test file has an alternative output because of the change in the -- display of SQL-standard function's arguments in INSERT/SELECT in PG15. -- The alternative output can be deleted when we drop support for PG14 -- CREATE SCHEMA multi_insert_select; SET search_path = multi_insert_select,public; SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15; server_version_ge_15 --------------------------------------------------------------------- t (1 row) SET citus.next_shard_id TO 13300000; SET citus.next_placement_id TO 13300000; -- create co-located tables SET citus.shard_count = 4; SET citus.shard_replication_factor = 2; -- order of execution might change in parallel executions -- and the error details might contain the worker node -- so be less verbose with \set VERBOSITY TERSE when necessary CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); SELECT create_distributed_table('raw_events_first', 'user_id'); create_distributed_table --------------------------------------------------------------------- (1 row) CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); SELECT create_distributed_table('raw_events_second', 'user_id'); create_distributed_table --------------------------------------------------------------------- (1 row) CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp, UNIQUE(user_id, value_1_agg)); SELECT create_distributed_table('agg_events', 'user_id');; create_distributed_table --------------------------------------------------------------------- (1 row) -- create the reference table as well CREATE TABLE reference_table (user_id int); SELECT create_reference_table('reference_table'); create_reference_table --------------------------------------------------------------------- (1 row) CREATE TABLE insert_select_varchar_test (key varchar, value int); SELECT create_distributed_table('insert_select_varchar_test', 'key', 'hash'); create_distributed_table --------------------------------------------------------------------- (1 row) -- set back to the defaults SET citus.shard_count = DEFAULT; SET citus.shard_replication_factor = DEFAULT; INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES (1, now(), 10, 100, 1000.1, 10000); INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES (2, now(), 20, 200, 2000.1, 20000); INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES (3, now(), 30, 300, 3000.1, 30000); INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES (4, now(), 40, 400, 4000.1, 40000); INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES (5, now(), 50, 500, 5000.1, 50000); INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES (6, now(), 60, 600, 6000.1, 60000); SET client_min_messages TO DEBUG2; -- raw table to raw table INSERT INTO raw_events_second SELECT * FROM raw_events_first; DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -- see that our first multi shard INSERT...SELECT works expected SET client_min_messages TO INFO; SELECT raw_events_first.user_id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id ORDER BY user_id DESC; user_id --------------------------------------------------------------------- 6 5 4 3 2 1 (6 rows) -- see that we get unique vialitons \set VERBOSITY TERSE INSERT INTO raw_events_second SELECT * FROM raw_events_first; ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300004" \set VERBOSITY DEFAULT -- stable functions should be allowed INSERT INTO raw_events_second (user_id, time) SELECT user_id, now() FROM raw_events_first WHERE user_id < 0; INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE time > now() + interval '1 day'; -- hide version-dependent PL/pgSQL context messages \set VERBOSITY terse -- make sure we evaluate stable functions on the master, once CREATE OR REPLACE FUNCTION evaluate_on_master() RETURNS int LANGUAGE plpgsql STABLE AS $function$ BEGIN RAISE NOTICE 'evaluating on master'; RETURN 0; END; $function$; INSERT INTO raw_events_second (user_id, value_1) SELECT user_id, evaluate_on_master() FROM raw_events_first WHERE user_id < 0; NOTICE: evaluating on master -- make sure we don't evaluate stable functions with column arguments SET citus.enable_metadata_sync TO OFF; CREATE OR REPLACE FUNCTION evaluate_on_master(x int) RETURNS int LANGUAGE plpgsql STABLE AS $function$ BEGIN RAISE NOTICE 'evaluating on master'; RETURN x; END; $function$; RESET citus.enable_metadata_sync; INSERT INTO raw_events_second (user_id, value_1) SELECT user_id, evaluate_on_master(value_1) FROM raw_events_first WHERE user_id = 0; ERROR: function multi_insert_select.evaluate_on_master(integer) does not exist -- add one more row INSERT INTO raw_events_first (user_id, time) VALUES (7, now()); -- try a single shard query SET client_min_messages TO DEBUG2; INSERT INTO raw_events_second (user_id, time) SELECT user_id, time FROM raw_events_first WHERE user_id = 7; DEBUG: Creating router plan DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time") SELECT raw_events_first.user_id, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 7) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away SET client_min_messages TO INFO; -- add one more row INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES (8, now(), 80, 800, 8000, 80000); -- reorder columns SET client_min_messages TO DEBUG2; INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) SELECT value_2, value_1, value_3, value_4, user_id, time FROM raw_events_first WHERE user_id = 8; DEBUG: Creating router plan DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 8) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away -- a zero shard select INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) SELECT value_2, value_1, value_3, value_4, user_id, time FROM raw_events_first WHERE false; DEBUG: Creating router plan DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away -- another zero shard select INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) SELECT value_2, value_1, value_3, value_4, user_id, time FROM raw_events_first WHERE 0 != 0; DEBUG: Creating router plan DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away -- add one more row SET client_min_messages TO INFO; INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES (9, now(), 90, 900, 9000, 90000); -- show that RETURNING also works SET client_min_messages TO DEBUG2; INSERT INTO raw_events_second (user_id, value_1, value_3) SELECT user_id, value_1, value_3 FROM raw_events_first WHERE value_3 = 9000 RETURNING *; DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 user_id | time | value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 9 | | 90 | | 9000 | (1 row) -- hits two shards \set VERBOSITY TERSE INSERT INTO raw_events_second (user_id, value_1, value_3) SELECT user_id, value_1, value_3 FROM raw_events_first WHERE user_id = 9 OR user_id = 16 RETURNING *; DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300007" -- now do some aggregations INSERT INTO agg_events SELECT user_id, sum(value_1), avg(value_2), sum(value_3), count(value_4) FROM raw_events_first GROUP BY user_id; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id -- group by column not exists on the SELECT target list INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, user_id) SELECT sum(value_3), count(value_4), sum(value_1), user_id FROM raw_events_first GROUP BY value_2, user_id RETURNING *; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" -- some subquery tests INSERT INTO agg_events (value_1_agg, user_id) SELECT SUM(value_1), id FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id) AS foo GROUP BY id ORDER BY id; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" -- subquery one more level depth INSERT INTO agg_events (value_4_agg, value_1_agg, user_id) SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id) AS foo ORDER BY id; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" \set VERBOSITY DEFAULT -- join between subqueries INSERT INTO agg_events (user_id) SELECT f2.id FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id); DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) -- add one more level subqueris on top of subquery JOINs INSERT INTO agg_events (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) FROM ( SELECT f2.id as id, f2.v4 as value FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id)) as outer_most GROUP BY outer_most.id; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id -- subqueries in WHERE clause INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id IN (SELECT user_id FROM raw_events_second WHERE user_id = 2); DEBUG: Creating router plan DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) 2))) AND (raw_events_first.user_id IS NOT NULL)) INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id IN (SELECT user_id FROM raw_events_second WHERE user_id != 2 AND value_1 = 2000) ON conflict (user_id, value_1) DO NOTHING; DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id IN (SELECT user_id FROM raw_events_second WHERE false); DEBUG: Creating router plan DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id IN (SELECT user_id FROM raw_events_second WHERE value_1 = 1000 OR value_1 = 2000 OR value_1 = 3000); DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) -- lets mix subqueries in FROM clause and subqueries in WHERE INSERT INTO agg_events (user_id) SELECT f2.id FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 1000) AS foo2 ) as f2 ON (f.id = f2.id) WHERE f.id IN (SELECT user_id FROM raw_events_second); DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)) AND (f2.id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)) AND (f2.id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)) AND (f2.id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)) AND (f2.id IS NOT NULL)) -- some UPSERTS INSERT INTO agg_events AS ae ( user_id, value_1_agg, agg_time ) SELECT user_id, value_1, time FROM raw_events_first ON conflict (user_id, value_1_agg) DO UPDATE SET agg_time = EXCLUDED.agg_time WHERE ae.agg_time < EXCLUDED.agg_time; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) -- upserts with returning INSERT INTO agg_events AS ae ( user_id, value_1_agg, agg_time ) SELECT user_id, value_1, time FROM raw_events_first ON conflict (user_id, value_1_agg) DO UPDATE SET agg_time = EXCLUDED.agg_time WHERE ae.agg_time < EXCLUDED.agg_time RETURNING user_id, value_1_agg; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg user_id | value_1_agg --------------------------------------------------------------------- 7 | (1 row) INSERT INTO agg_events (user_id, value_1_agg) SELECT user_id, sum(value_1 + value_2) FROM raw_events_first GROUP BY user_id; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id -- FILTER CLAUSE INSERT INTO agg_events (user_id, value_1_agg) SELECT user_id, sum(value_1 + value_2) FILTER (where value_3 = 15) FROM raw_events_first GROUP BY user_id; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id -- a test with reference table JOINs INSERT INTO agg_events (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(value_1) FROM reference_table, raw_events_first WHERE raw_events_first.user_id = reference_table.user_id GROUP BY raw_events_first.user_id; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id -- a note on the outer joins is that -- we filter out outer join results -- where partition column returns -- NULL. Thus, we could INSERT less rows -- than we expect from subquery result. -- see the following tests SET client_min_messages TO INFO; -- we don't want to see constraint violations, so truncate first TRUNCATE agg_events; -- add a row to first table to make table contents different INSERT INTO raw_events_second (user_id, time, value_1, value_2, value_3, value_4) VALUES (10, now(), 100, 10000, 10000, 100000); DELETE FROM raw_events_second WHERE user_id = 2; -- we select 11 rows SELECT t1.user_id AS col1, t2.user_id AS col2 FROM raw_events_first t1 FULL JOIN raw_events_second t2 ON t1.user_id = t2.user_id ORDER BY t1.user_id, t2.user_id; col1 | col2 --------------------------------------------------------------------- 1 | 1 2 | 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 | 10 (10 rows) SET client_min_messages TO DEBUG2; -- we insert 10 rows since we filtered out -- NULL partition column values INSERT INTO agg_events (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM raw_events_first t1 FULL JOIN raw_events_second t2 ON t1.user_id = t2.user_id; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300000 t1 FULL JOIN multi_insert_select.raw_events_second_13300004 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300001 t1 FULL JOIN multi_insert_select.raw_events_second_13300005 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300002 t1 FULL JOIN multi_insert_select.raw_events_second_13300006 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300003 t1 FULL JOIN multi_insert_select.raw_events_second_13300007 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) SET client_min_messages TO INFO; -- see that the results are different from the SELECT query SELECT user_id, value_1_agg FROM agg_events ORDER BY user_id, value_1_agg; user_id | value_1_agg --------------------------------------------------------------------- 1 | 1 2 | 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 (9 rows) -- we don't want to see constraint violations, so truncate first SET client_min_messages TO INFO; TRUNCATE agg_events; SET client_min_messages TO DEBUG2; -- DISTINCT clause INSERT INTO agg_events (value_1_agg, user_id) SELECT DISTINCT value_1, user_id FROM raw_events_first; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -- we don't want to see constraint violations, so truncate first SET client_min_messages TO INFO; truncate agg_events; SET client_min_messages TO DEBUG2; -- DISTINCT ON clauses are supported -- distinct on(non-partition column) -- values are pulled to master INSERT INTO agg_events (value_1_agg, user_id) SELECT DISTINCT ON (value_1) value_1, user_id FROM raw_events_first; DEBUG: cannot push down this subquery DETAIL: Distinct on columns without partition column is currently unsupported DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Collecting INSERT ... SELECT results on coordinator SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; DEBUG: Router planner cannot handle multi-shard select queries user_id | value_1_agg --------------------------------------------------------------------- 1 | 10 2 | 20 3 | 30 4 | 40 5 | 50 6 | 60 7 | 8 | 80 9 | 90 (9 rows) -- we don't want to see constraint violations, so truncate first SET client_min_messages TO INFO; truncate agg_events; SET client_min_messages TO DEBUG2; -- distinct on(partition column) -- queries are forwared to workers INSERT INTO agg_events (value_1_agg, user_id) SELECT DISTINCT ON (user_id) value_1, user_id FROM raw_events_first; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; DEBUG: Router planner cannot handle multi-shard select queries user_id | value_1_agg --------------------------------------------------------------------- 1 | 10 2 | 20 3 | 30 4 | 40 5 | 50 6 | 60 7 | 8 | 80 9 | 90 (9 rows) -- We support CTEs BEGIN; WITH fist_table_agg AS MATERIALIZED (SELECT max(value_1)+1 as v1_agg, user_id FROM raw_events_first GROUP BY user_id) INSERT INTO agg_events (value_1_agg, user_id) SELECT v1_agg, user_id FROM fist_table_agg; DEBUG: distributed INSERT ... SELECT can only select from distributed tables DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for CTE fist_table_agg: SELECT (max(value_1) OPERATOR(pg_catalog.+) 1) AS v1_agg, user_id FROM multi_insert_select.raw_events_first GROUP BY user_id DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, v1_agg AS value_1_agg FROM (SELECT fist_table_agg.user_id, fist_table_agg.v1_agg FROM (SELECT intermediate_result.v1_agg, intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v1_agg integer, user_id integer)) fist_table_agg) citus_insert_select_subquery DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator ROLLBACK; -- We do support CTEs that are referenced in the target list INSERT INTO agg_events WITH sub_cte AS (SELECT 1) SELECT raw_events_first.user_id, (SELECT * FROM sub_cte) FROM raw_events_first; DEBUG: CTE sub_cte is going to be inlined via distributed planning DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -- We support set operations BEGIN; INSERT INTO raw_events_first(user_id) SELECT user_id FROM ((SELECT user_id FROM raw_events_first) UNION (SELECT user_id FROM raw_events_second)) as foo; DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) ROLLBACK; -- We do support set operations through recursive planning BEGIN; SET LOCAL client_min_messages TO DEBUG; INSERT INTO raw_events_first(user_id) (SELECT user_id FROM raw_events_first) INTERSECT (SELECT user_id FROM raw_events_first); DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_first DEBUG: Creating router plan DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) citus_insert_select_subquery DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator ROLLBACK; -- If the query is router plannable then it is executed via the coordinator INSERT INTO raw_events_first(user_id) SELECT user_id FROM ((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT (SELECT user_id FROM raw_events_second where user_id = 17)) as foo; DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: router planner does not support queries that reference non-colocated distributed tables DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 15 DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first WHERE (user_id OPERATOR(pg_catalog.=) 15) DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: query has a single distribution column value: 17 DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE (user_id OPERATOR(pg_catalog.=) 17) DEBUG: Creating router plan DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) EXCEPT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator -- some supported LEFT joins INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) INSERT INTO agg_events (user_id) SELECT raw_events_second.user_id FROM reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id; DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table DEBUG: Router planner cannot handle multi-shard select queries DEBUG: recursively planning right side of the left join since the outer side is a recurring rel DEBUG: recursively planning distributed relation "raw_events_second" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "raw_events_second" to a subquery DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE true DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table LEFT JOIN (SELECT raw_events_second_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) raw_events_second_1) raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id WHERE raw_events_first.user_id = 10; DEBUG: Creating router plan DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 10) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id WHERE raw_events_second.user_id = 10 OR raw_events_second.user_id = 11; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id WHERE raw_events_first.user_id = 10 AND raw_events_first.user_id = 20; DEBUG: Creating router plan DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id WHERE raw_events_first.user_id = 10 AND raw_events_second.user_id = 20; DEBUG: Creating router plan DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id WHERE raw_events_first.user_id IN (19, 20, 21); DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM ((SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_first(user_id, "time", value_1, value_2, value_3, value_4) LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id WHERE raw_events_second.user_id IN (19, 20, 21); DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) SET client_min_messages TO WARNING; -- following query should use repartitioned joins and results should -- be routed via coordinator SET citus.enable_repartition_joins TO true; INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first, raw_events_second WHERE raw_events_second.user_id = raw_events_first.value_1 AND raw_events_first.value_1 = 12; -- some unsupported LEFT/INNER JOINs -- JOIN on one table with partition column other is not INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1; ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- same as the above with INNER JOIN INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1; -- a not meaningful query INSERT INTO agg_events (user_id) SELECT raw_events_second.user_id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_first.value_1; ERROR: cannot perform distributed planning on this query DETAIL: Cartesian products are currently unsupported -- both tables joined on non-partition columns INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- same as the above with INNER JOIN -- we support this with route to coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) Task Count: 6 (4 rows) -- EXPLAIN ANALYZE is not supported for INSERT ... SELECT via coordinator EXPLAIN (costs off, analyze on) INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; ERROR: EXPLAIN ANALYZE is currently not supported for INSERT ... SELECT commands via coordinator -- even if there is a filter on the partition key, since the join is not on the partition key we reject -- this query INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 WHERE raw_events_first.user_id = 10; ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- same as the above with INNER JOIN -- we support this with route to coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 WHERE raw_events_first.user_id = 10; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) Task Count: 6 (4 rows) -- make things a bit more complicate with IN clauses -- we support this with route to coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4); $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) Task Count: 6 (4 rows) -- implicit join on non partition column should also not be pushed down, -- so we fall back to route via coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first, raw_events_second WHERE raw_events_second.user_id = raw_events_first.value_1; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) Task Count: 6 (4 rows) RESET client_min_messages; -- The following is again a tricky query for Citus. If the given filter was -- on value_1 as shown in the above, Citus could push it down and use -- distributed INSERT/SELECT. But we instead fall back to route via coordinator. SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first, raw_events_second WHERE raw_events_second.user_id = raw_events_first.value_1 AND raw_events_first.value_2 = 12; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) Task Count: 6 (4 rows) -- foo is not joined on the partition key so the query is not -- pushed down. So instead we route via coordinator. SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) FROM ( SELECT f2.id as id, f2.v4 as value FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first LEFT JOIN reference_table ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id)) as outer_most GROUP BY outer_most.id; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 (8 rows) -- if the given filter was on value_1 as shown in the above, Citus could -- push it down. But here the query falls back to route via coordinator. SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id) SELECT raw_events_first.user_id FROM raw_events_first, raw_events_second WHERE raw_events_second.user_id = raw_events_first.value_1 AND raw_events_first.value_2 = 12; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) Task Count: 6 (4 rows) -- foo is not joined on the partition key so the query is not -- pushed down, and it falls back to route via coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) FROM ( SELECT f2.id as id, f2.v4 as value FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first LEFT JOIN reference_table ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id)) as outer_most GROUP BY outer_most.id; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 (8 rows) INSERT INTO agg_events (value_4_agg, value_1_agg, user_id) SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id != raw_events_second.user_id GROUP BY raw_events_second.user_id) AS foo; ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator SET client_min_messages TO DEBUG2; -- INSERT returns NULL partition key value via coordinator INSERT INTO agg_events (value_4_agg, value_1_agg, user_id) SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.value_3 AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.value_3) AS foo; DEBUG: cannot push down this subquery DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.value_3 AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_3 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(id) AS user_id, int4(v1) AS value_1_agg, int8(v4) AS value_4_agg FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 bigint, id double precision)) foo DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator ERROR: the partition column of table multi_insert_select.agg_events cannot be NULL -- error cases -- no part column at all INSERT INTO raw_events_second (value_1) SELECT value_1 FROM raw_events_first; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: the query doesn't include the target table's partition column DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT ERROR: the partition column of table multi_insert_select.raw_events_second should have a value INSERT INTO raw_events_second (value_1) SELECT user_id FROM raw_events_first; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: the query doesn't include the target table's partition column DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT ERROR: the partition column of table multi_insert_select.raw_events_second should have a value INSERT INTO raw_events_second (user_id) SELECT value_1 FROM raw_events_first; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'user_id' ERROR: the partition column value cannot be NULL CONTEXT: while executing command on localhost:xxxxx INSERT INTO raw_events_second (user_id) SELECT user_id * 2 FROM raw_events_first; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains an operator in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'user_id' DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0,repartitioned_results_xxxxx_from_13300001_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_1,repartitioned_results_xxxxx_from_13300001_to_1,repartitioned_results_xxxxx_from_13300003_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_3,repartitioned_results_xxxxx_from_13300002_to_3,repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) INSERT INTO raw_events_second (user_id) SELECT user_id :: bigint FROM raw_events_first; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains an explicit cast in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'user_id' DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300002_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, value_2_agg, user_id) SELECT SUM(value_3), Count(value_4), user_id, SUM(value_1), Avg(value_2) FROM raw_events_first GROUP BY user_id; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains an aggregation in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'user_id' ERROR: the partition column value cannot be NULL CONTEXT: while executing command on localhost:xxxxx INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, value_2_agg, user_id) SELECT SUM(value_3), Count(value_4), user_id, SUM(value_1), value_2 FROM raw_events_first GROUP BY user_id, value_2; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'user_id' ERROR: the partition column value cannot be NULL CONTEXT: while executing command on localhost:xxxxx -- tables should be co-located INSERT INTO agg_events (user_id) SELECT user_id FROM reference_table; DEBUG: Creating router plan DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Distributed planning for a fast-path router query DEBUG: Creating router plan DEBUG: Collecting INSERT ... SELECT results on coordinator -- foo2 is recursively planned and INSERT...SELECT is done via coordinator INSERT INTO agg_events (user_id) SELECT f2.id FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, SUM(raw_events_second.user_id) AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.value_1 HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id); DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(f2.id) AS user_id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -- the second part of the query is not routable since -- GROUP BY not on the partition column (i.e., value_1) and thus join -- on f.id = f2.id is not on the partition key (instead on the sum of partition key) -- but we still recursively plan foo2 and run the query INSERT INTO agg_events (user_id) SELECT f.id FROM (SELECT id FROM (SELECT raw_events_first.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, SUM(raw_events_second.user_id) AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.value_1 HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id); DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT f.id AS user_id FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'user_id' SET client_min_messages TO WARNING; -- cannot pushdown the query since the JOIN is not equi JOIN -- falls back to route via coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) FROM ( SELECT f2.id as id, f2.v4 as value FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id != f2.id)) as outer_most GROUP BY outer_most.id; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 (8 rows) -- cannot pushdown since foo2 is not join on partition key -- falls back to route via coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) FROM ( SELECT f2.id as id, f2.v4 as value FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.value_1 GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id)) as outer_most GROUP BY outer_most.id; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> HashAggregate Group Key: remote_scan.id Filter: (pg_catalog.sum(remote_scan.worker_column_4) > '10'::numeric) -> Custom Scan (Citus Adaptive) Task Count: 6 (11 rows) -- cannot push down since foo doesn't have en equi join -- falls back to route via coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) FROM ( SELECT f2.id as id, f2.v4 as value FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id != reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id)) as outer_most GROUP BY outer_most.id; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 (8 rows) -- some unsupported LATERAL JOINs -- join on averages is not on the partition key -- should fall back to route via coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id, value_4_agg) SELECT averages.user_id, avg(averages.value_4) FROM (SELECT raw_events_second.user_id FROM reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) ) reference_ids JOIN LATERAL (SELECT user_id, value_4 FROM raw_events_first WHERE value_4 = reference_ids.user_id) as averages ON true GROUP BY averages.user_id; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Adaptive) Task Count: 6 (6 rows) -- join among reference_ids and averages is not on the partition key -- should fall back to route via coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id, value_4_agg) SELECT averages.user_id, avg(averages.value_4) FROM (SELECT raw_events_second.user_id FROM reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) ) reference_ids JOIN LATERAL (SELECT user_id, value_4 FROM raw_events_first) as averages ON averages.value_4 = reference_ids.user_id GROUP BY averages.user_id; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 (8 rows) -- join among the agg_ids and averages is not on the partition key -- should fall back to route via coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id, value_4_agg) SELECT averages.user_id, avg(averages.value_4) FROM (SELECT raw_events_second.user_id FROM reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) ) reference_ids JOIN LATERAL (SELECT user_id, value_4 FROM raw_events_first) as averages ON averages.user_id = reference_ids.user_id JOIN LATERAL (SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id) GROUP BY averages.user_id; $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) Task Count: 6 (4 rows) -- Selected value in the WHERE is not partition key, so we cannot use distributed -- INSERT/SELECT and falls back route via coordinator SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id IN (SELECT value_1 FROM raw_events_second); $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: repartition -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 (6 rows) -- same as above but slightly more complex -- since it also includes subquery in FROM as well SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO agg_events (user_id) SELECT f2.id FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id) WHERE f.id IN (SELECT value_1 FROM raw_events_second); $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: repartition -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 (6 rows) -- some more semi-anti join tests SET client_min_messages TO DEBUG2; -- join in where INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id IN (SELECT raw_events_second.user_id FROM raw_events_second, raw_events_first WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200); DEBUG: Creating router plan DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second, multi_insert_select.raw_events_first_13300000 raw_events_first_1 WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first_1.user_id) AND (raw_events_first_1.user_id OPERATOR(pg_catalog.=) 200)))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away RESET client_min_messages; -- we cannot push this down since it is NOT IN -- we use repartition insert/select instead SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id NOT IN (SELECT raw_events_second.user_id FROM raw_events_second, raw_events_first WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200); $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: repartition -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 1 (6 rows) SET client_min_messages TO DEBUG2; -- safe to push down INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE EXISTS (SELECT 1 FROM raw_events_second WHERE raw_events_second.user_id =raw_events_first.user_id); DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) -- we cannot push down INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE NOT EXISTS (SELECT 1 FROM raw_events_second WHERE raw_events_second.user_id =raw_events_first.user_id); DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) -- more complex LEFT JOINs INSERT INTO agg_events (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) FROM ( SELECT f2.id as id, f2.v4 as value FROM (SELECT id FROM (SELECT raw_events_first.user_id AS id FROM raw_events_first LEFT JOIN reference_table ON (raw_events_first.user_id = reference_table.user_id)) AS foo) as f LEFT JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id)) as outer_most GROUP BY outer_most.id; DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id RESET client_min_messages; -- cannot push down since the f.id IN is matched with value_1 -- we use repartition insert/select instead SELECT coordinator_plan($Q$ EXPLAIN (costs off) INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id IN ( SELECT f2.id FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id) WHERE f.id IN (SELECT value_1 FROM raw_events_second)); $Q$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: repartition -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 (6 rows) SET client_min_messages TO DEBUG2; -- same as above, but this time is it safe to push down since -- f.id IN is matched with user_id INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id IN ( SELECT f2.id FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id) WHERE f.id IN (SELECT user_id FROM raw_events_second)); DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) RESET client_min_messages; -- cannot push down since top level user_id is matched with NOT IN INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id NOT IN ( SELECT f2.id FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id = f2.id) WHERE f.id IN (SELECT user_id FROM raw_events_second)); -- cannot push down since join is not equi join (f.id > f2.id) INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first WHERE user_id IN ( SELECT f2.id FROM (SELECT id FROM (SELECT reference_table.user_id AS id FROM raw_events_first, reference_table WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f INNER JOIN (SELECT v4, v1, id FROM (SELECT SUM(raw_events_second.value_4) AS v4, SUM(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM raw_events_first, raw_events_second WHERE raw_events_first.user_id = raw_events_second.user_id GROUP BY raw_events_second.user_id HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 ON (f.id > f2.id) WHERE f.id IN (SELECT user_id FROM raw_events_second)); -- we currently not support grouping sets INSERT INTO agg_events (user_id, value_1_agg, value_2_agg) SELECT user_id, Sum(value_1) AS sum_val1, Sum(value_2) AS sum_val2 FROM raw_events_second GROUP BY grouping sets ( ( user_id ), ( value_1 ), ( user_id, value_1 ), ( ) ); ERROR: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP HINT: Consider using an equality filter on the distributed table's partition column. -- set back to INFO SET client_min_messages TO INFO; -- avoid constraint violations TRUNCATE raw_events_first; -- we don't support LIMIT for subquery pushdown, but -- we recursively plan the query and run it via coordinator INSERT INTO agg_events(user_id) SELECT user_id FROM users_table WHERE user_id IN (SELECT user_id FROM ( ( SELECT user_id FROM ( SELECT e1.user_id FROM users_table u1, events_table e1 WHERE e1.user_id = u1.user_id LIMIT 3 ) as f_inner ) ) AS f2); -- Altering a table and selecting from it using a multi-shard statement -- in the same transaction is allowed because we will use the same -- connections for all co-located placements. BEGIN; ALTER TABLE raw_events_second DROP COLUMN value_4; INSERT INTO raw_events_first SELECT * FROM raw_events_second; ROLLBACK; -- Alterating a table and selecting from it using a single-shard statement -- in the same transaction is disallowed because we will use a different -- connection. BEGIN; ALTER TABLE raw_events_second DROP COLUMN value_4; INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; ROLLBACK; -- Altering a reference table and then performing an INSERT ... SELECT which -- joins with the reference table is allowed, since the INSERT ... SELECT -- would read from the reference table over the same connections with the ones -- that performed the parallel DDL. BEGIN; ALTER TABLE reference_table ADD COLUMN z int; INSERT INTO raw_events_first (user_id) SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id); ROLLBACK; -- the same test with sequential DDL should work fine BEGIN; SET LOCAL citus.multi_shard_modify_mode TO 'sequential'; ALTER TABLE reference_table ADD COLUMN z int; INSERT INTO raw_events_first (user_id) SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id); ROLLBACK; -- Insert after copy is allowed BEGIN; COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ','; INSERT INTO raw_events_first SELECT * FROM raw_events_second; ROLLBACK; -- Insert after copy is currently allowed for single-shard operation. -- Both insert and copy are rolled back successfully. BEGIN; COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ','; INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 101; SELECT user_id FROM raw_events_first WHERE user_id = 101; user_id --------------------------------------------------------------------- 101 (1 row) ROLLBACK; BEGIN; INSERT INTO raw_events_first SELECT * FROM raw_events_second; COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; ROLLBACK; BEGIN; INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; ROLLBACK; -- Similarly, multi-row INSERTs will take part in transactions and reuse connections... BEGIN; INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; INSERT INTO raw_events_first (user_id, value_1) VALUES (105, 105), (106, 106); ROLLBACK; -- selecting from views works CREATE VIEW test_view AS SELECT * FROM raw_events_first; INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES (16, now(), 60, 600, 6000.1, 60000); SELECT count(*) FROM raw_events_second; count --------------------------------------------------------------------- 45 (1 row) INSERT INTO raw_events_second SELECT * FROM test_view; INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES (17, now(), 60, 600, 6000.1, 60000); INSERT INTO raw_events_second SELECT * FROM test_view WHERE user_id = 17 GROUP BY 1,2,3,4,5,6; SELECT count(*) FROM raw_events_second; count --------------------------------------------------------------------- 47 (1 row) -- intermediate results (CTEs) should be allowed when doing INSERT...SELECT within a CTE WITH series AS ( SELECT s AS val FROM generate_series(60,70) s ), inserts AS ( INSERT INTO raw_events_second (user_id) SELECT user_id FROM raw_events_first JOIN series ON (value_1 = val) RETURNING NULL ) SELECT count(*) FROM inserts; count --------------------------------------------------------------------- 2 (1 row) -- we need this in our next test truncate raw_events_first; SET client_min_messages TO DEBUG2; -- first show that the query works now INSERT INTO raw_events_first SELECT * FROM raw_events_second; DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) SET client_min_messages TO INFO; truncate raw_events_first; SET client_min_messages TO DEBUG2; -- now show that it works for a single shard query as well INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 5; DEBUG: Creating router plan DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) 5) AND (raw_events_second.user_id IS NOT NULL)) DEBUG: Skipping target shard interval 13300001 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300002 since SELECT query for it pruned away DEBUG: Skipping target shard interval 13300003 since SELECT query for it pruned away SET client_min_messages TO INFO; -- now do some tests with varchars INSERT INTO insert_select_varchar_test VALUES ('test_1', 10); INSERT INTO insert_select_varchar_test VALUES ('test_2', 30); INSERT INTO insert_select_varchar_test (key, value) SELECT *, 100 FROM (SELECT f1.key FROM (SELECT key FROM insert_select_varchar_test GROUP BY 1 HAVING Count(key) < 3) AS f1, (SELECT key FROM insert_select_varchar_test GROUP BY 1 HAVING Sum(COALESCE(insert_select_varchar_test.value, 0)) > 20.0) AS f2 WHERE f1.key = f2.key GROUP BY 1) AS foo; SELECT * FROM insert_select_varchar_test ORDER BY 1 DESC, 2 DESC; key | value --------------------------------------------------------------------- test_2 | 100 test_2 | 30 test_1 | 10 (3 rows) -- some tests with DEFAULT columns and constant values -- this test is mostly importantly intended for deparsing the query correctly -- but still it is preferable to have this test here instead of multi_deparse_shard_query CREATE TABLE table_with_defaults ( store_id int, first_name text, default_1 int DEFAULT 1, last_name text, default_2 text DEFAULT '2' ); -- we don't need many shards SET citus.shard_count = 2; SELECT create_distributed_table('table_with_defaults', 'store_id'); create_distributed_table --------------------------------------------------------------------- (1 row) -- let's see the queries SET client_min_messages TO DEBUG2; -- a very simple query INSERT INTO table_with_defaults SELECT * FROM table_with_defaults; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -- see that defaults are filled INSERT INTO table_with_defaults (store_id, first_name) SELECT store_id, first_name FROM table_with_defaults; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -- shuffle one of the defaults and skip the other INSERT INTO table_with_defaults (default_2, store_id, first_name) SELECT default_2, store_id, first_name FROM table_with_defaults; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -- shuffle both defaults INSERT INTO table_with_defaults (default_2, store_id, default_1, first_name) SELECT default_2, store_id, default_1, first_name FROM table_with_defaults; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -- use constants instead of non-default column INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name) SELECT default_2, 'Freund', store_id, 'Andres' FROM table_with_defaults; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -- use constants instead of non-default column and skip both defauls INSERT INTO table_with_defaults (last_name, store_id, first_name) SELECT 'Freund', store_id, 'Andres' FROM table_with_defaults; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -- use constants instead of default columns INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1) SELECT 20, last_name, store_id, first_name, 10 FROM table_with_defaults; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -- use constants instead of both default columns and non-default columns INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1) SELECT 20, 'Freund', store_id, 'Andres', 10 FROM table_with_defaults; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -- some of the ultimate queries where we have constants, -- defaults and group by entry is not on the target entry INSERT INTO table_with_defaults (default_2, store_id, first_name) SELECT '2000', store_id, 'Andres' FROM table_with_defaults GROUP BY last_name, store_id; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2) SELECT 1000, store_id, 'Andres', '2000' FROM table_with_defaults GROUP BY last_name, store_id, first_name; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2) SELECT 1000, store_id, 'Andres', '2000' FROM table_with_defaults GROUP BY last_name, store_id, first_name, default_2; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 INSERT INTO table_with_defaults (default_1, store_id, first_name) SELECT 1000, store_id, 'Andres' FROM table_with_defaults GROUP BY last_name, store_id, first_name, default_2; DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 RESET client_min_messages; -- Stable function in default should be allowed ALTER TABLE table_with_defaults ADD COLUMN t timestamptz DEFAULT now(); INSERT INTO table_with_defaults (store_id, first_name, last_name) SELECT store_id, 'first '||store_id, 'last '||store_id FROM table_with_defaults GROUP BY store_id, first_name, last_name; -- Volatile function in default should be disallowed - SERIAL pseudo-types CREATE TABLE table_with_serial ( store_id int, s bigserial ); SELECT create_distributed_table('table_with_serial', 'store_id'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO table_with_serial (store_id) SELECT store_id FROM table_with_defaults GROUP BY store_id; -- Volatile function in default should be disallowed - user-defined sequence CREATE SEQUENCE user_defined_sequence; CREATE TABLE table_with_user_sequence ( store_id int, s bigint default nextval('user_defined_sequence') ); SELECT create_distributed_table('table_with_user_sequence', 'store_id'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO table_with_user_sequence (store_id) SELECT store_id FROM table_with_defaults GROUP BY store_id; -- do some more error/error message checks SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; CREATE TABLE text_table (part_col text, val int); CREATE TABLE char_table (part_col char[], val int); create table table_with_starts_with_defaults (a int DEFAULT 5, b int, c int); SELECT create_distributed_table('text_table', 'part_col'); create_distributed_table --------------------------------------------------------------------- (1 row) SELECT create_distributed_table('char_table','part_col'); create_distributed_table --------------------------------------------------------------------- (1 row) SELECT create_distributed_table('table_with_starts_with_defaults', 'c'); create_distributed_table --------------------------------------------------------------------- (1 row) SET client_min_messages TO DEBUG; INSERT INTO text_table (part_col) SELECT CASE WHEN part_col = 'onder' THEN 'marco' END FROM text_table ; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains a case expression in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'part_col' INSERT INTO text_table (part_col) SELECT COALESCE(part_col, 'onder') FROM text_table; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains a coalesce expression in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'part_col' INSERT INTO text_table (part_col) SELECT GREATEST(part_col, 'jason') FROM text_table; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'part_col' INSERT INTO text_table (part_col) SELECT LEAST(part_col, 'andres') FROM text_table; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'part_col' INSERT INTO text_table (part_col) SELECT NULLIF(part_col, 'metin') FROM text_table; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'part_col' INSERT INTO text_table (part_col) SELECT part_col isnull FROM text_table; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'part_col' INSERT INTO text_table (part_col) SELECT part_col::text from char_table; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'part_col' INSERT INTO text_table (part_col) SELECT (part_col = 'burak') is true FROM text_table; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'part_col' INSERT INTO text_table (part_col) SELECT val FROM text_table; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: The data type of the target table's partition column should exactly match the data type of the corresponding simple column reference in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'part_col' INSERT INTO text_table (part_col) SELECT val::text FROM text_table; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. DEBUG: Router planner cannot handle multi-shard select queries DEBUG: performing repartitioned INSERT ... SELECT DEBUG: partitioning SELECT query by column index 0 with name 'part_col' RESET client_min_messages; insert into table_with_starts_with_defaults (b,c) select b,c FROM table_with_starts_with_defaults; -- Test on partition column without native hash function CREATE TABLE raw_table ( id BIGINT, time DATE ); CREATE TABLE summary_table ( time DATE, count BIGINT ); SELECT create_distributed_table('raw_table', 'time'); create_distributed_table --------------------------------------------------------------------- (1 row) SELECT create_distributed_table('summary_table', 'time'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO raw_table VALUES(1, '11-11-1980'); INSERT INTO summary_table SELECT time, COUNT(*) FROM raw_table GROUP BY time; SELECT * FROM summary_table; time | count --------------------------------------------------------------------- 11-11-1980 | 1 (1 row) -- Test INSERT ... SELECT via coordinator -- Select from constants TRUNCATE raw_events_first; INSERT INTO raw_events_first (user_id, value_1) SELECT * FROM (VALUES (1,2), (3,4), (5,6)) AS v(int,int); SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id; user_id | value_1 --------------------------------------------------------------------- 1 | 2 3 | 4 5 | 6 (3 rows) -- Select from local functions TRUNCATE raw_events_first; CREATE SEQUENCE insert_select_test_seq; SET client_min_messages TO DEBUG; INSERT INTO raw_events_first (user_id, value_1, value_2) SELECT s, nextval('insert_select_test_seq'), (random()*10)::int FROM generate_series(1, 5) s; DEBUG: Creating router plan DEBUG: distributed INSERT ... SELECT can only select from distributed tables DEBUG: Collecting INSERT ... SELECT results on coordinator SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; DEBUG: Router planner cannot handle multi-shard select queries user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) -- ON CONFLICT is supported INSERT INTO raw_events_first (user_id, value_1) SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s ON CONFLICT DO NOTHING; DEBUG: Creating router plan DEBUG: distributed INSERT ... SELECT can only select from distributed tables DEBUG: Collecting INSERT ... SELECT results on coordinator DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING -- RETURNING is supported INSERT INTO raw_events_first (user_id, value_1) SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s RETURNING *; DEBUG: Creating router plan DEBUG: distributed INSERT ... SELECT can only select from distributed tables DEBUG: Collecting INSERT ... SELECT results on coordinator DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 user_id | time | value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | | 11 | | | 2 | | 12 | | | 3 | | 13 | | | 4 | | 14 | | | 5 | | 15 | | | (5 rows) RESET client_min_messages; -- INSERT ... SELECT and multi-shard SELECT in the same transaction is supported TRUNCATE raw_events_first; BEGIN; INSERT INTO raw_events_first (user_id, value_1) SELECT s, s FROM generate_series(1, 5) s; SELECT user_id, value_1 FROM raw_events_first ORDER BY 1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) ROLLBACK; -- INSERT ... SELECT and single-shard SELECT in the same transaction is supported TRUNCATE raw_events_first; BEGIN; INSERT INTO raw_events_first (user_id, value_1) SELECT s, s FROM generate_series(1, 5) s; SELECT user_id, value_1 FROM raw_events_first WHERE user_id = 1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 (1 row) COMMIT; -- Select from local table TRUNCATE raw_events_first; CREATE TEMPORARY TABLE raw_events_first_local AS SELECT s AS u, 2*s AS v FROM generate_series(1, 5) s; INSERT INTO raw_events_first (user_id, value_1) SELECT u, v FROM raw_events_first_local; SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 2 2 | 4 3 | 6 4 | 8 5 | 10 (5 rows) -- Use columns in opposite order TRUNCATE raw_events_first; INSERT INTO raw_events_first (value_1, user_id) SELECT u, v FROM raw_events_first_local; SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 2 | 1 4 | 2 6 | 3 8 | 4 10 | 5 (5 rows) -- Set operations can work with opposite column order TRUNCATE raw_events_first; INSERT INTO raw_events_first (value_3, user_id) ( SELECT v, u::bigint FROM raw_events_first_local ) UNION ALL ( SELECT v, u FROM raw_events_first_local ); SELECT user_id, value_3 FROM raw_events_first ORDER BY user_id, value_3; user_id | value_3 --------------------------------------------------------------------- 1 | 2 1 | 2 2 | 4 2 | 4 3 | 6 3 | 6 4 | 8 4 | 8 5 | 10 5 | 10 (10 rows) -- Select from other distributed table with limit TRUNCATE raw_events_first; TRUNCATE raw_events_second; INSERT INTO raw_events_second (user_id, value_4) SELECT s, 3*s FROM generate_series (1,5) s; INSERT INTO raw_events_first (user_id, value_1) SELECT user_id, value_4 FROM raw_events_second LIMIT 5; SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 3 2 | 6 3 | 9 4 | 12 5 | 15 (5 rows) -- CTEs are supported in local queries TRUNCATE raw_events_first; WITH removed_rows AS ( DELETE FROM raw_events_first_local RETURNING u ) INSERT INTO raw_events_first (user_id, value_1) WITH value AS (SELECT 1) SELECT * FROM removed_rows, value; SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 1 3 | 1 4 | 1 5 | 1 (5 rows) -- nested CTEs are also supported TRUNCATE raw_events_first; INSERT INTO raw_events_first_local SELECT s, 2*s FROM generate_series(0, 10) s; WITH rows_to_remove AS ( SELECT u FROM raw_events_first_local WHERE u > 0 ), removed_rows AS ( DELETE FROM raw_events_first_local WHERE u IN (SELECT * FROM rows_to_remove) RETURNING u, v ) INSERT INTO raw_events_first (user_id, value_1) WITH ultra_rows AS ( WITH numbers AS ( SELECT s FROM generate_series(1,10) s ), super_rows AS ( SELECT u, v FROM removed_rows JOIN numbers ON (u = s) ) SELECT * FROM super_rows LIMIT 5 ) SELECT u, v FROM ultra_rows; SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 2 2 | 4 3 | 6 4 | 8 5 | 10 (5 rows) -- CTEs with duplicate names are also supported TRUNCATE raw_events_first; WITH super_rows AS ( SELECT u FROM raw_events_first_local ) INSERT INTO raw_events_first (user_id, value_1) WITH super_rows AS ( SELECT * FROM super_rows GROUP BY u ) SELECT u, 5 FROM super_rows; SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 0 | 5 (1 row) -- CTEs are supported in router queries TRUNCATE raw_events_first; WITH user_two AS ( SELECT user_id, value_4 FROM raw_events_second WHERE user_id = 2 ) INSERT INTO raw_events_first (user_id, value_1) SELECT * FROM user_two; SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 2 | 6 (1 row) -- CTEs are supported when there are name collisions WITH numbers AS ( SELECT s FROM generate_series(1,10) s ) INSERT INTO raw_events_first(user_id, value_1) WITH numbers AS ( SELECT s, s FROM generate_series(1,5) s ) SELECT * FROM numbers; -- Select into distributed table with a sequence CREATE TABLE "CaseSensitiveTable" ("UserID" int, "Value1" int); SELECT create_distributed_table('"CaseSensitiveTable"', 'UserID'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO "CaseSensitiveTable" SELECT s, s FROM generate_series(1,10) s; SELECT * FROM "CaseSensitiveTable" ORDER BY "UserID"; UserID | Value1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 10 | 10 (10 rows) DROP TABLE "CaseSensitiveTable"; -- Select into distributed table with a sequence CREATE TABLE dist_table_with_sequence (user_id serial, value_1 serial); SELECT create_distributed_table('dist_table_with_sequence', 'user_id'); create_distributed_table --------------------------------------------------------------------- (1 row) -- from local query INSERT INTO dist_table_with_sequence (value_1) SELECT s FROM generate_series(1,5) s; SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) -- from a distributed query INSERT INTO dist_table_with_sequence (value_1) SELECT value_1 FROM dist_table_with_sequence ORDER BY value_1; SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 1 7 | 2 8 | 3 9 | 4 10 | 5 (10 rows) TRUNCATE dist_table_with_sequence; INSERT INTO dist_table_with_sequence (user_id) SELECT user_id FROM raw_events_second ORDER BY user_id; SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) WITH top10 AS ( SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10 ) INSERT INTO dist_table_with_sequence (value_1) SELECT * FROM top10; SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) -- router queries become logical planner queries when there is a nextval call INSERT INTO dist_table_with_sequence (user_id) SELECT user_id FROM dist_table_with_sequence WHERE user_id = 1; SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 1 | 6 2 | 2 3 | 3 4 | 4 5 | 5 (6 rows) DROP TABLE dist_table_with_sequence; -- Select into distributed table with a user-defined sequence CREATE SEQUENCE seq1; CREATE SEQUENCE seq2; CREATE TABLE dist_table_with_user_sequence (user_id int default nextval('seq1'), value_1 bigint default nextval('seq2')); SELECT create_distributed_table('dist_table_with_user_sequence', 'user_id'); create_distributed_table --------------------------------------------------------------------- (1 row) -- from local query INSERT INTO dist_table_with_user_sequence (value_1) SELECT s FROM generate_series(1,5) s; SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) -- from a distributed query INSERT INTO dist_table_with_user_sequence (value_1) SELECT value_1 FROM dist_table_with_user_sequence ORDER BY value_1; SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 1 7 | 2 8 | 3 9 | 4 10 | 5 (10 rows) TRUNCATE dist_table_with_user_sequence; INSERT INTO dist_table_with_user_sequence (user_id) SELECT user_id FROM raw_events_second ORDER BY user_id; SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) WITH top10 AS ( SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10 ) INSERT INTO dist_table_with_user_sequence (value_1) SELECT * FROM top10; SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 (5 rows) -- router queries become logical planner queries when there is a nextval call INSERT INTO dist_table_with_user_sequence (user_id) SELECT user_id FROM dist_table_with_user_sequence WHERE user_id = 1; SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 1 | 6 2 | 2 3 | 3 4 | 4 5 | 5 (6 rows) DROP TABLE dist_table_with_user_sequence; DROP SEQUENCE seq1, seq2; -- Select from distributed table into reference table CREATE TABLE ref_table (user_id serial, value_1 int); SELECT create_reference_table('ref_table'); create_reference_table --------------------------------------------------------------------- (1 row) INSERT INTO ref_table SELECT user_id, value_1 FROM raw_events_second; SELECT * FROM ref_table ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | (5 rows) INSERT INTO ref_table (value_1) SELECT value_1 FROM raw_events_second ORDER BY value_1; SELECT * FROM ref_table ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 | 2 | 2 | 3 | 3 | 4 | 4 | 5 | 5 | (10 rows) INSERT INTO ref_table SELECT * FROM ref_table; SELECT * FROM ref_table ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 | 1 | 1 | 2 | 2 | 2 | 2 | 3 | 3 | 3 | 3 | 4 | 4 | 4 | 4 | 5 | 5 | 5 | 5 | (20 rows) DROP TABLE ref_table; -- Select from distributed table into reference table with user-defined sequence CREATE SEQUENCE seq1; CREATE TABLE ref_table_with_user_sequence (user_id int default nextval('seq1'), value_1 int); SELECT create_reference_table('ref_table_with_user_sequence'); create_reference_table --------------------------------------------------------------------- (1 row) INSERT INTO ref_table_with_user_sequence SELECT user_id, value_1 FROM raw_events_second; SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | (5 rows) INSERT INTO ref_table_with_user_sequence (value_1) SELECT value_1 FROM raw_events_second ORDER BY value_1; SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 | 2 | 2 | 3 | 3 | 4 | 4 | 5 | 5 | (10 rows) INSERT INTO ref_table_with_user_sequence SELECT * FROM ref_table_with_user_sequence; SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 1 | 1 | 1 | 2 | 2 | 2 | 2 | 3 | 3 | 3 | 3 | 4 | 4 | 4 | 4 | 5 | 5 | 5 | 5 | (20 rows) DROP TABLE ref_table_with_user_sequence; DROP SEQUENCE seq1; -- Select from reference table into reference table CREATE TABLE ref1 (d timestamptz); SELECT create_reference_table('ref1'); create_reference_table --------------------------------------------------------------------- (1 row) CREATE TABLE ref2 (d date); SELECT create_reference_table('ref2'); create_reference_table --------------------------------------------------------------------- (1 row) INSERT INTO ref2 VALUES ('2017-10-31'); INSERT INTO ref1 SELECT * FROM ref2; SELECT count(*) from ref1; count --------------------------------------------------------------------- 1 (1 row) -- also test with now() INSERT INTO ref1 SELECT now() FROM ref2; SELECT count(*) from ref1; count --------------------------------------------------------------------- 2 (1 row) DROP TABLE ref1; DROP TABLE ref2; -- Select into an append-partitioned table is not supported CREATE TABLE insert_append_table (user_id int, value_4 bigint); SELECT create_distributed_table('insert_append_table', 'user_id', 'append'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO insert_append_table (user_id, value_4) SELECT user_id, 1 FROM raw_events_second LIMIT 5; ERROR: INSERT ... SELECT into an append-distributed table is not supported DROP TABLE insert_append_table; -- Insert from other distributed table as prepared statement TRUNCATE raw_events_first; PREPARE insert_prep(int) AS INSERT INTO raw_events_first (user_id, value_1) SELECT $1, value_4 FROM raw_events_second ORDER BY value_4 LIMIT 1; EXECUTE insert_prep(1); EXECUTE insert_prep(2); EXECUTE insert_prep(3); EXECUTE insert_prep(4); EXECUTE insert_prep(5); EXECUTE insert_prep(6); SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; user_id | value_1 --------------------------------------------------------------------- 1 | 3 2 | 3 3 | 3 4 | 3 5 | 3 6 | 3 (6 rows) -- Inserting into views is handled via coordinator TRUNCATE raw_events_first; INSERT INTO test_view SELECT * FROM raw_events_second; SELECT user_id, value_4 FROM test_view ORDER BY user_id, value_4; user_id | value_4 --------------------------------------------------------------------- 1 | 3 2 | 6 3 | 9 4 | 12 5 | 15 (5 rows) -- Drop the view now, because the column we are about to drop depends on it DROP VIEW test_view; -- Make sure we handle dropped columns correctly CREATE TABLE drop_col_table (col1 text, col2 text, col3 text); SELECT create_distributed_table('drop_col_table', 'col2'); create_distributed_table --------------------------------------------------------------------- (1 row) ALTER TABLE drop_col_table DROP COLUMN col1; INSERT INTO drop_col_table (col3, col2) SELECT value_4, user_id FROM raw_events_second LIMIT 5; SELECT * FROM drop_col_table ORDER BY col2, col3; col2 | col3 --------------------------------------------------------------------- 1 | 3 2 | 6 3 | 9 4 | 12 5 | 15 (5 rows) -- make sure the tuple went to the right shard SELECT * FROM drop_col_table WHERE col2 = '1'; col2 | col3 --------------------------------------------------------------------- 1 | 3 (1 row) RESET client_min_messages; -- make sure casts are handled correctly CREATE TABLE coerce_events(user_id int, time timestamp, value_1 numeric); SELECT create_distributed_table('coerce_events', 'user_id'); create_distributed_table --------------------------------------------------------------------- (1 row) CREATE TABLE coerce_agg (user_id int, value_1_agg int); SELECT create_distributed_table('coerce_agg', 'user_id'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO coerce_events(user_id, value_1) VALUES (1, 1), (2, 2), (10, 10); -- numeric -> int (straight function) INSERT INTO coerce_agg(user_id, value_1_agg) SELECT * FROM ( SELECT user_id, value_1 FROM coerce_events ) AS ftop ORDER BY 2 DESC, 1 DESC LIMIT 5; -- int -> text ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE text; INSERT INTO coerce_agg(user_id, value_1_agg) SELECT * FROM ( SELECT user_id, value_1 FROM coerce_events ) AS ftop LIMIT 5; SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; user_id | value_1_agg --------------------------------------------------------------------- 10 | 10 10 | 10 2 | 2 2 | 2 1 | 1 1 | 1 (6 rows) TRUNCATE coerce_agg; -- int -> char(1) ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(1); INSERT INTO coerce_agg(user_id, value_1_agg) SELECT * FROM ( SELECT user_id, value_1 FROM coerce_events ) AS ftop LIMIT 5; ERROR: value too long for type character(1) SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; user_id | value_1_agg --------------------------------------------------------------------- (0 rows) TRUNCATE coerce_agg; TRUNCATE coerce_events; -- char(5) -> char(1) ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(5); INSERT INTO coerce_events(user_id, value_1) VALUES (1, 'aaaaa'), (2, 'bbbbb'); INSERT INTO coerce_agg(user_id, value_1_agg) SELECT * FROM ( SELECT user_id, value_1 FROM coerce_events ) AS ftop LIMIT 5; ERROR: value too long for type character(1) -- char(1) -> char(5) ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(1) USING value_1::char(1); ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(5); INSERT INTO coerce_agg(user_id, value_1_agg) SELECT * FROM ( SELECT user_id, value_1 FROM coerce_events ) AS ftop LIMIT 5; SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; user_id | value_1_agg --------------------------------------------------------------------- 2 | b 1 | a (2 rows) TRUNCATE coerce_agg; TRUNCATE coerce_events; -- integer -> integer (check VALUE < 5) ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer USING NULL; ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE integer USING NULL; ALTER TABLE coerce_agg ADD CONSTRAINT small_number CHECK (value_1_agg < 5); INSERT INTO coerce_events (user_id, value_1) VALUES (1, 1), (10, 10); \set VERBOSITY TERSE INSERT INTO coerce_agg(user_id, value_1_agg) SELECT * FROM ( SELECT user_id, value_1 FROM coerce_events ) AS ftop; ERROR: new row for relation "coerce_agg_13300067" violates check constraint "small_number_13300067" \set VERBOSITY DEFAULT SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; user_id | value_1_agg --------------------------------------------------------------------- (0 rows) -- integer[3] -> text[3] TRUNCATE coerce_events; ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer[3] USING NULL; INSERT INTO coerce_events(user_id, value_1) VALUES (1, '{1,1,1}'), (2, '{2,2,2}'); ALTER TABLE coerce_agg DROP COLUMN value_1_agg; ALTER TABLE coerce_agg ADD COLUMN value_1_agg text[3]; INSERT INTO coerce_agg(user_id, value_1_agg) SELECT * FROM ( SELECT user_id, value_1 FROM coerce_events ) AS ftop LIMIT 5; SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; user_id | value_1_agg --------------------------------------------------------------------- 2 | {2,2,2} 1 | {1,1,1} (2 rows) -- INSERT..SELECT + prepared statements + recursive planning BEGIN; PREPARE prepared_recursive_insert_select AS INSERT INTO users_table SELECT * FROM users_table WHERE value_1 IN (SELECT value_2 FROM events_table OFFSET 0); EXECUTE prepared_recursive_insert_select; EXECUTE prepared_recursive_insert_select; EXECUTE prepared_recursive_insert_select; EXECUTE prepared_recursive_insert_select; EXECUTE prepared_recursive_insert_select; EXECUTE prepared_recursive_insert_select; ROLLBACK; -- upsert with on conflict update distribution column is unsupported INSERT INTO agg_events AS ae ( user_id, value_1_agg, agg_time ) SELECT user_id, value_1, time FROM raw_events_first ON conflict (user_id, value_1_agg) DO UPDATE SET user_id = 42 RETURNING user_id, value_1_agg; ERROR: modifying the partition value of rows is not allowed -- test a small citus.remote_copy_flush_threshold BEGIN; SET LOCAL citus.remote_copy_flush_threshold TO 1; INSERT INTO raw_events_first SELECT * FROM raw_events_first OFFSET 0 ON CONFLICT DO NOTHING; ABORT; -- test fix for issue https://github.com/citusdata/citus/issues/5891 CREATE TABLE dist_table_1( dist_col integer, int_col integer, text_col_1 text, text_col_2 text ); SELECT create_distributed_table('dist_table_1', 'dist_col'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO dist_table_1 VALUES (1, 1, 'string', 'string'); CREATE TABLE dist_table_2( dist_col integer, int_col integer ); SELECT create_distributed_table('dist_table_2', 'dist_col'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO dist_table_2 VALUES (1, 1); with a as (select random()) INSERT INTO dist_table_1 SELECT t1.dist_col, 1, 'string', 'string' FROM a, dist_table_1 t1 join dist_table_2 t2 using (dist_col) limit 1 returning text_col_1; text_col_1 --------------------------------------------------------------------- string (1 row) CREATE TABLE dist_table_3( dist_col bigint, int_col integer ); SELECT create_distributed_table('dist_table_3', 'dist_col'); create_distributed_table --------------------------------------------------------------------- (1 row) -- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to -- different types for distribution columns. Citus would not be able to handle this complex insert select. INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col); ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator CREATE TABLE dist_table_4( dist_col integer, int_col integer ); SELECT create_distributed_table('dist_table_4', 'dist_col'); create_distributed_table --------------------------------------------------------------------- (1 row) -- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4 -- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col; $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) Task Count: 6 (4 rows) -- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query. -- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col); $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus Adaptive) Task Count: 4 (2 rows) -- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT. -- It is because the subquery with limit needs to be merged at coordinator. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col); $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: repartition -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Limit -> Custom Scan (Citus Adaptive) Task Count: 4 (7 rows) CREATE TABLE dist_table_5(id int, id2 int); SELECT create_distributed_table('dist_table_5','id'); create_distributed_table --------------------------------------------------------------------- (1 row) CREATE TABLE dist_table_6(id int, id2 int); SELECT create_distributed_table('dist_table_6','id'); create_distributed_table --------------------------------------------------------------------- (1 row) -- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy; $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus Adaptive) Task Count: 4 (2 rows) -- verify that insert select with sublink can be pushed down when tables are colocated. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6; $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus Adaptive) Task Count: 4 (2 rows) CREATE TABLE ref_table_1(id int); SELECT create_reference_table('ref_table_1'); create_reference_table --------------------------------------------------------------------- (1 row) -- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation. INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1; ERROR: correlated subqueries are not supported when the FROM clause contains a reference table -- verify that insert select cannot be pushed down when we have recurring range table in from clause. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1; $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) Task Count: 1 (4 rows) -- verify that insert select cannot be pushed down when we have reference table in outside of outer join. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true); $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 (6 rows) -- verify that insert select cannot be pushed down when it has a recurring outer join in a subquery. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id); $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 (6 rows) CREATE TABLE loc_table_1(id int); -- verify that insert select cannot be pushed down when it contains join between local and distributed tables. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id); $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: repartition -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Seq Scan on loc_table_1 Task Count: 4 (6 rows) CREATE VIEW view_1 AS SELECT id FROM dist_table_6; CREATE MATERIALIZED VIEW view_2 AS SELECT id FROM dist_table_6; -- verify that insert select cannot be pushed down when it contains view. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1; $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus Adaptive) Task Count: 4 (2 rows) -- verify that insert select cannot be pushed down when it contains materialized view. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2; $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Seq Scan on view_2 (3 rows) CREATE TABLE append_table(id integer, data text, int_data int); SELECT create_distributed_table('append_table', 'id', 'append'); create_distributed_table --------------------------------------------------------------------- (1 row) SELECT master_create_empty_shard('append_table'); master_create_empty_shard --------------------------------------------------------------------- 13300096 (1 row) -- verify that insert select push down for append tables are not supported. INSERT INTO append_table SELECT * FROM append_table; ERROR: INSERT ... SELECT into an append-distributed table is not supported -- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner -- and handled by pull to coordinator. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id > 5) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN cte_1 USING(id) OFFSET 5; $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Limit -> Custom Scan (Citus Adaptive) Task Count: 4 (7 rows) -- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner. SELECT coordinator_plan($$ EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5) SELECT id FROM dist_table_5 JOIN cte_1 USING(id); $$); coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus Adaptive) Task Count: 1 (2 rows) SET client_min_messages TO ERROR; DROP SCHEMA multi_insert_select CASCADE;