From 6c3986fa373247516c2b916314b069813bd7117a Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Tue, 7 Oct 2025 14:10:11 +0300 Subject: [PATCH] Revert multi_insert_select_0.out addition (#8232) Probably Colm's commits on `GROUP BY RTE` fixes have fixed the test diffs we used to have. --- .../expected/multi_insert_select_0.out | 3812 ----------------- 1 file changed, 3812 deletions(-) delete mode 100644 src/test/regress/expected/multi_insert_select_0.out diff --git a/src/test/regress/expected/multi_insert_select_0.out b/src/test/regress/expected/multi_insert_select_0.out deleted file mode 100644 index f1ee0b7ba..000000000 --- a/src/test/regress/expected/multi_insert_select_0.out +++ /dev/null @@ -1,3812 +0,0 @@ --- --- MULTI_INSERT_SELECT --- -CREATE SCHEMA multi_insert_select; -SET search_path = multi_insert_select,public; -SET citus.next_shard_id TO 13300000; -SET citus.next_placement_id TO 13300000; --- create co-located tables -SET citus.shard_count = 4; -SET citus.shard_replication_factor = 2; --- order of execution might change in parallel executions --- and the error details might contain the worker node --- so be less verbose with \set VERBOSITY TERSE when necessary -CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); -SELECT create_distributed_table('raw_events_first', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); -SELECT create_distributed_table('raw_events_second', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp, UNIQUE(user_id, value_1_agg)); -SELECT create_distributed_table('agg_events', 'user_id');; - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- create the reference table as well -CREATE TABLE reference_table (user_id int); -SELECT create_reference_table('reference_table'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE insert_select_varchar_test (key varchar, value int); -SELECT create_distributed_table('insert_select_varchar_test', 'key', 'hash'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- set back to the defaults -SET citus.shard_count = DEFAULT; -SET citus.shard_replication_factor = DEFAULT; -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (1, now(), 10, 100, 1000.1, 10000); -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (2, now(), 20, 200, 2000.1, 20000); -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (3, now(), 30, 300, 3000.1, 30000); -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (4, now(), 40, 400, 4000.1, 40000); -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (5, now(), 50, 500, 5000.1, 50000); -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (6, now(), 60, 600, 6000.1, 60000); -SET client_min_messages TO DEBUG2; --- raw table to raw table -INSERT INTO raw_events_second SELECT * FROM raw_events_first; -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) --- see that our first multi shard INSERT...SELECT works expected -SET client_min_messages TO INFO; -SELECT - raw_events_first.user_id -FROM - raw_events_first, raw_events_second -WHERE - raw_events_first.user_id = raw_events_second.user_id -ORDER BY - user_id DESC; - user_id ---------------------------------------------------------------------- - 6 - 5 - 4 - 3 - 2 - 1 -(6 rows) - --- see that we get unique vialitons -\set VERBOSITY TERSE -INSERT INTO raw_events_second SELECT * FROM raw_events_first; -ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300004" -\set VERBOSITY DEFAULT --- stable functions should be allowed -INSERT INTO raw_events_second (user_id, time) -SELECT - user_id, now() -FROM - raw_events_first -WHERE - user_id < 0; -INSERT INTO raw_events_second (user_id) -SELECT - user_id -FROM - raw_events_first -WHERE - time > now() + interval '1 day'; --- hide version-dependent PL/pgSQL context messages -\set VERBOSITY terse --- make sure we evaluate stable functions on the master, once -CREATE OR REPLACE FUNCTION evaluate_on_master() -RETURNS int LANGUAGE plpgsql STABLE -AS $function$ -BEGIN - RAISE NOTICE 'evaluating on master'; - RETURN 0; -END; -$function$; -INSERT INTO raw_events_second (user_id, value_1) -SELECT - user_id, evaluate_on_master() -FROM - raw_events_first -WHERE - user_id < 0; -NOTICE: evaluating on master --- make sure we don't evaluate stable functions with column arguments -SET citus.enable_metadata_sync TO OFF; -CREATE OR REPLACE FUNCTION evaluate_on_master(x int) -RETURNS int LANGUAGE plpgsql STABLE -AS $function$ -BEGIN - RAISE NOTICE 'evaluating on master'; - RETURN x; -END; -$function$; -RESET citus.enable_metadata_sync; -INSERT INTO raw_events_second (user_id, value_1) -SELECT - user_id, evaluate_on_master(value_1) -FROM - raw_events_first -WHERE - user_id = 0; -ERROR: function multi_insert_select.evaluate_on_master(integer) does not exist --- add one more row -INSERT INTO raw_events_first (user_id, time) VALUES - (7, now()); --- try a single shard query -SET client_min_messages TO DEBUG2; -INSERT INTO raw_events_second (user_id, time) SELECT user_id, time FROM raw_events_first WHERE user_id = 7; -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time") SELECT raw_events_first.user_id, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 7) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away -SET client_min_messages TO INFO; --- add one more row -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (8, now(), 80, 800, 8000, 80000); --- reorder columns -SET client_min_messages TO DEBUG2; -INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) -SELECT - value_2, value_1, value_3, value_4, user_id, time -FROM - raw_events_first -WHERE - user_id = 8; -DEBUG: Creating router plan -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 8) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away --- a zero shard select -INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) -SELECT - value_2, value_1, value_3, value_4, user_id, time -FROM - raw_events_first -WHERE - false; -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away --- another zero shard select -INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) -SELECT - value_2, value_1, value_3, value_4, user_id, time -FROM - raw_events_first -WHERE - 0 != 0; -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away --- add one more row -SET client_min_messages TO INFO; -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (9, now(), 90, 900, 9000, 90000); --- show that RETURNING also works -SET client_min_messages TO DEBUG2; -INSERT INTO raw_events_second (user_id, value_1, value_3) -SELECT - user_id, value_1, value_3 -FROM - raw_events_first -WHERE - value_3 = 9000 -RETURNING *; -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 - user_id | time | value_1 | value_2 | value_3 | value_4 ---------------------------------------------------------------------- - 9 | | 90 | | 9000 | -(1 row) - --- hits two shards -\set VERBOSITY TERSE -INSERT INTO raw_events_second (user_id, value_1, value_3) -SELECT - user_id, value_1, value_3 -FROM - raw_events_first -WHERE - user_id = 9 OR user_id = 16 -RETURNING *; -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300007" --- now do some aggregations -INSERT INTO agg_events -SELECT - user_id, sum(value_1), avg(value_2), sum(value_3), count(value_4) -FROM - raw_events_first -GROUP BY - user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id --- group by column not exists on the SELECT target list -INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, user_id) -SELECT - sum(value_3), count(value_4), sum(value_1), user_id -FROM - raw_events_first -GROUP BY - value_2, user_id -RETURNING *; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time -ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" --- some subquery tests -INSERT INTO agg_events - (value_1_agg, - user_id) -SELECT SUM(value_1), - id -FROM (SELECT raw_events_second.user_id AS id, - raw_events_second.value_1 - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id) AS foo -GROUP BY id -ORDER BY id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id -ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" --- subquery one more level depth -INSERT INTO agg_events - (value_4_agg, - value_1_agg, - user_id) -SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id) AS foo -ORDER BY id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id -ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" -\set VERBOSITY DEFAULT --- join between subqueries -INSERT INTO agg_events - (user_id) -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id); -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL) --- add one more level subqueris on top of subquery JOINs -INSERT INTO agg_events - (user_id, value_4_agg) -SELECT - outer_most.id, max(outer_most.value) -FROM -( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id)) as outer_most -GROUP BY - outer_most.id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id --- subqueries in WHERE clause -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT user_id - FROM raw_events_second - WHERE user_id = 2); -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) 2))) AND (raw_events_first.user_id IS NOT NULL)) -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT user_id - FROM raw_events_second - WHERE user_id != 2 AND value_1 = 2000) -ON conflict (user_id, value_1) DO NOTHING; -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT user_id - FROM raw_events_second WHERE false); -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT user_id - FROM raw_events_second - WHERE value_1 = 1000 OR value_1 = 2000 OR value_1 = 3000); -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL)) --- lets mix subqueries in FROM clause and subqueries in WHERE -INSERT INTO agg_events - (user_id) -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 1000) AS foo2 ) as f2 -ON (f.id = f2.id) -WHERE f.id IN (SELECT user_id - FROM raw_events_second); -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)) AND (f2.id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)) AND (f2.id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)) AND (f2.id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)) AND (f2.id IS NOT NULL)) --- some UPSERTS -INSERT INTO agg_events AS ae - ( - user_id, - value_1_agg, - agg_time - ) -SELECT user_id, - value_1, - time -FROM raw_events_first -ON conflict (user_id, value_1_agg) -DO UPDATE - SET agg_time = EXCLUDED.agg_time - WHERE ae.agg_time < EXCLUDED.agg_time; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) --- upserts with returning -INSERT INTO agg_events AS ae - ( - user_id, - value_1_agg, - agg_time - ) -SELECT user_id, - value_1, - time -FROM raw_events_first -ON conflict (user_id, value_1_agg) -DO UPDATE - SET agg_time = EXCLUDED.agg_time - WHERE ae.agg_time < EXCLUDED.agg_time -RETURNING user_id, value_1_agg; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg - user_id | value_1_agg ---------------------------------------------------------------------- - 7 | -(1 row) - -INSERT INTO agg_events (user_id, value_1_agg) -SELECT - user_id, sum(value_1 + value_2) -FROM - raw_events_first GROUP BY user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id --- FILTER CLAUSE -INSERT INTO agg_events (user_id, value_1_agg) -SELECT - user_id, sum(value_1 + value_2) FILTER (where value_3 = 15) -FROM - raw_events_first GROUP BY user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (user_id IS NOT NULL) GROUP BY raw_events_first.user_id --- a test with reference table JOINs -INSERT INTO - agg_events (user_id, value_1_agg) -SELECT - raw_events_first.user_id, sum(value_1) -FROM - reference_table, raw_events_first -WHERE - raw_events_first.user_id = reference_table.user_id -GROUP BY - raw_events_first.user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (user_id IS NOT NULL)) GROUP BY raw_events_first.user_id --- a note on the outer joins is that --- we filter out outer join results --- where partition column returns --- NULL. Thus, we could INSERT less rows --- than we expect from subquery result. --- see the following tests -SET client_min_messages TO INFO; --- we don't want to see constraint violations, so truncate first -TRUNCATE agg_events; --- add a row to first table to make table contents different -INSERT INTO raw_events_second (user_id, time, value_1, value_2, value_3, value_4) VALUES - (10, now(), 100, 10000, 10000, 100000); -DELETE FROM raw_events_second WHERE user_id = 2; --- we select 11 rows -SELECT t1.user_id AS col1, - t2.user_id AS col2 - FROM raw_events_first t1 - FULL JOIN raw_events_second t2 - ON t1.user_id = t2.user_id - ORDER BY t1.user_id, - t2.user_id; - col1 | col2 ---------------------------------------------------------------------- - 1 | 1 - 2 | - 3 | 3 - 4 | 4 - 5 | 5 - 6 | 6 - 7 | 7 - 8 | 8 - 9 | 9 - | 10 -(10 rows) - -SET client_min_messages TO DEBUG2; --- we insert 10 rows since we filtered out --- NULL partition column values -INSERT INTO agg_events (user_id, value_1_agg) -SELECT t1.user_id AS col1, - t2.user_id AS col2 -FROM raw_events_first t1 - FULL JOIN raw_events_second t2 - ON t1.user_id = t2.user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300000 t1 FULL JOIN multi_insert_select.raw_events_second_13300004 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300001 t1 FULL JOIN multi_insert_select.raw_events_second_13300005 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300002 t1 FULL JOIN multi_insert_select.raw_events_second_13300006 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300003 t1 FULL JOIN multi_insert_select.raw_events_second_13300007 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL) -SET client_min_messages TO INFO; --- see that the results are different from the SELECT query -SELECT - user_id, value_1_agg -FROM - agg_events -ORDER BY - user_id, value_1_agg; - user_id | value_1_agg ---------------------------------------------------------------------- - 1 | 1 - 2 | - 3 | 3 - 4 | 4 - 5 | 5 - 6 | 6 - 7 | 7 - 8 | 8 - 9 | 9 -(9 rows) - --- we don't want to see constraint violations, so truncate first -SET client_min_messages TO INFO; -TRUNCATE agg_events; -SET client_min_messages TO DEBUG2; --- DISTINCT clause -INSERT INTO agg_events (value_1_agg, user_id) - SELECT - DISTINCT value_1, user_id - FROM - raw_events_first; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) --- we don't want to see constraint violations, so truncate first -SET client_min_messages TO INFO; -truncate agg_events; -SET client_min_messages TO DEBUG2; --- DISTINCT ON clauses are supported --- distinct on(non-partition column) --- values are pulled to master -INSERT INTO agg_events (value_1_agg, user_id) - SELECT - DISTINCT ON (value_1) value_1, user_id - FROM - raw_events_first; -DEBUG: cannot push down this subquery -DETAIL: Distinct on columns without partition column is currently unsupported -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Collecting INSERT ... SELECT results on coordinator -SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; -DEBUG: Router planner cannot handle multi-shard select queries - user_id | value_1_agg ---------------------------------------------------------------------- - 1 | 10 - 2 | 20 - 3 | 30 - 4 | 40 - 5 | 50 - 6 | 60 - 7 | - 8 | 80 - 9 | 90 -(9 rows) - --- we don't want to see constraint violations, so truncate first -SET client_min_messages TO INFO; -truncate agg_events; -SET client_min_messages TO DEBUG2; --- distinct on(partition column) --- queries are forwared to workers -INSERT INTO agg_events (value_1_agg, user_id) - SELECT - DISTINCT ON (user_id) value_1, user_id - FROM - raw_events_first; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2; -DEBUG: Router planner cannot handle multi-shard select queries - user_id | value_1_agg ---------------------------------------------------------------------- - 1 | 10 - 2 | 20 - 3 | 30 - 4 | 40 - 5 | 50 - 6 | 60 - 7 | - 8 | 80 - 9 | 90 -(9 rows) - --- We support CTEs -BEGIN; -WITH fist_table_agg AS MATERIALIZED - (SELECT max(value_1)+1 as v1_agg, user_id FROM raw_events_first GROUP BY user_id) -INSERT INTO agg_events - (value_1_agg, user_id) - SELECT - v1_agg, user_id - FROM - fist_table_agg; -DEBUG: distributed INSERT ... SELECT can only select from distributed tables -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE fist_table_agg: SELECT (max(value_1) OPERATOR(pg_catalog.+) 1) AS v1_agg, user_id FROM multi_insert_select.raw_events_first GROUP BY user_id -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, v1_agg AS value_1_agg FROM (SELECT fist_table_agg.user_id, fist_table_agg.v1_agg FROM (SELECT intermediate_result.v1_agg, intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v1_agg integer, user_id integer)) fist_table_agg) citus_insert_select_subquery -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator -ROLLBACK; --- We do support CTEs that are referenced in the target list -INSERT INTO agg_events - WITH sub_cte AS (SELECT 1) - SELECT - raw_events_first.user_id, (SELECT * FROM sub_cte) - FROM - raw_events_first; -DEBUG: CTE sub_cte is going to be inlined via distributed planning -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) --- We support set operations -BEGIN; -INSERT INTO - raw_events_first(user_id) -SELECT - user_id -FROM - ((SELECT user_id FROM raw_events_first) UNION - (SELECT user_id FROM raw_events_second)) as foo; -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second) foo WHERE (foo.user_id IS NOT NULL) -ROLLBACK; --- We do support set operations through recursive planning -BEGIN; -SET LOCAL client_min_messages TO DEBUG; -INSERT INTO - raw_events_first(user_id) - (SELECT user_id FROM raw_events_first) INTERSECT - (SELECT user_id FROM raw_events_first); -DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_first -DEBUG: Creating router plan -DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) citus_insert_select_subquery -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator -ROLLBACK; --- If the query is router plannable then it is executed via the coordinator -INSERT INTO - raw_events_first(user_id) -SELECT - user_id -FROM - ((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT - (SELECT user_id FROM raw_events_second where user_id = 17)) as foo; -DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -DEBUG: router planner does not support queries that reference non-colocated distributed tables -DEBUG: Distributed planning for a fast-path router query -DEBUG: Creating router plan -DEBUG: query has a single distribution column value: 15 -DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first WHERE (user_id OPERATOR(pg_catalog.=) 15) -DEBUG: Distributed planning for a fast-path router query -DEBUG: Creating router plan -DEBUG: query has a single distribution column value: 17 -DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE (user_id OPERATOR(pg_catalog.=) 17) -DEBUG: Creating router plan -DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) EXCEPT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator --- some supported LEFT joins - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL) - SET client_min_messages to debug3; - INSERT INTO agg_events (user_id) - SELECT - raw_events_second.user_id - FROM - reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id; -DEBUG: no shard pruning constraints on raw_events_second found -DEBUG: shard count after pruning for raw_events_second: 4 -DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table -DEBUG: no shard pruning constraints on raw_events_second found -DEBUG: shard count after pruning for raw_events_second: 4 -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: a push down safe left join with recurring left side -DEBUG: no shard pruning constraints on raw_events_second found -DEBUG: shard count after pruning for raw_events_second: 4 -DEBUG: assigned task to node localhost:xxxxx -DEBUG: assigned task to node localhost:xxxxx -DEBUG: assigned task to node localhost:xxxxx -DEBUG: assigned task to node localhost:xxxxx -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' - SET client_min_messages to debug2; - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_first.user_id = 10; -DEBUG: Creating router plan -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 10) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_second.user_id = 10 OR raw_events_second.user_id = 11; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL)) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_first.user_id = 10 AND raw_events_first.user_id = 20; -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_first.user_id = 10 AND raw_events_second.user_id = 20; -DEBUG: Creating router plan -DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_first.user_id IN (19, 20, 21); -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM ((SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_first(user_id, "time", value_1, value_2, value_3, value_4) LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id - WHERE raw_events_second.user_id IN (19, 20, 21); -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL)) -SET client_min_messages TO WARNING; - -- following query should use repartitioned joins and results should - -- be routed via coordinator - SET citus.enable_repartition_joins TO true; - INSERT INTO agg_events - (user_id) - SELECT raw_events_first.user_id - FROM raw_events_first, - raw_events_second - WHERE raw_events_second.user_id = raw_events_first.value_1 - AND raw_events_first.value_1 = 12; - -- some unsupported LEFT/INNER JOINs - -- JOIN on one table with partition column other is not - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns - -- same as the above with INNER JOIN - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1; - -- a not meaningful query - INSERT INTO agg_events - (user_id) - SELECT raw_events_second.user_id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_first.value_1; -ERROR: cannot perform distributed planning on this query -DETAIL: Cartesian products are currently unsupported - -- both tables joined on non-partition columns - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns - -- same as the above with INNER JOIN - -- we support this with route to coordinator - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - --- EXPLAIN ANALYZE is not supported for INSERT ... SELECT via coordinator -EXPLAIN (costs off, analyze on, BUFFERS OFF) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; -ERROR: EXPLAIN ANALYZE is currently not supported for INSERT ... SELECT commands via coordinator --- even if there is a filter on the partition key, since the join is not on the partition key we reject --- this query -INSERT INTO agg_events (user_id) -SELECT - raw_events_first.user_id -FROM - raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 -WHERE - raw_events_first.user_id = 10; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns - -- same as the above with INNER JOIN - -- we support this with route to coordinator - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 - WHERE raw_events_first.user_id = 10; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - - -- make things a bit more complicate with IN clauses - -- we support this with route to coordinator - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events (user_id) - SELECT - raw_events_first.user_id - FROM - raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 - WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4); -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - - -- implicit join on non partition column should also not be pushed down, - -- so we fall back to route via coordinator - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events - (user_id) - SELECT raw_events_first.user_id - FROM raw_events_first, - raw_events_second - WHERE raw_events_second.user_id = raw_events_first.value_1; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - -RESET client_min_messages; - -- The following is again a tricky query for Citus. If the given filter was - -- on value_1 as shown in the above, Citus could push it down and use - -- distributed INSERT/SELECT. But we instead fall back to route via coordinator. - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events - (user_id) - SELECT raw_events_first.user_id - FROM raw_events_first, - raw_events_second - WHERE raw_events_second.user_id = raw_events_first.value_1 - AND raw_events_first.value_2 = 12; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - - -- foo is not joined on the partition key so the query is not - -- pushed down. So instead we route via coordinator. - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events - (user_id, value_4_agg) - SELECT - outer_most.id, max(outer_most.value) - FROM - ( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first LEFT JOIN - reference_table - ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 - ON (f.id = f2.id)) as outer_most - GROUP BY - outer_most.id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(8 rows) - - -- if the given filter was on value_1 as shown in the above, Citus could - -- push it down. But here the query falls back to route via coordinator. - SELECT coordinator_plan($Q$ - EXPLAIN (costs off) - INSERT INTO agg_events - (user_id) - SELECT raw_events_first.user_id - FROM raw_events_first, - raw_events_second - WHERE raw_events_second.user_id = raw_events_first.value_1 - AND raw_events_first.value_2 = 12; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - - -- foo is not joined on the partition key so the query is not - -- pushed down, and it falls back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) - INSERT INTO agg_events - (user_id, value_4_agg) - SELECT - outer_most.id, max(outer_most.value) - FROM - ( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first LEFT JOIN - reference_table - ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 - ON (f.id = f2.id)) as outer_most - GROUP BY - outer_most.id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(8 rows) - -INSERT INTO agg_events - (value_4_agg, - value_1_agg, - user_id) -SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id != raw_events_second.user_id - GROUP BY raw_events_second.user_id) AS foo; -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -SET client_min_messages TO DEBUG2; --- INSERT returns NULL partition key value via coordinator -INSERT INTO agg_events - (value_4_agg, - value_1_agg, - user_id) -SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.value_3 AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.value_3) AS foo; -DEBUG: cannot push down this subquery -DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] -DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] -DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] -DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] -DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] -DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.value_3 AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_3 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(id) AS user_id, int4(v1) AS value_1_agg, int8(v4) AS value_4_agg FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 bigint, id double precision)) foo -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator -ERROR: the partition column of table multi_insert_select.agg_events cannot be NULL --- error cases --- no part column at all -INSERT INTO raw_events_second - (value_1) -SELECT value_1 -FROM raw_events_first; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: the query doesn't include the target table's partition column -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -ERROR: the partition column of table multi_insert_select.raw_events_second should have a value -INSERT INTO raw_events_second - (value_1) -SELECT user_id -FROM raw_events_first; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: the query doesn't include the target table's partition column -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -ERROR: the partition column of table multi_insert_select.raw_events_second should have a value -INSERT INTO raw_events_second - (user_id) -SELECT value_1 -FROM raw_events_first; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: The target table's partition column should correspond to a partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -ERROR: the partition column value cannot be NULL -CONTEXT: while executing command on localhost:xxxxx -INSERT INTO raw_events_second - (user_id) -SELECT user_id * 2 -FROM raw_events_first; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an operator in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0,repartitioned_results_xxxxx_from_13300001_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_1,repartitioned_results_xxxxx_from_13300001_to_1,repartitioned_results_xxxxx_from_13300003_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_3,repartitioned_results_xxxxx_from_13300002_to_3,repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -INSERT INTO raw_events_second - (user_id) -SELECT user_id :: bigint -FROM raw_events_first; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an explicit cast in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300002_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer) -INSERT INTO agg_events - (value_3_agg, - value_4_agg, - value_1_agg, - value_2_agg, - user_id) -SELECT SUM(value_3), - Count(value_4), - user_id, - SUM(value_1), - Avg(value_2) -FROM raw_events_first -GROUP BY user_id; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an aggregation in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -ERROR: the partition column value cannot be NULL -CONTEXT: while executing command on localhost:xxxxx -INSERT INTO agg_events - (value_3_agg, - value_4_agg, - value_1_agg, - value_2_agg, - user_id) -SELECT SUM(value_3), - Count(value_4), - user_id, - SUM(value_1), - value_2 -FROM raw_events_first -GROUP BY user_id, - value_2; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: The target table's partition column should correspond to a partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -ERROR: the partition column value cannot be NULL -CONTEXT: while executing command on localhost:xxxxx --- tables should be co-located -INSERT INTO agg_events (user_id) -SELECT - user_id -FROM - reference_table; -DEBUG: Creating router plan -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: The target table's partition column should correspond to a partition column in the subquery. -DEBUG: Distributed planning for a fast-path router query -DEBUG: Creating router plan -DEBUG: Collecting INSERT ... SELECT results on coordinator --- foo2 is recursively planned and INSERT...SELECT is done via coordinator -INSERT INTO agg_events - (user_id) -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - raw_events_second.value_1 AS v1, - SUM(raw_events_second.user_id) AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.value_1 - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id); -DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] -DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] -DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] -DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] -DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] -DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(f2.id) AS user_id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' --- the second part of the query is not routable since --- GROUP BY not on the partition column (i.e., value_1) and thus join --- on f.id = f2.id is not on the partition key (instead on the sum of partition key) --- but we still recursively plan foo2 and run the query -INSERT INTO agg_events - (user_id) -SELECT f.id FROM -(SELECT - id -FROM (SELECT raw_events_first.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - raw_events_second.value_1 AS v1, - SUM(raw_events_second.user_id) AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.value_1 - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id); -DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] -DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] -DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] -DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] -DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] -DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT f.id AS user_id FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'user_id' -SET client_min_messages TO WARNING; --- cannot pushdown the query since the JOIN is not equi JOIN --- falls back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events - (user_id, value_4_agg) -SELECT -outer_most.id, max(outer_most.value) - FROM -( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id != f2.id)) as outer_most -GROUP BY outer_most.id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(8 rows) - --- cannot pushdown since foo2 is not join on partition key --- falls back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events - (user_id, value_4_agg) -SELECT - outer_most.id, max(outer_most.value) -FROM -( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.value_1 - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id)) as outer_most -GROUP BY - outer_most.id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> HashAggregate - Group Key: remote_scan.id - Filter: (pg_catalog.sum(remote_scan.worker_column_4) > '10'::numeric) - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(11 rows) - --- cannot push down since foo doesn't have en equi join --- falls back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events - (user_id, value_4_agg) -SELECT - outer_most.id, max(outer_most.value) -FROM -( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id != reference_table.user_id ) AS foo) as f - INNER JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id)) as outer_most -GROUP BY - outer_most.id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(8 rows) - --- some unsupported LATERAL JOINs --- join on averages is not on the partition key --- should fall back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events (user_id, value_4_agg) -SELECT - averages.user_id, avg(averages.value_4) -FROM - (SELECT - raw_events_second.user_id - FROM - reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) - ) reference_ids - JOIN LATERAL - (SELECT - user_id, value_4 - FROM - raw_events_first WHERE - value_4 = reference_ids.user_id) as averages ON true - GROUP BY averages.user_id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(6 rows) - --- join among reference_ids and averages is not on the partition key --- should fall back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events (user_id, value_4_agg) -SELECT - averages.user_id, avg(averages.value_4) -FROM - (SELECT - raw_events_second.user_id - FROM - reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) - ) reference_ids - JOIN LATERAL - (SELECT - user_id, value_4 - FROM - raw_events_first) as averages ON averages.value_4 = reference_ids.user_id - GROUP BY averages.user_id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> HashAggregate - Group Key: remote_scan.user_id - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(8 rows) - --- join among the agg_ids and averages is not on the partition key --- should fall back to route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events (user_id, value_4_agg) -SELECT - averages.user_id, avg(averages.value_4) -FROM - (SELECT - raw_events_second.user_id - FROM - reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id) - ) reference_ids - JOIN LATERAL - (SELECT - user_id, value_4 - FROM - raw_events_first) as averages ON averages.user_id = reference_ids.user_id -JOIN LATERAL - (SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id) - GROUP BY averages.user_id; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - --- Selected value in the WHERE is not partition key, so we cannot use distributed --- INSERT/SELECT and falls back route via coordinator -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT value_1 - FROM raw_events_second); -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - --- same as above but slightly more complex --- since it also includes subquery in FROM as well -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO agg_events - (user_id) -SELECT f2.id FROM - -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id) -WHERE f.id IN (SELECT value_1 - FROM raw_events_second); -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - --- some more semi-anti join tests -SET client_min_messages TO DEBUG2; --- join in where -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN (SELECT raw_events_second.user_id - FROM raw_events_second, raw_events_first - WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200); -DEBUG: Creating router plan -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second, multi_insert_select.raw_events_first_13300000 raw_events_first_1 WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first_1.user_id) AND (raw_events_first_1.user_id OPERATOR(pg_catalog.=) 200)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away -RESET client_min_messages; --- we cannot push this down since it is NOT IN --- we use repartition insert/select instead -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id NOT IN (SELECT raw_events_second.user_id - FROM raw_events_second, raw_events_first - WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200); -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 1 -(6 rows) - -SET client_min_messages TO DEBUG2; --- safe to push down -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE EXISTS (SELECT 1 - FROM raw_events_second - WHERE raw_events_second.user_id =raw_events_first.user_id); -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL)) --- we cannot push down -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE NOT EXISTS (SELECT 1 - FROM raw_events_second - WHERE raw_events_second.user_id =raw_events_first.user_id); -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL)) --- more complex LEFT JOINs - INSERT INTO agg_events - (user_id, value_4_agg) - SELECT - outer_most.id, max(outer_most.value) - FROM - ( - SELECT f2.id as id, f2.v4 as value FROM - (SELECT - id - FROM (SELECT raw_events_first.user_id AS id - FROM raw_events_first LEFT JOIN - reference_table - ON (raw_events_first.user_id = reference_table.user_id)) AS foo) as f - LEFT JOIN - (SELECT v4, - v1, - id - FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 - ON (f.id = f2.id)) as outer_most - GROUP BY - outer_most.id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (id IS NOT NULL) GROUP BY outer_most.id -RESET client_min_messages; --- cannot push down since the f.id IN is matched with value_1 --- we use repartition insert/select instead -SELECT coordinator_plan($Q$ -EXPLAIN (costs off) -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN ( -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id) -WHERE f.id IN (SELECT value_1 - FROM raw_events_second)); -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - -SET client_min_messages TO DEBUG2; --- same as above, but this time is it safe to push down since --- f.id IN is matched with user_id -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN ( -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id) -WHERE f.id IN (SELECT user_id - FROM raw_events_second)); -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL)) -RESET client_min_messages; --- cannot push down since top level user_id is matched with NOT IN -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id NOT IN ( -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id = f2.id) -WHERE f.id IN (SELECT user_id - FROM raw_events_second)); --- cannot push down since join is not equi join (f.id > f2.id) -INSERT INTO raw_events_second - (user_id) -SELECT user_id -FROM raw_events_first -WHERE user_id IN ( -SELECT f2.id FROM -(SELECT - id -FROM (SELECT reference_table.user_id AS id - FROM raw_events_first, - reference_table - WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f -INNER JOIN -(SELECT v4, - v1, - id -FROM (SELECT SUM(raw_events_second.value_4) AS v4, - SUM(raw_events_first.value_1) AS v1, - raw_events_second.user_id AS id - FROM raw_events_first, - raw_events_second - WHERE raw_events_first.user_id = raw_events_second.user_id - GROUP BY raw_events_second.user_id - HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 -ON (f.id > f2.id) -WHERE f.id IN (SELECT user_id - FROM raw_events_second)); --- we currently not support grouping sets -INSERT INTO agg_events - (user_id, - value_1_agg, - value_2_agg) -SELECT user_id, - Sum(value_1) AS sum_val1, - Sum(value_2) AS sum_val2 -FROM raw_events_second -GROUP BY grouping sets ( ( user_id ), ( value_1 ), ( user_id, value_1 ), ( ) ); -ERROR: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP -HINT: Consider using an equality filter on the distributed table's partition column. --- set back to INFO -SET client_min_messages TO INFO; --- avoid constraint violations -TRUNCATE raw_events_first; --- we don't support LIMIT for subquery pushdown, but --- we recursively plan the query and run it via coordinator -INSERT INTO agg_events(user_id) -SELECT user_id -FROM users_table -WHERE user_id - IN (SELECT - user_id - FROM ( - ( - SELECT - user_id - FROM - ( - SELECT - e1.user_id - FROM - users_table u1, events_table e1 - WHERE - e1.user_id = u1.user_id LIMIT 3 - ) as f_inner - ) - ) AS f2); --- Altering a table and selecting from it using a multi-shard statement --- in the same transaction is allowed because we will use the same --- connections for all co-located placements. -BEGIN; -ALTER TABLE raw_events_second DROP COLUMN value_4; -INSERT INTO raw_events_first SELECT * FROM raw_events_second; -ROLLBACK; --- Alterating a table and selecting from it using a single-shard statement --- in the same transaction is disallowed because we will use a different --- connection. -BEGIN; -ALTER TABLE raw_events_second DROP COLUMN value_4; -INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; -ROLLBACK; --- Altering a reference table and then performing an INSERT ... SELECT which --- joins with the reference table is allowed, since the INSERT ... SELECT --- would read from the reference table over the same connections with the ones --- that performed the parallel DDL. -BEGIN; -ALTER TABLE reference_table ADD COLUMN z int; -INSERT INTO raw_events_first (user_id) -SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id); -ROLLBACK; --- the same test with sequential DDL should work fine -BEGIN; -SET LOCAL citus.multi_shard_modify_mode TO 'sequential'; -ALTER TABLE reference_table ADD COLUMN z int; -INSERT INTO raw_events_first (user_id) -SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id); -ROLLBACK; --- Insert after copy is allowed -BEGIN; -COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ','; -INSERT INTO raw_events_first SELECT * FROM raw_events_second; -ROLLBACK; --- Insert after copy is currently allowed for single-shard operation. --- Both insert and copy are rolled back successfully. -BEGIN; -COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ','; -INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 101; -SELECT user_id FROM raw_events_first WHERE user_id = 101; - user_id ---------------------------------------------------------------------- - 101 -(1 row) - -ROLLBACK; -BEGIN; -INSERT INTO raw_events_first SELECT * FROM raw_events_second; -COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; -ROLLBACK; -BEGIN; -INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; -COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; -ROLLBACK; --- Similarly, multi-row INSERTs will take part in transactions and reuse connections... -BEGIN; -INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100; -COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ','; -INSERT INTO raw_events_first (user_id, value_1) VALUES (105, 105), (106, 106); -ROLLBACK; --- selecting from views works -CREATE VIEW test_view AS SELECT * FROM raw_events_first; -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (16, now(), 60, 600, 6000.1, 60000); -SELECT count(*) FROM raw_events_second; - count ---------------------------------------------------------------------- - 45 -(1 row) - -INSERT INTO raw_events_second SELECT * FROM test_view; -INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES - (17, now(), 60, 600, 6000.1, 60000); -INSERT INTO raw_events_second SELECT * FROM test_view WHERE user_id = 17 GROUP BY 1,2,3,4,5,6; -SELECT count(*) FROM raw_events_second; - count ---------------------------------------------------------------------- - 47 -(1 row) - --- intermediate results (CTEs) should be allowed when doing INSERT...SELECT within a CTE -WITH series AS ( - SELECT s AS val FROM generate_series(60,70) s -), -inserts AS ( - INSERT INTO raw_events_second (user_id) - SELECT - user_id - FROM - raw_events_first JOIN series ON (value_1 = val) - RETURNING - NULL -) -SELECT count(*) FROM inserts; - count ---------------------------------------------------------------------- - 2 -(1 row) - --- we need this in our next test -truncate raw_events_first; -SET client_min_messages TO DEBUG2; --- first show that the query works now -INSERT INTO raw_events_first SELECT * FROM raw_events_second; -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL) -SET client_min_messages TO INFO; -truncate raw_events_first; -SET client_min_messages TO DEBUG2; --- now show that it works for a single shard query as well -INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 5; -DEBUG: Creating router plan -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) 5) AND (raw_events_second.user_id IS NOT NULL)) -DEBUG: Skipping target shard interval 13300001 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300002 since SELECT query for it pruned away -DEBUG: Skipping target shard interval 13300003 since SELECT query for it pruned away -SET client_min_messages TO INFO; --- now do some tests with varchars -INSERT INTO insert_select_varchar_test VALUES ('test_1', 10); -INSERT INTO insert_select_varchar_test VALUES ('test_2', 30); -INSERT INTO insert_select_varchar_test (key, value) -SELECT *, 100 -FROM (SELECT f1.key - FROM (SELECT key - FROM insert_select_varchar_test - GROUP BY 1 - HAVING Count(key) < 3) AS f1, - (SELECT key - FROM insert_select_varchar_test - GROUP BY 1 - HAVING Sum(COALESCE(insert_select_varchar_test.value, 0)) > - 20.0) - AS f2 - WHERE f1.key = f2.key - GROUP BY 1) AS foo; -SELECT * FROM insert_select_varchar_test ORDER BY 1 DESC, 2 DESC; - key | value ---------------------------------------------------------------------- - test_2 | 100 - test_2 | 30 - test_1 | 10 -(3 rows) - --- some tests with DEFAULT columns and constant values --- this test is mostly importantly intended for deparsing the query correctly --- but still it is preferable to have this test here instead of multi_deparse_shard_query -CREATE TABLE table_with_defaults -( - store_id int, - first_name text, - default_1 int DEFAULT 1, - last_name text, - default_2 text DEFAULT '2' -); --- we don't need many shards -SET citus.shard_count = 2; -SELECT create_distributed_table('table_with_defaults', 'store_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- let's see the queries -SET client_min_messages TO DEBUG2; --- a very simple query -INSERT INTO table_with_defaults SELECT * FROM table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- see that defaults are filled -INSERT INTO table_with_defaults (store_id, first_name) -SELECT - store_id, first_name -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- shuffle one of the defaults and skip the other -INSERT INTO table_with_defaults (default_2, store_id, first_name) -SELECT - default_2, store_id, first_name -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- shuffle both defaults -INSERT INTO table_with_defaults (default_2, store_id, default_1, first_name) -SELECT - default_2, store_id, default_1, first_name -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- use constants instead of non-default column -INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name) -SELECT - default_2, 'Freund', store_id, 'Andres' -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- use constants instead of non-default column and skip both defauls -INSERT INTO table_with_defaults (last_name, store_id, first_name) -SELECT - 'Freund', store_id, 'Andres' -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- use constants instead of default columns -INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1) -SELECT - 20, last_name, store_id, first_name, 10 -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- use constants instead of both default columns and non-default columns -INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1) -SELECT - 20, 'Freund', store_id, 'Andres', 10 -FROM - table_with_defaults; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) --- some of the ultimate queries where we have constants, --- defaults and group by entry is not on the target entry -INSERT INTO table_with_defaults (default_2, store_id, first_name) -SELECT - '2000', store_id, 'Andres' -FROM - table_with_defaults -GROUP BY - last_name, store_id; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id -INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2) -SELECT - 1000, store_id, 'Andres', '2000' -FROM - table_with_defaults -GROUP BY - last_name, store_id, first_name; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name -INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2) -SELECT - 1000, store_id, 'Andres', '2000' -FROM - table_with_defaults -GROUP BY - last_name, store_id, first_name, default_2; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 -INSERT INTO table_with_defaults (default_1, store_id, first_name) -SELECT - 1000, store_id, 'Andres' -FROM - table_with_defaults -GROUP BY - last_name, store_id, first_name, default_2; -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 -DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2 -RESET client_min_messages; --- Stable function in default should be allowed -ALTER TABLE table_with_defaults ADD COLUMN t timestamptz DEFAULT now(); -INSERT INTO table_with_defaults (store_id, first_name, last_name) -SELECT - store_id, 'first '||store_id, 'last '||store_id -FROM - table_with_defaults -GROUP BY - store_id, first_name, last_name; --- Volatile function in default should be disallowed - SERIAL pseudo-types -CREATE TABLE table_with_serial ( - store_id int, - s bigserial -); -SELECT create_distributed_table('table_with_serial', 'store_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO table_with_serial (store_id) -SELECT - store_id -FROM - table_with_defaults -GROUP BY - store_id; --- Volatile function in default should be disallowed - user-defined sequence -CREATE SEQUENCE user_defined_sequence; -CREATE TABLE table_with_user_sequence ( - store_id int, - s bigint default nextval('user_defined_sequence') -); -SELECT create_distributed_table('table_with_user_sequence', 'store_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO table_with_user_sequence (store_id) -SELECT - store_id -FROM - table_with_defaults -GROUP BY - store_id; --- do some more error/error message checks -SET citus.shard_count TO 4; -SET citus.shard_replication_factor TO 1; -CREATE TABLE text_table (part_col text, val int); -CREATE TABLE char_table (part_col char[], val int); -create table table_with_starts_with_defaults (a int DEFAULT 5, b int, c int); -SELECT create_distributed_table('text_table', 'part_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('char_table','part_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('table_with_starts_with_defaults', 'c'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SET client_min_messages TO DEBUG; -INSERT INTO text_table (part_col) - SELECT - CASE WHEN part_col = 'onder' THEN 'marco' - END -FROM text_table ; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains a case expression in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT COALESCE(part_col, 'onder') FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains a coalesce expression in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT GREATEST(part_col, 'jason') FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT LEAST(part_col, 'andres') FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT NULLIF(part_col, 'metin') FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT part_col isnull FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT part_col::text from char_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT (part_col = 'burak') is true FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT val FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: The data type of the target table's partition column should exactly match the data type of the corresponding simple column reference in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -INSERT INTO text_table (part_col) SELECT val::text FROM text_table; -DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match -DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column. -HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: performing repartitioned INSERT ... SELECT -DEBUG: partitioning SELECT query by column index 0 with name 'part_col' -RESET client_min_messages; -insert into table_with_starts_with_defaults (b,c) select b,c FROM table_with_starts_with_defaults; --- Test on partition column without native hash function -CREATE TABLE raw_table -( - id BIGINT, - time DATE -); -CREATE TABLE summary_table -( - time DATE, - count BIGINT -); -SELECT create_distributed_table('raw_table', 'time'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('summary_table', 'time'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO raw_table VALUES(1, '11-11-1980'); -INSERT INTO summary_table SELECT time, COUNT(*) FROM raw_table GROUP BY time; -SELECT * FROM summary_table; - time | count ---------------------------------------------------------------------- - 11-11-1980 | 1 -(1 row) - --- Test INSERT ... SELECT via coordinator --- Select from constants -TRUNCATE raw_events_first; -INSERT INTO raw_events_first (user_id, value_1) -SELECT * FROM (VALUES (1,2), (3,4), (5,6)) AS v(int,int); -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 2 - 3 | 4 - 5 | 6 -(3 rows) - --- Select from local functions -TRUNCATE raw_events_first; -CREATE SEQUENCE insert_select_test_seq; -SET client_min_messages TO DEBUG; -INSERT INTO raw_events_first (user_id, value_1, value_2) -SELECT - s, nextval('insert_select_test_seq'), (random()*10)::int -FROM - generate_series(1, 5) s; -DEBUG: Creating router plan -DEBUG: distributed INSERT ... SELECT can only select from distributed tables -DEBUG: Collecting INSERT ... SELECT results on coordinator -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; -DEBUG: Router planner cannot handle multi-shard select queries - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - --- ON CONFLICT is supported -INSERT INTO raw_events_first (user_id, value_1) -SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s -ON CONFLICT DO NOTHING; -DEBUG: Creating router plan -DEBUG: distributed INSERT ... SELECT can only select from distributed tables -DEBUG: Collecting INSERT ... SELECT results on coordinator -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING --- RETURNING is supported -INSERT INTO raw_events_first (user_id, value_1) -SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s -RETURNING *; -DEBUG: Creating router plan -DEBUG: distributed INSERT ... SELECT can only select from distributed tables -DEBUG: Collecting INSERT ... SELECT results on coordinator -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 -DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 - user_id | time | value_1 | value_2 | value_3 | value_4 ---------------------------------------------------------------------- - 1 | | 11 | | | - 2 | | 12 | | | - 3 | | 13 | | | - 4 | | 14 | | | - 5 | | 15 | | | -(5 rows) - -RESET client_min_messages; --- INSERT ... SELECT and multi-shard SELECT in the same transaction is supported -TRUNCATE raw_events_first; -BEGIN; -INSERT INTO raw_events_first (user_id, value_1) -SELECT s, s FROM generate_series(1, 5) s; -SELECT user_id, value_1 FROM raw_events_first ORDER BY 1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - -ROLLBACK; --- INSERT ... SELECT and single-shard SELECT in the same transaction is supported -TRUNCATE raw_events_first; -BEGIN; -INSERT INTO raw_events_first (user_id, value_1) -SELECT s, s FROM generate_series(1, 5) s; -SELECT user_id, value_1 FROM raw_events_first WHERE user_id = 1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 -(1 row) - -COMMIT; --- Select from local table -TRUNCATE raw_events_first; -CREATE TEMPORARY TABLE raw_events_first_local AS -SELECT s AS u, 2*s AS v FROM generate_series(1, 5) s; -INSERT INTO raw_events_first (user_id, value_1) -SELECT u, v FROM raw_events_first_local; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 2 - 2 | 4 - 3 | 6 - 4 | 8 - 5 | 10 -(5 rows) - --- Use columns in opposite order -TRUNCATE raw_events_first; -INSERT INTO raw_events_first (value_1, user_id) -SELECT u, v FROM raw_events_first_local; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 2 | 1 - 4 | 2 - 6 | 3 - 8 | 4 - 10 | 5 -(5 rows) - --- Set operations can work with opposite column order -TRUNCATE raw_events_first; -INSERT INTO raw_events_first (value_3, user_id) -( SELECT v, u::bigint FROM raw_events_first_local ) -UNION ALL -( SELECT v, u FROM raw_events_first_local ); -SELECT user_id, value_3 FROM raw_events_first ORDER BY user_id, value_3; - user_id | value_3 ---------------------------------------------------------------------- - 1 | 2 - 1 | 2 - 2 | 4 - 2 | 4 - 3 | 6 - 3 | 6 - 4 | 8 - 4 | 8 - 5 | 10 - 5 | 10 -(10 rows) - --- Select from other distributed table with limit -TRUNCATE raw_events_first; -TRUNCATE raw_events_second; -INSERT INTO raw_events_second (user_id, value_4) -SELECT s, 3*s FROM generate_series (1,5) s; -INSERT INTO raw_events_first (user_id, value_1) -SELECT user_id, value_4 FROM raw_events_second LIMIT 5; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 3 - 2 | 6 - 3 | 9 - 4 | 12 - 5 | 15 -(5 rows) - --- CTEs are supported in local queries -TRUNCATE raw_events_first; -WITH removed_rows AS ( - DELETE FROM raw_events_first_local RETURNING u -) -INSERT INTO raw_events_first (user_id, value_1) -WITH value AS (SELECT 1) -SELECT * FROM removed_rows, value; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 1 - 3 | 1 - 4 | 1 - 5 | 1 -(5 rows) - --- nested CTEs are also supported -TRUNCATE raw_events_first; -INSERT INTO raw_events_first_local SELECT s, 2*s FROM generate_series(0, 10) s; -WITH rows_to_remove AS ( - SELECT u FROM raw_events_first_local WHERE u > 0 -), -removed_rows AS ( - DELETE FROM raw_events_first_local - WHERE u IN (SELECT * FROM rows_to_remove) - RETURNING u, v -) -INSERT INTO raw_events_first (user_id, value_1) -WITH ultra_rows AS ( - WITH numbers AS ( - SELECT s FROM generate_series(1,10) s - ), - super_rows AS ( - SELECT u, v FROM removed_rows JOIN numbers ON (u = s) - ) - SELECT * FROM super_rows LIMIT 5 -) -SELECT u, v FROM ultra_rows; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 2 - 2 | 4 - 3 | 6 - 4 | 8 - 5 | 10 -(5 rows) - --- CTEs with duplicate names are also supported -TRUNCATE raw_events_first; -WITH super_rows AS ( - SELECT u FROM raw_events_first_local -) -INSERT INTO raw_events_first (user_id, value_1) -WITH super_rows AS ( - SELECT * FROM super_rows GROUP BY u -) -SELECT u, 5 FROM super_rows; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 0 | 5 -(1 row) - --- CTEs are supported in router queries -TRUNCATE raw_events_first; -WITH user_two AS ( - SELECT user_id, value_4 FROM raw_events_second WHERE user_id = 2 -) -INSERT INTO raw_events_first (user_id, value_1) -SELECT * FROM user_two; -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 2 | 6 -(1 row) - --- CTEs are supported when there are name collisions -WITH numbers AS ( - SELECT s FROM generate_series(1,10) s -) -INSERT INTO raw_events_first(user_id, value_1) -WITH numbers AS ( - SELECT s, s FROM generate_series(1,5) s -) -SELECT * FROM numbers; --- Select into distributed table with a sequence -CREATE TABLE "CaseSensitiveTable" ("UserID" int, "Value1" int); -SELECT create_distributed_table('"CaseSensitiveTable"', 'UserID'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO "CaseSensitiveTable" -SELECT s, s FROM generate_series(1,10) s; -SELECT * FROM "CaseSensitiveTable" ORDER BY "UserID"; - UserID | Value1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 - 6 | 6 - 7 | 7 - 8 | 8 - 9 | 9 - 10 | 10 -(10 rows) - -DROP TABLE "CaseSensitiveTable"; --- Select into distributed table with a sequence -CREATE TABLE dist_table_with_sequence (user_id serial, value_1 serial); -SELECT create_distributed_table('dist_table_with_sequence', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- from local query -INSERT INTO dist_table_with_sequence (value_1) -SELECT s FROM generate_series(1,5) s; -SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - --- from a distributed query -INSERT INTO dist_table_with_sequence (value_1) -SELECT value_1 FROM dist_table_with_sequence ORDER BY value_1; -SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 - 6 | 1 - 7 | 2 - 8 | 3 - 9 | 4 - 10 | 5 -(10 rows) - -TRUNCATE dist_table_with_sequence; -INSERT INTO dist_table_with_sequence (user_id) -SELECT user_id FROM raw_events_second ORDER BY user_id; -SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - -WITH top10 AS ( - SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10 -) -INSERT INTO dist_table_with_sequence (value_1) -SELECT * FROM top10; -SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - --- router queries become logical planner queries when there is a nextval call -INSERT INTO dist_table_with_sequence (user_id) -SELECT user_id FROM dist_table_with_sequence WHERE user_id = 1; -SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 1 | 6 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(6 rows) - -DROP TABLE dist_table_with_sequence; --- Select into distributed table with a user-defined sequence -CREATE SEQUENCE seq1; -CREATE SEQUENCE seq2; -CREATE TABLE dist_table_with_user_sequence (user_id int default nextval('seq1'), value_1 bigint default nextval('seq2')); -SELECT create_distributed_table('dist_table_with_user_sequence', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- from local query -INSERT INTO dist_table_with_user_sequence (value_1) -SELECT s FROM generate_series(1,5) s; -SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - --- from a distributed query -INSERT INTO dist_table_with_user_sequence (value_1) -SELECT value_1 FROM dist_table_with_user_sequence ORDER BY value_1; -SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 - 6 | 1 - 7 | 2 - 8 | 3 - 9 | 4 - 10 | 5 -(10 rows) - -TRUNCATE dist_table_with_user_sequence; -INSERT INTO dist_table_with_user_sequence (user_id) -SELECT user_id FROM raw_events_second ORDER BY user_id; -SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - -WITH top10 AS ( - SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10 -) -INSERT INTO dist_table_with_user_sequence (value_1) -SELECT * FROM top10; -SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(5 rows) - --- router queries become logical planner queries when there is a nextval call -INSERT INTO dist_table_with_user_sequence (user_id) -SELECT user_id FROM dist_table_with_user_sequence WHERE user_id = 1; -SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 1 - 1 | 6 - 2 | 2 - 3 | 3 - 4 | 4 - 5 | 5 -(6 rows) - -DROP TABLE dist_table_with_user_sequence; -DROP SEQUENCE seq1, seq2; --- Select from distributed table into reference table -CREATE TABLE ref_table (user_id serial, value_1 int); -SELECT create_reference_table('ref_table'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO ref_table -SELECT user_id, value_1 FROM raw_events_second; -SELECT * FROM ref_table ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 2 | - 3 | - 4 | - 5 | -(5 rows) - -INSERT INTO ref_table (value_1) -SELECT value_1 FROM raw_events_second ORDER BY value_1; -SELECT * FROM ref_table ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 1 | - 2 | - 2 | - 3 | - 3 | - 4 | - 4 | - 5 | - 5 | -(10 rows) - -INSERT INTO ref_table SELECT * FROM ref_table; -SELECT * FROM ref_table ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 1 | - 1 | - 1 | - 2 | - 2 | - 2 | - 2 | - 3 | - 3 | - 3 | - 3 | - 4 | - 4 | - 4 | - 4 | - 5 | - 5 | - 5 | - 5 | -(20 rows) - -DROP TABLE ref_table; --- Select from distributed table into reference table with user-defined sequence -CREATE SEQUENCE seq1; -CREATE TABLE ref_table_with_user_sequence (user_id int default nextval('seq1'), value_1 int); -SELECT create_reference_table('ref_table_with_user_sequence'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO ref_table_with_user_sequence -SELECT user_id, value_1 FROM raw_events_second; -SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 2 | - 3 | - 4 | - 5 | -(5 rows) - -INSERT INTO ref_table_with_user_sequence (value_1) -SELECT value_1 FROM raw_events_second ORDER BY value_1; -SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 1 | - 2 | - 2 | - 3 | - 3 | - 4 | - 4 | - 5 | - 5 | -(10 rows) - -INSERT INTO ref_table_with_user_sequence SELECT * FROM ref_table_with_user_sequence; -SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | - 1 | - 1 | - 1 | - 2 | - 2 | - 2 | - 2 | - 3 | - 3 | - 3 | - 3 | - 4 | - 4 | - 4 | - 4 | - 5 | - 5 | - 5 | - 5 | -(20 rows) - -DROP TABLE ref_table_with_user_sequence; -DROP SEQUENCE seq1; --- Select from reference table into reference table -CREATE TABLE ref1 (d timestamptz); -SELECT create_reference_table('ref1'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE ref2 (d date); -SELECT create_reference_table('ref2'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO ref2 VALUES ('2017-10-31'); -INSERT INTO ref1 SELECT * FROM ref2; -SELECT count(*) from ref1; - count ---------------------------------------------------------------------- - 1 -(1 row) - --- also test with now() -INSERT INTO ref1 SELECT now() FROM ref2; -SELECT count(*) from ref1; - count ---------------------------------------------------------------------- - 2 -(1 row) - -DROP TABLE ref1; -DROP TABLE ref2; --- Select into an append-partitioned table is not supported -CREATE TABLE insert_append_table (user_id int, value_4 bigint); -SELECT create_distributed_table('insert_append_table', 'user_id', 'append'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO insert_append_table (user_id, value_4) -SELECT user_id, 1 FROM raw_events_second LIMIT 5; -ERROR: INSERT ... SELECT into an append-distributed table is not supported -DROP TABLE insert_append_table; --- Insert from other distributed table as prepared statement -TRUNCATE raw_events_first; -PREPARE insert_prep(int) AS -INSERT INTO raw_events_first (user_id, value_1) -SELECT $1, value_4 FROM raw_events_second ORDER BY value_4 LIMIT 1; -EXECUTE insert_prep(1); -EXECUTE insert_prep(2); -EXECUTE insert_prep(3); -EXECUTE insert_prep(4); -EXECUTE insert_prep(5); -EXECUTE insert_prep(6); -SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1; - user_id | value_1 ---------------------------------------------------------------------- - 1 | 3 - 2 | 3 - 3 | 3 - 4 | 3 - 5 | 3 - 6 | 3 -(6 rows) - --- Inserting into views is handled via coordinator -TRUNCATE raw_events_first; -INSERT INTO test_view -SELECT * FROM raw_events_second; -SELECT user_id, value_4 FROM test_view ORDER BY user_id, value_4; - user_id | value_4 ---------------------------------------------------------------------- - 1 | 3 - 2 | 6 - 3 | 9 - 4 | 12 - 5 | 15 -(5 rows) - --- Drop the view now, because the column we are about to drop depends on it -DROP VIEW test_view; --- Make sure we handle dropped columns correctly -CREATE TABLE drop_col_table (col1 text, col2 text, col3 text); -SELECT create_distributed_table('drop_col_table', 'col2'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -ALTER TABLE drop_col_table DROP COLUMN col1; -INSERT INTO drop_col_table (col3, col2) -SELECT value_4, user_id FROM raw_events_second LIMIT 5; -SELECT * FROM drop_col_table ORDER BY col2, col3; - col2 | col3 ---------------------------------------------------------------------- - 1 | 3 - 2 | 6 - 3 | 9 - 4 | 12 - 5 | 15 -(5 rows) - --- make sure the tuple went to the right shard -SELECT * FROM drop_col_table WHERE col2 = '1'; - col2 | col3 ---------------------------------------------------------------------- - 1 | 3 -(1 row) - -RESET client_min_messages; --- make sure casts are handled correctly -CREATE TABLE coerce_events(user_id int, time timestamp, value_1 numeric); -SELECT create_distributed_table('coerce_events', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE coerce_agg (user_id int, value_1_agg int); -SELECT create_distributed_table('coerce_agg', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO coerce_events(user_id, value_1) VALUES (1, 1), (2, 2), (10, 10); --- numeric -> int (straight function) -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -ORDER BY 2 DESC, 1 DESC -LIMIT 5; --- int -> text -ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE text; -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -LIMIT 5; -SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; - user_id | value_1_agg ---------------------------------------------------------------------- - 10 | 10 - 10 | 10 - 2 | 2 - 2 | 2 - 1 | 1 - 1 | 1 -(6 rows) - -TRUNCATE coerce_agg; --- int -> char(1) -ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(1); -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -LIMIT 5; -ERROR: value too long for type character(1) -SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; - user_id | value_1_agg ---------------------------------------------------------------------- -(0 rows) - -TRUNCATE coerce_agg; -TRUNCATE coerce_events; --- char(5) -> char(1) -ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(5); -INSERT INTO coerce_events(user_id, value_1) VALUES (1, 'aaaaa'), (2, 'bbbbb'); -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -LIMIT 5; -ERROR: value too long for type character(1) --- char(1) -> char(5) -ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(1) USING value_1::char(1); -ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(5); -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -LIMIT 5; -SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; - user_id | value_1_agg ---------------------------------------------------------------------- - 2 | b - 1 | a -(2 rows) - -TRUNCATE coerce_agg; -TRUNCATE coerce_events; --- integer -> integer (check VALUE < 5) -ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer USING NULL; -ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE integer USING NULL; -ALTER TABLE coerce_agg ADD CONSTRAINT small_number CHECK (value_1_agg < 5); -INSERT INTO coerce_events (user_id, value_1) VALUES (1, 1), (10, 10); -\set VERBOSITY TERSE -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop; -ERROR: new row for relation "coerce_agg_13300067" violates check constraint "small_number_13300067" -\set VERBOSITY DEFAULT -SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; - user_id | value_1_agg ---------------------------------------------------------------------- -(0 rows) - --- integer[3] -> text[3] -TRUNCATE coerce_events; -ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer[3] USING NULL; -INSERT INTO coerce_events(user_id, value_1) VALUES (1, '{1,1,1}'), (2, '{2,2,2}'); -ALTER TABLE coerce_agg DROP COLUMN value_1_agg; -ALTER TABLE coerce_agg ADD COLUMN value_1_agg text[3]; -INSERT INTO coerce_agg(user_id, value_1_agg) -SELECT * -FROM ( - SELECT user_id, value_1 - FROM coerce_events -) AS ftop -LIMIT 5; -SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; - user_id | value_1_agg ---------------------------------------------------------------------- - 2 | {2,2,2} - 1 | {1,1,1} -(2 rows) - --- INSERT..SELECT + prepared statements + recursive planning -BEGIN; -PREPARE prepared_recursive_insert_select AS -INSERT INTO users_table -SELECT * FROM users_table -WHERE value_1 IN (SELECT value_2 FROM events_table OFFSET 0); -EXECUTE prepared_recursive_insert_select; -EXECUTE prepared_recursive_insert_select; -EXECUTE prepared_recursive_insert_select; -EXECUTE prepared_recursive_insert_select; -EXECUTE prepared_recursive_insert_select; -EXECUTE prepared_recursive_insert_select; -ROLLBACK; --- upsert with on conflict update distribution column is unsupported -INSERT INTO agg_events AS ae - ( - user_id, - value_1_agg, - agg_time - ) -SELECT user_id, - value_1, - time -FROM raw_events_first -ON conflict (user_id, value_1_agg) -DO UPDATE - SET user_id = 42 -RETURNING user_id, value_1_agg; -ERROR: modifying the partition value of rows is not allowed --- test a small citus.remote_copy_flush_threshold -BEGIN; -SET LOCAL citus.remote_copy_flush_threshold TO 1; -INSERT INTO raw_events_first -SELECT * FROM raw_events_first OFFSET 0 -ON CONFLICT DO NOTHING; -ABORT; --- test fix for issue https://github.com/citusdata/citus/issues/5891 -CREATE TABLE dist_table_1( -dist_col integer, -int_col integer, -text_col_1 text, -text_col_2 text -); -SELECT create_distributed_table('dist_table_1', 'dist_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO dist_table_1 VALUES (1, 1, 'string', 'string'); -CREATE TABLE dist_table_2( -dist_col integer, -int_col integer -); -SELECT create_distributed_table('dist_table_2', 'dist_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO dist_table_2 VALUES (1, 1); -with a as (select random()) INSERT INTO dist_table_1 -SELECT -t1.dist_col, -1, -'string', -'string' -FROM a, dist_table_1 t1 -join dist_table_2 t2 using (dist_col) -limit 1 -returning text_col_1; - text_col_1 ---------------------------------------------------------------------- - string -(1 row) - -CREATE TABLE dist_table_3( -dist_col bigint, -int_col integer -); -SELECT create_distributed_table('dist_table_3', 'dist_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to --- different types for distribution columns. Citus would not be able to handle this complex insert select. -INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -CREATE TABLE dist_table_4( -dist_col integer, -int_col integer -); -SELECT create_distributed_table('dist_table_4', 'dist_col'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4 --- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 6 -(4 rows) - --- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query. --- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 -(2 rows) - --- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT. --- It is because the subquery with limit needs to be merged at coordinator. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Limit - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(7 rows) - -CREATE TABLE dist_table_5(id int, id2 int); -SELECT create_distributed_table('dist_table_5','id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE dist_table_6(id int, id2 int); -SELECT create_distributed_table('dist_table_6','id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 -(2 rows) - --- verify that insert select with sublink can be pushed down when tables are colocated. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 -(2 rows) - -CREATE TABLE ref_table_1(id int); -SELECT create_reference_table('ref_table_1'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - --- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation. -INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1; -ERROR: correlated subqueries are not supported when the FROM clause contains a reference table --- verify that insert select cannot be pushed down when we have recurring range table in from clause. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - Task Count: 1 -(4 rows) - --- verify that insert select cannot be pushed down when we have reference table in outside of outer join in a chained-join. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - --- verify that insert select can be pushed down when we have reference table in outside of outer join. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(4 rows) - --- verify that insert select cannot be pushed down when we have reference table in outside of left join and joined on non-partition column. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT ref_table_1.id FROM ref_table_1 LEFT JOIN dist_table_5 ON ref_table_1.id = dist_table_5.id2; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - -CREATE TABLE loc_table_1(id int); --- verify that insert select cannot be pushed down when it contains join between local and distributed tables. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: repartition - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Seq Scan on loc_table_1 - Task Count: 4 -(6 rows) - -CREATE VIEW view_1 AS - SELECT id FROM dist_table_6; -CREATE MATERIALIZED VIEW view_2 AS - SELECT id FROM dist_table_6; --- verify that insert select cannot be pushed down when it contains view. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 -(2 rows) - --- verify that insert select cannot be pushed down when it contains materialized view. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Seq Scan on view_2 -(3 rows) - -CREATE TABLE append_table(id integer, data text, int_data int); -SELECT create_distributed_table('append_table', 'id', 'append'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT master_create_empty_shard('append_table'); - master_create_empty_shard ---------------------------------------------------------------------- - 13300096 -(1 row) - --- verify that insert select push down for append tables are not supported. -INSERT INTO append_table SELECT * FROM append_table; -ERROR: INSERT ... SELECT into an append-distributed table is not supported --- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner --- and handled by pull to coordinator. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id > 5) - INSERT INTO dist_table_5 - SELECT id FROM dist_table_5 JOIN cte_1 USING(id) OFFSET 5; -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) - INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) - -> Distributed Subplan XXX_1 - -> Limit - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(7 rows) - --- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner. -SELECT coordinator_plan($$ - EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 - WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5) - SELECT id FROM dist_table_5 JOIN cte_1 USING(id); -$$); - coordinator_plan ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 1 -(2 rows) - ---------------------------------------------------------------------- --- Regression Test Script for Issue #7784 --- This script tests INSERT ... SELECT with a CTE for: --- 1. Schema based sharding. --- 2. A distributed table. ---------------------------------------------------------------------- --- Enable schema-based sharding -SET citus.enable_schema_based_sharding TO ON; --- Create a table for schema based sharding -CREATE TABLE version_sch_based ( - id bigserial NOT NULL, - description varchar(255), - PRIMARY KEY (id) -); --- Insert an initial row. -INSERT INTO version_sch_based (description) VALUES ('Version 1'); --- Duplicate the row using a CTE and INSERT ... SELECT. -WITH v AS ( - SELECT * FROM version_sch_based WHERE description = 'Version 1' -) -INSERT INTO version_sch_based (description) -SELECT description FROM v; --- Expected output: --- id | description --- ----+------------- --- 1 | Version 1 --- 2 | Version 1 --- Query the table and order by id for consistency. -SELECT * FROM version_sch_based ORDER BY id; - id | description ---------------------------------------------------------------------- - 1 | Version 1 - 2 | Version 1 -(2 rows) - ---------------------------------------------------------------------- --- Case 2: Distributed Table Scenario ---------------------------------------------------------------------- -SET citus.enable_schema_based_sharding TO OFF; --- Create a table for the distributed test. -CREATE TABLE version_dist ( - id bigserial NOT NULL, - description varchar(255), - PRIMARY KEY (id) -); --- Register the table as distributed using the 'id' column as the distribution key. -SELECT create_distributed_table('version_dist', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Insert an initial row. -INSERT INTO version_dist (description) VALUES ('Version 1'); --- Duplicate the row using a CTE and INSERT ... SELECT. -WITH v AS ( - SELECT * FROM version_dist WHERE description = 'Version 1' -) -INSERT INTO version_dist (description) -SELECT description FROM v; --- Expected output: --- id | description --- ----+------------- --- 1 | Version 1 --- 2 | Version 1 --- Query the table and order by id for consistency. -SELECT * FROM version_dist ORDER BY id; - id | description ---------------------------------------------------------------------- - 1 | Version 1 - 2 | Version 1 -(2 rows) - ---------------------------------------------------------------------- --- Case 3: Distributed INSERT … SELECT with nextval() --- Verifies that nextval() is evaluated on the coordinator only. ---------------------------------------------------------------------- --- A fresh sequence for clarity -CREATE SEQUENCE seq_nextval_test START 100; --- Table with DEFAULT nextval() -CREATE TABLE version_dist_seq ( - id bigint DEFAULT nextval('seq_nextval_test'), - description text, - PRIMARY KEY (id) -); -SELECT create_distributed_table('version_dist_seq', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Seed one row (id = 100) -INSERT INTO version_dist_seq (description) VALUES ('row‑0'); --- CTE duplication – should produce **exactly one** new sequence value (id = 101) -WITH v AS ( - SELECT * FROM version_dist_seq WHERE description = 'row‑0' -) -INSERT INTO version_dist_seq (description) -SELECT description FROM v; --- Expected: ids are 100 and 101 (no gaps, no duplicates) -SELECT id, description FROM version_dist_seq ORDER BY id; - id | description ---------------------------------------------------------------------- - 100 | row‑0 - 101 | row‑0 -(2 rows) - ---------------------------------------------------------------------- --- Case 4: UNION ALL + nextval() in distributed INSERT … SELECT ---------------------------------------------------------------------- -CREATE SEQUENCE seq_union_test START 200; -CREATE TABLE version_dist_union ( - id bigint DEFAULT nextval('seq_union_test'), - val int, - PRIMARY KEY (id) -); -SELECT create_distributed_table('version_dist_union', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Seed rows -INSERT INTO version_dist_union (val) VALUES (1), (2); --- UNION ALL duplication; each leg returns two rows -> four inserts total -WITH src AS ( - SELECT val FROM version_dist_union - UNION ALL - SELECT val FROM version_dist_union -) -INSERT INTO version_dist_union(val) -SELECT val FROM src; --- Expected IDs: 200,201,202,203,204,205 -SELECT id, val FROM version_dist_union ORDER BY id; - id | val ---------------------------------------------------------------------- - 200 | 1 - 201 | 2 - 202 | 1 - 203 | 2 - 204 | 1 - 205 | 2 -(6 rows) - --- End of Issue #7784 --- PR #8106 — CTE traversal works when following outer Vars --- This script exercises three shapes: --- T1) CTE referenced inside a correlated subquery (one level down) --- T2) CTE referenced inside a nested subquery (two levels down) --- T3) Subquery targetlist uses a scalar sublink into the outer CTE -CREATE SCHEMA pr8106_cte_outervar; -SET search_path = pr8106_cte_outervar, public; --- Base tables for the tests -DROP TABLE IF EXISTS raw_events_first CASCADE; -NOTICE: table "raw_events_first" does not exist, skipping -DROP TABLE IF EXISTS agg_events CASCADE; -NOTICE: table "agg_events" does not exist, skipping -CREATE TABLE raw_events_first( - user_id int, - value_1 int -); -CREATE TABLE agg_events( - user_id int, - value_1_agg int -); --- Distribute and colocate (distribution key = user_id) -SELECT create_distributed_table('raw_events_first', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('agg_events', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Seed data (duplicates on some user_ids; some NULL value_1’s) -INSERT INTO raw_events_first(user_id, value_1) VALUES - (1, 10), (1, 20), (1, NULL), - (2, NULL), - (3, 30), - (4, NULL), - (5, 50), (5, NULL), - (6, NULL); ---------------------------------------------------------------------- --- T1) CTE referenced inside a correlated subquery (one level down) ---------------------------------------------------------------------- -TRUNCATE agg_events; -WITH c AS MATERIALIZED ( - SELECT user_id FROM raw_events_first -) -INSERT INTO agg_events (user_id) -SELECT t.user_id -FROM raw_events_first t -WHERE EXISTS (SELECT 1 FROM c WHERE c.user_id = t.user_id); --- Expect one insert per row in raw_events_first (EXISTS always true per user_id) -SELECT 't1_count_matches' AS test, - (SELECT count(*) FROM agg_events) = - (SELECT count(*) FROM raw_events_first) AS ok; - test | ok ---------------------------------------------------------------------- - t1_count_matches | t -(1 row) - --- Spot-check: how many rows were inserted -SELECT 't1_rows' AS test, count(*) AS rows FROM agg_events; - test | rows ---------------------------------------------------------------------- - t1_rows | 9 -(1 row) - ---------------------------------------------------------------------- --- T2) CTE referenced inside a nested subquery (two levels down) ---------------------------------------------------------------------- -TRUNCATE agg_events; -WITH c AS MATERIALIZED ( - SELECT user_id FROM raw_events_first -) -INSERT INTO agg_events (user_id) -SELECT t.user_id -FROM raw_events_first t -WHERE EXISTS ( - SELECT 1 - FROM (SELECT user_id FROM c) c2 - WHERE c2.user_id = t.user_id -); --- Same cardinality expectation as T1 -SELECT 't2_count_matches' AS test, - (SELECT count(*) FROM agg_events) = - (SELECT count(*) FROM raw_events_first) AS ok; - test | ok ---------------------------------------------------------------------- - t2_count_matches | t -(1 row) - -SELECT 't2_rows' AS test, count(*) AS rows FROM agg_events; - test | rows ---------------------------------------------------------------------- - t2_rows | 9 -(1 row) - ---------------------------------------------------------------------- --- T3) Subquery targetlist uses a scalar sublink into the outer CTE --- (use MAX() to keep scalar subquery single-row) ---------------------------------------------------------------------- -TRUNCATE agg_events; -WITH c AS (SELECT user_id, value_1 FROM raw_events_first) -INSERT INTO agg_events (user_id, value_1_agg) -SELECT d.user_id, d.value_1_agg -FROM ( - SELECT t.user_id, - (SELECT max(c.value_1) FROM c WHERE c.user_id = t.user_id) AS value_1_agg - FROM raw_events_first t -) AS d -WHERE d.value_1_agg IS NOT NULL; --- Expect one insert per row in raw_events_first whose user_id has at least one non-NULL value_1 -SELECT 't3_count_matches' AS test, - (SELECT count(*) FROM agg_events) = - ( - SELECT count(*) - FROM raw_events_first t - WHERE EXISTS ( - SELECT 1 FROM raw_events_first c - WHERE c.user_id = t.user_id AND c.value_1 IS NOT NULL - ) - ) AS ok; - test | ok ---------------------------------------------------------------------- - t3_count_matches | t -(1 row) - --- Also verify no NULLs were inserted into value_1_agg -SELECT 't3_no_null_value_1_agg' AS test, - NOT EXISTS (SELECT 1 FROM agg_events WHERE value_1_agg IS NULL) AS ok; - test | ok ---------------------------------------------------------------------- - t3_no_null_value_1_agg | t -(1 row) - --- Deterministic sample of results -SELECT 't3_sample' AS test, user_id, value_1_agg -FROM agg_events -ORDER BY user_id -LIMIT 5; - test | user_id | value_1_agg ---------------------------------------------------------------------- - t3_sample | 1 | 20 - t3_sample | 1 | 20 - t3_sample | 1 | 20 - t3_sample | 3 | 30 - t3_sample | 5 | 50 -(5 rows) - --- End of PR #8106 — CTE traversal works when following outer Vars -SET client_min_messages TO ERROR; -DROP SCHEMA pr8106_cte_outervar CASCADE; -DROP SCHEMA multi_insert_select CASCADE;