mirror of https://github.com/citusdata/citus.git
3508 lines
214 KiB
Plaintext
3508 lines
214 KiB
Plaintext
--
|
|
-- MULTI_INSERT_SELECT
|
|
--
|
|
-- This test file has an alternative output because of the change in the
|
|
-- display of SQL-standard function's arguments in INSERT/SELECT in PG15.
|
|
-- The alternative output can be deleted when we drop support for PG14
|
|
--
|
|
CREATE SCHEMA multi_insert_select;
|
|
SET search_path = multi_insert_select,public;
|
|
SHOW server_version \gset
|
|
SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15;
|
|
server_version_ge_15
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
SET citus.next_shard_id TO 13300000;
|
|
SET citus.next_placement_id TO 13300000;
|
|
-- create co-located tables
|
|
SET citus.shard_count = 4;
|
|
SET citus.shard_replication_factor = 2;
|
|
-- order of execution might change in parallel executions
|
|
-- and the error details might contain the worker node
|
|
-- so be less verbose with \set VERBOSITY TERSE when necessary
|
|
CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1));
|
|
SELECT create_distributed_table('raw_events_first', 'user_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1));
|
|
SELECT create_distributed_table('raw_events_second', 'user_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp, UNIQUE(user_id, value_1_agg));
|
|
SELECT create_distributed_table('agg_events', 'user_id');;
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- create the reference table as well
|
|
CREATE TABLE reference_table (user_id int);
|
|
SELECT create_reference_table('reference_table');
|
|
create_reference_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
CREATE TABLE insert_select_varchar_test (key varchar, value int);
|
|
SELECT create_distributed_table('insert_select_varchar_test', 'key', 'hash');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- set back to the defaults
|
|
SET citus.shard_count = DEFAULT;
|
|
SET citus.shard_replication_factor = DEFAULT;
|
|
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(1, now(), 10, 100, 1000.1, 10000);
|
|
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(2, now(), 20, 200, 2000.1, 20000);
|
|
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(3, now(), 30, 300, 3000.1, 30000);
|
|
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(4, now(), 40, 400, 4000.1, 40000);
|
|
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(5, now(), 50, 500, 5000.1, 50000);
|
|
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(6, now(), 60, 600, 6000.1, 60000);
|
|
SET client_min_messages TO DEBUG2;
|
|
-- raw table to raw table
|
|
INSERT INTO raw_events_second SELECT * FROM raw_events_first;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
-- see that our first multi shard INSERT...SELECT works expected
|
|
SET client_min_messages TO INFO;
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first, raw_events_second
|
|
WHERE
|
|
raw_events_first.user_id = raw_events_second.user_id
|
|
ORDER BY
|
|
user_id DESC;
|
|
user_id
|
|
---------------------------------------------------------------------
|
|
6
|
|
5
|
|
4
|
|
3
|
|
2
|
|
1
|
|
(6 rows)
|
|
|
|
-- see that we get unique vialitons
|
|
\set VERBOSITY TERSE
|
|
INSERT INTO raw_events_second SELECT * FROM raw_events_first;
|
|
ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300004"
|
|
\set VERBOSITY DEFAULT
|
|
-- stable functions should be allowed
|
|
INSERT INTO raw_events_second (user_id, time)
|
|
SELECT
|
|
user_id, now()
|
|
FROM
|
|
raw_events_first
|
|
WHERE
|
|
user_id < 0;
|
|
INSERT INTO raw_events_second (user_id)
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
raw_events_first
|
|
WHERE
|
|
time > now() + interval '1 day';
|
|
-- hide version-dependent PL/pgSQL context messages
|
|
\set VERBOSITY terse
|
|
-- make sure we evaluate stable functions on the master, once
|
|
CREATE OR REPLACE FUNCTION evaluate_on_master()
|
|
RETURNS int LANGUAGE plpgsql STABLE
|
|
AS $function$
|
|
BEGIN
|
|
RAISE NOTICE 'evaluating on master';
|
|
RETURN 0;
|
|
END;
|
|
$function$;
|
|
INSERT INTO raw_events_second (user_id, value_1)
|
|
SELECT
|
|
user_id, evaluate_on_master()
|
|
FROM
|
|
raw_events_first
|
|
WHERE
|
|
user_id < 0;
|
|
NOTICE: evaluating on master
|
|
-- make sure we don't evaluate stable functions with column arguments
|
|
SET citus.enable_metadata_sync TO OFF;
|
|
CREATE OR REPLACE FUNCTION evaluate_on_master(x int)
|
|
RETURNS int LANGUAGE plpgsql STABLE
|
|
AS $function$
|
|
BEGIN
|
|
RAISE NOTICE 'evaluating on master';
|
|
RETURN x;
|
|
END;
|
|
$function$;
|
|
RESET citus.enable_metadata_sync;
|
|
INSERT INTO raw_events_second (user_id, value_1)
|
|
SELECT
|
|
user_id, evaluate_on_master(value_1)
|
|
FROM
|
|
raw_events_first
|
|
WHERE
|
|
user_id = 0;
|
|
ERROR: function multi_insert_select.evaluate_on_master(integer) does not exist
|
|
-- add one more row
|
|
INSERT INTO raw_events_first (user_id, time) VALUES
|
|
(7, now());
|
|
-- try a single shard query
|
|
SET client_min_messages TO DEBUG2;
|
|
INSERT INTO raw_events_second (user_id, time) SELECT user_id, time FROM raw_events_first WHERE user_id = 7;
|
|
DEBUG: Creating router plan
|
|
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, "time") SELECT raw_events_first.user_id, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 7) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
|
|
SET client_min_messages TO INFO;
|
|
-- add one more row
|
|
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(8, now(), 80, 800, 8000, 80000);
|
|
-- reorder columns
|
|
SET client_min_messages TO DEBUG2;
|
|
INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time)
|
|
SELECT
|
|
value_2, value_1, value_3, value_4, user_id, time
|
|
FROM
|
|
raw_events_first
|
|
WHERE
|
|
user_id = 8;
|
|
DEBUG: Creating router plan
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_first.user_id, raw_events_first."time", raw_events_first.value_1, raw_events_first.value_2, raw_events_first.value_3, raw_events_first.value_4 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 8) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
|
|
-- a zero shard select
|
|
INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time)
|
|
SELECT
|
|
value_2, value_1, value_3, value_4, user_id, time
|
|
FROM
|
|
raw_events_first
|
|
WHERE
|
|
false;
|
|
DEBUG: Creating router plan
|
|
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
|
|
-- another zero shard select
|
|
INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time)
|
|
SELECT
|
|
value_2, value_1, value_3, value_4, user_id, time
|
|
FROM
|
|
raw_events_first
|
|
WHERE
|
|
0 != 0;
|
|
DEBUG: Creating router plan
|
|
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
|
|
-- add one more row
|
|
SET client_min_messages TO INFO;
|
|
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(9, now(), 90, 900, 9000, 90000);
|
|
-- show that RETURNING also works
|
|
SET client_min_messages TO DEBUG2;
|
|
INSERT INTO raw_events_second (user_id, value_1, value_3)
|
|
SELECT
|
|
user_id, value_1, value_3
|
|
FROM
|
|
raw_events_first
|
|
WHERE
|
|
value_3 = 9000
|
|
RETURNING *;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.value_3 OPERATOR(pg_catalog.=) (9000)::double precision) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
|
|
user_id | time | value_1 | value_2 | value_3 | value_4
|
|
---------------------------------------------------------------------
|
|
9 | | 90 | | 9000 |
|
|
(1 row)
|
|
|
|
-- hits two shards
|
|
\set VERBOSITY TERSE
|
|
INSERT INTO raw_events_second (user_id, value_1, value_3)
|
|
SELECT
|
|
user_id, value_1, value_3
|
|
FROM
|
|
raw_events_first
|
|
WHERE
|
|
user_id = 9 OR user_id = 16
|
|
RETURNING *;
|
|
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
|
|
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first.value_3 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (((raw_events_first.user_id OPERATOR(pg_catalog.=) 9) OR (raw_events_first.user_id OPERATOR(pg_catalog.=) 16)) AND (raw_events_first.user_id IS NOT NULL)) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
|
|
ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300007"
|
|
-- now do some aggregations
|
|
INSERT INTO agg_events
|
|
SELECT
|
|
user_id, sum(value_1), avg(value_2), sum(value_3), count(value_4)
|
|
FROM
|
|
raw_events_first
|
|
GROUP BY
|
|
user_id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, avg(raw_events_first.value_2) AS avg, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
-- group by column not exists on the SELECT target list
|
|
INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, user_id)
|
|
SELECT
|
|
sum(value_3), count(value_4), sum(value_1), user_id
|
|
FROM
|
|
raw_events_first
|
|
GROUP BY
|
|
value_2, user_id
|
|
RETURNING *;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum, sum(raw_events_first.value_3) AS sum, count(raw_events_first.value_4) AS count FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.value_2, raw_events_first.user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time
|
|
ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008"
|
|
-- some subquery tests
|
|
INSERT INTO agg_events
|
|
(value_1_agg,
|
|
user_id)
|
|
SELECT SUM(value_1),
|
|
id
|
|
FROM (SELECT raw_events_second.user_id AS id,
|
|
raw_events_second.value_1
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id) AS foo
|
|
GROUP BY id
|
|
ORDER BY id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT foo.id, sum(foo.value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)) foo WHERE (foo.id IS NOT NULL) GROUP BY foo.id ORDER BY foo.id
|
|
ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008"
|
|
-- subquery one more level depth
|
|
INSERT INTO agg_events
|
|
(value_4_agg,
|
|
value_1_agg,
|
|
user_id)
|
|
SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id) AS foo
|
|
ORDER BY id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT foo.id, foo.v1, foo.v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE (foo.id IS NOT NULL) ORDER BY foo.id
|
|
ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008"
|
|
\set VERBOSITY DEFAULT
|
|
-- join between subqueries
|
|
INSERT INTO agg_events
|
|
(user_id)
|
|
SELECT f2.id FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id);
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f2.id IS NOT NULL)
|
|
-- add one more level subqueris on top of subquery JOINs
|
|
INSERT INTO agg_events
|
|
(user_id, value_4_agg)
|
|
SELECT
|
|
outer_most.id, max(outer_most.value)
|
|
FROM
|
|
(
|
|
SELECT f2.id as id, f2.v4 as value FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id)) as outer_most
|
|
GROUP BY
|
|
outer_most.id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
|
|
-- subqueries in WHERE clause
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id IN (SELECT user_id
|
|
FROM raw_events_second
|
|
WHERE user_id = 2);
|
|
DEBUG: Creating router plan
|
|
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) 2))) AND (raw_events_first.user_id IS NOT NULL))
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id IN (SELECT user_id
|
|
FROM raw_events_second
|
|
WHERE user_id != 2 AND value_1 = 2000)
|
|
ON conflict (user_id, value_1) DO NOTHING;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.<>) 2) AND (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000)))) AND (raw_events_first.user_id IS NOT NULL)) ON CONFLICT(user_id, value_1) DO NOTHING
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id IN (SELECT user_id
|
|
FROM raw_events_second WHERE false);
|
|
DEBUG: Creating router plan
|
|
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id IN (SELECT user_id
|
|
FROM raw_events_second
|
|
WHERE value_1 = 1000 OR value_1 = 2000 OR value_1 = 3000);
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.value_1 OPERATOR(pg_catalog.=) 1000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 2000) OR (raw_events_second.value_1 OPERATOR(pg_catalog.=) 3000)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
-- lets mix subqueries in FROM clause and subqueries in WHERE
|
|
INSERT INTO agg_events
|
|
(user_id)
|
|
SELECT f2.id FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 1000) AS foo2 ) as f2
|
|
ON (f.id = f2.id)
|
|
WHERE f.id IN (SELECT user_id
|
|
FROM raw_events_second);
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)) AND (f2.id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)) AND (f2.id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)) AND (f2.id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (1000)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE ((f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)) AND (f2.id IS NOT NULL))
|
|
-- some UPSERTS
|
|
INSERT INTO agg_events AS ae
|
|
(
|
|
user_id,
|
|
value_1_agg,
|
|
agg_time
|
|
)
|
|
SELECT user_id,
|
|
value_1,
|
|
time
|
|
FROM raw_events_first
|
|
ON conflict (user_id, value_1_agg)
|
|
DO UPDATE
|
|
SET agg_time = EXCLUDED.agg_time
|
|
WHERE ae.agg_time < EXCLUDED.agg_time;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time)
|
|
-- upserts with returning
|
|
INSERT INTO agg_events AS ae
|
|
(
|
|
user_id,
|
|
value_1_agg,
|
|
agg_time
|
|
)
|
|
SELECT user_id,
|
|
value_1,
|
|
time
|
|
FROM raw_events_first
|
|
ON conflict (user_id, value_1_agg)
|
|
DO UPDATE
|
|
SET agg_time = EXCLUDED.agg_time
|
|
WHERE ae.agg_time < EXCLUDED.agg_time
|
|
RETURNING user_id, value_1_agg;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT raw_events_first.user_id, raw_events_first.value_1, raw_events_first."time" FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time OPERATOR(pg_catalog.<) excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg
|
|
user_id | value_1_agg
|
|
---------------------------------------------------------------------
|
|
7 |
|
|
(1 row)
|
|
|
|
INSERT INTO agg_events (user_id, value_1_agg)
|
|
SELECT
|
|
user_id, sum(value_1 + value_2)
|
|
FROM
|
|
raw_events_first GROUP BY user_id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
-- FILTER CLAUSE
|
|
INSERT INTO agg_events (user_id, value_1_agg)
|
|
SELECT
|
|
user_id, sum(value_1 + value_2) FILTER (where value_3 = 15)
|
|
FROM
|
|
raw_events_first GROUP BY user_id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum((raw_events_first.value_1 OPERATOR(pg_catalog.+) raw_events_first.value_2)) FILTER (WHERE (raw_events_first.value_3 OPERATOR(pg_catalog.=) (15)::double precision)) AS sum FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL) GROUP BY raw_events_first.user_id
|
|
-- a test with reference table JOINs
|
|
INSERT INTO
|
|
agg_events (user_id, value_1_agg)
|
|
SELECT
|
|
raw_events_first.user_id, sum(value_1)
|
|
FROM
|
|
reference_table, raw_events_first
|
|
WHERE
|
|
raw_events_first.user_id = reference_table.user_id
|
|
GROUP BY
|
|
raw_events_first.user_id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM multi_insert_select.reference_table_13300012 reference_table, multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id) AND (raw_events_first.user_id IS NOT NULL)) GROUP BY raw_events_first.user_id
|
|
-- a note on the outer joins is that
|
|
-- we filter out outer join results
|
|
-- where partition column returns
|
|
-- NULL. Thus, we could INSERT less rows
|
|
-- than we expect from subquery result.
|
|
-- see the following tests
|
|
SET client_min_messages TO INFO;
|
|
-- we don't want to see constraint violations, so truncate first
|
|
TRUNCATE agg_events;
|
|
-- add a row to first table to make table contents different
|
|
INSERT INTO raw_events_second (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(10, now(), 100, 10000, 10000, 100000);
|
|
DELETE FROM raw_events_second WHERE user_id = 2;
|
|
-- we select 11 rows
|
|
SELECT t1.user_id AS col1,
|
|
t2.user_id AS col2
|
|
FROM raw_events_first t1
|
|
FULL JOIN raw_events_second t2
|
|
ON t1.user_id = t2.user_id
|
|
ORDER BY t1.user_id,
|
|
t2.user_id;
|
|
col1 | col2
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 |
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
6 | 6
|
|
7 | 7
|
|
8 | 8
|
|
9 | 9
|
|
| 10
|
|
(10 rows)
|
|
|
|
SET client_min_messages TO DEBUG2;
|
|
-- we insert 10 rows since we filtered out
|
|
-- NULL partition column values
|
|
INSERT INTO agg_events (user_id, value_1_agg)
|
|
SELECT t1.user_id AS col1,
|
|
t2.user_id AS col2
|
|
FROM raw_events_first t1
|
|
FULL JOIN raw_events_second t2
|
|
ON t1.user_id = t2.user_id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300000 t1 FULL JOIN multi_insert_select.raw_events_second_13300004 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300001 t1 FULL JOIN multi_insert_select.raw_events_second_13300005 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300002 t1 FULL JOIN multi_insert_select.raw_events_second_13300006 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (multi_insert_select.raw_events_first_13300003 t1 FULL JOIN multi_insert_select.raw_events_second_13300007 t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id))) WHERE (t1.user_id IS NOT NULL)
|
|
SET client_min_messages TO INFO;
|
|
-- see that the results are different from the SELECT query
|
|
SELECT
|
|
user_id, value_1_agg
|
|
FROM
|
|
agg_events
|
|
ORDER BY
|
|
user_id, value_1_agg;
|
|
user_id | value_1_agg
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 |
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
6 | 6
|
|
7 | 7
|
|
8 | 8
|
|
9 | 9
|
|
(9 rows)
|
|
|
|
-- we don't want to see constraint violations, so truncate first
|
|
SET client_min_messages TO INFO;
|
|
TRUNCATE agg_events;
|
|
SET client_min_messages TO DEBUG2;
|
|
-- DISTINCT clause
|
|
INSERT INTO agg_events (value_1_agg, user_id)
|
|
SELECT
|
|
DISTINCT value_1, user_id
|
|
FROM
|
|
raw_events_first;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
-- we don't want to see constraint violations, so truncate first
|
|
SET client_min_messages TO INFO;
|
|
truncate agg_events;
|
|
SET client_min_messages TO DEBUG2;
|
|
-- DISTINCT ON clauses are supported
|
|
-- distinct on(non-partition column)
|
|
-- values are pulled to master
|
|
INSERT INTO agg_events (value_1_agg, user_id)
|
|
SELECT
|
|
DISTINCT ON (value_1) value_1, user_id
|
|
FROM
|
|
raw_events_first;
|
|
DEBUG: cannot push down this subquery
|
|
DETAIL: Distinct on columns without partition column is currently unsupported
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
|
SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2;
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
user_id | value_1_agg
|
|
---------------------------------------------------------------------
|
|
1 | 10
|
|
2 | 20
|
|
3 | 30
|
|
4 | 40
|
|
5 | 50
|
|
6 | 60
|
|
7 |
|
|
8 | 80
|
|
9 | 90
|
|
(9 rows)
|
|
|
|
-- we don't want to see constraint violations, so truncate first
|
|
SET client_min_messages TO INFO;
|
|
truncate agg_events;
|
|
SET client_min_messages TO DEBUG2;
|
|
-- distinct on(partition column)
|
|
-- queries are forwared to workers
|
|
INSERT INTO agg_events (value_1_agg, user_id)
|
|
SELECT
|
|
DISTINCT ON (user_id) value_1, user_id
|
|
FROM
|
|
raw_events_first;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT ON (raw_events_first.user_id) raw_events_first.user_id, raw_events_first.value_1 FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
SELECT user_id, value_1_agg FROM agg_events ORDER BY 1,2;
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
user_id | value_1_agg
|
|
---------------------------------------------------------------------
|
|
1 | 10
|
|
2 | 20
|
|
3 | 30
|
|
4 | 40
|
|
5 | 50
|
|
6 | 60
|
|
7 |
|
|
8 | 80
|
|
9 | 90
|
|
(9 rows)
|
|
|
|
-- We support CTEs
|
|
BEGIN;
|
|
WITH fist_table_agg AS MATERIALIZED
|
|
(SELECT max(value_1)+1 as v1_agg, user_id FROM raw_events_first GROUP BY user_id)
|
|
INSERT INTO agg_events
|
|
(value_1_agg, user_id)
|
|
SELECT
|
|
v1_agg, user_id
|
|
FROM
|
|
fist_table_agg;
|
|
DEBUG: distributed INSERT ... SELECT can only select from distributed tables
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: generating subplan XXX_1 for CTE fist_table_agg: SELECT (max(value_1) OPERATOR(pg_catalog.+) 1) AS v1_agg, user_id FROM multi_insert_select.raw_events_first GROUP BY user_id
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, v1_agg AS value_1_agg FROM (SELECT fist_table_agg.user_id, fist_table_agg.v1_agg FROM (SELECT intermediate_result.v1_agg, intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v1_agg integer, user_id integer)) fist_table_agg) citus_insert_select_subquery
|
|
DEBUG: Creating router plan
|
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
|
ROLLBACK;
|
|
-- We do support CTEs that are referenced in the target list
|
|
INSERT INTO agg_events
|
|
WITH sub_cte AS (SELECT 1)
|
|
SELECT
|
|
raw_events_first.user_id, (SELECT * FROM sub_cte)
|
|
FROM
|
|
raw_events_first;
|
|
DEBUG: CTE sub_cte is going to be inlined via distributed planning
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, (SELECT sub_cte."?column?" FROM (SELECT 1) sub_cte("?column?")) FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE (raw_events_first.user_id IS NOT NULL)
|
|
-- We support set operations
|
|
BEGIN;
|
|
INSERT INTO
|
|
raw_events_first(user_id)
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
((SELECT user_id FROM raw_events_first) UNION
|
|
(SELECT user_id FROM raw_events_second)) as foo;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id) SELECT foo.user_id FROM (SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first UNION SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second) foo WHERE (foo.user_id IS NOT NULL)
|
|
ROLLBACK;
|
|
-- We do support set operations through recursive planning
|
|
BEGIN;
|
|
SET LOCAL client_min_messages TO DEBUG;
|
|
INSERT INTO
|
|
raw_events_first(user_id)
|
|
(SELECT user_id FROM raw_events_first) INTERSECT
|
|
(SELECT user_id FROM raw_events_first);
|
|
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_first
|
|
DEBUG: Creating router plan
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) INTERSECT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) citus_insert_select_subquery
|
|
DEBUG: Creating router plan
|
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
|
ROLLBACK;
|
|
-- If the query is router plannable then it is executed via the coordinator
|
|
INSERT INTO
|
|
raw_events_first(user_id)
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT
|
|
(SELECT user_id FROM raw_events_second where user_id = 17)) as foo;
|
|
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
DEBUG: router planner does not support queries that reference non-colocated distributed tables
|
|
DEBUG: Distributed planning for a fast-path router query
|
|
DEBUG: Creating router plan
|
|
DEBUG: query has a single distribution column value: 15
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_first WHERE (user_id OPERATOR(pg_catalog.=) 15)
|
|
DEBUG: Distributed planning for a fast-path router query
|
|
DEBUG: Creating router plan
|
|
DEBUG: query has a single distribution column value: 17
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE (user_id OPERATOR(pg_catalog.=) 17)
|
|
DEBUG: Creating router plan
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) EXCEPT SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo
|
|
DEBUG: Creating router plan
|
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
|
-- some supported LEFT joins
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_second.user_id
|
|
FROM
|
|
reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id;
|
|
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
|
DEBUG: recursively planning distributed relation "raw_events_second" since it is part of a distributed join node that is outer joined with a recurring rel
|
|
DEBUG: Wrapping relation "raw_events_second" to a subquery
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE true
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table LEFT JOIN (SELECT raw_events_second_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) raw_events_second_1) raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)))
|
|
DEBUG: Creating router plan
|
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
|
|
WHERE raw_events_first.user_id = 10;
|
|
DEBUG: Creating router plan
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) 10) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
|
|
WHERE raw_events_second.user_id = 10 OR raw_events_second.user_id = 11;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (((raw_events_second.user_id OPERATOR(pg_catalog.=) 10) OR (raw_events_second.user_id OPERATOR(pg_catalog.=) 11)) AND (raw_events_first.user_id IS NOT NULL))
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
|
|
WHERE raw_events_first.user_id = 10 AND raw_events_first.user_id = 20;
|
|
DEBUG: Creating router plan
|
|
DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
|
|
WHERE raw_events_first.user_id = 10 AND raw_events_second.user_id = 20;
|
|
DEBUG: Creating router plan
|
|
DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
|
|
WHERE raw_events_first.user_id IN (19, 20, 21);
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM ((SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_first(user_id, "time", value_1, value_2, value_3, value_4) LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
|
|
WHERE raw_events_second.user_id IN (19, 20, 21);
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[19, 20, 21])) AND (raw_events_first.user_id IS NOT NULL))
|
|
SET client_min_messages TO WARNING;
|
|
-- following query should use repartitioned joins and results should
|
|
-- be routed via coordinator
|
|
SET citus.enable_repartition_joins TO true;
|
|
INSERT INTO agg_events
|
|
(user_id)
|
|
SELECT raw_events_first.user_id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_second.user_id = raw_events_first.value_1
|
|
AND raw_events_first.value_1 = 12;
|
|
-- some unsupported LEFT/INNER JOINs
|
|
-- JOIN on one table with partition column other is not
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
|
|
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
-- same as the above with INNER JOIN
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
|
|
-- a not meaningful query
|
|
INSERT INTO agg_events
|
|
(user_id)
|
|
SELECT raw_events_second.user_id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_first.value_1;
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Cartesian products are currently unsupported
|
|
-- both tables joined on non-partition columns
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
|
|
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
-- same as the above with INNER JOIN
|
|
-- we support this with route to coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 6
|
|
(4 rows)
|
|
|
|
-- EXPLAIN ANALYZE is not supported for INSERT ... SELECT via coordinator
|
|
EXPLAIN (costs off, analyze on)
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
|
|
ERROR: EXPLAIN ANALYZE is currently not supported for INSERT ... SELECT commands via coordinator
|
|
-- even if there is a filter on the partition key, since the join is not on the partition key we reject
|
|
-- this query
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
|
|
WHERE
|
|
raw_events_first.user_id = 10;
|
|
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
-- same as the above with INNER JOIN
|
|
-- we support this with route to coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
|
|
WHERE raw_events_first.user_id = 10;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 6
|
|
(4 rows)
|
|
|
|
-- make things a bit more complicate with IN clauses
|
|
-- we support this with route to coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
raw_events_first.user_id
|
|
FROM
|
|
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
|
|
WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4);
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 6
|
|
(4 rows)
|
|
|
|
-- implicit join on non partition column should also not be pushed down,
|
|
-- so we fall back to route via coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events
|
|
(user_id)
|
|
SELECT raw_events_first.user_id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_second.user_id = raw_events_first.value_1;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 6
|
|
(4 rows)
|
|
|
|
RESET client_min_messages;
|
|
-- The following is again a tricky query for Citus. If the given filter was
|
|
-- on value_1 as shown in the above, Citus could push it down and use
|
|
-- distributed INSERT/SELECT. But we instead fall back to route via coordinator.
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events
|
|
(user_id)
|
|
SELECT raw_events_first.user_id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_second.user_id = raw_events_first.value_1
|
|
AND raw_events_first.value_2 = 12;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 6
|
|
(4 rows)
|
|
|
|
-- foo is not joined on the partition key so the query is not
|
|
-- pushed down. So instead we route via coordinator.
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events
|
|
(user_id, value_4_agg)
|
|
SELECT
|
|
outer_most.id, max(outer_most.value)
|
|
FROM
|
|
(
|
|
SELECT f2.id as id, f2.v4 as value FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first LEFT JOIN
|
|
reference_table
|
|
ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id)) as outer_most
|
|
GROUP BY
|
|
outer_most.id;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> HashAggregate
|
|
Group Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(8 rows)
|
|
|
|
-- if the given filter was on value_1 as shown in the above, Citus could
|
|
-- push it down. But here the query falls back to route via coordinator.
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events
|
|
(user_id)
|
|
SELECT raw_events_first.user_id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_second.user_id = raw_events_first.value_1
|
|
AND raw_events_first.value_2 = 12;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 6
|
|
(4 rows)
|
|
|
|
-- foo is not joined on the partition key so the query is not
|
|
-- pushed down, and it falls back to route via coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events
|
|
(user_id, value_4_agg)
|
|
SELECT
|
|
outer_most.id, max(outer_most.value)
|
|
FROM
|
|
(
|
|
SELECT f2.id as id, f2.v4 as value FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first LEFT JOIN
|
|
reference_table
|
|
ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id)) as outer_most
|
|
GROUP BY
|
|
outer_most.id;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> HashAggregate
|
|
Group Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(8 rows)
|
|
|
|
INSERT INTO agg_events
|
|
(value_4_agg,
|
|
value_1_agg,
|
|
user_id)
|
|
SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id != raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id) AS foo;
|
|
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
|
|
SET client_min_messages TO DEBUG2;
|
|
-- INSERT returns NULL partition key value via coordinator
|
|
INSERT INTO agg_events
|
|
(value_4_agg,
|
|
value_1_agg,
|
|
user_id)
|
|
SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.value_3 AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.value_3) AS foo;
|
|
DEBUG: cannot push down this subquery
|
|
DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
|
|
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
|
|
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
|
|
DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825]
|
|
DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823]
|
|
DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647]
|
|
DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825]
|
|
DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1]
|
|
DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647]
|
|
DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825]
|
|
DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1]
|
|
DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.value_3 AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_3
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(id) AS user_id, int4(v1) AS value_1_agg, int8(v4) AS value_4_agg FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 bigint, id double precision)) foo
|
|
DEBUG: Creating router plan
|
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
|
ERROR: the partition column of table multi_insert_select.agg_events cannot be NULL
|
|
-- error cases
|
|
-- no part column at all
|
|
INSERT INTO raw_events_second
|
|
(value_1)
|
|
SELECT value_1
|
|
FROM raw_events_first;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: the query doesn't include the target table's partition column
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
ERROR: the partition column of table multi_insert_select.raw_events_second should have a value
|
|
INSERT INTO raw_events_second
|
|
(value_1)
|
|
SELECT user_id
|
|
FROM raw_events_first;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: the query doesn't include the target table's partition column
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
ERROR: the partition column of table multi_insert_select.raw_events_second should have a value
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT value_1
|
|
FROM raw_events_first;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
|
|
ERROR: the partition column value cannot be NULL
|
|
CONTEXT: while executing command on localhost:xxxxx
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id * 2
|
|
FROM raw_events_first;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains an operator in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0,repartitioned_results_xxxxx_from_13300001_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_1,repartitioned_results_xxxxx_from_13300001_to_1,repartitioned_results_xxxxx_from_13300003_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_3,repartitioned_results_xxxxx_from_13300002_to_3,repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id :: bigint
|
|
FROM raw_events_first;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains an explicit cast in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300000_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300001_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300002_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT intermediate_result.user_id FROM read_intermediate_results('{repartitioned_results_xxxxx_from_13300003_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(user_id integer)
|
|
INSERT INTO agg_events
|
|
(value_3_agg,
|
|
value_4_agg,
|
|
value_1_agg,
|
|
value_2_agg,
|
|
user_id)
|
|
SELECT SUM(value_3),
|
|
Count(value_4),
|
|
user_id,
|
|
SUM(value_1),
|
|
Avg(value_2)
|
|
FROM raw_events_first
|
|
GROUP BY user_id;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains an aggregation in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
|
|
ERROR: the partition column value cannot be NULL
|
|
CONTEXT: while executing command on localhost:xxxxx
|
|
INSERT INTO agg_events
|
|
(value_3_agg,
|
|
value_4_agg,
|
|
value_1_agg,
|
|
value_2_agg,
|
|
user_id)
|
|
SELECT SUM(value_3),
|
|
Count(value_4),
|
|
user_id,
|
|
SUM(value_1),
|
|
value_2
|
|
FROM raw_events_first
|
|
GROUP BY user_id,
|
|
value_2;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
|
|
ERROR: the partition column value cannot be NULL
|
|
CONTEXT: while executing command on localhost:xxxxx
|
|
-- tables should be co-located
|
|
INSERT INTO agg_events (user_id)
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
reference_table;
|
|
DEBUG: Creating router plan
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
|
|
DEBUG: Distributed planning for a fast-path router query
|
|
DEBUG: Creating router plan
|
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
|
-- foo2 is recursively planned and INSERT...SELECT is done via coordinator
|
|
INSERT INTO agg_events
|
|
(user_id)
|
|
SELECT f2.id FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
raw_events_second.value_1 AS v1,
|
|
SUM(raw_events_second.user_id) AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.value_1
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id);
|
|
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
|
|
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
|
|
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
|
|
DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825]
|
|
DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823]
|
|
DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647]
|
|
DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825]
|
|
DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1]
|
|
DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647]
|
|
DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825]
|
|
DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1]
|
|
DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT int4(f2.id) AS user_id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
|
|
-- the second part of the query is not routable since
|
|
-- GROUP BY not on the partition column (i.e., value_1) and thus join
|
|
-- on f.id = f2.id is not on the partition key (instead on the sum of partition key)
|
|
-- but we still recursively plan foo2 and run the query
|
|
INSERT INTO agg_events
|
|
(user_id)
|
|
SELECT f.id FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT raw_events_first.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
raw_events_second.value_1 AS v1,
|
|
SUM(raw_events_second.user_id) AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.value_1
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id);
|
|
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
|
|
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
|
|
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
|
|
DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825]
|
|
DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823]
|
|
DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647]
|
|
DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825]
|
|
DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1]
|
|
DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647]
|
|
DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825]
|
|
DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1]
|
|
DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT sum(raw_events_second.value_4) AS v4, raw_events_second.value_1 AS v1, sum(raw_events_second.user_id) AS id FROM multi_insert_select.raw_events_first, multi_insert_select.raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.value_1 HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT f.id AS user_id FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM multi_insert_select.raw_events_first, multi_insert_select.reference_table WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT intermediate_result.v4, intermediate_result.v1, intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v4 numeric, v1 integer, id bigint)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
|
|
SET client_min_messages TO WARNING;
|
|
-- cannot pushdown the query since the JOIN is not equi JOIN
|
|
-- falls back to route via coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events
|
|
(user_id, value_4_agg)
|
|
SELECT
|
|
outer_most.id, max(outer_most.value)
|
|
FROM
|
|
(
|
|
SELECT f2.id as id, f2.v4 as value FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id != f2.id)) as outer_most
|
|
GROUP BY outer_most.id;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> HashAggregate
|
|
Group Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(8 rows)
|
|
|
|
-- cannot pushdown since foo2 is not join on partition key
|
|
-- falls back to route via coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events
|
|
(user_id, value_4_agg)
|
|
SELECT
|
|
outer_most.id, max(outer_most.value)
|
|
FROM
|
|
(
|
|
SELECT f2.id as id, f2.v4 as value FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.value_1
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id)) as outer_most
|
|
GROUP BY
|
|
outer_most.id;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> HashAggregate
|
|
Group Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> HashAggregate
|
|
Group Key: remote_scan.id
|
|
Filter: (pg_catalog.sum(remote_scan.worker_column_4) > '10'::numeric)
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 6
|
|
(11 rows)
|
|
|
|
-- cannot push down since foo doesn't have en equi join
|
|
-- falls back to route via coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events
|
|
(user_id, value_4_agg)
|
|
SELECT
|
|
outer_most.id, max(outer_most.value)
|
|
FROM
|
|
(
|
|
SELECT f2.id as id, f2.v4 as value FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id != reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id)) as outer_most
|
|
GROUP BY
|
|
outer_most.id;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> HashAggregate
|
|
Group Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(8 rows)
|
|
|
|
-- some unsupported LATERAL JOINs
|
|
-- join on averages is not on the partition key
|
|
-- should fall back to route via coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events (user_id, value_4_agg)
|
|
SELECT
|
|
averages.user_id, avg(averages.value_4)
|
|
FROM
|
|
(SELECT
|
|
raw_events_second.user_id
|
|
FROM
|
|
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
|
|
) reference_ids
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_4
|
|
FROM
|
|
raw_events_first WHERE
|
|
value_4 = reference_ids.user_id) as averages ON true
|
|
GROUP BY averages.user_id;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> HashAggregate
|
|
Group Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 6
|
|
(6 rows)
|
|
|
|
-- join among reference_ids and averages is not on the partition key
|
|
-- should fall back to route via coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events (user_id, value_4_agg)
|
|
SELECT
|
|
averages.user_id, avg(averages.value_4)
|
|
FROM
|
|
(SELECT
|
|
raw_events_second.user_id
|
|
FROM
|
|
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
|
|
) reference_ids
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_4
|
|
FROM
|
|
raw_events_first) as averages ON averages.value_4 = reference_ids.user_id
|
|
GROUP BY averages.user_id;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> HashAggregate
|
|
Group Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(8 rows)
|
|
|
|
-- join among the agg_ids and averages is not on the partition key
|
|
-- should fall back to route via coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events (user_id, value_4_agg)
|
|
SELECT
|
|
averages.user_id, avg(averages.value_4)
|
|
FROM
|
|
(SELECT
|
|
raw_events_second.user_id
|
|
FROM
|
|
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
|
|
) reference_ids
|
|
JOIN LATERAL
|
|
(SELECT
|
|
user_id, value_4
|
|
FROM
|
|
raw_events_first) as averages ON averages.user_id = reference_ids.user_id
|
|
JOIN LATERAL
|
|
(SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id)
|
|
GROUP BY averages.user_id;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 6
|
|
(4 rows)
|
|
|
|
-- Selected value in the WHERE is not partition key, so we cannot use distributed
|
|
-- INSERT/SELECT and falls back route via coordinator
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id IN (SELECT value_1
|
|
FROM raw_events_second);
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: repartition
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(6 rows)
|
|
|
|
-- same as above but slightly more complex
|
|
-- since it also includes subquery in FROM as well
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO agg_events
|
|
(user_id)
|
|
SELECT f2.id FROM
|
|
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id)
|
|
WHERE f.id IN (SELECT value_1
|
|
FROM raw_events_second);
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: repartition
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(6 rows)
|
|
|
|
-- some more semi-anti join tests
|
|
SET client_min_messages TO DEBUG2;
|
|
-- join in where
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id IN (SELECT raw_events_second.user_id
|
|
FROM raw_events_second, raw_events_first
|
|
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
|
|
DEBUG: Creating router plan
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second, multi_insert_select.raw_events_first_13300000 raw_events_first_1 WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first_1.user_id) AND (raw_events_first_1.user_id OPERATOR(pg_catalog.=) 200)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
|
|
RESET client_min_messages;
|
|
-- we cannot push this down since it is NOT IN
|
|
-- we use repartition insert/select instead
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id NOT IN (SELECT raw_events_second.user_id
|
|
FROM raw_events_second, raw_events_first
|
|
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: repartition
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 1
|
|
(6 rows)
|
|
|
|
SET client_min_messages TO DEBUG2;
|
|
-- safe to push down
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE EXISTS (SELECT 1
|
|
FROM raw_events_second
|
|
WHERE raw_events_second.user_id =raw_events_first.user_id);
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id))) AND (raw_events_first.user_id IS NOT NULL))
|
|
-- we cannot push down
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE NOT EXISTS (SELECT 1
|
|
FROM raw_events_second
|
|
WHERE raw_events_second.user_id =raw_events_first.user_id);
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id OPERATOR(pg_catalog.=) raw_events_first.user_id)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
-- more complex LEFT JOINs
|
|
INSERT INTO agg_events
|
|
(user_id, value_4_agg)
|
|
SELECT
|
|
outer_most.id, max(outer_most.value)
|
|
FROM
|
|
(
|
|
SELECT f2.id as id, f2.v4 as value FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT raw_events_first.user_id AS id
|
|
FROM raw_events_first LEFT JOIN
|
|
reference_table
|
|
ON (raw_events_first.user_id = reference_table.user_id)) AS foo) as f
|
|
LEFT JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id)) as outer_most
|
|
GROUP BY
|
|
outer_most.id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300000 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT outer_most.id, max(outer_most.value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.reference_table_13300012 reference_table ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id)))) outer_most WHERE (outer_most.id IS NOT NULL) GROUP BY outer_most.id
|
|
RESET client_min_messages;
|
|
-- cannot push down since the f.id IN is matched with value_1
|
|
-- we use repartition insert/select instead
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (costs off)
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id IN (
|
|
SELECT f2.id FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id)
|
|
WHERE f.id IN (SELECT value_1
|
|
FROM raw_events_second));
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: repartition
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(6 rows)
|
|
|
|
SET client_min_messages TO DEBUG2;
|
|
-- same as above, but this time is it safe to push down since
|
|
-- f.id IN is matched with user_id
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id IN (
|
|
SELECT f2.id FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id)
|
|
WHERE f.id IN (SELECT user_id
|
|
FROM raw_events_second));
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300000 raw_events_first_1, multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300004 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300001 raw_events_first_1, multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300005 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300002 raw_events_first_1, multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300006 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM multi_insert_select.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id OPERATOR(pg_catalog.=) ANY (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM multi_insert_select.raw_events_first_13300003 raw_events_first_1, multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_first_1.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) OPERATOR(pg_catalog.>) (10)::numeric)) foo2) f2 ON ((f.id OPERATOR(pg_catalog.=) f2.id))) WHERE (f.id OPERATOR(pg_catalog.=) ANY (SELECT raw_events_second.user_id FROM multi_insert_select.raw_events_second_13300007 raw_events_second)))) AND (raw_events_first.user_id IS NOT NULL))
|
|
RESET client_min_messages;
|
|
-- cannot push down since top level user_id is matched with NOT IN
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id NOT IN (
|
|
SELECT f2.id FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id = f2.id)
|
|
WHERE f.id IN (SELECT user_id
|
|
FROM raw_events_second));
|
|
-- cannot push down since join is not equi join (f.id > f2.id)
|
|
INSERT INTO raw_events_second
|
|
(user_id)
|
|
SELECT user_id
|
|
FROM raw_events_first
|
|
WHERE user_id IN (
|
|
SELECT f2.id FROM
|
|
(SELECT
|
|
id
|
|
FROM (SELECT reference_table.user_id AS id
|
|
FROM raw_events_first,
|
|
reference_table
|
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
|
INNER JOIN
|
|
(SELECT v4,
|
|
v1,
|
|
id
|
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|
SUM(raw_events_first.value_1) AS v1,
|
|
raw_events_second.user_id AS id
|
|
FROM raw_events_first,
|
|
raw_events_second
|
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
|
GROUP BY raw_events_second.user_id
|
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
|
ON (f.id > f2.id)
|
|
WHERE f.id IN (SELECT user_id
|
|
FROM raw_events_second));
|
|
-- we currently not support grouping sets
|
|
INSERT INTO agg_events
|
|
(user_id,
|
|
value_1_agg,
|
|
value_2_agg)
|
|
SELECT user_id,
|
|
Sum(value_1) AS sum_val1,
|
|
Sum(value_2) AS sum_val2
|
|
FROM raw_events_second
|
|
GROUP BY grouping sets ( ( user_id ), ( value_1 ), ( user_id, value_1 ), ( ) );
|
|
ERROR: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP
|
|
HINT: Consider using an equality filter on the distributed table's partition column.
|
|
-- set back to INFO
|
|
SET client_min_messages TO INFO;
|
|
-- avoid constraint violations
|
|
TRUNCATE raw_events_first;
|
|
-- we don't support LIMIT for subquery pushdown, but
|
|
-- we recursively plan the query and run it via coordinator
|
|
INSERT INTO agg_events(user_id)
|
|
SELECT user_id
|
|
FROM users_table
|
|
WHERE user_id
|
|
IN (SELECT
|
|
user_id
|
|
FROM (
|
|
(
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
(
|
|
SELECT
|
|
e1.user_id
|
|
FROM
|
|
users_table u1, events_table e1
|
|
WHERE
|
|
e1.user_id = u1.user_id LIMIT 3
|
|
) as f_inner
|
|
)
|
|
) AS f2);
|
|
-- Altering a table and selecting from it using a multi-shard statement
|
|
-- in the same transaction is allowed because we will use the same
|
|
-- connections for all co-located placements.
|
|
BEGIN;
|
|
ALTER TABLE raw_events_second DROP COLUMN value_4;
|
|
INSERT INTO raw_events_first SELECT * FROM raw_events_second;
|
|
ROLLBACK;
|
|
-- Alterating a table and selecting from it using a single-shard statement
|
|
-- in the same transaction is disallowed because we will use a different
|
|
-- connection.
|
|
BEGIN;
|
|
ALTER TABLE raw_events_second DROP COLUMN value_4;
|
|
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100;
|
|
ROLLBACK;
|
|
-- Altering a reference table and then performing an INSERT ... SELECT which
|
|
-- joins with the reference table is allowed, since the INSERT ... SELECT
|
|
-- would read from the reference table over the same connections with the ones
|
|
-- that performed the parallel DDL.
|
|
BEGIN;
|
|
ALTER TABLE reference_table ADD COLUMN z int;
|
|
INSERT INTO raw_events_first (user_id)
|
|
SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id);
|
|
ROLLBACK;
|
|
-- the same test with sequential DDL should work fine
|
|
BEGIN;
|
|
SET LOCAL citus.multi_shard_modify_mode TO 'sequential';
|
|
ALTER TABLE reference_table ADD COLUMN z int;
|
|
INSERT INTO raw_events_first (user_id)
|
|
SELECT user_id FROM raw_events_second JOIN reference_table USING (user_id);
|
|
ROLLBACK;
|
|
-- Insert after copy is allowed
|
|
BEGIN;
|
|
COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ',';
|
|
INSERT INTO raw_events_first SELECT * FROM raw_events_second;
|
|
ROLLBACK;
|
|
-- Insert after copy is currently allowed for single-shard operation.
|
|
-- Both insert and copy are rolled back successfully.
|
|
BEGIN;
|
|
COPY raw_events_second (user_id, value_1) FROM STDIN DELIMITER ',';
|
|
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 101;
|
|
SELECT user_id FROM raw_events_first WHERE user_id = 101;
|
|
user_id
|
|
---------------------------------------------------------------------
|
|
101
|
|
(1 row)
|
|
|
|
ROLLBACK;
|
|
BEGIN;
|
|
INSERT INTO raw_events_first SELECT * FROM raw_events_second;
|
|
COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ',';
|
|
ROLLBACK;
|
|
BEGIN;
|
|
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100;
|
|
COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ',';
|
|
ROLLBACK;
|
|
-- Similarly, multi-row INSERTs will take part in transactions and reuse connections...
|
|
BEGIN;
|
|
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 100;
|
|
COPY raw_events_first (user_id, value_1) FROM STDIN DELIMITER ',';
|
|
INSERT INTO raw_events_first (user_id, value_1) VALUES (105, 105), (106, 106);
|
|
ROLLBACK;
|
|
-- selecting from views works
|
|
CREATE VIEW test_view AS SELECT * FROM raw_events_first;
|
|
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(16, now(), 60, 600, 6000.1, 60000);
|
|
SELECT count(*) FROM raw_events_second;
|
|
count
|
|
---------------------------------------------------------------------
|
|
45
|
|
(1 row)
|
|
|
|
INSERT INTO raw_events_second SELECT * FROM test_view;
|
|
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
|
|
(17, now(), 60, 600, 6000.1, 60000);
|
|
INSERT INTO raw_events_second SELECT * FROM test_view WHERE user_id = 17 GROUP BY 1,2,3,4,5,6;
|
|
SELECT count(*) FROM raw_events_second;
|
|
count
|
|
---------------------------------------------------------------------
|
|
47
|
|
(1 row)
|
|
|
|
-- intermediate results (CTEs) should be allowed when doing INSERT...SELECT within a CTE
|
|
WITH series AS (
|
|
SELECT s AS val FROM generate_series(60,70) s
|
|
),
|
|
inserts AS (
|
|
INSERT INTO raw_events_second (user_id)
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
raw_events_first JOIN series ON (value_1 = val)
|
|
RETURNING
|
|
NULL
|
|
)
|
|
SELECT count(*) FROM inserts;
|
|
count
|
|
---------------------------------------------------------------------
|
|
2
|
|
(1 row)
|
|
|
|
-- we need this in our next test
|
|
truncate raw_events_first;
|
|
SET client_min_messages TO DEBUG2;
|
|
-- first show that the query works now
|
|
INSERT INTO raw_events_first SELECT * FROM raw_events_second;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id IS NOT NULL)
|
|
SET client_min_messages TO INFO;
|
|
truncate raw_events_first;
|
|
SET client_min_messages TO DEBUG2;
|
|
-- now show that it works for a single shard query as well
|
|
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 5;
|
|
DEBUG: Creating router plan
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT raw_events_second.user_id, raw_events_second."time", raw_events_second.value_1, raw_events_second.value_2, raw_events_second.value_3, raw_events_second.value_4 FROM multi_insert_select.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id OPERATOR(pg_catalog.=) 5) AND (raw_events_second.user_id IS NOT NULL))
|
|
DEBUG: Skipping target shard interval 13300001 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300002 since SELECT query for it pruned away
|
|
DEBUG: Skipping target shard interval 13300003 since SELECT query for it pruned away
|
|
SET client_min_messages TO INFO;
|
|
-- now do some tests with varchars
|
|
INSERT INTO insert_select_varchar_test VALUES ('test_1', 10);
|
|
INSERT INTO insert_select_varchar_test VALUES ('test_2', 30);
|
|
INSERT INTO insert_select_varchar_test (key, value)
|
|
SELECT *, 100
|
|
FROM (SELECT f1.key
|
|
FROM (SELECT key
|
|
FROM insert_select_varchar_test
|
|
GROUP BY 1
|
|
HAVING Count(key) < 3) AS f1,
|
|
(SELECT key
|
|
FROM insert_select_varchar_test
|
|
GROUP BY 1
|
|
HAVING Sum(COALESCE(insert_select_varchar_test.value, 0)) >
|
|
20.0)
|
|
AS f2
|
|
WHERE f1.key = f2.key
|
|
GROUP BY 1) AS foo;
|
|
SELECT * FROM insert_select_varchar_test ORDER BY 1 DESC, 2 DESC;
|
|
key | value
|
|
---------------------------------------------------------------------
|
|
test_2 | 100
|
|
test_2 | 30
|
|
test_1 | 10
|
|
(3 rows)
|
|
|
|
-- some tests with DEFAULT columns and constant values
|
|
-- this test is mostly importantly intended for deparsing the query correctly
|
|
-- but still it is preferable to have this test here instead of multi_deparse_shard_query
|
|
CREATE TABLE table_with_defaults
|
|
(
|
|
store_id int,
|
|
first_name text,
|
|
default_1 int DEFAULT 1,
|
|
last_name text,
|
|
default_2 text DEFAULT '2'
|
|
);
|
|
-- we don't need many shards
|
|
SET citus.shard_count = 2;
|
|
SELECT create_distributed_table('table_with_defaults', 'store_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- let's see the queries
|
|
SET client_min_messages TO DEBUG2;
|
|
-- a very simple query
|
|
INSERT INTO table_with_defaults SELECT * FROM table_with_defaults;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
-- see that defaults are filled
|
|
INSERT INTO table_with_defaults (store_id, first_name)
|
|
SELECT
|
|
store_id, first_name
|
|
FROM
|
|
table_with_defaults;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
-- shuffle one of the defaults and skip the other
|
|
INSERT INTO table_with_defaults (default_2, store_id, first_name)
|
|
SELECT
|
|
default_2, store_id, first_name
|
|
FROM
|
|
table_with_defaults;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 1 AS default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
-- shuffle both defaults
|
|
INSERT INTO table_with_defaults (default_2, store_id, default_1, first_name)
|
|
SELECT
|
|
default_2, store_id, default_1, first_name
|
|
FROM
|
|
table_with_defaults;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_1, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
-- use constants instead of non-default column
|
|
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name)
|
|
SELECT
|
|
default_2, 'Freund', store_id, 'Andres'
|
|
FROM
|
|
table_with_defaults;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, table_with_defaults.default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
-- use constants instead of non-default column and skip both defauls
|
|
INSERT INTO table_with_defaults (last_name, store_id, first_name)
|
|
SELECT
|
|
'Freund', store_id, 'Andres'
|
|
FROM
|
|
table_with_defaults;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
-- use constants instead of default columns
|
|
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1)
|
|
SELECT
|
|
20, last_name, store_id, first_name, 10
|
|
FROM
|
|
table_with_defaults;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, table_with_defaults.first_name, 10, table_with_defaults.last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
-- use constants instead of both default columns and non-default columns
|
|
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1)
|
|
SELECT
|
|
20, 'Freund', store_id, 'Andres', 10
|
|
FROM
|
|
table_with_defaults;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL)
|
|
-- some of the ultimate queries where we have constants,
|
|
-- defaults and group by entry is not on the target entry
|
|
INSERT INTO table_with_defaults (default_2, store_id, first_name)
|
|
SELECT
|
|
'2000', store_id, 'Andres'
|
|
FROM
|
|
table_with_defaults
|
|
GROUP BY
|
|
last_name, store_id;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id
|
|
INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2)
|
|
SELECT
|
|
1000, store_id, 'Andres', '2000'
|
|
FROM
|
|
table_with_defaults
|
|
GROUP BY
|
|
last_name, store_id, first_name;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name
|
|
INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2)
|
|
SELECT
|
|
1000, store_id, 'Andres', '2000'
|
|
FROM
|
|
table_with_defaults
|
|
GROUP BY
|
|
last_name, store_id, first_name, default_2;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2
|
|
INSERT INTO table_with_defaults (default_1, store_id, first_name)
|
|
SELECT
|
|
1000, store_id, 'Andres'
|
|
FROM
|
|
table_with_defaults
|
|
GROUP BY
|
|
last_name, store_id, first_name, default_2;
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300017 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT table_with_defaults.store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM multi_insert_select.table_with_defaults_13300018 table_with_defaults WHERE (table_with_defaults.store_id IS NOT NULL) GROUP BY table_with_defaults.last_name, table_with_defaults.store_id, table_with_defaults.first_name, table_with_defaults.default_2
|
|
RESET client_min_messages;
|
|
-- Stable function in default should be allowed
|
|
ALTER TABLE table_with_defaults ADD COLUMN t timestamptz DEFAULT now();
|
|
INSERT INTO table_with_defaults (store_id, first_name, last_name)
|
|
SELECT
|
|
store_id, 'first '||store_id, 'last '||store_id
|
|
FROM
|
|
table_with_defaults
|
|
GROUP BY
|
|
store_id, first_name, last_name;
|
|
-- Volatile function in default should be disallowed - SERIAL pseudo-types
|
|
CREATE TABLE table_with_serial (
|
|
store_id int,
|
|
s bigserial
|
|
);
|
|
SELECT create_distributed_table('table_with_serial', 'store_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO table_with_serial (store_id)
|
|
SELECT
|
|
store_id
|
|
FROM
|
|
table_with_defaults
|
|
GROUP BY
|
|
store_id;
|
|
-- Volatile function in default should be disallowed - user-defined sequence
|
|
CREATE SEQUENCE user_defined_sequence;
|
|
CREATE TABLE table_with_user_sequence (
|
|
store_id int,
|
|
s bigint default nextval('user_defined_sequence')
|
|
);
|
|
SELECT create_distributed_table('table_with_user_sequence', 'store_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO table_with_user_sequence (store_id)
|
|
SELECT
|
|
store_id
|
|
FROM
|
|
table_with_defaults
|
|
GROUP BY
|
|
store_id;
|
|
-- do some more error/error message checks
|
|
SET citus.shard_count TO 4;
|
|
SET citus.shard_replication_factor TO 1;
|
|
CREATE TABLE text_table (part_col text, val int);
|
|
CREATE TABLE char_table (part_col char[], val int);
|
|
create table table_with_starts_with_defaults (a int DEFAULT 5, b int, c int);
|
|
SELECT create_distributed_table('text_table', 'part_col');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('char_table','part_col');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('table_with_starts_with_defaults', 'c');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SET client_min_messages TO DEBUG;
|
|
INSERT INTO text_table (part_col)
|
|
SELECT
|
|
CASE WHEN part_col = 'onder' THEN 'marco'
|
|
END
|
|
FROM text_table ;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains a case expression in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
|
|
INSERT INTO text_table (part_col) SELECT COALESCE(part_col, 'onder') FROM text_table;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains a coalesce expression in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
|
|
INSERT INTO text_table (part_col) SELECT GREATEST(part_col, 'jason') FROM text_table;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
|
|
INSERT INTO text_table (part_col) SELECT LEAST(part_col, 'andres') FROM text_table;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains a min/max expression in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
|
|
INSERT INTO text_table (part_col) SELECT NULLIF(part_col, 'metin') FROM text_table;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
|
|
INSERT INTO text_table (part_col) SELECT part_col isnull FROM text_table;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
|
|
INSERT INTO text_table (part_col) SELECT part_col::text from char_table;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
|
|
INSERT INTO text_table (part_col) SELECT (part_col = 'burak') is true FROM text_table;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
|
|
INSERT INTO text_table (part_col) SELECT val FROM text_table;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: The data type of the target table's partition column should exactly match the data type of the corresponding simple column reference in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
|
|
INSERT INTO text_table (part_col) SELECT val::text FROM text_table;
|
|
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
|
DETAIL: Subquery contains an explicit coercion in the same position as the target table's partition column.
|
|
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
DEBUG: performing repartitioned INSERT ... SELECT
|
|
DEBUG: partitioning SELECT query by column index 0 with name 'part_col'
|
|
RESET client_min_messages;
|
|
insert into table_with_starts_with_defaults (b,c) select b,c FROM table_with_starts_with_defaults;
|
|
-- Test on partition column without native hash function
|
|
CREATE TABLE raw_table
|
|
(
|
|
id BIGINT,
|
|
time DATE
|
|
);
|
|
CREATE TABLE summary_table
|
|
(
|
|
time DATE,
|
|
count BIGINT
|
|
);
|
|
SELECT create_distributed_table('raw_table', 'time');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('summary_table', 'time');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO raw_table VALUES(1, '11-11-1980');
|
|
INSERT INTO summary_table SELECT time, COUNT(*) FROM raw_table GROUP BY time;
|
|
SELECT * FROM summary_table;
|
|
time | count
|
|
---------------------------------------------------------------------
|
|
11-11-1980 | 1
|
|
(1 row)
|
|
|
|
-- Test INSERT ... SELECT via coordinator
|
|
-- Select from constants
|
|
TRUNCATE raw_events_first;
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
SELECT * FROM (VALUES (1,2), (3,4), (5,6)) AS v(int,int);
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 2
|
|
3 | 4
|
|
5 | 6
|
|
(3 rows)
|
|
|
|
-- Select from local functions
|
|
TRUNCATE raw_events_first;
|
|
CREATE SEQUENCE insert_select_test_seq;
|
|
SET client_min_messages TO DEBUG;
|
|
INSERT INTO raw_events_first (user_id, value_1, value_2)
|
|
SELECT
|
|
s, nextval('insert_select_test_seq'), (random()*10)::int
|
|
FROM
|
|
generate_series(1, 5) s;
|
|
DEBUG: Creating router plan
|
|
DEBUG: distributed INSERT ... SELECT can only select from distributed tables
|
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
|
|
DEBUG: Router planner cannot handle multi-shard select queries
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
(5 rows)
|
|
|
|
-- ON CONFLICT is supported
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s
|
|
ON CONFLICT DO NOTHING;
|
|
DEBUG: Creating router plan
|
|
DEBUG: distributed INSERT ... SELECT can only select from distributed tables
|
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) ON CONFLICT DO NOTHING
|
|
-- RETURNING is supported
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
SELECT s, nextval('insert_select_test_seq') FROM generate_series(1, 5) s
|
|
RETURNING *;
|
|
DEBUG: Creating router plan
|
|
DEBUG: distributed INSERT ... SELECT can only select from distributed tables
|
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300000 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300000'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300001 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300001'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300002 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300002'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
|
|
DEBUG: distributed statement: INSERT INTO multi_insert_select.raw_events_first_13300003 AS citus_table_alias (user_id, value_1) SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('insert_select_XXX_13300003'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4
|
|
user_id | time | value_1 | value_2 | value_3 | value_4
|
|
---------------------------------------------------------------------
|
|
1 | | 11 | | |
|
|
2 | | 12 | | |
|
|
3 | | 13 | | |
|
|
4 | | 14 | | |
|
|
5 | | 15 | | |
|
|
(5 rows)
|
|
|
|
RESET client_min_messages;
|
|
-- INSERT ... SELECT and multi-shard SELECT in the same transaction is supported
|
|
TRUNCATE raw_events_first;
|
|
BEGIN;
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
SELECT s, s FROM generate_series(1, 5) s;
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY 1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
(5 rows)
|
|
|
|
ROLLBACK;
|
|
-- INSERT ... SELECT and single-shard SELECT in the same transaction is supported
|
|
TRUNCATE raw_events_first;
|
|
BEGIN;
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
SELECT s, s FROM generate_series(1, 5) s;
|
|
SELECT user_id, value_1 FROM raw_events_first WHERE user_id = 1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
(1 row)
|
|
|
|
COMMIT;
|
|
-- Select from local table
|
|
TRUNCATE raw_events_first;
|
|
CREATE TEMPORARY TABLE raw_events_first_local AS
|
|
SELECT s AS u, 2*s AS v FROM generate_series(1, 5) s;
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
SELECT u, v FROM raw_events_first_local;
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 2
|
|
2 | 4
|
|
3 | 6
|
|
4 | 8
|
|
5 | 10
|
|
(5 rows)
|
|
|
|
-- Use columns in opposite order
|
|
TRUNCATE raw_events_first;
|
|
INSERT INTO raw_events_first (value_1, user_id)
|
|
SELECT u, v FROM raw_events_first_local;
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
2 | 1
|
|
4 | 2
|
|
6 | 3
|
|
8 | 4
|
|
10 | 5
|
|
(5 rows)
|
|
|
|
-- Set operations can work with opposite column order
|
|
TRUNCATE raw_events_first;
|
|
INSERT INTO raw_events_first (value_3, user_id)
|
|
( SELECT v, u::bigint FROM raw_events_first_local )
|
|
UNION ALL
|
|
( SELECT v, u FROM raw_events_first_local );
|
|
SELECT user_id, value_3 FROM raw_events_first ORDER BY user_id, value_3;
|
|
user_id | value_3
|
|
---------------------------------------------------------------------
|
|
1 | 2
|
|
1 | 2
|
|
2 | 4
|
|
2 | 4
|
|
3 | 6
|
|
3 | 6
|
|
4 | 8
|
|
4 | 8
|
|
5 | 10
|
|
5 | 10
|
|
(10 rows)
|
|
|
|
-- Select from other distributed table with limit
|
|
TRUNCATE raw_events_first;
|
|
TRUNCATE raw_events_second;
|
|
INSERT INTO raw_events_second (user_id, value_4)
|
|
SELECT s, 3*s FROM generate_series (1,5) s;
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
SELECT user_id, value_4 FROM raw_events_second LIMIT 5;
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 3
|
|
2 | 6
|
|
3 | 9
|
|
4 | 12
|
|
5 | 15
|
|
(5 rows)
|
|
|
|
-- CTEs are supported in local queries
|
|
TRUNCATE raw_events_first;
|
|
WITH removed_rows AS (
|
|
DELETE FROM raw_events_first_local RETURNING u
|
|
)
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
WITH value AS (SELECT 1)
|
|
SELECT * FROM removed_rows, value;
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 1
|
|
3 | 1
|
|
4 | 1
|
|
5 | 1
|
|
(5 rows)
|
|
|
|
-- nested CTEs are also supported
|
|
TRUNCATE raw_events_first;
|
|
INSERT INTO raw_events_first_local SELECT s, 2*s FROM generate_series(0, 10) s;
|
|
WITH rows_to_remove AS (
|
|
SELECT u FROM raw_events_first_local WHERE u > 0
|
|
),
|
|
removed_rows AS (
|
|
DELETE FROM raw_events_first_local
|
|
WHERE u IN (SELECT * FROM rows_to_remove)
|
|
RETURNING u, v
|
|
)
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
WITH ultra_rows AS (
|
|
WITH numbers AS (
|
|
SELECT s FROM generate_series(1,10) s
|
|
),
|
|
super_rows AS (
|
|
SELECT u, v FROM removed_rows JOIN numbers ON (u = s)
|
|
)
|
|
SELECT * FROM super_rows LIMIT 5
|
|
)
|
|
SELECT u, v FROM ultra_rows;
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 2
|
|
2 | 4
|
|
3 | 6
|
|
4 | 8
|
|
5 | 10
|
|
(5 rows)
|
|
|
|
-- CTEs with duplicate names are also supported
|
|
TRUNCATE raw_events_first;
|
|
WITH super_rows AS (
|
|
SELECT u FROM raw_events_first_local
|
|
)
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
WITH super_rows AS (
|
|
SELECT * FROM super_rows GROUP BY u
|
|
)
|
|
SELECT u, 5 FROM super_rows;
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
0 | 5
|
|
(1 row)
|
|
|
|
-- CTEs are supported in router queries
|
|
TRUNCATE raw_events_first;
|
|
WITH user_two AS (
|
|
SELECT user_id, value_4 FROM raw_events_second WHERE user_id = 2
|
|
)
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
SELECT * FROM user_two;
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
2 | 6
|
|
(1 row)
|
|
|
|
-- CTEs are supported when there are name collisions
|
|
WITH numbers AS (
|
|
SELECT s FROM generate_series(1,10) s
|
|
)
|
|
INSERT INTO raw_events_first(user_id, value_1)
|
|
WITH numbers AS (
|
|
SELECT s, s FROM generate_series(1,5) s
|
|
)
|
|
SELECT * FROM numbers;
|
|
-- Select into distributed table with a sequence
|
|
CREATE TABLE "CaseSensitiveTable" ("UserID" int, "Value1" int);
|
|
SELECT create_distributed_table('"CaseSensitiveTable"', 'UserID');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO "CaseSensitiveTable"
|
|
SELECT s, s FROM generate_series(1,10) s;
|
|
SELECT * FROM "CaseSensitiveTable" ORDER BY "UserID";
|
|
UserID | Value1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
6 | 6
|
|
7 | 7
|
|
8 | 8
|
|
9 | 9
|
|
10 | 10
|
|
(10 rows)
|
|
|
|
DROP TABLE "CaseSensitiveTable";
|
|
-- Select into distributed table with a sequence
|
|
CREATE TABLE dist_table_with_sequence (user_id serial, value_1 serial);
|
|
SELECT create_distributed_table('dist_table_with_sequence', 'user_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- from local query
|
|
INSERT INTO dist_table_with_sequence (value_1)
|
|
SELECT s FROM generate_series(1,5) s;
|
|
SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
(5 rows)
|
|
|
|
-- from a distributed query
|
|
INSERT INTO dist_table_with_sequence (value_1)
|
|
SELECT value_1 FROM dist_table_with_sequence ORDER BY value_1;
|
|
SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
6 | 1
|
|
7 | 2
|
|
8 | 3
|
|
9 | 4
|
|
10 | 5
|
|
(10 rows)
|
|
|
|
TRUNCATE dist_table_with_sequence;
|
|
INSERT INTO dist_table_with_sequence (user_id)
|
|
SELECT user_id FROM raw_events_second ORDER BY user_id;
|
|
SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
(5 rows)
|
|
|
|
WITH top10 AS (
|
|
SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10
|
|
)
|
|
INSERT INTO dist_table_with_sequence (value_1)
|
|
SELECT * FROM top10;
|
|
SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
(5 rows)
|
|
|
|
-- router queries become logical planner queries when there is a nextval call
|
|
INSERT INTO dist_table_with_sequence (user_id)
|
|
SELECT user_id FROM dist_table_with_sequence WHERE user_id = 1;
|
|
SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
1 | 6
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
(6 rows)
|
|
|
|
DROP TABLE dist_table_with_sequence;
|
|
-- Select into distributed table with a user-defined sequence
|
|
CREATE SEQUENCE seq1;
|
|
CREATE SEQUENCE seq2;
|
|
CREATE TABLE dist_table_with_user_sequence (user_id int default nextval('seq1'), value_1 bigint default nextval('seq2'));
|
|
SELECT create_distributed_table('dist_table_with_user_sequence', 'user_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- from local query
|
|
INSERT INTO dist_table_with_user_sequence (value_1)
|
|
SELECT s FROM generate_series(1,5) s;
|
|
SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
(5 rows)
|
|
|
|
-- from a distributed query
|
|
INSERT INTO dist_table_with_user_sequence (value_1)
|
|
SELECT value_1 FROM dist_table_with_user_sequence ORDER BY value_1;
|
|
SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
6 | 1
|
|
7 | 2
|
|
8 | 3
|
|
9 | 4
|
|
10 | 5
|
|
(10 rows)
|
|
|
|
TRUNCATE dist_table_with_user_sequence;
|
|
INSERT INTO dist_table_with_user_sequence (user_id)
|
|
SELECT user_id FROM raw_events_second ORDER BY user_id;
|
|
SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
(5 rows)
|
|
|
|
WITH top10 AS (
|
|
SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10
|
|
)
|
|
INSERT INTO dist_table_with_user_sequence (value_1)
|
|
SELECT * FROM top10;
|
|
SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
(5 rows)
|
|
|
|
-- router queries become logical planner queries when there is a nextval call
|
|
INSERT INTO dist_table_with_user_sequence (user_id)
|
|
SELECT user_id FROM dist_table_with_user_sequence WHERE user_id = 1;
|
|
SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
1 | 6
|
|
2 | 2
|
|
3 | 3
|
|
4 | 4
|
|
5 | 5
|
|
(6 rows)
|
|
|
|
DROP TABLE dist_table_with_user_sequence;
|
|
DROP SEQUENCE seq1, seq2;
|
|
-- Select from distributed table into reference table
|
|
CREATE TABLE ref_table (user_id serial, value_1 int);
|
|
SELECT create_reference_table('ref_table');
|
|
create_reference_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO ref_table
|
|
SELECT user_id, value_1 FROM raw_events_second;
|
|
SELECT * FROM ref_table ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
(5 rows)
|
|
|
|
INSERT INTO ref_table (value_1)
|
|
SELECT value_1 FROM raw_events_second ORDER BY value_1;
|
|
SELECT * FROM ref_table ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 |
|
|
1 |
|
|
2 |
|
|
2 |
|
|
3 |
|
|
3 |
|
|
4 |
|
|
4 |
|
|
5 |
|
|
5 |
|
|
(10 rows)
|
|
|
|
INSERT INTO ref_table SELECT * FROM ref_table;
|
|
SELECT * FROM ref_table ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 |
|
|
1 |
|
|
1 |
|
|
1 |
|
|
2 |
|
|
2 |
|
|
2 |
|
|
2 |
|
|
3 |
|
|
3 |
|
|
3 |
|
|
3 |
|
|
4 |
|
|
4 |
|
|
4 |
|
|
4 |
|
|
5 |
|
|
5 |
|
|
5 |
|
|
5 |
|
|
(20 rows)
|
|
|
|
DROP TABLE ref_table;
|
|
-- Select from distributed table into reference table with user-defined sequence
|
|
CREATE SEQUENCE seq1;
|
|
CREATE TABLE ref_table_with_user_sequence (user_id int default nextval('seq1'), value_1 int);
|
|
SELECT create_reference_table('ref_table_with_user_sequence');
|
|
create_reference_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO ref_table_with_user_sequence
|
|
SELECT user_id, value_1 FROM raw_events_second;
|
|
SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
(5 rows)
|
|
|
|
INSERT INTO ref_table_with_user_sequence (value_1)
|
|
SELECT value_1 FROM raw_events_second ORDER BY value_1;
|
|
SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 |
|
|
1 |
|
|
2 |
|
|
2 |
|
|
3 |
|
|
3 |
|
|
4 |
|
|
4 |
|
|
5 |
|
|
5 |
|
|
(10 rows)
|
|
|
|
INSERT INTO ref_table_with_user_sequence SELECT * FROM ref_table_with_user_sequence;
|
|
SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 |
|
|
1 |
|
|
1 |
|
|
1 |
|
|
2 |
|
|
2 |
|
|
2 |
|
|
2 |
|
|
3 |
|
|
3 |
|
|
3 |
|
|
3 |
|
|
4 |
|
|
4 |
|
|
4 |
|
|
4 |
|
|
5 |
|
|
5 |
|
|
5 |
|
|
5 |
|
|
(20 rows)
|
|
|
|
DROP TABLE ref_table_with_user_sequence;
|
|
DROP SEQUENCE seq1;
|
|
-- Select from reference table into reference table
|
|
CREATE TABLE ref1 (d timestamptz);
|
|
SELECT create_reference_table('ref1');
|
|
create_reference_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
CREATE TABLE ref2 (d date);
|
|
SELECT create_reference_table('ref2');
|
|
create_reference_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO ref2 VALUES ('2017-10-31');
|
|
INSERT INTO ref1 SELECT * FROM ref2;
|
|
SELECT count(*) from ref1;
|
|
count
|
|
---------------------------------------------------------------------
|
|
1
|
|
(1 row)
|
|
|
|
-- also test with now()
|
|
INSERT INTO ref1 SELECT now() FROM ref2;
|
|
SELECT count(*) from ref1;
|
|
count
|
|
---------------------------------------------------------------------
|
|
2
|
|
(1 row)
|
|
|
|
DROP TABLE ref1;
|
|
DROP TABLE ref2;
|
|
-- Select into an append-partitioned table is not supported
|
|
CREATE TABLE insert_append_table (user_id int, value_4 bigint);
|
|
SELECT create_distributed_table('insert_append_table', 'user_id', 'append');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO insert_append_table (user_id, value_4)
|
|
SELECT user_id, 1 FROM raw_events_second LIMIT 5;
|
|
ERROR: INSERT ... SELECT into an append-distributed table is not supported
|
|
DROP TABLE insert_append_table;
|
|
-- Insert from other distributed table as prepared statement
|
|
TRUNCATE raw_events_first;
|
|
PREPARE insert_prep(int) AS
|
|
INSERT INTO raw_events_first (user_id, value_1)
|
|
SELECT $1, value_4 FROM raw_events_second ORDER BY value_4 LIMIT 1;
|
|
EXECUTE insert_prep(1);
|
|
EXECUTE insert_prep(2);
|
|
EXECUTE insert_prep(3);
|
|
EXECUTE insert_prep(4);
|
|
EXECUTE insert_prep(5);
|
|
EXECUTE insert_prep(6);
|
|
SELECT user_id, value_1 FROM raw_events_first ORDER BY user_id, value_1;
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 3
|
|
2 | 3
|
|
3 | 3
|
|
4 | 3
|
|
5 | 3
|
|
6 | 3
|
|
(6 rows)
|
|
|
|
-- Inserting into views is handled via coordinator
|
|
TRUNCATE raw_events_first;
|
|
INSERT INTO test_view
|
|
SELECT * FROM raw_events_second;
|
|
SELECT user_id, value_4 FROM test_view ORDER BY user_id, value_4;
|
|
user_id | value_4
|
|
---------------------------------------------------------------------
|
|
1 | 3
|
|
2 | 6
|
|
3 | 9
|
|
4 | 12
|
|
5 | 15
|
|
(5 rows)
|
|
|
|
-- Drop the view now, because the column we are about to drop depends on it
|
|
DROP VIEW test_view;
|
|
-- Make sure we handle dropped columns correctly
|
|
CREATE TABLE drop_col_table (col1 text, col2 text, col3 text);
|
|
SELECT create_distributed_table('drop_col_table', 'col2');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
ALTER TABLE drop_col_table DROP COLUMN col1;
|
|
INSERT INTO drop_col_table (col3, col2)
|
|
SELECT value_4, user_id FROM raw_events_second LIMIT 5;
|
|
SELECT * FROM drop_col_table ORDER BY col2, col3;
|
|
col2 | col3
|
|
---------------------------------------------------------------------
|
|
1 | 3
|
|
2 | 6
|
|
3 | 9
|
|
4 | 12
|
|
5 | 15
|
|
(5 rows)
|
|
|
|
-- make sure the tuple went to the right shard
|
|
SELECT * FROM drop_col_table WHERE col2 = '1';
|
|
col2 | col3
|
|
---------------------------------------------------------------------
|
|
1 | 3
|
|
(1 row)
|
|
|
|
RESET client_min_messages;
|
|
-- make sure casts are handled correctly
|
|
CREATE TABLE coerce_events(user_id int, time timestamp, value_1 numeric);
|
|
SELECT create_distributed_table('coerce_events', 'user_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
CREATE TABLE coerce_agg (user_id int, value_1_agg int);
|
|
SELECT create_distributed_table('coerce_agg', 'user_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO coerce_events(user_id, value_1) VALUES (1, 1), (2, 2), (10, 10);
|
|
-- numeric -> int (straight function)
|
|
INSERT INTO coerce_agg(user_id, value_1_agg)
|
|
SELECT *
|
|
FROM (
|
|
SELECT user_id, value_1
|
|
FROM coerce_events
|
|
) AS ftop
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 5;
|
|
-- int -> text
|
|
ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE text;
|
|
INSERT INTO coerce_agg(user_id, value_1_agg)
|
|
SELECT *
|
|
FROM (
|
|
SELECT user_id, value_1
|
|
FROM coerce_events
|
|
) AS ftop
|
|
LIMIT 5;
|
|
SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC;
|
|
user_id | value_1_agg
|
|
---------------------------------------------------------------------
|
|
10 | 10
|
|
10 | 10
|
|
2 | 2
|
|
2 | 2
|
|
1 | 1
|
|
1 | 1
|
|
(6 rows)
|
|
|
|
TRUNCATE coerce_agg;
|
|
-- int -> char(1)
|
|
ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(1);
|
|
INSERT INTO coerce_agg(user_id, value_1_agg)
|
|
SELECT *
|
|
FROM (
|
|
SELECT user_id, value_1
|
|
FROM coerce_events
|
|
) AS ftop
|
|
LIMIT 5;
|
|
ERROR: value too long for type character(1)
|
|
SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC;
|
|
user_id | value_1_agg
|
|
---------------------------------------------------------------------
|
|
(0 rows)
|
|
|
|
TRUNCATE coerce_agg;
|
|
TRUNCATE coerce_events;
|
|
-- char(5) -> char(1)
|
|
ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(5);
|
|
INSERT INTO coerce_events(user_id, value_1) VALUES (1, 'aaaaa'), (2, 'bbbbb');
|
|
INSERT INTO coerce_agg(user_id, value_1_agg)
|
|
SELECT *
|
|
FROM (
|
|
SELECT user_id, value_1
|
|
FROM coerce_events
|
|
) AS ftop
|
|
LIMIT 5;
|
|
ERROR: value too long for type character(1)
|
|
-- char(1) -> char(5)
|
|
ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE char(1) USING value_1::char(1);
|
|
ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE char(5);
|
|
INSERT INTO coerce_agg(user_id, value_1_agg)
|
|
SELECT *
|
|
FROM (
|
|
SELECT user_id, value_1
|
|
FROM coerce_events
|
|
) AS ftop
|
|
LIMIT 5;
|
|
SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC;
|
|
user_id | value_1_agg
|
|
---------------------------------------------------------------------
|
|
2 | b
|
|
1 | a
|
|
(2 rows)
|
|
|
|
TRUNCATE coerce_agg;
|
|
TRUNCATE coerce_events;
|
|
-- integer -> integer (check VALUE < 5)
|
|
ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer USING NULL;
|
|
ALTER TABLE coerce_agg ALTER COLUMN value_1_agg TYPE integer USING NULL;
|
|
ALTER TABLE coerce_agg ADD CONSTRAINT small_number CHECK (value_1_agg < 5);
|
|
INSERT INTO coerce_events (user_id, value_1) VALUES (1, 1), (10, 10);
|
|
\set VERBOSITY TERSE
|
|
INSERT INTO coerce_agg(user_id, value_1_agg)
|
|
SELECT *
|
|
FROM (
|
|
SELECT user_id, value_1
|
|
FROM coerce_events
|
|
) AS ftop;
|
|
ERROR: new row for relation "coerce_agg_13300067" violates check constraint "small_number_13300067"
|
|
\set VERBOSITY DEFAULT
|
|
SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC;
|
|
user_id | value_1_agg
|
|
---------------------------------------------------------------------
|
|
(0 rows)
|
|
|
|
-- integer[3] -> text[3]
|
|
TRUNCATE coerce_events;
|
|
ALTER TABLE coerce_events ALTER COLUMN value_1 TYPE integer[3] USING NULL;
|
|
INSERT INTO coerce_events(user_id, value_1) VALUES (1, '{1,1,1}'), (2, '{2,2,2}');
|
|
ALTER TABLE coerce_agg DROP COLUMN value_1_agg;
|
|
ALTER TABLE coerce_agg ADD COLUMN value_1_agg text[3];
|
|
INSERT INTO coerce_agg(user_id, value_1_agg)
|
|
SELECT *
|
|
FROM (
|
|
SELECT user_id, value_1
|
|
FROM coerce_events
|
|
) AS ftop
|
|
LIMIT 5;
|
|
SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC;
|
|
user_id | value_1_agg
|
|
---------------------------------------------------------------------
|
|
2 | {2,2,2}
|
|
1 | {1,1,1}
|
|
(2 rows)
|
|
|
|
-- INSERT..SELECT + prepared statements + recursive planning
|
|
BEGIN;
|
|
PREPARE prepared_recursive_insert_select AS
|
|
INSERT INTO users_table
|
|
SELECT * FROM users_table
|
|
WHERE value_1 IN (SELECT value_2 FROM events_table OFFSET 0);
|
|
EXECUTE prepared_recursive_insert_select;
|
|
EXECUTE prepared_recursive_insert_select;
|
|
EXECUTE prepared_recursive_insert_select;
|
|
EXECUTE prepared_recursive_insert_select;
|
|
EXECUTE prepared_recursive_insert_select;
|
|
EXECUTE prepared_recursive_insert_select;
|
|
ROLLBACK;
|
|
-- upsert with on conflict update distribution column is unsupported
|
|
INSERT INTO agg_events AS ae
|
|
(
|
|
user_id,
|
|
value_1_agg,
|
|
agg_time
|
|
)
|
|
SELECT user_id,
|
|
value_1,
|
|
time
|
|
FROM raw_events_first
|
|
ON conflict (user_id, value_1_agg)
|
|
DO UPDATE
|
|
SET user_id = 42
|
|
RETURNING user_id, value_1_agg;
|
|
ERROR: modifying the partition value of rows is not allowed
|
|
-- test a small citus.remote_copy_flush_threshold
|
|
BEGIN;
|
|
SET LOCAL citus.remote_copy_flush_threshold TO 1;
|
|
INSERT INTO raw_events_first
|
|
SELECT * FROM raw_events_first OFFSET 0
|
|
ON CONFLICT DO NOTHING;
|
|
ABORT;
|
|
-- test fix for issue https://github.com/citusdata/citus/issues/5891
|
|
CREATE TABLE dist_table_1(
|
|
dist_col integer,
|
|
int_col integer,
|
|
text_col_1 text,
|
|
text_col_2 text
|
|
);
|
|
SELECT create_distributed_table('dist_table_1', 'dist_col');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO dist_table_1 VALUES (1, 1, 'string', 'string');
|
|
CREATE TABLE dist_table_2(
|
|
dist_col integer,
|
|
int_col integer
|
|
);
|
|
SELECT create_distributed_table('dist_table_2', 'dist_col');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO dist_table_2 VALUES (1, 1);
|
|
with a as (select random()) INSERT INTO dist_table_1
|
|
SELECT
|
|
t1.dist_col,
|
|
1,
|
|
'string',
|
|
'string'
|
|
FROM a, dist_table_1 t1
|
|
join dist_table_2 t2 using (dist_col)
|
|
limit 1
|
|
returning text_col_1;
|
|
text_col_1
|
|
---------------------------------------------------------------------
|
|
string
|
|
(1 row)
|
|
|
|
CREATE TABLE dist_table_3(
|
|
dist_col bigint,
|
|
int_col integer
|
|
);
|
|
SELECT create_distributed_table('dist_table_3', 'dist_col');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- dist_table_2 and dist_table_3 are non-colocated source tables. Repartitioning is also not possible due to
|
|
-- different types for distribution columns. Citus would not be able to handle this complex insert select.
|
|
INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_3 USING(dist_col);
|
|
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
|
|
CREATE TABLE dist_table_4(
|
|
dist_col integer,
|
|
int_col integer
|
|
);
|
|
SELECT create_distributed_table('dist_table_4', 'dist_col');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- Even if target table distribution column is colocated with dist_table_2's distributed column, source tables dist_table_2 and dist_table_4
|
|
-- are non-colocated. Hence, SELECT part of the query should be pulled to coordinator.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT dist_table_2.dist_col FROM dist_table_2 JOIN dist_table_4 ON dist_table_2.dist_col = dist_table_4.int_col;
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 6
|
|
(4 rows)
|
|
|
|
-- For INSERT SELECT, when a lateral query references an outer query, push-down is possible even if limit clause exists in the lateral query.
|
|
-- It is because subquery with limit does not need to be merged at coordinator as it is a lateral query.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN LATERAL (SELECT * FROM dist_table_2 d2 WHERE d1.dist_col = d2.dist_col LIMIT 3) dummy USING(dist_col);
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(2 rows)
|
|
|
|
-- For INSERT SELECT, when push-down is NOT possible when limit clause exists in a subquery at SELECT part of INSERT SELECT.
|
|
-- It is because the subquery with limit needs to be merged at coordinator.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_1 SELECT d1.dist_col FROM dist_table_1 d1 LEFT JOIN (SELECT * FROM dist_table_2 LIMIT 3) dummy USING(dist_col);
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: repartition
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Limit
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(7 rows)
|
|
|
|
CREATE TABLE dist_table_5(id int, id2 int);
|
|
SELECT create_distributed_table('dist_table_5','id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
CREATE TABLE dist_table_6(id int, id2 int);
|
|
SELECT create_distributed_table('dist_table_6','id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- verify that insert select with union can be pushed down since UNION clause has FROM clause at top level query.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5(id) SELECT id FROM (SELECT id FROM dist_table_5 UNION SELECT id FROM dist_table_6) dummy;
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(2 rows)
|
|
|
|
-- verify that insert select with sublink can be pushed down when tables are colocated.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = dist_table_6.id) FROM dist_table_6;
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(2 rows)
|
|
|
|
CREATE TABLE ref_table_1(id int);
|
|
SELECT create_reference_table('ref_table_1');
|
|
create_reference_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- verify that insert select with sublink cannot be pushed down when from clause does not contain any distributed relation.
|
|
INSERT INTO dist_table_5 SELECT id, (SELECT id FROM dist_table_5 WHERE dist_table_5.id = ref_table_1.id) FROM ref_table_1;
|
|
ERROR: correlated subqueries are not supported when the FROM clause contains a reference table
|
|
-- verify that insert select cannot be pushed down when we have recurring range table in from clause.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1;
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 1
|
|
(4 rows)
|
|
|
|
-- verify that insert select cannot be pushed down when we have reference table in outside of outer join.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true);
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(6 rows)
|
|
|
|
-- verify that insert select cannot be pushed down when it has a recurring outer join in a subquery.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id);
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(6 rows)
|
|
|
|
CREATE TABLE loc_table_1(id int);
|
|
-- verify that insert select cannot be pushed down when it contains join between local and distributed tables.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM dist_table_5 JOIN loc_table_1 USING(id);
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: repartition
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Seq Scan on loc_table_1
|
|
Task Count: 4
|
|
(6 rows)
|
|
|
|
CREATE VIEW view_1 AS
|
|
SELECT id FROM dist_table_6;
|
|
CREATE MATERIALIZED VIEW view_2 AS
|
|
SELECT id FROM dist_table_6;
|
|
-- verify that insert select cannot be pushed down when it contains view.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_1;
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(2 rows)
|
|
|
|
-- verify that insert select cannot be pushed down when it contains materialized view.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT * FROM view_2;
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Seq Scan on view_2
|
|
(3 rows)
|
|
|
|
CREATE TABLE append_table(id integer, data text, int_data int);
|
|
SELECT create_distributed_table('append_table', 'id', 'append');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT master_create_empty_shard('append_table');
|
|
master_create_empty_shard
|
|
---------------------------------------------------------------------
|
|
13300096
|
|
(1 row)
|
|
|
|
-- verify that insert select push down for append tables are not supported.
|
|
INSERT INTO append_table SELECT * FROM append_table;
|
|
ERROR: INSERT ... SELECT into an append-distributed table is not supported
|
|
-- verify that CTEs at top level of INSERT SELECT, that can normally be inlined, would not be inlined by INSERT SELECT pushdown planner
|
|
-- and handled by pull to coordinator.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id > 5)
|
|
INSERT INTO dist_table_5
|
|
SELECT id FROM dist_table_5 JOIN cte_1 USING(id) OFFSET 5;
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus INSERT ... SELECT)
|
|
INSERT/SELECT method: pull to coordinator
|
|
-> Custom Scan (Citus Adaptive)
|
|
-> Distributed Subplan XXX_1
|
|
-> Limit
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(7 rows)
|
|
|
|
-- verify that CTEs at top level of SELECT part, would be inlined by Postgres and pushed down by INSERT SELECT planner.
|
|
SELECT coordinator_plan($$
|
|
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5
|
|
WITH cte_1 AS (SELECT id FROM dist_table_5 WHERE id = 5)
|
|
SELECT id FROM dist_table_5 JOIN cte_1 USING(id);
|
|
$$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus Adaptive)
|
|
Task Count: 1
|
|
(2 rows)
|
|
|
|
SET client_min_messages TO ERROR;
|
|
DROP SCHEMA multi_insert_select CASCADE;
|