citus/src/test/regress/sql/multi_insert_select.sql

522 lines
15 KiB
SQL

--
-- MULTI_INSERT_SELECT
--
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 13300000;
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 13300000;
CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1));
SELECT master_create_distributed_table('raw_events_first', 'user_id', 'hash');
SELECT master_create_worker_shards('raw_events_first', 4, 2);
CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1));
SELECT master_create_distributed_table('raw_events_second', 'user_id', 'hash');
SELECT master_create_worker_shards('raw_events_second', 4, 2);
CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp, UNIQUE(user_id, value_1_agg));
SELECT master_create_distributed_table('agg_events', 'user_id', 'hash');
SELECT master_create_worker_shards('agg_events', 4, 2);
-- make tables as co-located
UPDATE pg_dist_partition SET colocationid = 100000 WHERE logicalrelid IN ('raw_events_first', 'raw_events_second', 'agg_events');
CREATE TABLE reference_table (user_id int);
SELECT master_create_distributed_table('reference_table', 'user_id', 'hash');
SELECT master_create_worker_shards('reference_table', 1, 2);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(1, now(), 10, 100, 1000.1, 10000);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(2, now(), 20, 200, 2000.1, 20000);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(3, now(), 30, 300, 3000.1, 30000);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(4, now(), 40, 400, 4000.1, 40000);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(5, now(), 50, 500, 5000.1, 50000);
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(6, now(), 60, 600, 6000.1, 60000);
SET client_min_messages TO DEBUG4;
-- raw table to raw table
INSERT INTO raw_events_second SELECT * FROM raw_events_first;
-- see that our first multi shard INSERT...SELECT works expected
SET client_min_messages TO INFO;
SELECT
raw_events_first.user_id
FROM
raw_events_first, raw_events_second
WHERE
raw_events_first.user_id = raw_events_second.user_id;
-- see that we get unique vialitons
INSERT INTO raw_events_second SELECT * FROM raw_events_first;
-- add one more row
INSERT INTO raw_events_first (user_id, time) VALUES
(7, now());
-- try a single shard query
SET client_min_messages TO DEBUG4;
INSERT INTO raw_events_second (user_id, time) SELECT user_id, time FROM raw_events_first WHERE user_id = 7;
SET client_min_messages TO INFO;
-- add one more row
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(8, now(), 80, 800, 8000, 80000);
-- reorder columns
SET client_min_messages TO DEBUG4;
INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time)
SELECT
value_2, value_1, value_3, value_4, user_id, time
FROM
raw_events_first
WHERE
user_id = 8;
-- a zero shard select
INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time)
SELECT
value_2, value_1, value_3, value_4, user_id, time
FROM
raw_events_first
WHERE
false;
-- another zero shard select
INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time)
SELECT
value_2, value_1, value_3, value_4, user_id, time
FROM
raw_events_first
WHERE
0 != 0;
-- add one more row
SET client_min_messages TO INFO;
INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES
(9, now(), 90, 900, 9000, 90000);
-- show that RETURNING also works
SET client_min_messages TO DEBUG4;
INSERT INTO raw_events_second (user_id, value_1, value_3)
SELECT
user_id, value_1, value_3
FROM
raw_events_first
WHERE
value_3 = 9000
RETURNING *;
-- hits two shards
INSERT INTO raw_events_second (user_id, value_1, value_3)
SELECT
user_id, value_1, value_3
FROM
raw_events_first
WHERE
user_id = 9 OR user_id = 16
RETURNING *;
-- now do some aggregations
INSERT INTO agg_events
SELECT
user_id, sum(value_1), avg(value_2), sum(value_3), count(value_4)
FROM
raw_events_first
GROUP BY
user_id;
-- group by column not exists on the SELECT target list
-- TODO: there is a bug on RETURNING
INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, user_id)
SELECT
sum(value_3), count(value_4), sum(value_1), user_id
FROM
raw_events_first
GROUP BY
value_2, user_id
RETURNING *;
-- some subquery tests
INSERT INTO agg_events
(value_1_agg,
user_id)
SELECT SUM(value_1),
id
FROM (SELECT raw_events_second.user_id AS id,
raw_events_second.value_1
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id) AS foo
GROUP BY id;
-- subquery one more level depth
INSERT INTO agg_events
(value_4_agg,
value_1_agg,
user_id)
SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id) AS foo;
-- join between subqueries
INSERT INTO agg_events
(user_id)
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id);
-- add one more level subqueris on top of subquery JOINs
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
-- some UPSERTS
INSERT INTO agg_events AS ae
(
user_id,
value_1_agg,
agg_time
)
SELECT user_id,
value_1,
time
FROM raw_events_first
ON conflict (user_id, value_1_agg)
DO UPDATE
SET agg_time = EXCLUDED.agg_time
WHERE ae.agg_time < EXCLUDED.agg_time;
-- upserts with returning
INSERT INTO agg_events AS ae
(
user_id,
value_1_agg,
agg_time
)
SELECT user_id,
value_1,
time
FROM raw_events_first
ON conflict (user_id, value_1_agg)
DO UPDATE
SET agg_time = EXCLUDED.agg_time
WHERE ae.agg_time < EXCLUDED.agg_time
RETURNING user_id, value_1_agg;
-- TODO:: add hll and date_trunc
INSERT INTO agg_events (user_id, value_1_agg)
SELECT
user_id, sum(value_1 + value_2)
FROM
raw_events_first GROUP BY user_id;
-- FILTER CLAUSE
INSERT INTO agg_events (user_id, value_1_agg)
SELECT
user_id, sum(value_1 + value_2) FILTER (where value_3 = 15)
FROM
raw_events_first GROUP BY user_id;
-- TODO: UUIDs
-- a test with reference table JOINs
INSERT INTO
agg_events (user_id, value_1_agg)
SELECT
raw_events_first.user_id, sum(value_1)
FROM
reference_table, raw_events_first
WHERE
raw_events_first.user_id = reference_table.user_id
GROUP BY
raw_events_first.user_id;
-- We do not support CTEs
WITH fist_table_agg AS
(SELECT sum(value_1) as v1_agg, user_id FROM raw_events_first GROUP BY user_id)
INSERT INTO agg_events
(value_1_agg, user_id)
SELECT
v1_agg, user_id
FROM
fist_table_agg;
-- We do not support CTEs in the INSERT as well
INSERT INTO agg_events
WITH sub_cte AS (SELECT 1)
SELECT
raw_events_first.user_id, (SELECT * FROM sub_cte)
FROM
raw_events_first;
-- We do not support any set operations
INSERT INTO
raw_events_first(user_id)
SELECT
user_id
FROM
((SELECT user_id FROM raw_events_first) UNION
(SELECT user_id FROM raw_events_second)) as foo;
-- We do not support any set operations
INSERT INTO
raw_events_first(user_id)
(SELECT user_id FROM raw_events_first) INTERSECT
(SELECT user_id FROM raw_events_first);
-- We do not support any set operations
INSERT INTO
raw_events_first(user_id)
SELECT
user_id
FROM
((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT
(SELECT user_id FROM raw_events_second where user_id = 17)) as foo;
-- unsupported JOIN
INSERT INTO agg_events
(value_4_agg,
value_1_agg,
user_id)
SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id != raw_events_second.user_id
GROUP BY raw_events_second.user_id) AS foo;
-- INSERT partition column does not match with SELECT partition column
INSERT INTO agg_events
(value_4_agg,
value_1_agg,
user_id)
SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.value_3 AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.value_3) AS foo;
-- error cases
-- no part column at all
INSERT INTO raw_events_second
(value_1)
SELECT value_1
FROM raw_events_first;
INSERT INTO raw_events_second
(value_1)
SELECT user_id
FROM raw_events_first;
INSERT INTO raw_events_second
(user_id)
SELECT value_1
FROM raw_events_first;
INSERT INTO raw_events_second
(user_id)
SELECT user_id * 2
FROM raw_events_first;
INSERT INTO raw_events_second
(user_id)
SELECT user_id :: bigint
FROM raw_events_first;
INSERT INTO agg_events
(value_3_agg,
value_4_agg,
value_1_agg,
value_2_agg,
user_id)
SELECT SUM(value_3),
Count(value_4),
user_id,
SUM(value_1),
Avg(value_2)
FROM raw_events_first
GROUP BY user_id;
INSERT INTO agg_events
(value_3_agg,
value_4_agg,
value_1_agg,
value_2_agg,
user_id)
SELECT SUM(value_3),
Count(value_4),
user_id,
SUM(value_1),
value_2
FROM raw_events_first
GROUP BY user_id,
value_2;
-- tables should be co-located
INSERT INTO agg_events (user_id)
SELECT
user_id
FROM
reference_table;
-- unsupported joins between subqueries
-- we do not return bare partition column on the inner query
INSERT INTO agg_events
(user_id)
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
raw_events_second.value_1 AS v1,
SUM(raw_events_second.user_id) AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.value_1
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id);
-- the second part of the query is not routable since
-- no GROUP BY on the partition column
INSERT INTO agg_events
(user_id)
SELECT f.id FROM
(SELECT
id
FROM (SELECT raw_events_first.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
raw_events_second.value_1 AS v1,
SUM(raw_events_second.user_id) AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.value_1
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id);
-- cannot pushdown the query since the JOIN is not equi JOIN
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id != f2.id)) as outer_most
GROUP BY outer_most.id;
-- Views does not work
CREATE VIEW test_view AS SELECT * FROM raw_events_first;
INSERT INTO raw_events_second SELECT * FROM test_view;