-- -- PG18 -- SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int >= 18 AS server_version_ge_18 \gset -- test invalid statistics -- behavior is same among PG versions, error message differs -- relevant PG18 commit: 3eea4dc2c7, 38883916e CREATE STATISTICS tst ON a FROM (VALUES (x)) AS foo; ERROR: CREATE STATISTICS only supports relation names in the FROM clause CREATE FUNCTION tftest(int) returns table(a int, b int) as $$ SELECT $1, $1+i FROM generate_series(1,5) g(i); $$ LANGUAGE sql IMMUTABLE STRICT; CREATE STATISTICS alt_stat2 ON a FROM tftest(1); ERROR: CREATE STATISTICS only supports relation names in the FROM clause DROP FUNCTION tftest; \if :server_version_ge_18 \else \q \endif -- PG18-specific tests go here. -- -- Purpose: Verify PG18 behavior that NOT NULL constraints are materialized -- as pg_constraint rows with contype = 'n' on both coordinator and -- worker shards. Also confirm our helper view (table_checks) does -- NOT surface NOT NULL entries. -- https://github.com/postgres/postgres/commit/14e87ffa5c543b5f30ead7413084c25f7735039f CREATE SCHEMA pg18_nn; SET search_path TO pg18_nn; -- Local control table DROP TABLE IF EXISTS nn_local CASCADE; NOTICE: table "nn_local" does not exist, skipping CREATE TABLE nn_local( a int NOT NULL, b int, c text NOT NULL ); -- Distributed table DROP TABLE IF EXISTS nn_dist CASCADE; NOTICE: table "nn_dist" does not exist, skipping CREATE TABLE nn_dist( a int NOT NULL, b int, c text NOT NULL ); SELECT create_distributed_table('nn_dist', 'a'); create_distributed_table --------------------------------------------------------------------- (1 row) -- Coordinator: count NOT NULL constraint rows SELECT 'local_n_count' AS label, contype, count(*) FROM pg_constraint WHERE conrelid = 'pg18_nn.nn_local'::regclass GROUP BY contype ORDER BY contype; label | contype | count --------------------------------------------------------------------- local_n_count | n | 2 (1 row) SELECT 'dist_n_count' AS label, contype, count(*) FROM pg_constraint WHERE conrelid = 'pg18_nn.nn_dist'::regclass GROUP BY contype ORDER BY contype; label | contype | count --------------------------------------------------------------------- dist_n_count | n | 2 (1 row) -- Our helper view should exclude NOT NULL SELECT 'table_checks_local_count' AS label, count(*) FROM public.table_checks WHERE relid = 'pg18_nn.nn_local'::regclass; label | count --------------------------------------------------------------------- table_checks_local_count | 0 (1 row) SELECT 'table_checks_dist_count' AS label, count(*) FROM public.table_checks WHERE relid = 'pg18_nn.nn_dist'::regclass; label | count --------------------------------------------------------------------- table_checks_dist_count | 0 (1 row) -- Add a real CHECK to ensure table_checks still reports real checks ALTER TABLE nn_dist ADD CONSTRAINT nn_dist_check CHECK (b IS DISTINCT FROM 42); SELECT 'table_checks_dist_with_real_check' AS label, count(*) FROM public.table_checks WHERE relid = 'pg18_nn.nn_dist'::regclass; label | count --------------------------------------------------------------------- table_checks_dist_with_real_check | 1 (1 row) -- === Worker checks === \c - - - :worker_1_port SET client_min_messages TO WARNING; SET search_path TO pg18_nn; -- Pick one heap shard of nn_dist in our schema SELECT format('%I.%I', n.nspname, c.relname) AS shard_regclass FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'pg18_nn' AND c.relname LIKE 'nn_dist_%' AND c.relkind = 'r' ORDER BY c.relname LIMIT 1 \gset -- Expect: 2 NOT NULL rows (a,c) + 1 CHECK row on the shard SELECT 'worker_shard_n_count' AS label, contype, count(*) FROM pg_constraint WHERE conrelid = :'shard_regclass'::regclass GROUP BY contype ORDER BY contype; label | contype | count --------------------------------------------------------------------- worker_shard_n_count | c | 1 worker_shard_n_count | n | 2 (2 rows) -- table_checks on shard should hide NOT NULL SELECT 'table_checks_worker_shard_count' AS label, count(*) FROM public.table_checks WHERE relid = :'shard_regclass'::regclass; label | count --------------------------------------------------------------------- table_checks_worker_shard_count | 1 (1 row) -- Drop one NOT NULL on coordinator; verify propagation \c - - - :master_port SET search_path TO pg18_nn; ALTER TABLE nn_dist ALTER COLUMN c DROP NOT NULL; -- Re-check on worker: NOT NULL count should drop to 1 \c - - - :worker_1_port SET search_path TO pg18_nn; SELECT 'worker_shard_n_after_drop' AS label, contype, count(*) FROM pg_constraint WHERE conrelid = :'shard_regclass'::regclass GROUP BY contype ORDER BY contype; label | contype | count --------------------------------------------------------------------- worker_shard_n_after_drop | c | 1 worker_shard_n_after_drop | n | 1 (2 rows) -- And on coordinator \c - - - :master_port SET search_path TO pg18_nn; SELECT 'dist_n_after_drop' AS label, contype, count(*) FROM pg_constraint WHERE conrelid = 'pg18_nn.nn_dist'::regclass GROUP BY contype ORDER BY contype; label | contype | count --------------------------------------------------------------------- dist_n_after_drop | c | 1 dist_n_after_drop | n | 1 (2 rows) -- Purpose: test self join elimination for distributed, citus local and local tables. -- CREATE TABLE sje_d1 (id bigserial PRIMARY KEY, name text, created_at timestamptz DEFAULT now()); CREATE TABLE sje_d2 (id bigserial PRIMARY KEY, name text, created_at timestamptz DEFAULT now()); CREATE TABLE sje_local (id bigserial PRIMARY KEY, title text); SET citus.next_shard_id TO 4754000; SELECT create_distributed_table('sje_d1', 'id'); create_distributed_table --------------------------------------------------------------------- (1 row) SELECT create_distributed_table('sje_d2', 'id'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO sje_d1 SELECT i, i::text, now() FROM generate_series(0,100)i; INSERT INTO sje_d2 SELECT i, i::text, now() FROM generate_series(0,100)i; INSERT INTO sje_local SELECT i, i::text FROM generate_series(0,100)i; -- Self-join elimination is applied when distributed tables are involved -- The query plan has only one join EXPLAIN (costs off) select count(1) from sje_d1 INNER JOIN sje_d2 u1 USING (id) INNER JOIN sje_d2 u2 USING (id) INNER JOIN sje_d2 u3 USING (id) INNER JOIN sje_d2 u4 USING (id) INNER JOIN sje_d2 u5 USING (id) INNER JOIN sje_d2 u6 USING (id); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (sje_d1.id = u6.id) -> Seq Scan on sje_d1_4754000 sje_d1 -> Hash -> Seq Scan on sje_d2_4754004 u6 (12 rows) select count(1) from sje_d1 INNER JOIN sje_d2 u1 USING (id) INNER JOIN sje_d2 u2 USING (id) INNER JOIN sje_d2 u3 USING (id) INNER JOIN sje_d2 u4 USING (id) INNER JOIN sje_d2 u5 USING (id) INNER JOIN sje_d2 u6 USING (id); count --------------------------------------------------------------------- 101 (1 row) -- Self-join elimination applied to from list join EXPLAIN (costs off) SELECT count(1) from sje_d1 d1, sje_d2 u1, sje_d2 u2, sje_d2 u3 WHERE d1.id = u1.id and u1.id = u2.id and u3.id = d1.id; QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (d1.id = u3.id) -> Seq Scan on sje_d1_4754000 d1 -> Hash -> Seq Scan on sje_d2_4754004 u3 (12 rows) SELECT count(1) from sje_d1 d1, sje_d2 u1, sje_d2 u2, sje_d2 u3 WHERE d1.id = u1.id and u1.id = u2.id and u3.id = d1.id; count --------------------------------------------------------------------- 101 (1 row) -- Self-join elimination is not applied when a local table is involved -- This is a limitation that will be resolved in citus 14 EXPLAIN (costs off) select count(1) from sje_d1 INNER JOIN sje_local u1 USING (id) INNER JOIN sje_local u2 USING (id) INNER JOIN sje_local u3 USING (id) INNER JOIN sje_local u4 USING (id) INNER JOIN sje_local u5 USING (id) INNER JOIN sje_local u6 USING (id); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Seq Scan on sje_local u1 -> Distributed Subplan XXX_2 -> Seq Scan on sje_local u2 -> Distributed Subplan XXX_3 -> Seq Scan on sje_local u3 -> Distributed Subplan XXX_4 -> Seq Scan on sje_local u4 -> Distributed Subplan XXX_5 -> Seq Scan on sje_local u5 -> Distributed Subplan XXX_6 -> Seq Scan on sje_local u6 Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (intermediate_result_5.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result_5 -> Hash -> Hash Join Hash Cond: (intermediate_result_4.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result_4 -> Hash -> Hash Join Hash Cond: (intermediate_result_3.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result_3 -> Hash -> Hash Join Hash Cond: (intermediate_result_2.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result_2 -> Hash -> Hash Join Hash Cond: (intermediate_result_1.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result_1 -> Hash -> Hash Join Hash Cond: (intermediate_result.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result -> Hash -> Seq Scan on sje_d1_4754000 sje_d1 (44 rows) select count(1) from sje_d1 INNER JOIN sje_local u1 USING (id) INNER JOIN sje_local u2 USING (id) INNER JOIN sje_local u3 USING (id) INNER JOIN sje_local u4 USING (id) INNER JOIN sje_local u5 USING (id) INNER JOIN sje_local u6 USING (id); count --------------------------------------------------------------------- 101 (1 row) -- to test USING vs ON equivalence EXPLAIN (costs off) SELECT count(1) FROM sje_d1 d JOIN sje_d2 u1 ON (d.id = u1.id) JOIN sje_d2 u2 ON (u1.id = u2.id); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (d.id = u2.id) -> Seq Scan on sje_d1_4754000 d -> Hash -> Seq Scan on sje_d2_4754004 u2 (12 rows) SELECT count(1) FROM sje_d1 d JOIN sje_d2 u1 ON (d.id = u1.id) JOIN sje_d2 u2 ON (u1.id = u2.id); count --------------------------------------------------------------------- 101 (1 row) -- Null-introducing join can have SJE EXPLAIN (costs off) SELECT count(*) FROM sje_d1 d LEFT JOIN sje_d2 u1 USING (id) LEFT JOIN sje_d2 u2 USING (id); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Seq Scan on sje_d1_4754000 d (8 rows) SELECT count(*) FROM sje_d1 d LEFT JOIN sje_d2 u1 USING (id) LEFT JOIN sje_d2 u2 USING (id); count --------------------------------------------------------------------- 101 (1 row) -- prepared statement PREPARE sje_p(int,int) AS SELECT count(1) FROM sje_d1 d JOIN sje_d2 u1 USING (id) JOIN sje_d2 u2 USING (id) WHERE d.id BETWEEN $1 AND $2; EXPLAIN (costs off) EXECUTE sje_p(10,20); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (u2.id = d.id) -> Seq Scan on sje_d2_4754004 u2 -> Hash -> Bitmap Heap Scan on sje_d1_4754000 d Recheck Cond: ((id >= 10) AND (id <= 20)) -> Bitmap Index Scan on sje_d1_pkey_4754000 Index Cond: ((id >= 10) AND (id <= 20)) (15 rows) EXECUTE sje_p(10,20); count --------------------------------------------------------------------- 11 (1 row) -- cte EXPLAIN (costs off) WITH z AS (SELECT id FROM sje_d2 WHERE id % 2 = 0) SELECT count(1) FROM sje_d1 d JOIN z USING (id) JOIN sje_d2 u2 USING (id); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (d.id = u2.id) -> Seq Scan on sje_d1_4754000 d -> Hash -> Seq Scan on sje_d2_4754004 u2 Filter: ((id % '2'::bigint) = 0) (13 rows) WITH z AS (SELECT id FROM sje_d2 WHERE id % 2 = 0) SELECT count(1) FROM sje_d1 d JOIN z USING (id) JOIN sje_d2 u2 USING (id); count --------------------------------------------------------------------- 51 (1 row) -- PG18 Feature: JSON functionality - JSON_TABLE has COLUMNS clause for -- extracting multiple fields from JSON documents. -- PG18 commit: https://github.com/postgres/postgres/commit/bb766cd CREATE TABLE pg18_json_test (id serial PRIMARY KEY, data JSON); INSERT INTO pg18_json_test (data) VALUES ('{ "user": {"name": "Alice", "age": 30, "city": "San Diego"} }'), ('{ "user": {"name": "Bob", "age": 25, "city": "Los Angeles"} }'), ('{ "user": {"name": "Charlie", "age": 35, "city": "Los Angeles"} }'), ('{ "user": {"name": "Diana", "age": 28, "city": "Seattle"} } '), ('{ "user": {"name": "Evan", "age": 40, "city": "Portland"} } '), ('{ "user": {"name": "Ethan", "age": 32, "city": "Seattle"} } '), ('{ "user": {"name": "Fiona", "age": 27, "city": "Seattle"} } '), ('{ "user": {"name": "George", "age": 29, "city": "San Francisco"} } '), ('{ "user": {"name": "Hannah", "age": 33, "city": "Seattle"} } '), ('{ "user": {"name": "Ian", "age": 26, "city": "Portland"} } '), ('{ "user": {"name": "Jane", "age": 38, "city": "San Francisco"} } '); SELECT jt.name, jt.age FROM pg18_json_test, JSON_TABLE( data, '$.user' COLUMNS ( age INT PATH '$.age', name TEXT PATH '$.name' ) ) AS jt WHERE jt.age between 25 and 35 ORDER BY jt.age, jt.name; name | age --------------------------------------------------------------------- Bob | 25 Ian | 26 Fiona | 27 Diana | 28 George | 29 Alice | 30 Ethan | 32 Hannah | 33 Charlie | 35 (9 rows) SELECT jt.city, count(1) FROM pg18_json_test, JSON_TABLE( data, '$.user' COLUMNS ( city TEXT PATH '$.city' ) ) AS jt GROUP BY jt.city ORDER BY count(1) DESC; city | count --------------------------------------------------------------------- Seattle | 4 San Francisco | 2 Portland | 2 Los Angeles | 2 San Diego | 1 (5 rows) -- Make it distributed and repeat the queries SELECT create_distributed_table('pg18_json_test', 'id'); NOTICE: Copying data from local table... NOTICE: copying the data has completed DETAIL: The local data in the table is no longer visible, but is still on disk. HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$pg18_nn.pg18_json_test$$) create_distributed_table --------------------------------------------------------------------- (1 row) SELECT jt.name, jt.age FROM pg18_json_test, JSON_TABLE( data, '$.user' COLUMNS ( age INT PATH '$.age', name TEXT PATH '$.name' ) ) AS jt WHERE jt.age between 25 and 35 ORDER BY jt.age, jt.name; name | age --------------------------------------------------------------------- Bob | 25 Ian | 26 Fiona | 27 Diana | 28 George | 29 Alice | 30 Ethan | 32 Hannah | 33 Charlie | 35 (9 rows) SELECT jt.city, count(1) FROM pg18_json_test, JSON_TABLE( data, '$.user' COLUMNS ( city TEXT PATH '$.city' ) ) AS jt GROUP BY jt.city ORDER BY count(1) DESC; city | count --------------------------------------------------------------------- Seattle | 4 Portland | 2 Los Angeles | 2 San Francisco | 2 San Diego | 1 (5 rows) -- PG18 Feature: WITHOUT OVERLAPS can appear in PRIMARY KEY and UNIQUE constraints. -- PG18 commit: https://github.com/postgres/postgres/commit/fc0438b4e CREATE TABLE temporal_rng ( -- Since we can't depend on having btree_gist here, -- use an int4range instead of an int. -- (The rangetypes regression test uses the same trick.) id int4range, valid_at daterange, CONSTRAINT temporal_rng_pk PRIMARY KEY (id, valid_at WITHOUT OVERLAPS) ); SELECT create_distributed_table('temporal_rng', 'id'); create_distributed_table --------------------------------------------------------------------- (1 row) -- okay: INSERT INTO temporal_rng (id, valid_at) VALUES ('[1,2)', daterange('2018-01-02', '2018-02-03')); INSERT INTO temporal_rng (id, valid_at) VALUES ('[1,2)', daterange('2018-03-03', '2018-04-04')); INSERT INTO temporal_rng (id, valid_at) VALUES ('[2,3)', daterange('2018-01-01', '2018-01-05')); INSERT INTO temporal_rng (id, valid_at) VALUES ('[3,4)', daterange('2018-01-01', NULL)); -- should fail: INSERT INTO temporal_rng (id, valid_at) VALUES ('[1,2)', daterange('2018-01-01', '2018-01-05')); ERROR: conflicting key value violates exclusion constraint "temporal_rng_pk_4754013" DETAIL: Key (id, valid_at)=([1,2), [2018-01-01,2018-01-05)) conflicts with existing key (id, valid_at)=([1,2), [2018-01-02,2018-02-03)). CONTEXT: while executing command on localhost:xxxxx -- NULLs are not allowed in the shard key: INSERT INTO temporal_rng (id, valid_at) VALUES (NULL, daterange('2018-01-01', '2018-01-05')); ERROR: cannot perform an INSERT with NULL in the partition column INSERT INTO temporal_rng (id, valid_at) VALUES ('[3,4)', NULL); ERROR: null value in column "valid_at" violates not-null constraint DETAIL: Failing row contains ([3,4), null). CONTEXT: while executing command on localhost:xxxxx -- rejects empty: INSERT INTO temporal_rng (id, valid_at) VALUES ('[3,4)', 'empty'); ERROR: empty WITHOUT OVERLAPS value found in column "valid_at" in relation "temporal_rng_4754012" CONTEXT: while executing command on localhost:xxxxx SELECT * FROM temporal_rng ORDER BY id, valid_at; id | valid_at --------------------------------------------------------------------- [1,2) | [01-02-2018,02-03-2018) [1,2) | [03-03-2018,04-04-2018) [2,3) | [01-01-2018,01-05-2018) [3,4) | [01-01-2018,) (4 rows) -- Repeat with UNIQUE constraint CREATE TABLE temporal_rng_uq ( -- Since we can't depend on having btree_gist here, -- use an int4range instead of an int. id int4range, valid_at daterange, CONSTRAINT temporal_rng_uq_uk UNIQUE (id, valid_at WITHOUT OVERLAPS) ); SELECT create_distributed_table('temporal_rng_uq', 'id'); create_distributed_table --------------------------------------------------------------------- (1 row) -- okay: INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[1,2)', daterange('2018-01-02', '2018-02-03')); INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[1,2)', daterange('2018-03-03', '2018-04-04')); INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[2,3)', daterange('2018-01-01', '2018-01-05')); INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[3,4)', daterange('2018-01-01', NULL)); -- should fail: INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[1,2)', daterange('2018-01-01', '2018-01-05')); ERROR: conflicting key value violates exclusion constraint "temporal_rng_uq_uk_4754017" DETAIL: Key (id, valid_at)=([1,2), [2018-01-01,2018-01-05)) conflicts with existing key (id, valid_at)=([1,2), [2018-01-02,2018-02-03)). CONTEXT: while executing command on localhost:xxxxx -- NULLs are not allowed in the shard key: INSERT INTO temporal_rng_uq (id, valid_at) VALUES (NULL, daterange('2018-01-01', '2018-01-05')); ERROR: cannot perform an INSERT with NULL in the partition column INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[3,4)', NULL); -- rejects empty: INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[3,4)', 'empty'); ERROR: empty WITHOUT OVERLAPS value found in column "valid_at" in relation "temporal_rng_uq_4754016" CONTEXT: while executing command on localhost:xxxxx SELECT * FROM temporal_rng_uq ORDER BY id, valid_at; id | valid_at --------------------------------------------------------------------- [1,2) | [01-02-2018,02-03-2018) [1,2) | [03-03-2018,04-04-2018) [2,3) | [01-01-2018,01-05-2018) [3,4) | [01-01-2018,) [3,4) | (5 rows) DROP TABLE temporal_rng CASCADE; DROP TABLE temporal_rng_uq CASCADE; -- Repeat the tests with the PRIMARY KEY and UNIQUE constraints added -- after the table is created and distributed. INSERTs produce the -- same results as before. CREATE TABLE temporal_rng ( -- Since we can't depend on having btree_gist here, -- use an int4range instead of an int. -- (The rangetypes regression test uses the same trick.) id int4range, valid_at daterange ); SELECT create_distributed_table('temporal_rng', 'id'); create_distributed_table --------------------------------------------------------------------- (1 row) -- okay: INSERT INTO temporal_rng (id, valid_at) VALUES ('[1,2)', daterange('2018-01-02', '2018-02-03')); INSERT INTO temporal_rng (id, valid_at) VALUES ('[1,2)', daterange('2018-03-03', '2018-04-04')); INSERT INTO temporal_rng (id, valid_at) VALUES ('[2,3)', daterange('2018-01-01', '2018-01-05')); INSERT INTO temporal_rng (id, valid_at) VALUES ('[3,4)', daterange('2018-01-01', NULL)); ALTER TABLE temporal_rng ADD CONSTRAINT temporal_rng_pk PRIMARY KEY (id, valid_at WITHOUT OVERLAPS); -- should fail: INSERT INTO temporal_rng (id, valid_at) VALUES ('[1,2)', daterange('2018-01-01', '2018-01-05')); ERROR: conflicting key value violates exclusion constraint "temporal_rng_pk_4754021" DETAIL: Key (id, valid_at)=([1,2), [2018-01-01,2018-01-05)) conflicts with existing key (id, valid_at)=([1,2), [2018-01-02,2018-02-03)). CONTEXT: while executing command on localhost:xxxxx -- NULLs are not allowed in the shard key: INSERT INTO temporal_rng (id, valid_at) VALUES (NULL, daterange('2018-01-01', '2018-01-05')); ERROR: cannot perform an INSERT with NULL in the partition column INSERT INTO temporal_rng (id, valid_at) VALUES ('[3,4)', NULL); ERROR: null value in column "valid_at" violates not-null constraint DETAIL: Failing row contains ([3,4), null). CONTEXT: while executing command on localhost:xxxxx -- rejects empty: INSERT INTO temporal_rng (id, valid_at) VALUES ('[3,4)', 'empty'); ERROR: empty WITHOUT OVERLAPS value found in column "valid_at" in relation "temporal_rng_4754020" CONTEXT: while executing command on localhost:xxxxx SELECT * FROM temporal_rng ORDER BY id, valid_at; id | valid_at --------------------------------------------------------------------- [1,2) | [01-02-2018,02-03-2018) [1,2) | [03-03-2018,04-04-2018) [2,3) | [01-01-2018,01-05-2018) [3,4) | [01-01-2018,) (4 rows) -- Repeat with UNIQUE constraint CREATE TABLE temporal_rng_uq ( -- Since we can't depend on having btree_gist here, -- use an int4range instead of an int. id int4range, valid_at daterange ); SELECT create_distributed_table('temporal_rng_uq', 'id'); create_distributed_table --------------------------------------------------------------------- (1 row) -- okay: INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[1,2)', daterange('2018-01-02', '2018-02-03')); INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[1,2)', daterange('2018-03-03', '2018-04-04')); INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[2,3)', daterange('2018-01-01', '2018-01-05')); INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[3,4)', daterange('2018-01-01', NULL)); ALTER TABLE temporal_rng_uq ADD CONSTRAINT temporal_rng_uq_uk UNIQUE (id, valid_at WITHOUT OVERLAPS); -- should fail: INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[1,2)', daterange('2018-01-01', '2018-01-05')); ERROR: conflicting key value violates exclusion constraint "temporal_rng_uq_uk_4754025" DETAIL: Key (id, valid_at)=([1,2), [2018-01-01,2018-01-05)) conflicts with existing key (id, valid_at)=([1,2), [2018-01-02,2018-02-03)). CONTEXT: while executing command on localhost:xxxxx -- NULLs are not allowed in the shard key: INSERT INTO temporal_rng_uq (id, valid_at) VALUES (NULL, daterange('2018-01-01', '2018-01-05')); ERROR: cannot perform an INSERT with NULL in the partition column INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[3,4)', NULL); -- rejects empty: INSERT INTO temporal_rng_uq (id, valid_at) VALUES ('[3,4)', 'empty'); ERROR: empty WITHOUT OVERLAPS value found in column "valid_at" in relation "temporal_rng_uq_4754024" CONTEXT: while executing command on localhost:xxxxx SELECT * FROM temporal_rng_uq ORDER BY id, valid_at; id | valid_at --------------------------------------------------------------------- [1,2) | [01-02-2018,02-03-2018) [1,2) | [03-03-2018,04-04-2018) [2,3) | [01-01-2018,01-05-2018) [3,4) | [01-01-2018,) [3,4) | (5 rows) -- PG18 Feature: RETURNING old and new values in DML statements -- PG18 commit: https://github.com/postgres/postgres/commit/80feb727c CREATE TABLE users (id SERIAL PRIMARY KEY, email text, category int); INSERT INTO users (email, category) SELECT 'xxx@foo.com', i % 10 from generate_series (1,100) t(i); SELECT create_distributed_table('users','id'); NOTICE: Copying data from local table... NOTICE: copying the data has completed DETAIL: The local data in the table is no longer visible, but is still on disk. HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$pg18_nn.users$$) create_distributed_table --------------------------------------------------------------------- (1 row) UPDATE users SET email = 'colm@planet.com' WHERE id = 1 RETURNING OLD.email AS previous_email, NEW.email AS current_email; previous_email | current_email --------------------------------------------------------------------- xxx@foo.com | colm@planet.com (1 row) SELECT * FROM users WHERE id = 1 ORDER BY id; id | email | category --------------------------------------------------------------------- 1 | colm@planet.com | 1 (1 row) UPDATE users SET email = 'tim@arctic.net' WHERE id = 22 RETURNING OLD.email AS previous_email, NEW.email AS current_email; previous_email | current_email --------------------------------------------------------------------- xxx@foo.com | tim@arctic.net (1 row) UPDATE users SET email = 'john@farm.ie' WHERE id = 33 RETURNING OLD.email AS previous_email, NEW.email AS current_email; previous_email | current_email --------------------------------------------------------------------- xxx@foo.com | john@farm.ie (1 row) SELECT * FROM users WHERE id = 22 ORDER BY id; id | email | category --------------------------------------------------------------------- 22 | tim@arctic.net | 2 (1 row) SELECT * FROM users WHERE email not like 'xxx@%' ORDER BY id; id | email | category --------------------------------------------------------------------- 1 | colm@planet.com | 1 22 | tim@arctic.net | 2 33 | john@farm.ie | 3 (3 rows) -- NULL values creep into the email column.. INSERT INTO users (email, category) VALUES (null, 5) RETURNING OLD.email AS previous_email, NEW.email AS current_email; previous_email | current_email --------------------------------------------------------------------- | (1 row) UPDATE users SET email = NULL WHERE id = 79 RETURNING OLD.email AS previous_email, NEW.email AS current_email; previous_email | current_email --------------------------------------------------------------------- xxx@foo.com | (1 row) -- Now add a NOT NULL constraint on email, but do -- not apply it to existing rows yet. ALTER TABLE users ADD CONSTRAINT users_email_not_null CHECK (email IS NOT NULL) NOT VALID; UPDATE users SET email = NULL WHERE id = 50 RETURNING OLD.email AS previous_email, NEW.email AS current_email; ERROR: new row for relation "users_4754028" violates check constraint "users_email_not_null_4754028" DETAIL: Failing row contains (50, null, 0). CONTEXT: while executing command on localhost:xxxxx -- Validation should fail due to existing NULLs ALTER TABLE users VALIDATE CONSTRAINT users_email_not_null; ERROR: check constraint "users_email_not_null_4754028" of relation "users_4754028" is violated by some row CONTEXT: while executing command on localhost:xxxxx -- Fix NULL emails to a default value UPDATE users SET email = 'xxx@foo.com' WHERE email IS NULL RETURNING OLD.email AS previous_email, NEW.email AS current_email; previous_email | current_email --------------------------------------------------------------------- | xxx@foo.com | xxx@foo.com (2 rows) -- Validation should now succeed ALTER TABLE users VALIDATE CONSTRAINT users_email_not_null; -- And prevent future NULLs INSERT INTO users (email, category) VALUES (null, 10) RETURNING OLD.email AS previous_email, NEW.email AS current_email; ERROR: new row for relation "users_4754030" violates check constraint "users_email_not_null_4754030" DETAIL: Failing row contains (102, null, 10). CONTEXT: while executing command on localhost:xxxxx -- PG18 Feature: support for LIKE in CREATE FOREIGN TABLE -- PG18 commit: https://github.com/postgres/postgres/commit/302cf1575 SET citus.use_citus_managed_tables TO ON; CREATE EXTENSION postgres_fdw; CREATE SERVER foreign_server FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host 'localhost', port :'master_port', dbname 'regression'); CREATE USER MAPPING FOR CURRENT_USER SERVER foreign_server OPTIONS (user 'postgres'); CREATE TABLE ctl_table(a int PRIMARY KEY, b varchar COMPRESSION pglz, c int GENERATED ALWAYS AS (a * 2) STORED, d bigint GENERATED ALWAYS AS IDENTITY, e int DEFAULT 1); CREATE INDEX ctl_table_ab_key ON ctl_table(a, b); COMMENT ON COLUMN ctl_table.b IS 'Column b'; CREATE STATISTICS ctl_table_stat ON a,b FROM ctl_table; INSERT INTO ctl_table VALUES (1, 'first'), (2, 'second'), (3, 'third'), (4, 'fourth'); -- Test EXCLUDING ALL CREATE FOREIGN TABLE ctl_ft1(LIKE ctl_table EXCLUDING ALL) SERVER foreign_server OPTIONS (schema_name 'pg18_nn', table_name 'ctl_table'); -- Test INCLUDING ALL CREATE FOREIGN TABLE ctl_ft2(LIKE ctl_table INCLUDING ALL) SERVER foreign_server OPTIONS (schema_name 'pg18_nn', table_name 'ctl_table'); -- check that the foreign tables are citus local table SELECT partmethod, repmodel FROM pg_dist_partition WHERE logicalrelid IN ('ctl_ft1'::regclass, 'ctl_ft2'::regclass) ORDER BY logicalrelid; partmethod | repmodel --------------------------------------------------------------------- n | s n | s (2 rows) -- we can query the foreign tables EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ctl_ft1 ORDER BY a; QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus Adaptive) Output: remote_scan.a, remote_scan.b, remote_scan.c, remote_scan.d, remote_scan.e Task Count: 1 Tasks Shown: All -> Task Query: SELECT a, b, c, d, e FROM pg18_nn.ctl_ft1_4754033 ctl_ft1 ORDER BY a Node: host=localhost port=xxxxx dbname=regression -> Foreign Scan on pg18_nn.ctl_ft1_4754033 ctl_ft1 Output: a, b, c, d, e Remote SQL: SELECT a, b, c, d, e FROM pg18_nn.ctl_table ORDER BY a ASC NULLS LAST (10 rows) SELECT * FROM ctl_ft1 ORDER BY a; a | b | c | d | e --------------------------------------------------------------------- 1 | first | 2 | 1 | 1 2 | second | 4 | 2 | 1 3 | third | 6 | 3 | 1 4 | fourth | 8 | 4 | 1 (4 rows) EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ctl_ft2 ORDER BY a; QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus Adaptive) Output: remote_scan.a, remote_scan.b, remote_scan.c, remote_scan.d, remote_scan.e Task Count: 1 Tasks Shown: All -> Task Query: SELECT a, b, c, d, e FROM pg18_nn.ctl_ft2_4754034 ctl_ft2 ORDER BY a Node: host=localhost port=xxxxx dbname=regression -> Foreign Scan on pg18_nn.ctl_ft2_4754034 ctl_ft2 Output: a, b, c, d, e Remote SQL: SELECT a, b, c, d, e FROM pg18_nn.ctl_table ORDER BY a ASC NULLS LAST (10 rows) SELECT * FROM ctl_ft2 ORDER BY a; a | b | c | d | e --------------------------------------------------------------------- 1 | first | 2 | 1 | 1 2 | second | 4 | 2 | 1 3 | third | 6 | 3 | 1 4 | fourth | 8 | 4 | 1 (4 rows) -- Clean up foreign table test RESET citus.use_citus_managed_tables; SELECT undistribute_table('ctl_ft1'); NOTICE: creating a new table for pg18_nn.ctl_ft1 NOTICE: dropping the old pg18_nn.ctl_ft1 NOTICE: renaming the new table to pg18_nn.ctl_ft1 undistribute_table --------------------------------------------------------------------- (1 row) SELECT undistribute_table('ctl_ft2'); NOTICE: creating a new table for pg18_nn.ctl_ft2 NOTICE: dropping the old pg18_nn.ctl_ft2 NOTICE: renaming the new table to pg18_nn.ctl_ft2 undistribute_table --------------------------------------------------------------------- (1 row) DROP SERVER foreign_server CASCADE; NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to user mapping for postgres on server foreign_server drop cascades to foreign table ctl_ft1 drop cascades to foreign table ctl_ft2 -- PG18 Feature: PERIOD clause in foreign key constraint definitions. -- PG18 commit: https://github.com/postgres/postgres/commit/89f908a6d -- This test verifies that the PG18 tests apply to Citus tables CREATE EXTENSION btree_gist; -- needed for range type indexing CREATE TABLE temporal_test ( id integer, valid_at daterange, CONSTRAINT temporal_test_pk PRIMARY KEY (id, valid_at WITHOUT OVERLAPS) ); SET citus.shard_count TO 4; SELECT create_reference_table( 'temporal_test'); create_reference_table --------------------------------------------------------------------- (1 row) INSERT INTO temporal_test VALUES (1, '[2000-01-01,2001-01-01)'); -- same key, doesn't overlap: INSERT INTO temporal_test VALUES (1, '[2001-01-01,2002-01-01)'); -- overlaps but different key: INSERT INTO temporal_test VALUES (2, '[2000-01-01,2001-01-01)'); -- should fail: INSERT INTO temporal_test VALUES (1, '[2000-06-01,2001-01-01)'); ERROR: conflicting key value violates exclusion constraint "temporal_test_pk_4754035" DETAIL: Key (id, valid_at)=(1, [2000-06-01,2001-01-01)) conflicts with existing key (id, valid_at)=(1, [2000-01-01,2001-01-01)). CONTEXT: while executing command on localhost:xxxxx -- Required for foreign key constraint on distributed table SET citus.shard_replication_factor TO 1; -- Create and distribute a table with temporal foreign key constraints CREATE TABLE temporal_fk_rng2rng ( id integer, valid_at daterange, parent_id integer, CONSTRAINT temporal_fk_rng2rng_pk PRIMARY KEY (id, valid_at WITHOUT OVERLAPS) ); SELECT create_distributed_table( 'temporal_fk_rng2rng', 'id'); create_distributed_table --------------------------------------------------------------------- (1 row) -- -- Add foreign key constraint with PERIOD clause -- This is propagated to worker shards ALTER TABLE temporal_fk_rng2rng ADD CONSTRAINT temporal_fk_rng2rng_fk FOREIGN KEY (parent_id, PERIOD valid_at) REFERENCES temporal_test (id, PERIOD valid_at); INSERT INTO temporal_fk_rng2rng VALUES (1, '[2000-01-01,2001-01-01)', 1); -- okay spanning two parent records: INSERT INTO temporal_fk_rng2rng VALUES (2, '[2000-01-01,2002-01-01)', 1); -- key is missing INSERT INTO temporal_fk_rng2rng VALUES (3, '[2000-01-01,2001-01-01)', 3); ERROR: insert or update on table "temporal_fk_rng2rng_4754037" violates foreign key constraint "temporal_fk_rng2rng_fk_4754037" DETAIL: Key (parent_id, valid_at)=(3, [2000-01-01,2001-01-01)) is not present in table "temporal_test_4754035". CONTEXT: while executing command on localhost:xxxxx -- key exist but is outside range INSERT INTO temporal_fk_rng2rng VALUES (4, '[2001-01-01,2002-01-01)', 2); ERROR: insert or update on table "temporal_fk_rng2rng_4754037" violates foreign key constraint "temporal_fk_rng2rng_fk_4754037" DETAIL: Key (parent_id, valid_at)=(2, [2001-01-01,2002-01-01)) is not present in table "temporal_test_4754035". CONTEXT: while executing command on localhost:xxxxx -- key exist but is partly outside range INSERT INTO temporal_fk_rng2rng VALUES (5, '[2000-01-01,2002-01-01)', 2); ERROR: insert or update on table "temporal_fk_rng2rng_4754036" violates foreign key constraint "temporal_fk_rng2rng_fk_4754036" DETAIL: Key (parent_id, valid_at)=(2, [2000-01-01,2002-01-01)) is not present in table "temporal_test_4754035". CONTEXT: while executing command on localhost:xxxxx -- PG18 Feature: REJECT_LIMIT option for COPY errors -- PG18 commit: https://github.com/postgres/postgres/commit/4ac2a9bec -- Citus does not support COPY with ON_ERROR so just need to -- ensure the appropriate error is returned. CREATE TABLE check_ign_err (n int, m int[], k int); SELECT create_distributed_table('check_ign_err', 'n'); create_distributed_table --------------------------------------------------------------------- (1 row) COPY check_ign_err FROM STDIN WITH (on_error stop, reject_limit 5); ERROR: Citus does not support COPY FROM with ON_ERROR option. COPY check_ign_err FROM STDIN WITH (ON_ERROR ignore, REJECT_LIMIT 100); ERROR: Citus does not support COPY FROM with ON_ERROR option. COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose, reject_limit 50); ERROR: Citus does not support COPY FROM with ON_ERROR option. COPY check_ign_err FROM STDIN WITH (reject_limt 77, log_verbosity verbose, on_error ignore); ERROR: Citus does not support COPY FROM with ON_ERROR option. -- PG requires on_error when reject_limit is specified COPY check_ign_err FROM STDIN WITH (reject_limit 100); ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE -- PG18 Feature: COPY TABLE TO on a materialized view -- PG18 commit: https://github.com/postgres/postgres/commit/534874fac -- This does not work in Citus as a materialized view cannot be distributed. -- So just verify that the appropriate error is raised. CREATE MATERIALIZED VIEW copytest_mv AS SELECT i as id, md5(i::text) as hashval FROM generate_series(1,100) i; -- Attempting to make it distributed should fail with appropriate error as -- Citus does not yet support materialized views. SELECT create_distributed_table('copytest_mv', 'id'); ERROR: copytest_mv is not a regular, foreign or partitioned table -- After that, any command on the materialized view is outside Citus support. -- PG18: verify publish_generated_columns is preserved for distributed tables -- https://github.com/postgres/postgres/commit/7054186c4 \c - - - :master_port CREATE SCHEMA pg18_publication; SET search_path TO pg18_publication; -- table with a stored generated column CREATE TABLE gen_pub_tab ( id int primary key, a int, b int GENERATED ALWAYS AS (a * 10) STORED ); -- make it distributed so CREATE PUBLICATION goes through Citus metadata/DDL path SELECT create_distributed_table('gen_pub_tab', 'id', colocate_with := 'none'); create_distributed_table --------------------------------------------------------------------- (1 row) -- publication using the new PG18 option: stored CREATE PUBLICATION pub_gen_cols_stored FOR TABLE gen_pub_tab WITH (publish = 'insert, update', publish_generated_columns = stored); -- second publication explicitly using "none" for completeness CREATE PUBLICATION pub_gen_cols_none FOR TABLE gen_pub_tab WITH (publish = 'insert, update', publish_generated_columns = none); -- On coordinator: pubgencols must be 's' and 'n' respectively SELECT pubname, pubgencols FROM pg_publication WHERE pubname IN ('pub_gen_cols_stored', 'pub_gen_cols_none') ORDER BY pubname; pubname | pubgencols --------------------------------------------------------------------- pub_gen_cols_none | n pub_gen_cols_stored | s (2 rows) -- On worker 1: both publications must exist and keep pubgencols in sync \c - - - :worker_1_port SET search_path TO pg18_publication; SELECT pubname, pubgencols FROM pg_publication WHERE pubname IN ('pub_gen_cols_stored', 'pub_gen_cols_none') ORDER BY pubname; pubname | pubgencols --------------------------------------------------------------------- pub_gen_cols_none | n pub_gen_cols_stored | s (2 rows) -- On worker 2: same check \c - - - :worker_2_port SET search_path TO pg18_publication; SELECT pubname, pubgencols FROM pg_publication WHERE pubname IN ('pub_gen_cols_stored', 'pub_gen_cols_none') ORDER BY pubname; pubname | pubgencols --------------------------------------------------------------------- pub_gen_cols_none | n pub_gen_cols_stored | s (2 rows) -- Now verify ALTER PUBLICATION .. SET (publish_generated_columns = none) -- propagates to workers as well. \c - - - :master_port SET search_path TO pg18_publication; ALTER PUBLICATION pub_gen_cols_stored SET (publish_generated_columns = none); -- coordinator: both publications should now have pubgencols = 'n' SELECT pubname, pubgencols FROM pg_publication WHERE pubname IN ('pub_gen_cols_stored', 'pub_gen_cols_none') ORDER BY pubname; pubname | pubgencols --------------------------------------------------------------------- pub_gen_cols_none | n pub_gen_cols_stored | n (2 rows) -- worker 1: pubgencols must match coordinator \c - - - :worker_1_port SET search_path TO pg18_publication; SELECT pubname, pubgencols FROM pg_publication WHERE pubname IN ('pub_gen_cols_stored', 'pub_gen_cols_none') ORDER BY pubname; pubname | pubgencols --------------------------------------------------------------------- pub_gen_cols_none | n pub_gen_cols_stored | n (2 rows) -- worker 2: same check \c - - - :worker_2_port SET search_path TO pg18_publication; SELECT pubname, pubgencols FROM pg_publication WHERE pubname IN ('pub_gen_cols_stored', 'pub_gen_cols_none') ORDER BY pubname; pubname | pubgencols --------------------------------------------------------------------- pub_gen_cols_none | n pub_gen_cols_stored | n (2 rows) -- Column list precedence test: Citus must preserve both prattrs and pubgencols \c - - - :master_port SET search_path TO pg18_publication; -- Case 1: column list explicitly includes the generated column, flag = none CREATE PUBLICATION pub_gen_cols_list_includes_b FOR TABLE gen_pub_tab (id, a, b) WITH (publish_generated_columns = none); -- Case 2: column list excludes the generated column, flag = stored CREATE PUBLICATION pub_gen_cols_list_excludes_b FOR TABLE gen_pub_tab (id, a) WITH (publish_generated_columns = stored); -- Helper: show pubname, pubgencols, and column list (prattrs) for gen_pub_tab SELECT p.pubname, p.pubgencols, r.prattrs FROM pg_publication p JOIN pg_publication_rel r ON p.oid = r.prpubid JOIN pg_class c ON c.oid = r.prrelid WHERE p.pubname IN ('pub_gen_cols_list_includes_b', 'pub_gen_cols_list_excludes_b') AND c.relname = 'gen_pub_tab' ORDER BY p.pubname; pubname | pubgencols | prattrs --------------------------------------------------------------------- pub_gen_cols_list_excludes_b | s | 1 2 pub_gen_cols_list_includes_b | n | 1 2 3 (2 rows) -- worker 1: must see the same pubgencols + prattrs \c - - - :worker_1_port SET search_path TO pg18_publication; SELECT p.pubname, p.pubgencols, r.prattrs FROM pg_publication p JOIN pg_publication_rel r ON p.oid = r.prpubid JOIN pg_class c ON c.oid = r.prrelid WHERE p.pubname IN ('pub_gen_cols_list_includes_b', 'pub_gen_cols_list_excludes_b') AND c.relname = 'gen_pub_tab' ORDER BY p.pubname; pubname | pubgencols | prattrs --------------------------------------------------------------------- pub_gen_cols_list_excludes_b | s | 1 2 pub_gen_cols_list_includes_b | n | 1 2 3 (2 rows) -- worker 2: same check \c - - - :worker_2_port SET search_path TO pg18_publication; SELECT p.pubname, p.pubgencols, r.prattrs FROM pg_publication p JOIN pg_publication_rel r ON p.oid = r.prpubid JOIN pg_class c ON c.oid = r.prrelid WHERE p.pubname IN ('pub_gen_cols_list_includes_b', 'pub_gen_cols_list_excludes_b') AND c.relname = 'gen_pub_tab' ORDER BY p.pubname; pubname | pubgencols | prattrs --------------------------------------------------------------------- pub_gen_cols_list_excludes_b | s | 1 2 pub_gen_cols_list_includes_b | n | 1 2 3 (2 rows) -- back to coordinator for subsequent tests / cleanup \c - - - :master_port SET search_path TO pg18_publication; DROP PUBLICATION pub_gen_cols_stored; DROP PUBLICATION pub_gen_cols_none; DROP PUBLICATION pub_gen_cols_list_includes_b; DROP PUBLICATION pub_gen_cols_list_excludes_b; DROP SCHEMA pg18_publication CASCADE; NOTICE: drop cascades to table gen_pub_tab SET search_path TO pg18_nn; -- END: PG18: verify publish_generated_columns is preserved for distributed tables -- PG18 Feature: FOREIGN KEY constraints can be specified as NOT ENFORCED -- PG18 commit: https://github.com/postgres/postgres/commit/eec0040c4 CREATE TABLE customers( customer_id INT GENERATED ALWAYS AS IDENTITY, customer_name VARCHAR(255) NOT NULL, PRIMARY KEY(customer_id) ); SET citus.shard_replication_factor TO 1; SELECT create_distributed_table('customers', 'customer_id'); create_distributed_table --------------------------------------------------------------------- (1 row) CREATE TABLE contacts( contact_id INT GENERATED ALWAYS AS IDENTITY, customer_id INT, contact_name VARCHAR(255) NOT NULL, phone VARCHAR(15), email VARCHAR(100), CONSTRAINT fk_customer FOREIGN KEY(customer_id) REFERENCES customers(customer_id) ON DELETE CASCADE NOT ENFORCED ); -- The foreign key constraint is propagated to worker nodes. SELECT create_distributed_table('contacts', 'customer_id'); create_distributed_table --------------------------------------------------------------------- (1 row) SELECT pg_get_constraintdef(oid, true) AS "Definition" FROM pg_constraint WHERE conrelid = 'contacts'::regclass AND conname = 'fk_customer'; Definition --------------------------------------------------------------------- FOREIGN KEY (customer_id) REFERENCES customers(customer_id) ON DELETE CASCADE NOT ENFORCED (1 row) INSERT INTO customers(customer_name) VALUES('BlueBird Inc'), ('Dolphin LLC'); INSERT INTO contacts(customer_id, contact_name, phone, email) VALUES(1,'John Doe','(408)-111-1234','john.doe@example.com'), (1,'Jane Doe','(408)-111-1235','jane.doe@example.com'), (2,'David Wright','(408)-222-1234','david.wright@example.com'); DELETE FROM customers WHERE customer_name = 'Dolphin LLC'; -- After deleting 'Dolphin LLC' from customers, the corresponding contact -- 'David Wright' is not deleted from contacts due to the NOT ENFORCED. SELECT * FROM contacts ORDER BY contact_id; contact_id | customer_id | contact_name | phone | email --------------------------------------------------------------------- 1 | 1 | John Doe | (408)-111-1234 | john.doe@example.com 2 | 1 | Jane Doe | (408)-111-1235 | jane.doe@example.com 3 | 2 | David Wright | (408)-222-1234 | david.wright@example.com (3 rows) -- Test that ALTER TABLE .. ADD CONSTRAINT .. FOREIGN KEY .. NOT ENFORCED -- is propagated to worker nodes. First drop the foreign key: ALTER TABLE contacts DROP CONSTRAINT fk_customer; SELECT pg_get_constraintdef(oid, true) AS "Definition" FROM pg_constraint WHERE conrelid = 'contacts'::regclass AND conname = 'fk_customer'; Definition --------------------------------------------------------------------- (0 rows) -- Now add the foreign key constraint back with NOT ENFORCED. ALTER TABLE contacts ADD CONSTRAINT fk_customer FOREIGN KEY(customer_id) REFERENCES customers(customer_id) ON DELETE CASCADE NOT ENFORCED; -- The foreign key is propagated to worker nodes. SELECT pg_get_constraintdef(oid, true) AS "Definition" FROM pg_constraint WHERE conrelid = 'contacts'::regclass AND conname = 'fk_customer'; Definition --------------------------------------------------------------------- FOREIGN KEY (customer_id) REFERENCES customers(customer_id) ON DELETE CASCADE NOT ENFORCED (1 row) DELETE FROM customers WHERE customer_name = 'BlueBird Inc'; -- The customers table is now empty but the contacts table still has -- the contacts due to the NOT ENFORCED foreign key. SELECT * FROM customers ORDER BY customer_id; customer_id | customer_name --------------------------------------------------------------------- (0 rows) SELECT * FROM contacts ORDER BY contact_id; contact_id | customer_id | contact_name | phone | email --------------------------------------------------------------------- 1 | 1 | John Doe | (408)-111-1234 | john.doe@example.com 2 | 1 | Jane Doe | (408)-111-1235 | jane.doe@example.com 3 | 2 | David Wright | (408)-222-1234 | david.wright@example.com (3 rows) -- ALTER TABLE .. ALTER CONSTRAINT is not supported in Citus, -- so the following command should fail ALTER TABLE contacts ALTER CONSTRAINT fk_customer ENFORCED; ERROR: alter table command is currently unsupported DETAIL: Only ADD|DROP COLUMN, SET|DROP NOT NULL, SET|DROP DEFAULT, ADD|DROP|VALIDATE CONSTRAINT, SET (), RESET (), ENABLE|DISABLE|NO FORCE|FORCE ROW LEVEL SECURITY, ATTACH|DETACH PARTITION and TYPE subcommands are supported. -- PG18 Feature: ENFORCED / NOT ENFORCED check constraints -- PG18 commit: https://github.com/postgres/postgres/commit/ca87c415e -- In Citus, CHECK constraints are propagated on promoting a postgres table -- to a citus table, on adding a new CHECK constraint to a citus table, and -- on adding a node to a citus cluster. Postgres does not support altering a -- check constraint's enforcement status, so Citus does not either. CREATE TABLE NE_CHECK_TBL (x int, y int, CONSTRAINT CHECK_X CHECK (x > 3) NOT ENFORCED, CONSTRAINT CHECK_Y CHECK (y < 20) ENFORCED ); SET citus.next_shard_id TO 4754044; SELECT create_distributed_table('ne_check_tbl', 'x'); create_distributed_table --------------------------------------------------------------------- (1 row) -- CHECK_X is NOT ENFORCED, so these inserts should succeed INSERT INTO NE_CHECK_TBL (x) VALUES (5), (4), (3), (2), (6), (1); SELECT x FROM NE_CHECK_TBL ORDER BY x; x --------------------------------------------------------------------- 1 2 3 4 5 6 (6 rows) -- CHECK_Y is ENFORCED, so this insert should fail INSERT INTO NE_CHECK_TBL (x, y) VALUES (1, 15), (2, 25), (3, 10), (4, 30); ERROR: new row for relation "ne_check_tbl_4754045" violates check constraint "check_y" DETAIL: Failing row contains (4, 30). CONTEXT: while executing command on localhost:xxxxx -- Test adding new constraints with enforcement status ALTER TABLE NE_CHECK_TBL ADD CONSTRAINT CHECK_Y2 CHECK (y > 10) NOT ENFORCED; -- CHECK_Y2 is NOT ENFORCED, so these inserts should succeed INSERT INTO NE_CHECK_TBL (x, y) VALUES (1, 8), (2, 9), (3, 10), (4, 11); SELECT x, y FROM NE_CHECK_TBL ORDER BY x, y; x | y --------------------------------------------------------------------- 1 | 8 1 | 2 | 9 2 | 3 | 10 3 | 4 | 11 4 | 5 | 6 | (10 rows) ALTER TABLE NE_CHECK_TBL ADD CONSTRAINT CHECK_X2 CHECK (x < 10) ENFORCED; -- CHECK_X2 is ENFORCED, so these inserts should fail INSERT INTO NE_CHECK_TBL (x) VALUES (5), (15), (8), (12); ERROR: new row for relation "ne_check_tbl_4754044" violates check constraint "check_x2_4754044" DETAIL: Failing row contains (15, null). CONTEXT: while executing command on localhost:xxxxx -- PG18 Feature: Generated Virtual Columns -- PG18 commit: https://github.com/postgres/postgres/commit/83ea6c540 -- Verify that generated virtual columns are supported on distributed tables. CREATE TABLE v_reading ( celsius DECIMAL(5,2), farenheit DECIMAL(6, 2) GENERATED ALWAYS AS (celsius * 9/5 + 32) VIRTUAL, created_at TIMESTAMPTZ DEFAULT now(), device_id INT ); -- Cannot distribute on a generated column (#4616) applies -- to VIRTUAL columns. SELECT create_distributed_table('v_reading', 'farenheit'); ERROR: cannot distribute relation: v_reading DETAIL: Distribution column must not use GENERATED ALWAYS AS (...) VIRTUAL. SELECT create_distributed_table('v_reading', 'device_id'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO v_reading (celsius, device_id) VALUES (0, 1), (100, 1), (37.5, 2), (25, 2), (-40, 3); SELECT device_id, celsius, farenheit FROM v_reading ORDER BY device_id; device_id | celsius | farenheit --------------------------------------------------------------------- 1 | 0.00 | 32.00 1 | 100.00 | 212.00 2 | 37.50 | 99.50 2 | 25.00 | 77.00 3 | -40.00 | -40.00 (5 rows) ALTER TABLE v_reading ADD COLUMN kelvin DECIMAL(6, 2) GENERATED ALWAYS AS (celsius + 273.15) VIRTUAL; SELECT device_id, celsius, kelvin FROM v_reading ORDER BY device_id, celsius; device_id | celsius | kelvin --------------------------------------------------------------------- 1 | 0.00 | 273.15 1 | 100.00 | 373.15 2 | 25.00 | 298.15 2 | 37.50 | 310.65 3 | -40.00 | 233.15 (5 rows) -- Show all columns that are generated SELECT s.relname, a.attname, a.attgenerated FROM pg_class s JOIN pg_attribute a ON a.attrelid=s.oid WHERE s.relname LIKE 'v_reading%' and attgenerated::int != 0 ORDER BY 1,2; relname | attname | attgenerated --------------------------------------------------------------------- v_reading | farenheit | v v_reading | kelvin | v (2 rows) -- Generated columns are virtual by default - repeat the test without VIRTUAL keyword CREATE TABLE d_reading ( celsius DECIMAL(5,2), farenheit DECIMAL(6, 2) GENERATED ALWAYS AS (celsius * 9/5 + 32), created_at TIMESTAMPTZ DEFAULT now(), device_id INT ); SELECT create_distributed_table('d_reading', 'farenheit'); ERROR: cannot distribute relation: d_reading DETAIL: Distribution column must not use GENERATED ALWAYS AS (...) VIRTUAL. SELECT create_distributed_table('d_reading', 'device_id'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO d_reading (celsius, device_id) VALUES (0, 1), (100, 1), (37.5, 2), (25, 2), (-40, 3); SELECT device_id, celsius, farenheit FROM d_reading ORDER BY device_id; device_id | celsius | farenheit --------------------------------------------------------------------- 1 | 0.00 | 32.00 1 | 100.00 | 212.00 2 | 37.50 | 99.50 2 | 25.00 | 77.00 3 | -40.00 | -40.00 (5 rows) ALTER TABLE d_reading ADD COLUMN kelvin DECIMAL(6, 2) GENERATED ALWAYS AS (celsius + 273.15) VIRTUAL; SELECT device_id, celsius, kelvin FROM d_reading ORDER BY device_id, celsius; device_id | celsius | kelvin --------------------------------------------------------------------- 1 | 0.00 | 273.15 1 | 100.00 | 373.15 2 | 25.00 | 298.15 2 | 37.50 | 310.65 3 | -40.00 | 233.15 (5 rows) -- Show all columns that are generated SELECT s.relname, a.attname, a.attgenerated FROM pg_class s JOIN pg_attribute a ON a.attrelid=s.oid WHERE s.relname LIKE 'd_reading%' and attgenerated::int != 0 ORDER BY 1,2; relname | attname | attgenerated --------------------------------------------------------------------- d_reading | farenheit | v d_reading | kelvin | v (2 rows) -- COPY implementation needs to handle GENERATED ALWAYS AS (...) VIRTUAL columns. \COPY d_reading FROM STDIN WITH DELIMITER ',' SELECT device_id, count(device_id) as count, round(avg(celsius), 2) as avg, min(farenheit), max(farenheit) FROM d_reading GROUP BY device_id ORDER BY count DESC; device_id | count | avg | min | max --------------------------------------------------------------------- 1 | 12 | 20.00 | 32.00 | 212.00 5 | 10 | 13.20 | 33.80 | 73.40 2 | 2 | 31.25 | 77.00 | 99.50 3 | 1 | -40.00 | -40.00 | -40.00 (4 rows) -- Test GROUP BY on tables with generated virtual columns - this requires -- special case handling in distributed planning. Test it out on some -- some queries involving joins and set operations. SELECT device_id, max(kelvin) as Kel FROM v_reading WHERE (device_id, celsius) NOT IN (SELECT device_id, max(celsius) FROM v_reading GROUP BY device_id) GROUP BY device_id ORDER BY device_id ASC; device_id | kel --------------------------------------------------------------------- 1 | 273.15 2 | 298.15 (2 rows) SELECT device_id, round(AVG( (d_farenheit + v_farenheit) / 2), 2) as Avg_Far FROM (SELECT * FROM (SELECT device_id, round(AVG(farenheit),2) as d_farenheit FROM d_reading GROUP BY device_id) AS subq RIGHT JOIN (SELECT device_id, MAX(farenheit) AS v_farenheit FROM d_reading GROUP BY device_id) AS subq2 USING (device_id) ) AS finalq GROUP BY device_id ORDER BY device_id ASC; device_id | avg_far --------------------------------------------------------------------- 1 | 140.00 2 | 93.88 3 | -40.00 5 | 64.58 (4 rows) SELECT device_id, MAX(farenheit) as farenheit FROM ((SELECT device_id, round(AVG(farenheit),2) as farenheit FROM d_reading GROUP BY device_id) UNION ALL (SELECT device_id, MAX(farenheit) AS farenheit FROM d_reading GROUP BY device_id) ) AS unioned GROUP BY device_id ORDER BY device_id ASC; device_id | farenheit --------------------------------------------------------------------- 1 | 212.00 2 | 99.50 3 | -40.00 5 | 73.40 (4 rows) SELECT device_id, MAX(farenheit) as farenheit FROM ((SELECT device_id, round(AVG(farenheit),2) as farenheit FROM d_reading GROUP BY device_id) INTERSECT (SELECT device_id, MAX(farenheit) AS farenheit FROM d_reading GROUP BY device_id) ) AS intersected GROUP BY device_id ORDER BY device_id ASC; device_id | farenheit --------------------------------------------------------------------- 3 | -40.00 (1 row) SELECT device_id, MAX(farenheit) as farenheit FROM ((SELECT device_id, round(AVG(farenheit),2) as farenheit FROM d_reading GROUP BY device_id) EXCEPT (SELECT device_id, MAX(farenheit) AS farenheit FROM d_reading GROUP BY device_id) ) AS excepted GROUP BY device_id ORDER BY device_id ASC; device_id | farenheit --------------------------------------------------------------------- 1 | 68.00 2 | 88.25 5 | 55.76 (3 rows) -- Ensure that UDFs such as alter_distributed_table, undistribute_table -- and add_local_table_to_metadata work fine with VIRTUAL columns. For -- this, PR #4616 changes are modified to handle VIRTUAL columns in -- addition to STORED columns. CREATE TABLE generated_stored_dist ( col_1 int, "col\'_2" text, col_3 text generated always as (UPPER("col\'_2")) virtual ); SELECT create_distributed_table ('generated_stored_dist', 'col_1'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO generated_stored_dist VALUES (1, 'text_1'), (2, 'text_2'); SELECT * FROM generated_stored_dist ORDER BY 1,2,3; col_1 | col\'_2 | col_3 --------------------------------------------------------------------- 1 | text_1 | TEXT_1 2 | text_2 | TEXT_2 (2 rows) INSERT INTO generated_stored_dist VALUES (1, 'text_1'), (2, 'text_2'); SELECT alter_distributed_table('generated_stored_dist', shard_count := 5, cascade_to_colocated := false); NOTICE: creating a new table for pg18_nn.generated_stored_dist NOTICE: moving the data of pg18_nn.generated_stored_dist NOTICE: dropping the old pg18_nn.generated_stored_dist NOTICE: renaming the new table to pg18_nn.generated_stored_dist alter_distributed_table --------------------------------------------------------------------- (1 row) SELECT * FROM generated_stored_dist ORDER BY 1,2,3; col_1 | col\'_2 | col_3 --------------------------------------------------------------------- 1 | text_1 | TEXT_1 1 | text_1 | TEXT_1 2 | text_2 | TEXT_2 2 | text_2 | TEXT_2 (4 rows) CREATE TABLE generated_stored_local ( col_1 int, "col\'_2" text, col_3 text generated always as (UPPER("col\'_2")) stored ); SELECT citus_add_local_table_to_metadata('generated_stored_local'); citus_add_local_table_to_metadata --------------------------------------------------------------------- (1 row) INSERT INTO generated_stored_local VALUES (1, 'text_1'), (2, 'text_2'); SELECT * FROM generated_stored_local ORDER BY 1,2,3; col_1 | col\'_2 | col_3 --------------------------------------------------------------------- 1 | text_1 | TEXT_1 2 | text_2 | TEXT_2 (2 rows) SELECT create_distributed_table ('generated_stored_local', 'col_1'); NOTICE: Copying data from local table... NOTICE: copying the data has completed DETAIL: The local data in the table is no longer visible, but is still on disk. HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$pg18_nn.generated_stored_local$$) create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO generated_stored_local VALUES (1, 'text_1'), (2, 'text_2'); SELECT * FROM generated_stored_local ORDER BY 1,2,3; col_1 | col\'_2 | col_3 --------------------------------------------------------------------- 1 | text_1 | TEXT_1 1 | text_1 | TEXT_1 2 | text_2 | TEXT_2 2 | text_2 | TEXT_2 (4 rows) CREATE TABLE generated_stored_ref ( col_1 int, col_2 int, col_3 int generated always as (col_1+col_2) virtual, col_4 int, col_5 int generated always as (col_4*2-col_1) virtual ); SELECT create_reference_table ('generated_stored_ref'); create_reference_table --------------------------------------------------------------------- (1 row) INSERT INTO generated_stored_ref (col_1, col_4) VALUES (1,2), (11,12); INSERT INTO generated_stored_ref (col_1, col_2, col_4) VALUES (100,101,102), (200,201,202); SELECT * FROM generated_stored_ref ORDER BY 1,2,3,4,5; col_1 | col_2 | col_3 | col_4 | col_5 --------------------------------------------------------------------- 1 | | | 2 | 3 11 | | | 12 | 13 100 | 101 | 201 | 102 | 104 200 | 201 | 401 | 202 | 204 (4 rows) BEGIN; SELECT undistribute_table('generated_stored_ref'); NOTICE: creating a new table for pg18_nn.generated_stored_ref NOTICE: moving the data of pg18_nn.generated_stored_ref NOTICE: dropping the old pg18_nn.generated_stored_ref NOTICE: renaming the new table to pg18_nn.generated_stored_ref undistribute_table --------------------------------------------------------------------- (1 row) INSERT INTO generated_stored_ref (col_1, col_4) VALUES (11,12), (21,22); INSERT INTO generated_stored_ref (col_1, col_2, col_4) VALUES (200,201,202), (300,301,302); SELECT * FROM generated_stored_ref ORDER BY 1,2,3,4,5; col_1 | col_2 | col_3 | col_4 | col_5 --------------------------------------------------------------------- 1 | | | 2 | 3 11 | | | 12 | 13 11 | | | 12 | 13 21 | | | 22 | 23 100 | 101 | 201 | 102 | 104 200 | 201 | 401 | 202 | 204 200 | 201 | 401 | 202 | 204 300 | 301 | 601 | 302 | 304 (8 rows) ROLLBACK; BEGIN; -- drop some of the columns not having "generated always as virtual" expressions SET client_min_messages TO WARNING; ALTER TABLE generated_stored_ref DROP COLUMN col_1 CASCADE; RESET client_min_messages; ALTER TABLE generated_stored_ref DROP COLUMN col_4; -- show that undistribute_table works fine SELECT undistribute_table('generated_stored_ref'); NOTICE: creating a new table for pg18_nn.generated_stored_ref NOTICE: moving the data of pg18_nn.generated_stored_ref NOTICE: dropping the old pg18_nn.generated_stored_ref NOTICE: renaming the new table to pg18_nn.generated_stored_ref undistribute_table --------------------------------------------------------------------- (1 row) INSERT INTO generated_stored_ref VALUES (5); SELECT * FROM generated_stored_REF ORDER BY 1; col_2 --------------------------------------------------------------------- 5 101 201 (5 rows) ROLLBACK; BEGIN; -- now drop all columns ALTER TABLE generated_stored_ref DROP COLUMN col_3; ALTER TABLE generated_stored_ref DROP COLUMN col_5; ALTER TABLE generated_stored_ref DROP COLUMN col_1; ALTER TABLE generated_stored_ref DROP COLUMN col_2; ALTER TABLE generated_stored_ref DROP COLUMN col_4; -- show that undistribute_table works fine SELECT undistribute_table('generated_stored_ref'); NOTICE: creating a new table for pg18_nn.generated_stored_ref NOTICE: moving the data of pg18_nn.generated_stored_ref NOTICE: dropping the old pg18_nn.generated_stored_ref NOTICE: renaming the new table to pg18_nn.generated_stored_ref undistribute_table --------------------------------------------------------------------- (1 row) SELECT * FROM generated_stored_ref; -- (4 rows) ROLLBACK; -- cleanup with minimum verbosity SET client_min_messages TO ERROR; RESET search_path; RESET citus.shard_count; RESET citus.shard_replication_factor; DROP SCHEMA pg18_nn CASCADE; RESET client_min_messages;