-- -- PG18 -- SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int >= 18 AS server_version_ge_18 \gset -- test invalid statistics -- behavior is same among PG versions, error message differs -- relevant PG18 commit: 3eea4dc2c7, 38883916e CREATE STATISTICS tst ON a FROM (VALUES (x)) AS foo; ERROR: CREATE STATISTICS only supports relation names in the FROM clause CREATE FUNCTION tftest(int) returns table(a int, b int) as $$ SELECT $1, $1+i FROM generate_series(1,5) g(i); $$ LANGUAGE sql IMMUTABLE STRICT; CREATE STATISTICS alt_stat2 ON a FROM tftest(1); ERROR: CREATE STATISTICS only supports relation names in the FROM clause DROP FUNCTION tftest; \if :server_version_ge_18 \else \q \endif -- PG18-specific tests go here. -- -- Purpose: Verify PG18 behavior that NOT NULL constraints are materialized -- as pg_constraint rows with contype = 'n' on both coordinator and -- worker shards. Also confirm our helper view (table_checks) does -- NOT surface NOT NULL entries. -- https://github.com/postgres/postgres/commit/14e87ffa5c543b5f30ead7413084c25f7735039f CREATE SCHEMA pg18_nn; SET search_path TO pg18_nn; -- Local control table DROP TABLE IF EXISTS nn_local CASCADE; NOTICE: table "nn_local" does not exist, skipping CREATE TABLE nn_local( a int NOT NULL, b int, c text NOT NULL ); -- Distributed table DROP TABLE IF EXISTS nn_dist CASCADE; NOTICE: table "nn_dist" does not exist, skipping CREATE TABLE nn_dist( a int NOT NULL, b int, c text NOT NULL ); SELECT create_distributed_table('nn_dist', 'a'); create_distributed_table --------------------------------------------------------------------- (1 row) -- Coordinator: count NOT NULL constraint rows SELECT 'local_n_count' AS label, contype, count(*) FROM pg_constraint WHERE conrelid = 'pg18_nn.nn_local'::regclass GROUP BY contype ORDER BY contype; label | contype | count --------------------------------------------------------------------- local_n_count | n | 2 (1 row) SELECT 'dist_n_count' AS label, contype, count(*) FROM pg_constraint WHERE conrelid = 'pg18_nn.nn_dist'::regclass GROUP BY contype ORDER BY contype; label | contype | count --------------------------------------------------------------------- dist_n_count | n | 2 (1 row) -- Our helper view should exclude NOT NULL SELECT 'table_checks_local_count' AS label, count(*) FROM public.table_checks WHERE relid = 'pg18_nn.nn_local'::regclass; label | count --------------------------------------------------------------------- table_checks_local_count | 0 (1 row) SELECT 'table_checks_dist_count' AS label, count(*) FROM public.table_checks WHERE relid = 'pg18_nn.nn_dist'::regclass; label | count --------------------------------------------------------------------- table_checks_dist_count | 0 (1 row) -- Add a real CHECK to ensure table_checks still reports real checks ALTER TABLE nn_dist ADD CONSTRAINT nn_dist_check CHECK (b IS DISTINCT FROM 42); SELECT 'table_checks_dist_with_real_check' AS label, count(*) FROM public.table_checks WHERE relid = 'pg18_nn.nn_dist'::regclass; label | count --------------------------------------------------------------------- table_checks_dist_with_real_check | 1 (1 row) -- === Worker checks === \c - - - :worker_1_port SET client_min_messages TO WARNING; SET search_path TO pg18_nn; -- Pick one heap shard of nn_dist in our schema SELECT format('%I.%I', n.nspname, c.relname) AS shard_regclass FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'pg18_nn' AND c.relname LIKE 'nn_dist_%' AND c.relkind = 'r' ORDER BY c.relname LIMIT 1 \gset -- Expect: 2 NOT NULL rows (a,c) + 1 CHECK row on the shard SELECT 'worker_shard_n_count' AS label, contype, count(*) FROM pg_constraint WHERE conrelid = :'shard_regclass'::regclass GROUP BY contype ORDER BY contype; label | contype | count --------------------------------------------------------------------- worker_shard_n_count | c | 1 worker_shard_n_count | n | 2 (2 rows) -- table_checks on shard should hide NOT NULL SELECT 'table_checks_worker_shard_count' AS label, count(*) FROM public.table_checks WHERE relid = :'shard_regclass'::regclass; label | count --------------------------------------------------------------------- table_checks_worker_shard_count | 1 (1 row) -- Drop one NOT NULL on coordinator; verify propagation \c - - - :master_port SET search_path TO pg18_nn; ALTER TABLE nn_dist ALTER COLUMN c DROP NOT NULL; -- Re-check on worker: NOT NULL count should drop to 1 \c - - - :worker_1_port SET search_path TO pg18_nn; SELECT 'worker_shard_n_after_drop' AS label, contype, count(*) FROM pg_constraint WHERE conrelid = :'shard_regclass'::regclass GROUP BY contype ORDER BY contype; label | contype | count --------------------------------------------------------------------- worker_shard_n_after_drop | c | 1 worker_shard_n_after_drop | n | 1 (2 rows) -- And on coordinator \c - - - :master_port SET search_path TO pg18_nn; SELECT 'dist_n_after_drop' AS label, contype, count(*) FROM pg_constraint WHERE conrelid = 'pg18_nn.nn_dist'::regclass GROUP BY contype ORDER BY contype; label | contype | count --------------------------------------------------------------------- dist_n_after_drop | c | 1 dist_n_after_drop | n | 1 (2 rows) -- Purpose: test self join elimination for distributed, citus local and local tables. -- CREATE TABLE sje_d1 (id bigserial PRIMARY KEY, name text, created_at timestamptz DEFAULT now()); CREATE TABLE sje_d2 (id bigserial PRIMARY KEY, name text, created_at timestamptz DEFAULT now()); CREATE TABLE sje_local (id bigserial PRIMARY KEY, title text); SELECT create_distributed_table('sje_d1', 'id'); create_distributed_table --------------------------------------------------------------------- (1 row) SELECT create_distributed_table('sje_d2', 'id'); create_distributed_table --------------------------------------------------------------------- (1 row) INSERT INTO sje_d1 SELECT i, i::text, now() FROM generate_series(0,100)i; INSERT INTO sje_d2 SELECT i, i::text, now() FROM generate_series(0,100)i; INSERT INTO sje_local SELECT i, i::text FROM generate_series(0,100)i; -- Self-join elimination is applied when distributed tables are involved -- The query plan has only one join EXPLAIN (costs off) select count(1) from sje_d1 INNER JOIN sje_d2 u1 USING (id) INNER JOIN sje_d2 u2 USING (id) INNER JOIN sje_d2 u3 USING (id) INNER JOIN sje_d2 u4 USING (id) INNER JOIN sje_d2 u5 USING (id) INNER JOIN sje_d2 u6 USING (id); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (sje_d1.id = u6.id) -> Seq Scan on sje_d1_102012 sje_d1 -> Hash -> Seq Scan on sje_d2_102016 u6 (12 rows) select count(1) from sje_d1 INNER JOIN sje_d2 u1 USING (id) INNER JOIN sje_d2 u2 USING (id) INNER JOIN sje_d2 u3 USING (id) INNER JOIN sje_d2 u4 USING (id) INNER JOIN sje_d2 u5 USING (id) INNER JOIN sje_d2 u6 USING (id); count --------------------------------------------------------------------- 101 (1 row) -- Self-join elimination applied to from list join EXPLAIN (costs off) SELECT count(1) from sje_d1 d1, sje_d2 u1, sje_d2 u2, sje_d2 u3 WHERE d1.id = u1.id and u1.id = u2.id and u3.id = d1.id; QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (d1.id = u3.id) -> Seq Scan on sje_d1_102012 d1 -> Hash -> Seq Scan on sje_d2_102016 u3 (12 rows) SELECT count(1) from sje_d1 d1, sje_d2 u1, sje_d2 u2, sje_d2 u3 WHERE d1.id = u1.id and u1.id = u2.id and u3.id = d1.id; count --------------------------------------------------------------------- 101 (1 row) -- Self-join elimination is not applied when a local table is involved -- This is a limitation that will be resolved in citus 14 EXPLAIN (costs off) select count(1) from sje_d1 INNER JOIN sje_local u1 USING (id) INNER JOIN sje_local u2 USING (id) INNER JOIN sje_local u3 USING (id) INNER JOIN sje_local u4 USING (id) INNER JOIN sje_local u5 USING (id) INNER JOIN sje_local u6 USING (id); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Seq Scan on sje_local u1 -> Distributed Subplan XXX_2 -> Seq Scan on sje_local u2 -> Distributed Subplan XXX_3 -> Seq Scan on sje_local u3 -> Distributed Subplan XXX_4 -> Seq Scan on sje_local u4 -> Distributed Subplan XXX_5 -> Seq Scan on sje_local u5 -> Distributed Subplan XXX_6 -> Seq Scan on sje_local u6 Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (intermediate_result_5.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result_5 -> Hash -> Hash Join Hash Cond: (intermediate_result_4.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result_4 -> Hash -> Hash Join Hash Cond: (intermediate_result_3.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result_3 -> Hash -> Hash Join Hash Cond: (intermediate_result_2.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result_2 -> Hash -> Hash Join Hash Cond: (intermediate_result_1.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result_1 -> Hash -> Hash Join Hash Cond: (intermediate_result.id = sje_d1.id) -> Function Scan on read_intermediate_result intermediate_result -> Hash -> Seq Scan on sje_d1_102012 sje_d1 (44 rows) select count(1) from sje_d1 INNER JOIN sje_local u1 USING (id) INNER JOIN sje_local u2 USING (id) INNER JOIN sje_local u3 USING (id) INNER JOIN sje_local u4 USING (id) INNER JOIN sje_local u5 USING (id) INNER JOIN sje_local u6 USING (id); count --------------------------------------------------------------------- 101 (1 row) -- to test USING vs ON equivalence EXPLAIN (costs off) SELECT count(1) FROM sje_d1 d JOIN sje_d2 u1 ON (d.id = u1.id) JOIN sje_d2 u2 ON (u1.id = u2.id); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (d.id = u2.id) -> Seq Scan on sje_d1_102012 d -> Hash -> Seq Scan on sje_d2_102016 u2 (12 rows) SELECT count(1) FROM sje_d1 d JOIN sje_d2 u1 ON (d.id = u1.id) JOIN sje_d2 u2 ON (u1.id = u2.id); count --------------------------------------------------------------------- 101 (1 row) -- Null-introducing join can have SJE EXPLAIN (costs off) SELECT count(*) FROM sje_d1 d LEFT JOIN sje_d2 u1 USING (id) LEFT JOIN sje_d2 u2 USING (id); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Seq Scan on sje_d1_102012 d (8 rows) SELECT count(*) FROM sje_d1 d LEFT JOIN sje_d2 u1 USING (id) LEFT JOIN sje_d2 u2 USING (id); count --------------------------------------------------------------------- 101 (1 row) -- prepared statement PREPARE sje_p(int,int) AS SELECT count(1) FROM sje_d1 d JOIN sje_d2 u1 USING (id) JOIN sje_d2 u2 USING (id) WHERE d.id BETWEEN $1 AND $2; EXPLAIN (costs off) EXECUTE sje_p(10,20); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (u2.id = d.id) -> Seq Scan on sje_d2_102016 u2 -> Hash -> Bitmap Heap Scan on sje_d1_102012 d Recheck Cond: ((id >= 10) AND (id <= 20)) -> Bitmap Index Scan on sje_d1_pkey_102012 Index Cond: ((id >= 10) AND (id <= 20)) (15 rows) EXECUTE sje_p(10,20); count --------------------------------------------------------------------- 11 (1 row) -- cte EXPLAIN (costs off) WITH z AS (SELECT id FROM sje_d2 WHERE id % 2 = 0) SELECT count(1) FROM sje_d1 d JOIN z USING (id) JOIN sje_d2 u2 USING (id); QUERY PLAN --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Hash Join Hash Cond: (d.id = u2.id) -> Seq Scan on sje_d1_102012 d -> Hash -> Seq Scan on sje_d2_102016 u2 Filter: ((id % '2'::bigint) = 0) (13 rows) WITH z AS (SELECT id FROM sje_d2 WHERE id % 2 = 0) SELECT count(1) FROM sje_d1 d JOIN z USING (id) JOIN sje_d2 u2 USING (id); count --------------------------------------------------------------------- 51 (1 row) -- cleanup with minimum verbosity SET client_min_messages TO ERROR; RESET search_path; DROP SCHEMA pg18_nn CASCADE; RESET client_min_messages;