From ce32d911ab3deb299e3ce36701cb13056012108e Mon Sep 17 00:00:00 2001 From: naisila Date: Mon, 7 Jul 2025 14:08:05 +0300 Subject: [PATCH] Add null_frac to citus_column_stats --- .../sql/udfs/citus_column_stats/13.1-1.sql | 15 ++++++---- .../sql/udfs/citus_column_stats/latest.sql | 15 ++++++---- .../regress/expected/citus_column_stats.out | 28 ++++++++++--------- src/test/regress/expected/multi_extension.out | 2 +- src/test/regress/sql/citus_column_stats.sql | 10 ++++--- 5 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/backend/distributed/sql/udfs/citus_column_stats/13.1-1.sql b/src/backend/distributed/sql/udfs/citus_column_stats/13.1-1.sql index 65654ddac..1e6b5e7f4 100644 --- a/src/backend/distributed/sql/udfs/citus_column_stats/13.1-1.sql +++ b/src/backend/distributed/sql/udfs/citus_column_stats/13.1-1.sql @@ -2,6 +2,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_column_stats( qualified_table_name text) RETURNS TABLE ( attname text, + null_frac float4, most_common_vals text[], most_common_freqs float4[] ) @@ -14,10 +15,10 @@ BEGIN WITH most_common_vals_json AS ( SELECT * FROM run_command_on_shards(qualified_table_name, - $$ SELECT json_agg(row_to_json(shard_stats)) FROM ( - SELECT attname, most_common_vals, most_common_freqs, c.reltuples AS reltuples - FROM pg_stats s RIGHT JOIN pg_class c ON (s.tablename = c.relname) - WHERE c.relname = '%s') shard_stats $$ )), + $$ SELECT json_agg(row_to_json(shard_stats)) FROM ( + SELECT attname, s.null_frac, most_common_vals, most_common_freqs, c.reltuples AS reltuples + FROM pg_stats s RIGHT JOIN pg_class c ON (s.tablename = c.relname) + WHERE c.relname = '%s') shard_stats $$ )), table_reltuples_json AS ( SELECT distinct(shardid), @@ -30,18 +31,22 @@ BEGIN most_common_vals AS ( SELECT shardid, (json_array_elements(result::json)->>'attname')::text AS attname, + (json_array_elements(result::json)->>'null_frac')::float4 AS null_frac, json_array_elements_text((json_array_elements(result::json)->>'most_common_vals')::json)::text AS common_val, json_array_elements_text((json_array_elements(result::json)->>'most_common_freqs')::json)::float4 AS common_freq, (json_array_elements(result::json)->>'reltuples')::bigint AS shard_reltuples FROM most_common_vals_json), common_val_occurrence AS ( - SELECT m.attname, common_val, sum(common_freq * shard_reltuples)::bigint AS occurrence + SELECT m.attname, common_val, + sum(common_freq * shard_reltuples)::bigint AS occurrence, + any_value(m.null_frac * shard_reltuples)::bigint AS null_occurrences FROM most_common_vals m GROUP BY m.attname, common_val ORDER BY m.attname, occurrence DESC, common_val) SELECT c.attname, + any_value((null_occurrences/t.table_reltuples)::float4) AS null_frac, ARRAY_agg(common_val) AS most_common_vals, ARRAY_agg((occurrence/t.table_reltuples)::float4) AS most_common_freqs FROM common_val_occurrence c, table_reltuples t diff --git a/src/backend/distributed/sql/udfs/citus_column_stats/latest.sql b/src/backend/distributed/sql/udfs/citus_column_stats/latest.sql index 65654ddac..1e6b5e7f4 100644 --- a/src/backend/distributed/sql/udfs/citus_column_stats/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_column_stats/latest.sql @@ -2,6 +2,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_column_stats( qualified_table_name text) RETURNS TABLE ( attname text, + null_frac float4, most_common_vals text[], most_common_freqs float4[] ) @@ -14,10 +15,10 @@ BEGIN WITH most_common_vals_json AS ( SELECT * FROM run_command_on_shards(qualified_table_name, - $$ SELECT json_agg(row_to_json(shard_stats)) FROM ( - SELECT attname, most_common_vals, most_common_freqs, c.reltuples AS reltuples - FROM pg_stats s RIGHT JOIN pg_class c ON (s.tablename = c.relname) - WHERE c.relname = '%s') shard_stats $$ )), + $$ SELECT json_agg(row_to_json(shard_stats)) FROM ( + SELECT attname, s.null_frac, most_common_vals, most_common_freqs, c.reltuples AS reltuples + FROM pg_stats s RIGHT JOIN pg_class c ON (s.tablename = c.relname) + WHERE c.relname = '%s') shard_stats $$ )), table_reltuples_json AS ( SELECT distinct(shardid), @@ -30,18 +31,22 @@ BEGIN most_common_vals AS ( SELECT shardid, (json_array_elements(result::json)->>'attname')::text AS attname, + (json_array_elements(result::json)->>'null_frac')::float4 AS null_frac, json_array_elements_text((json_array_elements(result::json)->>'most_common_vals')::json)::text AS common_val, json_array_elements_text((json_array_elements(result::json)->>'most_common_freqs')::json)::float4 AS common_freq, (json_array_elements(result::json)->>'reltuples')::bigint AS shard_reltuples FROM most_common_vals_json), common_val_occurrence AS ( - SELECT m.attname, common_val, sum(common_freq * shard_reltuples)::bigint AS occurrence + SELECT m.attname, common_val, + sum(common_freq * shard_reltuples)::bigint AS occurrence, + any_value(m.null_frac * shard_reltuples)::bigint AS null_occurrences FROM most_common_vals m GROUP BY m.attname, common_val ORDER BY m.attname, occurrence DESC, common_val) SELECT c.attname, + any_value((null_occurrences/t.table_reltuples)::float4) AS null_frac, ARRAY_agg(common_val) AS most_common_vals, ARRAY_agg((occurrence/t.table_reltuples)::float4) AS most_common_freqs FROM common_val_occurrence c, table_reltuples t diff --git a/src/test/regress/expected/citus_column_stats.out b/src/test/regress/expected/citus_column_stats.out index 41b3873a2..d0a0b3a6e 100644 --- a/src/test/regress/expected/citus_column_stats.out +++ b/src/test/regress/expected/citus_column_stats.out @@ -9,18 +9,19 @@ INSERT INTO current_check VALUES (3, 'cde', 'user1'), (4, 'def', 'user1'), (4, 'def', 'user1'), - (3, 'cde', 'user2'); + (3, 'cde', 'user2'), + (5, NULL, NULL); ALTER TABLE current_check ENABLE ROW LEVEL SECURITY; SET row_security TO ON; ANALYZE current_check; -SELECT attname, most_common_vals, most_common_freqs FROM pg_stats +SELECT attname, null_frac, most_common_vals, most_common_freqs FROM pg_stats WHERE tablename = 'current_check' ORDER BY 1; - attname | most_common_vals | most_common_freqs + attname | null_frac | most_common_vals | most_common_freqs --------------------------------------------------------------------- - currentid | {3,4} | {0.4,0.4} - payload | {cde,def} | {0.4,0.4} - rlsuser | {user1} | {0.8} + currentid | 0 | {3,4} | {0.333333,0.333333} + payload | 0.166667 | {cde,def} | {0.333333,0.333333} + rlsuser | 0.166667 | {user1} | {0.666667} (3 rows) SELECT * FROM citus_column_stats('current_check'); @@ -38,21 +39,22 @@ INSERT INTO dist_current_check VALUES (3, 'cde', 'user1'), (4, 'def', 'user1'), (4, 'def', 'user1'), - (3, 'cde', 'user2'); + (3, 'cde', 'user2'), + (5, NULL, NULL); ANALYZE dist_current_check; -SELECT attname, most_common_vals, most_common_freqs FROM pg_stats +SELECT attname, null_frac, most_common_vals, most_common_freqs FROM pg_stats WHERE tablename = 'dist_current_check' ORDER BY 1; - attname | most_common_vals | most_common_freqs + attname | null_frac | most_common_vals | most_common_freqs --------------------------------------------------------------------- (0 rows) SELECT * FROM citus_column_stats('dist_current_check'); - attname | most_common_vals | most_common_freqs + attname | null_frac | most_common_vals | most_common_freqs --------------------------------------------------------------------- - currentid | {3,4} | {0.4,0.4} - payload | {cde,def} | {0.4,0.4} - rlsuser | {user1} | {0.8} + currentid | 0 | {3,4} | {0.333333,0.333333} + payload | 0.166667 | {cde,def} | {0.333333,0.333333} + rlsuser | 0.166667 | {user1} | {0.666667} (3 rows) DROP TABLE current_check; diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 942832977..fd3b46fc8 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -1456,7 +1456,7 @@ SELECT * FROM multi_extension.print_extension_changes(); --------------------------------------------------------------------- function citus_unmark_object_distributed(oid,oid,integer) void | function shard_name(regclass,bigint) text | - | function citus_column_stats(text) TABLE(attname text, most_common_vals text[], most_common_freqs real[]) + | function citus_column_stats(text) TABLE(attname text, null_frac real, most_common_vals text[], most_common_freqs real[]) | function citus_internal.acquire_citus_advisory_object_class_lock(integer,cstring) void | function citus_internal.add_colocation_metadata(integer,integer,integer,regtype,oid) void | function citus_internal.add_object_metadata(text,text[],text[],integer,integer,boolean) void diff --git a/src/test/regress/sql/citus_column_stats.sql b/src/test/regress/sql/citus_column_stats.sql index cceb4a0ac..8623bc500 100644 --- a/src/test/regress/sql/citus_column_stats.sql +++ b/src/test/regress/sql/citus_column_stats.sql @@ -13,7 +13,8 @@ INSERT INTO current_check VALUES (3, 'cde', 'user1'), (4, 'def', 'user1'), (4, 'def', 'user1'), - (3, 'cde', 'user2'); + (3, 'cde', 'user2'), + (5, NULL, NULL); ALTER TABLE current_check ENABLE ROW LEVEL SECURITY; @@ -21,7 +22,7 @@ SET row_security TO ON; ANALYZE current_check; -SELECT attname, most_common_vals, most_common_freqs FROM pg_stats +SELECT attname, null_frac, most_common_vals, most_common_freqs FROM pg_stats WHERE tablename = 'current_check' ORDER BY 1; @@ -35,11 +36,12 @@ INSERT INTO dist_current_check VALUES (3, 'cde', 'user1'), (4, 'def', 'user1'), (4, 'def', 'user1'), - (3, 'cde', 'user2'); + (3, 'cde', 'user2'), + (5, NULL, NULL); ANALYZE dist_current_check; -SELECT attname, most_common_vals, most_common_freqs FROM pg_stats +SELECT attname, null_frac, most_common_vals, most_common_freqs FROM pg_stats WHERE tablename = 'dist_current_check' ORDER BY 1;