Add null_frac to citus_column_stats

naisila/citus_column_stats
naisila 2025-07-07 14:08:05 +03:00
parent ac27bb7d88
commit ce32d911ab
5 changed files with 42 additions and 28 deletions

View File

@ -2,6 +2,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_column_stats(
qualified_table_name text)
RETURNS TABLE (
attname text,
null_frac float4,
most_common_vals text[],
most_common_freqs float4[]
)
@ -15,7 +16,7 @@ BEGIN
WITH most_common_vals_json AS (
SELECT * FROM run_command_on_shards(qualified_table_name,
$$ SELECT json_agg(row_to_json(shard_stats)) FROM (
SELECT attname, most_common_vals, most_common_freqs, c.reltuples AS reltuples
SELECT attname, s.null_frac, most_common_vals, most_common_freqs, c.reltuples AS reltuples
FROM pg_stats s RIGHT JOIN pg_class c ON (s.tablename = c.relname)
WHERE c.relname = '%s') shard_stats $$ )),
@ -30,18 +31,22 @@ BEGIN
most_common_vals AS (
SELECT shardid,
(json_array_elements(result::json)->>'attname')::text AS attname,
(json_array_elements(result::json)->>'null_frac')::float4 AS null_frac,
json_array_elements_text((json_array_elements(result::json)->>'most_common_vals')::json)::text AS common_val,
json_array_elements_text((json_array_elements(result::json)->>'most_common_freqs')::json)::float4 AS common_freq,
(json_array_elements(result::json)->>'reltuples')::bigint AS shard_reltuples
FROM most_common_vals_json),
common_val_occurrence AS (
SELECT m.attname, common_val, sum(common_freq * shard_reltuples)::bigint AS occurrence
SELECT m.attname, common_val,
sum(common_freq * shard_reltuples)::bigint AS occurrence,
any_value(m.null_frac * shard_reltuples)::bigint AS null_occurrences
FROM most_common_vals m
GROUP BY m.attname, common_val
ORDER BY m.attname, occurrence DESC, common_val)
SELECT c.attname,
any_value((null_occurrences/t.table_reltuples)::float4) AS null_frac,
ARRAY_agg(common_val) AS most_common_vals,
ARRAY_agg((occurrence/t.table_reltuples)::float4) AS most_common_freqs
FROM common_val_occurrence c, table_reltuples t

View File

@ -2,6 +2,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_column_stats(
qualified_table_name text)
RETURNS TABLE (
attname text,
null_frac float4,
most_common_vals text[],
most_common_freqs float4[]
)
@ -15,7 +16,7 @@ BEGIN
WITH most_common_vals_json AS (
SELECT * FROM run_command_on_shards(qualified_table_name,
$$ SELECT json_agg(row_to_json(shard_stats)) FROM (
SELECT attname, most_common_vals, most_common_freqs, c.reltuples AS reltuples
SELECT attname, s.null_frac, most_common_vals, most_common_freqs, c.reltuples AS reltuples
FROM pg_stats s RIGHT JOIN pg_class c ON (s.tablename = c.relname)
WHERE c.relname = '%s') shard_stats $$ )),
@ -30,18 +31,22 @@ BEGIN
most_common_vals AS (
SELECT shardid,
(json_array_elements(result::json)->>'attname')::text AS attname,
(json_array_elements(result::json)->>'null_frac')::float4 AS null_frac,
json_array_elements_text((json_array_elements(result::json)->>'most_common_vals')::json)::text AS common_val,
json_array_elements_text((json_array_elements(result::json)->>'most_common_freqs')::json)::float4 AS common_freq,
(json_array_elements(result::json)->>'reltuples')::bigint AS shard_reltuples
FROM most_common_vals_json),
common_val_occurrence AS (
SELECT m.attname, common_val, sum(common_freq * shard_reltuples)::bigint AS occurrence
SELECT m.attname, common_val,
sum(common_freq * shard_reltuples)::bigint AS occurrence,
any_value(m.null_frac * shard_reltuples)::bigint AS null_occurrences
FROM most_common_vals m
GROUP BY m.attname, common_val
ORDER BY m.attname, occurrence DESC, common_val)
SELECT c.attname,
any_value((null_occurrences/t.table_reltuples)::float4) AS null_frac,
ARRAY_agg(common_val) AS most_common_vals,
ARRAY_agg((occurrence/t.table_reltuples)::float4) AS most_common_freqs
FROM common_val_occurrence c, table_reltuples t

View File

@ -9,18 +9,19 @@ INSERT INTO current_check VALUES
(3, 'cde', 'user1'),
(4, 'def', 'user1'),
(4, 'def', 'user1'),
(3, 'cde', 'user2');
(3, 'cde', 'user2'),
(5, NULL, NULL);
ALTER TABLE current_check ENABLE ROW LEVEL SECURITY;
SET row_security TO ON;
ANALYZE current_check;
SELECT attname, most_common_vals, most_common_freqs FROM pg_stats
SELECT attname, null_frac, most_common_vals, most_common_freqs FROM pg_stats
WHERE tablename = 'current_check'
ORDER BY 1;
attname | most_common_vals | most_common_freqs
attname | null_frac | most_common_vals | most_common_freqs
---------------------------------------------------------------------
currentid | {3,4} | {0.4,0.4}
payload | {cde,def} | {0.4,0.4}
rlsuser | {user1} | {0.8}
currentid | 0 | {3,4} | {0.333333,0.333333}
payload | 0.166667 | {cde,def} | {0.333333,0.333333}
rlsuser | 0.166667 | {user1} | {0.666667}
(3 rows)
SELECT * FROM citus_column_stats('current_check');
@ -38,21 +39,22 @@ INSERT INTO dist_current_check VALUES
(3, 'cde', 'user1'),
(4, 'def', 'user1'),
(4, 'def', 'user1'),
(3, 'cde', 'user2');
(3, 'cde', 'user2'),
(5, NULL, NULL);
ANALYZE dist_current_check;
SELECT attname, most_common_vals, most_common_freqs FROM pg_stats
SELECT attname, null_frac, most_common_vals, most_common_freqs FROM pg_stats
WHERE tablename = 'dist_current_check'
ORDER BY 1;
attname | most_common_vals | most_common_freqs
attname | null_frac | most_common_vals | most_common_freqs
---------------------------------------------------------------------
(0 rows)
SELECT * FROM citus_column_stats('dist_current_check');
attname | most_common_vals | most_common_freqs
attname | null_frac | most_common_vals | most_common_freqs
---------------------------------------------------------------------
currentid | {3,4} | {0.4,0.4}
payload | {cde,def} | {0.4,0.4}
rlsuser | {user1} | {0.8}
currentid | 0 | {3,4} | {0.333333,0.333333}
payload | 0.166667 | {cde,def} | {0.333333,0.333333}
rlsuser | 0.166667 | {user1} | {0.666667}
(3 rows)
DROP TABLE current_check;

View File

@ -1456,7 +1456,7 @@ SELECT * FROM multi_extension.print_extension_changes();
---------------------------------------------------------------------
function citus_unmark_object_distributed(oid,oid,integer) void |
function shard_name(regclass,bigint) text |
| function citus_column_stats(text) TABLE(attname text, most_common_vals text[], most_common_freqs real[])
| function citus_column_stats(text) TABLE(attname text, null_frac real, most_common_vals text[], most_common_freqs real[])
| function citus_internal.acquire_citus_advisory_object_class_lock(integer,cstring) void
| function citus_internal.add_colocation_metadata(integer,integer,integer,regtype,oid) void
| function citus_internal.add_object_metadata(text,text[],text[],integer,integer,boolean) void

View File

@ -13,7 +13,8 @@ INSERT INTO current_check VALUES
(3, 'cde', 'user1'),
(4, 'def', 'user1'),
(4, 'def', 'user1'),
(3, 'cde', 'user2');
(3, 'cde', 'user2'),
(5, NULL, NULL);
ALTER TABLE current_check ENABLE ROW LEVEL SECURITY;
@ -21,7 +22,7 @@ SET row_security TO ON;
ANALYZE current_check;
SELECT attname, most_common_vals, most_common_freqs FROM pg_stats
SELECT attname, null_frac, most_common_vals, most_common_freqs FROM pg_stats
WHERE tablename = 'current_check'
ORDER BY 1;
@ -35,11 +36,12 @@ INSERT INTO dist_current_check VALUES
(3, 'cde', 'user1'),
(4, 'def', 'user1'),
(4, 'def', 'user1'),
(3, 'cde', 'user2');
(3, 'cde', 'user2'),
(5, NULL, NULL);
ANALYZE dist_current_check;
SELECT attname, most_common_vals, most_common_freqs FROM pg_stats
SELECT attname, null_frac, most_common_vals, most_common_freqs FROM pg_stats
WHERE tablename = 'dist_current_check'
ORDER BY 1;