mirror of https://github.com/citusdata/citus.git
Add citus_column_stats UDF
parent
55a0d1f730
commit
f55c636dbb
|
@ -51,6 +51,7 @@ DROP VIEW IF EXISTS pg_catalog.citus_lock_waits;
|
||||||
#include "udfs/citus_stat_counters/13.1-1.sql"
|
#include "udfs/citus_stat_counters/13.1-1.sql"
|
||||||
#include "udfs/citus_stat_counters_reset/13.1-1.sql"
|
#include "udfs/citus_stat_counters_reset/13.1-1.sql"
|
||||||
#include "udfs/citus_nodes/13.1-1.sql"
|
#include "udfs/citus_nodes/13.1-1.sql"
|
||||||
|
#include "udfs/citus_column_stats/13.1-1.sql"
|
||||||
|
|
||||||
-- Since shard_name/13.1-1.sql first drops the function and then creates it, we first
|
-- Since shard_name/13.1-1.sql first drops the function and then creates it, we first
|
||||||
-- need to drop citus_shards view since that view depends on this function. And immediately
|
-- need to drop citus_shards view since that view depends on this function. And immediately
|
||||||
|
|
|
@ -46,6 +46,7 @@ DROP VIEW pg_catalog.citus_stat_counters;
|
||||||
DROP FUNCTION pg_catalog.citus_stat_counters(oid);
|
DROP FUNCTION pg_catalog.citus_stat_counters(oid);
|
||||||
DROP FUNCTION pg_catalog.citus_stat_counters_reset(oid);
|
DROP FUNCTION pg_catalog.citus_stat_counters_reset(oid);
|
||||||
DROP VIEW IF EXISTS pg_catalog.citus_nodes;
|
DROP VIEW IF EXISTS pg_catalog.citus_nodes;
|
||||||
|
DROP FUNCTION IF EXISTS pg_catalog.citus_column_stats;
|
||||||
|
|
||||||
-- Definition of shard_name() prior to this release doesn't have a separate SQL file
|
-- Definition of shard_name() prior to this release doesn't have a separate SQL file
|
||||||
-- because it's quite an old UDF that its prior definition(s) was(were) squashed into
|
-- because it's quite an old UDF that its prior definition(s) was(were) squashed into
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_column_stats(
|
||||||
|
qualified_table_name text)
|
||||||
|
RETURNS TABLE (
|
||||||
|
attname text,
|
||||||
|
most_common_vals text[],
|
||||||
|
most_common_freqs float4[]
|
||||||
|
)
|
||||||
|
AS $func$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (SELECT * FROM pg_dist_partition WHERE logicalrelid = qualified_table_name::regclass) THEN
|
||||||
|
RAISE EXCEPTION 'Not a Citus table';
|
||||||
|
ELSE
|
||||||
|
RETURN QUERY
|
||||||
|
|
||||||
|
WITH most_common_vals_json AS (
|
||||||
|
SELECT * FROM run_command_on_shards(qualified_table_name,
|
||||||
|
$$ SELECT json_agg(row_to_json(shard_stats)) FROM (
|
||||||
|
SELECT attname, most_common_vals, most_common_freqs, c.reltuples AS reltuples
|
||||||
|
FROM pg_stats s RIGHT JOIN pg_class c ON (s.tablename = c.relname)
|
||||||
|
WHERE c.relname = '%s') shard_stats $$ )),
|
||||||
|
|
||||||
|
table_reltuples_json AS (
|
||||||
|
SELECT distinct(shardid),
|
||||||
|
(json_array_elements(result::json)->>'reltuples')::bigint AS shard_reltuples
|
||||||
|
FROM most_common_vals_json),
|
||||||
|
|
||||||
|
table_reltuples AS (
|
||||||
|
SELECT sum(shard_reltuples) AS table_reltuples FROM table_reltuples_json),
|
||||||
|
|
||||||
|
most_common_vals AS (
|
||||||
|
SELECT shardid,
|
||||||
|
(json_array_elements(result::json)->>'attname')::text AS attname,
|
||||||
|
json_array_elements_text((json_array_elements(result::json)->>'most_common_vals')::json)::text AS common_val,
|
||||||
|
json_array_elements_text((json_array_elements(result::json)->>'most_common_freqs')::json)::float4 AS common_freq,
|
||||||
|
(json_array_elements(result::json)->>'reltuples')::bigint AS shard_reltuples
|
||||||
|
FROM most_common_vals_json),
|
||||||
|
|
||||||
|
common_val_occurrence AS (
|
||||||
|
SELECT m.attname, common_val, sum(common_freq * shard_reltuples)::bigint AS occurrence
|
||||||
|
FROM most_common_vals m
|
||||||
|
GROUP BY m.attname, common_val
|
||||||
|
ORDER BY m.attname, occurrence DESC, common_val)
|
||||||
|
|
||||||
|
SELECT c.attname,
|
||||||
|
ARRAY_agg(common_val) AS most_common_vals,
|
||||||
|
ARRAY_agg((occurrence/t.table_reltuples)::float4) AS most_common_freqs
|
||||||
|
FROM common_val_occurrence c, table_reltuples t
|
||||||
|
GROUP BY c.attname;
|
||||||
|
|
||||||
|
END IF;
|
||||||
|
END;
|
||||||
|
$func$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_column_stats(
|
||||||
|
qualified_table_name text)
|
||||||
|
IS 'provides some pg_stats for columns of input Citus table';
|
|
@ -0,0 +1,56 @@
|
||||||
|
CREATE OR REPLACE FUNCTION pg_catalog.citus_column_stats(
|
||||||
|
qualified_table_name text)
|
||||||
|
RETURNS TABLE (
|
||||||
|
attname text,
|
||||||
|
most_common_vals text[],
|
||||||
|
most_common_freqs float4[]
|
||||||
|
)
|
||||||
|
AS $func$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (SELECT * FROM pg_dist_partition WHERE logicalrelid = qualified_table_name::regclass) THEN
|
||||||
|
RAISE EXCEPTION 'Not a Citus table';
|
||||||
|
ELSE
|
||||||
|
RETURN QUERY
|
||||||
|
|
||||||
|
WITH most_common_vals_json AS (
|
||||||
|
SELECT * FROM run_command_on_shards(qualified_table_name,
|
||||||
|
$$ SELECT json_agg(row_to_json(shard_stats)) FROM (
|
||||||
|
SELECT attname, most_common_vals, most_common_freqs, c.reltuples AS reltuples
|
||||||
|
FROM pg_stats s RIGHT JOIN pg_class c ON (s.tablename = c.relname)
|
||||||
|
WHERE c.relname = '%s') shard_stats $$ )),
|
||||||
|
|
||||||
|
table_reltuples_json AS (
|
||||||
|
SELECT distinct(shardid),
|
||||||
|
(json_array_elements(result::json)->>'reltuples')::bigint AS shard_reltuples
|
||||||
|
FROM most_common_vals_json),
|
||||||
|
|
||||||
|
table_reltuples AS (
|
||||||
|
SELECT sum(shard_reltuples) AS table_reltuples FROM table_reltuples_json),
|
||||||
|
|
||||||
|
most_common_vals AS (
|
||||||
|
SELECT shardid,
|
||||||
|
(json_array_elements(result::json)->>'attname')::text AS attname,
|
||||||
|
json_array_elements_text((json_array_elements(result::json)->>'most_common_vals')::json)::text AS common_val,
|
||||||
|
json_array_elements_text((json_array_elements(result::json)->>'most_common_freqs')::json)::float4 AS common_freq,
|
||||||
|
(json_array_elements(result::json)->>'reltuples')::bigint AS shard_reltuples
|
||||||
|
FROM most_common_vals_json),
|
||||||
|
|
||||||
|
common_val_occurrence AS (
|
||||||
|
SELECT m.attname, common_val, sum(common_freq * shard_reltuples)::bigint AS occurrence
|
||||||
|
FROM most_common_vals m
|
||||||
|
GROUP BY m.attname, common_val
|
||||||
|
ORDER BY m.attname, occurrence DESC, common_val)
|
||||||
|
|
||||||
|
SELECT c.attname,
|
||||||
|
ARRAY_agg(common_val) AS most_common_vals,
|
||||||
|
ARRAY_agg((occurrence/t.table_reltuples)::float4) AS most_common_freqs
|
||||||
|
FROM common_val_occurrence c, table_reltuples t
|
||||||
|
GROUP BY c.attname;
|
||||||
|
|
||||||
|
END IF;
|
||||||
|
END;
|
||||||
|
$func$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION pg_catalog.citus_column_stats(
|
||||||
|
qualified_table_name text)
|
||||||
|
IS 'provides some pg_stats for columns of input Citus table';
|
|
@ -0,0 +1,62 @@
|
||||||
|
SET citus.shard_count = 2;
|
||||||
|
CREATE USER user1;
|
||||||
|
GRANT ALL ON SCHEMA public TO user1;
|
||||||
|
SET SESSION AUTHORIZATION user1;
|
||||||
|
CREATE TABLE current_check (currentid int, payload text, rlsuser text);
|
||||||
|
GRANT ALL ON current_check TO PUBLIC;
|
||||||
|
INSERT INTO current_check VALUES
|
||||||
|
(1, 'abc', 'user1'),
|
||||||
|
(3, 'cde', 'user1'),
|
||||||
|
(4, 'def', 'user1'),
|
||||||
|
(4, 'def', 'user1'),
|
||||||
|
(3, 'cde', 'user2');
|
||||||
|
ALTER TABLE current_check ENABLE ROW LEVEL SECURITY;
|
||||||
|
SET row_security TO ON;
|
||||||
|
ANALYZE current_check;
|
||||||
|
SELECT attname, most_common_vals, most_common_freqs FROM pg_stats
|
||||||
|
WHERE tablename = 'current_check'
|
||||||
|
ORDER BY 1;
|
||||||
|
attname | most_common_vals | most_common_freqs
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
currentid | {3,4} | {0.4,0.4}
|
||||||
|
payload | {cde,def} | {0.4,0.4}
|
||||||
|
rlsuser | {user1} | {0.8}
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
SELECT * FROM citus_column_stats('current_check');
|
||||||
|
ERROR: Not a Citus table
|
||||||
|
CONTEXT: PL/pgSQL function citus_column_stats(text) line XX at RAISE
|
||||||
|
CREATE TABLE dist_current_check (currentid int, payload text, rlsuser text);
|
||||||
|
SELECT create_distributed_table('dist_current_check', 'currentid');
|
||||||
|
create_distributed_table
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
INSERT INTO dist_current_check VALUES
|
||||||
|
(1, 'abc', 'user1'),
|
||||||
|
(3, 'cde', 'user1'),
|
||||||
|
(4, 'def', 'user1'),
|
||||||
|
(4, 'def', 'user1'),
|
||||||
|
(3, 'cde', 'user2');
|
||||||
|
ANALYZE dist_current_check;
|
||||||
|
SELECT attname, most_common_vals, most_common_freqs FROM pg_stats
|
||||||
|
WHERE tablename = 'dist_current_check'
|
||||||
|
ORDER BY 1;
|
||||||
|
attname | most_common_vals | most_common_freqs
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
SELECT * FROM citus_column_stats('dist_current_check');
|
||||||
|
attname | most_common_vals | most_common_freqs
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
currentid | {3,4} | {0.4,0.4}
|
||||||
|
payload | {cde,def} | {0.4,0.4}
|
||||||
|
rlsuser | {user1} | {0.8}
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
DROP TABLE current_check;
|
||||||
|
DROP TABLE dist_current_check;
|
||||||
|
RESET SESSION AUTHORIZATION;
|
||||||
|
REVOKE ALL ON SCHEMA public FROM user1;
|
||||||
|
DROP USER user1;
|
|
@ -1456,6 +1456,7 @@ SELECT * FROM multi_extension.print_extension_changes();
|
||||||
---------------------------------------------------------------------
|
---------------------------------------------------------------------
|
||||||
function citus_unmark_object_distributed(oid,oid,integer) void |
|
function citus_unmark_object_distributed(oid,oid,integer) void |
|
||||||
function shard_name(regclass,bigint) text |
|
function shard_name(regclass,bigint) text |
|
||||||
|
| function citus_column_stats(text) TABLE(attname text, most_common_vals text[], most_common_freqs real[])
|
||||||
| function citus_internal.acquire_citus_advisory_object_class_lock(integer,cstring) void
|
| function citus_internal.acquire_citus_advisory_object_class_lock(integer,cstring) void
|
||||||
| function citus_internal.add_colocation_metadata(integer,integer,integer,regtype,oid) void
|
| function citus_internal.add_colocation_metadata(integer,integer,integer,regtype,oid) void
|
||||||
| function citus_internal.add_object_metadata(text,text[],text[],integer,integer,boolean) void
|
| function citus_internal.add_object_metadata(text,text[],text[],integer,integer,boolean) void
|
||||||
|
@ -1487,7 +1488,7 @@ SELECT * FROM multi_extension.print_extension_changes();
|
||||||
| function shard_name(regclass,bigint,boolean) text
|
| function shard_name(regclass,bigint,boolean) text
|
||||||
| view citus_nodes
|
| view citus_nodes
|
||||||
| view citus_stat_counters
|
| view citus_stat_counters
|
||||||
(33 rows)
|
(34 rows)
|
||||||
|
|
||||||
DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff;
|
DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff;
|
||||||
-- show running version
|
-- show running version
|
||||||
|
|
|
@ -54,6 +54,7 @@ ORDER BY 1;
|
||||||
function citus_check_connection_to_node(text,integer)
|
function citus_check_connection_to_node(text,integer)
|
||||||
function citus_cleanup_orphaned_resources()
|
function citus_cleanup_orphaned_resources()
|
||||||
function citus_cleanup_orphaned_shards()
|
function citus_cleanup_orphaned_shards()
|
||||||
|
function citus_column_stats(text)
|
||||||
function citus_conninfo_cache_invalidate()
|
function citus_conninfo_cache_invalidate()
|
||||||
function citus_coordinator_nodeid()
|
function citus_coordinator_nodeid()
|
||||||
function citus_copy_shard_placement(bigint,integer,integer,citus.shard_transfer_mode)
|
function citus_copy_shard_placement(bigint,integer,integer,citus.shard_transfer_mode)
|
||||||
|
@ -393,6 +394,6 @@ ORDER BY 1;
|
||||||
view citus_stat_tenants_local
|
view citus_stat_tenants_local
|
||||||
view pg_dist_shard_placement
|
view pg_dist_shard_placement
|
||||||
view time_partitions
|
view time_partitions
|
||||||
(362 rows)
|
(363 rows)
|
||||||
|
|
||||||
DROP TABLE extension_basic_types;
|
DROP TABLE extension_basic_types;
|
||||||
|
|
|
@ -32,6 +32,7 @@ test: propagate_extension_commands
|
||||||
test: escape_extension_name
|
test: escape_extension_name
|
||||||
test: ref_citus_local_fkeys
|
test: ref_citus_local_fkeys
|
||||||
test: alter_database_owner
|
test: alter_database_owner
|
||||||
|
test: citus_column_stats
|
||||||
test: seclabel
|
test: seclabel
|
||||||
test: distributed_triggers
|
test: distributed_triggers
|
||||||
test: create_single_shard_table
|
test: create_single_shard_table
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
SET citus.shard_count = 2;
|
||||||
|
|
||||||
|
CREATE USER user1;
|
||||||
|
GRANT ALL ON SCHEMA public TO user1;
|
||||||
|
|
||||||
|
SET SESSION AUTHORIZATION user1;
|
||||||
|
|
||||||
|
CREATE TABLE current_check (currentid int, payload text, rlsuser text);
|
||||||
|
GRANT ALL ON current_check TO PUBLIC;
|
||||||
|
|
||||||
|
INSERT INTO current_check VALUES
|
||||||
|
(1, 'abc', 'user1'),
|
||||||
|
(3, 'cde', 'user1'),
|
||||||
|
(4, 'def', 'user1'),
|
||||||
|
(4, 'def', 'user1'),
|
||||||
|
(3, 'cde', 'user2');
|
||||||
|
|
||||||
|
ALTER TABLE current_check ENABLE ROW LEVEL SECURITY;
|
||||||
|
|
||||||
|
SET row_security TO ON;
|
||||||
|
|
||||||
|
ANALYZE current_check;
|
||||||
|
|
||||||
|
SELECT attname, most_common_vals, most_common_freqs FROM pg_stats
|
||||||
|
WHERE tablename = 'current_check'
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
|
SELECT * FROM citus_column_stats('current_check');
|
||||||
|
|
||||||
|
CREATE TABLE dist_current_check (currentid int, payload text, rlsuser text);
|
||||||
|
SELECT create_distributed_table('dist_current_check', 'currentid');
|
||||||
|
|
||||||
|
INSERT INTO dist_current_check VALUES
|
||||||
|
(1, 'abc', 'user1'),
|
||||||
|
(3, 'cde', 'user1'),
|
||||||
|
(4, 'def', 'user1'),
|
||||||
|
(4, 'def', 'user1'),
|
||||||
|
(3, 'cde', 'user2');
|
||||||
|
|
||||||
|
ANALYZE dist_current_check;
|
||||||
|
|
||||||
|
SELECT attname, most_common_vals, most_common_freqs FROM pg_stats
|
||||||
|
WHERE tablename = 'dist_current_check'
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
|
SELECT * FROM citus_column_stats('dist_current_check');
|
||||||
|
|
||||||
|
DROP TABLE current_check;
|
||||||
|
DROP TABLE dist_current_check;
|
||||||
|
|
||||||
|
RESET SESSION AUTHORIZATION;
|
||||||
|
REVOKE ALL ON SCHEMA public FROM user1;
|
||||||
|
DROP USER user1;
|
Loading…
Reference in New Issue