mirror of https://github.com/citusdata/citus.git
196 lines
7.8 KiB
SQL
196 lines
7.8 KiB
SQL
--
|
|
-- TDIGEST_AGGREGATE_SUPPORT
|
|
-- test the integration of github.com/tvondra/tdigest aggregates into the citus planner
|
|
-- for push down parts of the aggregate to use parallelized execution and reduced data
|
|
-- transfer sizes for aggregates not grouped by the distribution column
|
|
--
|
|
|
|
SET citus.next_shard_id TO 20070000;
|
|
CREATE SCHEMA tdigest_aggregate_support;
|
|
SET search_path TO tdigest_aggregate_support, public;
|
|
|
|
-- create the tdigest extension when installed
|
|
SELECT CASE WHEN COUNT(*) > 0
|
|
THEN 'CREATE EXTENSION tdigest WITH SCHEMA public'
|
|
ELSE 'SELECT false AS tdigest_present' END
|
|
AS create_cmd FROM pg_available_extensions()
|
|
WHERE name = 'tdigest'
|
|
\gset
|
|
:create_cmd;
|
|
|
|
SET citus.shard_count TO 4;
|
|
SET citus.coordinator_aggregation_strategy TO 'disabled'; -- prevent aggregate execution when the aggregate can't be pushed down
|
|
|
|
CREATE TABLE latencies (a int, b int, latency double precision);
|
|
SELECT create_distributed_table('latencies', 'a');
|
|
SELECT setseed(0.42); -- make the random data inserted deterministic
|
|
INSERT INTO latencies
|
|
SELECT (random()*20)::int AS a,
|
|
(random()*20)::int AS b,
|
|
random()*10000.0 AS latency
|
|
FROM generate_series(1, 10000);
|
|
|
|
-- explain no grouping to verify partially pushed down for tdigest(value, compression)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT tdigest(latency, 100)
|
|
FROM latencies;
|
|
|
|
-- explain grouping by distribution column is completely pushed down for tdigest(value, compression)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT a, tdigest(latency, 100)
|
|
FROM latencies
|
|
GROUP BY a;
|
|
|
|
-- explain grouping by non-distribution column is partially pushed down for tdigest(value, compression)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT b, tdigest(latency, 100)
|
|
FROM latencies
|
|
GROUP BY b;
|
|
|
|
-- explain no grouping to verify partially pushed down for tdigest_precentile(value, compression, quantile)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT tdigest_percentile(latency, 100, 0.99)
|
|
FROM latencies;
|
|
|
|
-- explain grouping by distribution column is completely pushed down for tdigest_precentile(value, compression, quantile)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT a, tdigest_percentile(latency, 100, 0.99)
|
|
FROM latencies
|
|
GROUP BY a;
|
|
|
|
-- explain grouping by non-distribution column is partially pushed down for tdigest_precentile(value, compression, quantile)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT b, tdigest_percentile(latency, 100, 0.99)
|
|
FROM latencies
|
|
GROUP BY b;
|
|
|
|
-- explain no grouping to verify partially pushed down for tdigest_precentile(value, compression, quantiles[])
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT tdigest_percentile(latency, 100, ARRAY[0.99, 0.95])
|
|
FROM latencies;
|
|
|
|
-- explain grouping by distribution column is completely pushed down for tdigest_precentile(value, compression, quantiles[])
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT a, tdigest_percentile(latency, 100, ARRAY[0.99, 0.95])
|
|
FROM latencies
|
|
GROUP BY a;
|
|
|
|
-- explain grouping by non-distribution column is partially pushed down for tdigest_precentile(value, compression, quantiles[])
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT b, tdigest_percentile(latency, 100, ARRAY[0.99, 0.95])
|
|
FROM latencies
|
|
GROUP BY b;
|
|
|
|
-- explain no grouping to verify partially pushed down for tdigest_precentile_of(value, compression, hypotetical_value)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT tdigest_percentile_of(latency, 100, 9000)
|
|
FROM latencies;
|
|
|
|
-- explain grouping by distribution column is completely pushed down for tdigest_precentile_of(value, compression, hypotetical_value)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT a, tdigest_percentile_of(latency, 100, 9000)
|
|
FROM latencies
|
|
GROUP BY a;
|
|
|
|
-- explain grouping by non-distribution column is partially pushed down for tdigest_precentile_of(value, compression, hypotetical_value)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT b, tdigest_percentile_of(latency, 100, 9000)
|
|
FROM latencies
|
|
GROUP BY b;
|
|
|
|
-- explain no grouping to verify partially pushed down for tdigest_precentile_of(value, compression, hypotetical_values[])
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT tdigest_percentile_of(latency, 100, ARRAY[9000, 9500])
|
|
FROM latencies;
|
|
|
|
-- explain grouping by distribution column is completely pushed down for tdigest_precentile_of(value, compression, hypotetical_values[])
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT a, tdigest_percentile_of(latency, 100, ARRAY[9000, 9500])
|
|
FROM latencies
|
|
GROUP BY a;
|
|
|
|
-- explain grouping by non-distribution column is partially pushed down for tdigest_precentile_of(value, compression, hypotetical_values[])
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT b, tdigest_percentile_of(latency, 100, ARRAY[9000, 9500])
|
|
FROM latencies
|
|
GROUP BY b;
|
|
|
|
-- verifying results - should be stable due to seed while inserting the data, if failure due to data these queries could be removed or check for certain ranges
|
|
SELECT tdigest(latency, 100) FROM latencies;
|
|
SELECT tdigest_percentile(latency, 100, 0.99) FROM latencies;
|
|
SELECT tdigest_percentile(latency, 100, ARRAY[0.99, 0.95]) FROM latencies;
|
|
SELECT tdigest_percentile_of(latency, 100, 9000) FROM latencies;
|
|
SELECT tdigest_percentile_of(latency, 100, ARRAY[9000, 9500]) FROM latencies;
|
|
|
|
CREATE TABLE latencies_rollup (a int, tdigest tdigest);
|
|
SELECT create_distributed_table('latencies_rollup', 'a', colocate_with => 'latencies');
|
|
|
|
INSERT INTO latencies_rollup
|
|
SELECT a, tdigest(latency, 100)
|
|
FROM latencies
|
|
GROUP BY a;
|
|
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT tdigest(tdigest)
|
|
FROM latencies_rollup;
|
|
|
|
-- explain grouping by distribution column is completely pushed down for tdigest(tdigest)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT a, tdigest(tdigest)
|
|
FROM latencies_rollup
|
|
GROUP BY a;
|
|
|
|
-- explain no grouping to verify partially pushed down for tdigest_precentile(tdigest, quantile)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT tdigest_percentile(tdigest, 0.99)
|
|
FROM latencies_rollup;
|
|
|
|
-- explain grouping by distribution column is completely pushed down for tdigest_precentile(tdigest, quantile)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT a, tdigest_percentile(tdigest, 0.99)
|
|
FROM latencies_rollup
|
|
GROUP BY a;
|
|
|
|
-- explain no grouping to verify partially pushed down for tdigest_precentile(value, compression, quantiles[])
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT tdigest_percentile(tdigest, ARRAY[0.99, 0.95])
|
|
FROM latencies_rollup;
|
|
|
|
-- explain grouping by distribution column is completely pushed down for tdigest_precentile(value, compression, quantiles[])
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT a, tdigest_percentile(tdigest, ARRAY[0.99, 0.95])
|
|
FROM latencies_rollup
|
|
GROUP BY a;
|
|
|
|
-- explain no grouping to verify partially pushed down for tdigest_precentile_of(value, compression, hypotetical_value)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT tdigest_percentile_of(tdigest, 9000)
|
|
FROM latencies_rollup;
|
|
|
|
-- explain grouping by distribution column is completely pushed down for tdigest_precentile_of(value, compression, hypotetical_value)
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT a, tdigest_percentile_of(tdigest, 9000)
|
|
FROM latencies_rollup
|
|
GROUP BY a;
|
|
|
|
-- explain no grouping to verify partially pushed down for tdigest_precentile_of(value, compression, hypotetical_values[])
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT tdigest_percentile_of(tdigest, ARRAY[9000, 9500])
|
|
FROM latencies_rollup;
|
|
|
|
-- explain grouping by distribution column is completely pushed down for tdigest_precentile_of(value, compression, hypotetical_values[])
|
|
EXPLAIN (COSTS OFF, VERBOSE)
|
|
SELECT a, tdigest_percentile_of(tdigest, ARRAY[9000, 9500])
|
|
FROM latencies_rollup
|
|
GROUP BY a;
|
|
|
|
-- verifying results - should be stable due to seed while inserting the data, if failure due to data these queries could be removed or check for certain ranges
|
|
SELECT tdigest(tdigest) FROM latencies_rollup;
|
|
SELECT tdigest_percentile(tdigest, 0.99) FROM latencies_rollup;
|
|
SELECT tdigest_percentile(tdigest, ARRAY[0.99, 0.95]) FROM latencies_rollup;
|
|
SELECT tdigest_percentile_of(tdigest, 9000) FROM latencies_rollup;
|
|
SELECT tdigest_percentile_of(tdigest, ARRAY[9000, 9500]) FROM latencies_rollup;
|
|
|
|
SET client_min_messages TO WARNING; -- suppress cascade messages
|
|
DROP SCHEMA tdigest_aggregate_support CASCADE;
|