citus/src/test/regress/sql/multi_agg_approximate_disti...

137 lines
4.6 KiB
SQL

--
-- MULTI_AGG_APPROXIMATE_DISTINCT
--
-- Create HLL extension if present, print false result otherwise
SELECT CASE WHEN COUNT(*) > 0 THEN
'CREATE EXTENSION HLL'
ELSE 'SELECT false AS hll_present' END
AS create_cmd FROM pg_available_extensions()
WHERE name = 'hll'
\gset
:create_cmd;
SELECT public.explain_has_distributed_subplan($$
EXPLAIN SELECT symbol_id,
HLL_ADD_AGG(HLL_HASH_BIGINT(event_id)) AS event_hll_hash,
HLL_CARDINALITY(HLL_ADD_AGG(HLL_HASH_BIGINT(event_id))) AS event_n_users
FROM (
SELECT event_time, composite_id, event_id, 4640476 symbol_id FROM "events"
UNION ALL
SELECT event_time, composite_id, event_id, 4640477 symbol_id FROM "events"
) pushdown_events
GROUP BY symbol_id;
$$);
SET citus.coordinator_aggregation_strategy TO 'disabled';
-- Try to execute count(distinct) when approximate distincts aren't enabled
SELECT count(distinct l_orderkey) FROM lineitem;
-- Check approximate count(distinct) at different precisions / error rates
SET citus.count_distinct_error_rate = 0.1;
SELECT count(distinct l_orderkey) FROM lineitem;
SET citus.count_distinct_error_rate = 0.01;
SELECT count(distinct l_orderkey) FROM lineitem;
-- Check approximate count(distinct) for different data types
SELECT count(distinct l_partkey) FROM lineitem;
SELECT count(distinct l_extendedprice) FROM lineitem;
SELECT count(distinct l_shipdate) FROM lineitem;
SELECT count(distinct l_comment) FROM lineitem;
-- Check that we can execute approximate count(distinct) on complex expressions
SELECT count(distinct (l_orderkey * 2 + 1)) FROM lineitem;
SELECT count(distinct extract(month from l_shipdate)) AS my_month FROM lineitem;
SELECT count(distinct l_partkey) / count(distinct l_orderkey) FROM lineitem;
-- Check that we can execute approximate count(distinct) on select queries that
-- contain different filter, join, sort and limit clauses
SELECT count(distinct l_orderkey) FROM lineitem
WHERE octet_length(l_comment) + octet_length('randomtext'::text) > 40;
SELECT count(DISTINCT l_orderkey) FROM lineitem, orders
WHERE l_orderkey = o_orderkey AND l_quantity < 5;
SELECT count(DISTINCT l_orderkey) as distinct_order_count, l_quantity FROM lineitem
WHERE l_quantity < 32.0
GROUP BY l_quantity
ORDER BY distinct_order_count ASC, l_quantity ASC
LIMIT 10;
-- Check that approximate count(distinct) works at a table in a schema other than public
-- create necessary objects
SET citus.next_shard_id TO 20000000;
SET citus.next_placement_id TO 20000000;
CREATE SCHEMA test_count_distinct_schema;
CREATE TABLE test_count_distinct_schema.nation_hash(
n_nationkey integer not null,
n_name char(25) not null,
n_regionkey integer not null,
n_comment varchar(152)
);
SELECT create_distributed_table('test_count_distinct_schema.nation_hash', 'n_nationkey', 'hash');
\copy test_count_distinct_schema.nation_hash FROM STDIN with delimiter '|';
0|ALGERIA|0|haggle. carefully final deposits detect slyly agai
1|ARGENTINA|1|al foxes promise slyly according to the regular accounts. bold requests alon
2|BRAZIL|1|y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special
3|CANADA|1|eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold
4|EGYPT|4|y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d
5|ETHIOPIA|0|ven packages wake quickly. regu
\.
SET search_path TO public;
SET citus.count_distinct_error_rate TO 0.01;
SELECT COUNT (DISTINCT n_regionkey) FROM test_count_distinct_schema.nation_hash;
-- test with search_path is set
SET search_path TO test_count_distinct_schema;
SELECT COUNT (DISTINCT n_regionkey) FROM nation_hash;
SET search_path TO public;
-- If we have an order by on count(distinct) that we intend to push down to
-- worker nodes, we need to error out. Otherwise, we are fine.
SET citus.limit_clause_row_fetch_count = 1000;
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
FROM lineitem
GROUP BY l_returnflag
ORDER BY count_distinct
LIMIT 10;
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
FROM lineitem
GROUP BY l_returnflag
ORDER BY total
LIMIT 10;
SELECT
l_partkey,
count(l_partkey) FILTER (WHERE l_shipmode = 'AIR'),
count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR'),
count(DISTINCT CASE WHEN l_shipmode = 'AIR' THEN l_partkey ELSE NULL END)
FROM lineitem
GROUP BY l_partkey
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- Check that we can revert config and disable count(distinct) approximations
SET citus.count_distinct_error_rate = 0.0;
SELECT count(distinct l_orderkey) FROM lineitem;