citus/src/test/regress/expected/multi_agg_approximate_disti...

172 lines
4.9 KiB
Plaintext

--
-- MULTI_AGG_APPROXIMATE_DISTINCT
--
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 340000;
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 340000;
-- Try to execute count(distinct) when approximate distincts aren't enabled
SELECT count(distinct l_orderkey) FROM lineitem;
ERROR: cannot compute aggregate (distinct)
DETAIL: table partitioning is unsuitable for aggregate (distinct)
HINT: You can load the hll extension from contrib packages and enable distinct approximations.
-- Check approximate count(distinct) at different precisions / error rates
SET citus.count_distinct_error_rate = 0.1;
SELECT count(distinct l_orderkey) FROM lineitem;
count
-------
2612
(1 row)
SET citus.count_distinct_error_rate = 0.01;
SELECT count(distinct l_orderkey) FROM lineitem;
count
-------
2967
(1 row)
-- Check approximate count(distinct) for different data types
SELECT count(distinct l_partkey) FROM lineitem;
count
-------
11654
(1 row)
SELECT count(distinct l_extendedprice) FROM lineitem;
count
-------
11691
(1 row)
SELECT count(distinct l_shipdate) FROM lineitem;
count
-------
2483
(1 row)
SELECT count(distinct l_comment) FROM lineitem;
count
-------
11788
(1 row)
-- Check that we can execute approximate count(distinct) on complex expressions
SELECT count(distinct (l_orderkey * 2 + 1)) FROM lineitem;
count
-------
2980
(1 row)
SELECT count(distinct extract(month from l_shipdate)) AS my_month FROM lineitem;
my_month
----------
12
(1 row)
SELECT count(distinct l_partkey) / count(distinct l_orderkey) FROM lineitem;
?column?
----------
3
(1 row)
-- Check that we can execute approximate count(distinct) on select queries that
-- contain different filter, join, sort and limit clauses
SELECT count(distinct l_orderkey) FROM lineitem
WHERE octet_length(l_comment) + octet_length('randomtext'::text) > 40;
count
-------
2355
(1 row)
SELECT count(DISTINCT l_orderkey) FROM lineitem, orders
WHERE l_orderkey = o_orderkey AND l_quantity < 5;
count
-------
835
(1 row)
SELECT count(DISTINCT l_orderkey) as distinct_order_count, l_quantity FROM lineitem
WHERE l_quantity < 32.0
GROUP BY l_quantity
ORDER BY distinct_order_count ASC, l_quantity ASC
LIMIT 10;
distinct_order_count | l_quantity
----------------------+------------
210 | 29.00
216 | 13.00
217 | 16.00
219 | 3.00
220 | 18.00
222 | 14.00
223 | 7.00
223 | 17.00
223 | 26.00
223 | 31.00
(10 rows)
-- Check that approximate count(distinct) works at a table in a schema other than public
-- create necessary objects
CREATE SCHEMA test_count_distinct_schema;
CREATE TABLE test_count_distinct_schema.nation_hash(
n_nationkey integer not null,
n_name char(25) not null,
n_regionkey integer not null,
n_comment varchar(152)
);
SELECT master_create_distributed_table('test_count_distinct_schema.nation_hash', 'n_nationkey', 'hash');
master_create_distributed_table
---------------------------------
(1 row)
SELECT master_create_worker_shards('test_count_distinct_schema.nation_hash', 4, 2);
master_create_worker_shards
-----------------------------
(1 row)
\copy test_count_distinct_schema.nation_hash FROM STDIN with delimiter '|';
SET search_path TO public;
SET citus.count_distinct_error_rate TO 0.01;
SELECT COUNT (DISTINCT n_regionkey) FROM test_count_distinct_schema.nation_hash;
count
-------
3
(1 row)
-- test with search_path is set
SET search_path TO test_count_distinct_schema;
SELECT COUNT (DISTINCT n_regionkey) FROM nation_hash;
count
-------
3
(1 row)
SET search_path TO public;
-- If we have an order by on count(distinct) that we intend to push down to
-- worker nodes, we need to error out. Otherwise, we are fine.
SET citus.limit_clause_row_fetch_count = 1000;
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
FROM lineitem
GROUP BY l_returnflag
ORDER BY count_distinct
LIMIT 10;
ERROR: cannot approximate count(distinct) and order by it
HINT: You might need to disable approximations for either count(distinct) or limit through configuration.
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
FROM lineitem
GROUP BY l_returnflag
ORDER BY total
LIMIT 10;
l_returnflag | count_distinct | total
--------------+----------------+-------
R | 1103 | 2901
A | 1108 | 2944
N | 1265 | 6155
(3 rows)
-- Check that we can revert config and disable count(distinct) approximations
SET citus.count_distinct_error_rate = 0.0;
SELECT count(distinct l_orderkey) FROM lineitem;
ERROR: cannot compute aggregate (distinct)
DETAIL: table partitioning is unsuitable for aggregate (distinct)
HINT: You can load the hll extension from contrib packages and enable distinct approximations.