mirror of https://github.com/citusdata/citus.git
wip
parent
4e3c6224fb
commit
eea210c9f5
|
@ -1,201 +0,0 @@
|
||||||
--
|
|
||||||
-- MULTI_AGG_APPROXIMATE_DISTINCT
|
|
||||||
--
|
|
||||||
-- Create HLL extension if present, print false result otherwise
|
|
||||||
SELECT CASE WHEN COUNT(*) > 0 THEN
|
|
||||||
'CREATE EXTENSION HLL'
|
|
||||||
ELSE 'SELECT false AS hll_present' END
|
|
||||||
AS create_cmd FROM pg_available_extensions()
|
|
||||||
WHERE name = 'hll'
|
|
||||||
\gset
|
|
||||||
:create_cmd;
|
|
||||||
SET citus.coordinator_aggregation_strategy TO 'disabled';
|
|
||||||
-- Try to execute count(distinct) when approximate distincts aren't enabled
|
|
||||||
SELECT count(distinct l_orderkey) FROM lineitem;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
2985
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
-- Check approximate count(distinct) at different precisions / error rates
|
|
||||||
SET citus.count_distinct_error_rate = 0.1;
|
|
||||||
SELECT count(distinct l_orderkey) FROM lineitem;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
2612
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SET citus.count_distinct_error_rate = 0.01;
|
|
||||||
SELECT count(distinct l_orderkey) FROM lineitem;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
2967
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
-- Check approximate count(distinct) for different data types
|
|
||||||
SELECT count(distinct l_partkey) FROM lineitem;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
11654
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT count(distinct l_extendedprice) FROM lineitem;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
11691
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT count(distinct l_shipdate) FROM lineitem;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
2483
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT count(distinct l_comment) FROM lineitem;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
11788
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
-- Check that we can execute approximate count(distinct) on complex expressions
|
|
||||||
SELECT count(distinct (l_orderkey * 2 + 1)) FROM lineitem;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
2980
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT count(distinct extract(month from l_shipdate)) AS my_month FROM lineitem;
|
|
||||||
my_month
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
12
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT count(distinct l_partkey) / count(distinct l_orderkey) FROM lineitem;
|
|
||||||
?column?
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
3
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
-- Check that we can execute approximate count(distinct) on select queries that
|
|
||||||
-- contain different filter, join, sort and limit clauses
|
|
||||||
SELECT count(distinct l_orderkey) FROM lineitem
|
|
||||||
WHERE octet_length(l_comment) + octet_length('randomtext'::text) > 40;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
2355
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT count(DISTINCT l_orderkey) FROM lineitem, orders
|
|
||||||
WHERE l_orderkey = o_orderkey AND l_quantity < 5;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
835
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT count(DISTINCT l_orderkey) as distinct_order_count, l_quantity FROM lineitem
|
|
||||||
WHERE l_quantity < 32.0
|
|
||||||
GROUP BY l_quantity
|
|
||||||
ORDER BY distinct_order_count ASC, l_quantity ASC
|
|
||||||
LIMIT 10;
|
|
||||||
distinct_order_count | l_quantity
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
210 | 29.00
|
|
||||||
216 | 13.00
|
|
||||||
217 | 16.00
|
|
||||||
219 | 3.00
|
|
||||||
220 | 18.00
|
|
||||||
222 | 14.00
|
|
||||||
223 | 7.00
|
|
||||||
223 | 17.00
|
|
||||||
223 | 26.00
|
|
||||||
223 | 31.00
|
|
||||||
(10 rows)
|
|
||||||
|
|
||||||
-- Check that approximate count(distinct) works at a table in a schema other than public
|
|
||||||
-- create necessary objects
|
|
||||||
SET citus.next_shard_id TO 20000000;
|
|
||||||
SET citus.next_placement_id TO 20000000;
|
|
||||||
CREATE SCHEMA test_count_distinct_schema;
|
|
||||||
CREATE TABLE test_count_distinct_schema.nation_hash(
|
|
||||||
n_nationkey integer not null,
|
|
||||||
n_name char(25) not null,
|
|
||||||
n_regionkey integer not null,
|
|
||||||
n_comment varchar(152)
|
|
||||||
);
|
|
||||||
SELECT create_distributed_table('test_count_distinct_schema.nation_hash', 'n_nationkey', 'hash');
|
|
||||||
create_distributed_table
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
\copy test_count_distinct_schema.nation_hash FROM STDIN with delimiter '|';
|
|
||||||
SET search_path TO public;
|
|
||||||
SET citus.count_distinct_error_rate TO 0.01;
|
|
||||||
SELECT COUNT (DISTINCT n_regionkey) FROM test_count_distinct_schema.nation_hash;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
3
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
-- test with search_path is set
|
|
||||||
SET search_path TO test_count_distinct_schema;
|
|
||||||
SELECT COUNT (DISTINCT n_regionkey) FROM nation_hash;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
3
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SET search_path TO public;
|
|
||||||
-- If we have an order by on count(distinct) that we intend to push down to
|
|
||||||
-- worker nodes, we need to error out. Otherwise, we are fine.
|
|
||||||
SET citus.limit_clause_row_fetch_count = 1000;
|
|
||||||
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
|
|
||||||
FROM lineitem
|
|
||||||
GROUP BY l_returnflag
|
|
||||||
ORDER BY count_distinct
|
|
||||||
LIMIT 10;
|
|
||||||
ERROR: cannot approximate count(distinct) and order by it
|
|
||||||
HINT: You might need to disable approximations for either count(distinct) or limit through configuration.
|
|
||||||
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
|
|
||||||
FROM lineitem
|
|
||||||
GROUP BY l_returnflag
|
|
||||||
ORDER BY total
|
|
||||||
LIMIT 10;
|
|
||||||
l_returnflag | count_distinct | total
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
R | 1103 | 2901
|
|
||||||
A | 1108 | 2944
|
|
||||||
N | 1265 | 6155
|
|
||||||
(3 rows)
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
l_orderkey,
|
|
||||||
count(l_partkey) FILTER (WHERE l_shipmode = 'AIR'),
|
|
||||||
count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR'),
|
|
||||||
count(DISTINCT CASE WHEN l_shipmode = 'AIR' THEN l_partkey ELSE NULL END)
|
|
||||||
FROM lineitem
|
|
||||||
GROUP BY l_orderkey
|
|
||||||
ORDER BY 2 DESC, 1 DESC
|
|
||||||
LIMIT 10;
|
|
||||||
l_orderkey | count | count | count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
12005 | 4 | 4 | 4
|
|
||||||
5409 | 4 | 4 | 4
|
|
||||||
4964 | 4 | 4 | 4
|
|
||||||
14848 | 3 | 3 | 3
|
|
||||||
14496 | 3 | 3 | 3
|
|
||||||
13473 | 3 | 3 | 3
|
|
||||||
13122 | 3 | 3 | 3
|
|
||||||
12929 | 3 | 3 | 3
|
|
||||||
12645 | 3 | 3 | 3
|
|
||||||
12417 | 3 | 3 | 3
|
|
||||||
(10 rows)
|
|
||||||
|
|
||||||
-- Check that we can revert config and disable count(distinct) approximations
|
|
||||||
SET citus.count_distinct_error_rate = 0.0;
|
|
||||||
SELECT count(distinct l_orderkey) FROM lineitem;
|
|
||||||
count
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
2985
|
|
||||||
(1 row)
|
|
||||||
|
|
Loading…
Reference in New Issue