mirror of https://github.com/citusdata/citus.git
168 lines
5.9 KiB
Plaintext
168 lines
5.9 KiB
Plaintext
--
|
|
-- MULTI_AGG_DISTINCT
|
|
--
|
|
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 200000;
|
|
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 200000;
|
|
-- Create a new range partitioned lineitem table and stage data into it
|
|
CREATE TABLE lineitem_range (
|
|
l_orderkey bigint not null,
|
|
l_partkey integer not null,
|
|
l_suppkey integer not null,
|
|
l_linenumber integer not null,
|
|
l_quantity decimal(15, 2) not null,
|
|
l_extendedprice decimal(15, 2) not null,
|
|
l_discount decimal(15, 2) not null,
|
|
l_tax decimal(15, 2) not null,
|
|
l_returnflag char(1) not null,
|
|
l_linestatus char(1) not null,
|
|
l_shipdate date not null,
|
|
l_commitdate date not null,
|
|
l_receiptdate date not null,
|
|
l_shipinstruct char(25) not null,
|
|
l_shipmode char(10) not null,
|
|
l_comment varchar(44) not null );
|
|
SELECT master_create_distributed_table('lineitem_range', 'l_orderkey', 'range');
|
|
master_create_distributed_table
|
|
---------------------------------
|
|
|
|
(1 row)
|
|
|
|
SET citus.shard_max_size TO "500MB";
|
|
\STAGE lineitem_range FROM '@abs_srcdir@/data/lineitem.1.data' with delimiter '|'
|
|
\STAGE lineitem_range FROM '@abs_srcdir@/data/lineitem.2.data' with delimiter '|'
|
|
RESET citus.shard_max_size;
|
|
-- Run aggregate(distinct) on partition column for range partitioned table
|
|
SELECT count(distinct l_orderkey) FROM lineitem_range;
|
|
count
|
|
-------
|
|
2985
|
|
(1 row)
|
|
|
|
SELECT avg(distinct l_orderkey) FROM lineitem_range;
|
|
avg
|
|
-----------------------
|
|
7463.9474036850921273
|
|
(1 row)
|
|
|
|
-- Run count(distinct) on join between a range partitioned table and a single
|
|
-- sharded table. For this test, we also change a config setting to ensure that
|
|
-- we don't repartition any of the tables during the query.
|
|
SET citus.large_table_shard_count TO 2;
|
|
SELECT p_partkey, count(distinct l_orderkey) FROM lineitem_range, part
|
|
WHERE l_partkey = p_partkey
|
|
GROUP BY p_partkey
|
|
ORDER BY p_partkey LIMIT 10;
|
|
p_partkey | count
|
|
-----------+-------
|
|
18 | 1
|
|
79 | 1
|
|
91 | 1
|
|
149 | 2
|
|
175 | 1
|
|
179 | 1
|
|
182 | 1
|
|
195 | 1
|
|
204 | 1
|
|
222 | 1
|
|
(10 rows)
|
|
|
|
RESET citus.large_table_shard_count;
|
|
-- Check that we don't support count(distinct) on non-partition column, and
|
|
-- complex expressions.
|
|
SELECT count(distinct l_partkey) FROM lineitem_range;
|
|
ERROR: cannot compute aggregate (distinct)
|
|
DETAIL: table partitioning is unsuitable for aggregate (distinct)
|
|
HINT: You can load the hll extension from contrib packages and enable distinct approximations.
|
|
SELECT count(distinct (l_orderkey + 1)) FROM lineitem_range;
|
|
ERROR: cannot compute aggregate (distinct)
|
|
DETAIL: aggregate (distinct) on complex expressions is unsupported
|
|
HINT: You can load the hll extension from contrib packages and enable distinct approximations.
|
|
-- Now test append partitioned tables. First run count(distinct) on a single
|
|
-- sharded table.
|
|
SELECT count(distinct p_mfgr) FROM part;
|
|
count
|
|
-------
|
|
5
|
|
(1 row)
|
|
|
|
SELECT p_mfgr, count(distinct p_partkey) FROM part GROUP BY p_mfgr ORDER BY p_mfgr;
|
|
p_mfgr | count
|
|
---------------------------+-------
|
|
Manufacturer#1 | 193
|
|
Manufacturer#2 | 190
|
|
Manufacturer#3 | 228
|
|
Manufacturer#4 | 204
|
|
Manufacturer#5 | 185
|
|
(5 rows)
|
|
|
|
-- We don't support count(distinct) queries if table is append partitioned and
|
|
-- has multiple shards
|
|
SELECT count(distinct o_orderkey) FROM orders;
|
|
ERROR: cannot compute aggregate (distinct)
|
|
DETAIL: table partitioning is unsuitable for aggregate (distinct)
|
|
HINT: You can load the hll extension from contrib packages and enable distinct approximations.
|
|
-- Hash partitioned tables:
|
|
CREATE TABLE lineitem_hash (
|
|
l_orderkey bigint not null,
|
|
l_partkey integer not null,
|
|
l_suppkey integer not null,
|
|
l_linenumber integer not null,
|
|
l_quantity decimal(15, 2) not null,
|
|
l_extendedprice decimal(15, 2) not null,
|
|
l_discount decimal(15, 2) not null,
|
|
l_tax decimal(15, 2) not null,
|
|
l_returnflag char(1) not null,
|
|
l_linestatus char(1) not null,
|
|
l_shipdate date not null,
|
|
l_commitdate date not null,
|
|
l_receiptdate date not null,
|
|
l_shipinstruct char(25) not null,
|
|
l_shipmode char(10) not null,
|
|
l_comment varchar(44) not null );
|
|
SELECT master_create_distributed_table('lineitem_hash', 'l_orderkey', 'hash');
|
|
master_create_distributed_table
|
|
---------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT master_create_worker_shards('lineitem_hash', 4, 1);
|
|
master_create_worker_shards
|
|
-----------------------------
|
|
|
|
(1 row)
|
|
|
|
\COPY lineitem_hash FROM '@abs_srcdir@/data/lineitem.1.data' with delimiter '|'
|
|
\COPY lineitem_hash FROM '@abs_srcdir@/data/lineitem.2.data' with delimiter '|'
|
|
-- aggregate(distinct) on partition column is allowed
|
|
SELECT count(distinct l_orderkey) FROM lineitem_hash;
|
|
count
|
|
-------
|
|
2985
|
|
(1 row)
|
|
|
|
SELECT avg(distinct l_orderkey) FROM lineitem_hash;
|
|
avg
|
|
-----------------------
|
|
7463.9474036850921273
|
|
(1 row)
|
|
|
|
-- count(distinct) on non-partition column or expression is not allowed
|
|
SELECT count(distinct l_partkey) FROM lineitem_hash;
|
|
ERROR: cannot compute aggregate (distinct)
|
|
DETAIL: table partitioning is unsuitable for aggregate (distinct)
|
|
HINT: You can load the hll extension from contrib packages and enable distinct approximations.
|
|
SELECT count(distinct (l_orderkey + 1)) FROM lineitem_hash;
|
|
ERROR: cannot compute aggregate (distinct)
|
|
DETAIL: aggregate (distinct) on complex expressions is unsupported
|
|
HINT: You can load the hll extension from contrib packages and enable distinct approximations.
|
|
-- agg(distinct) is allowed if we group by partition column
|
|
SELECT l_orderkey, count(distinct l_partkey) INTO hash_results FROM lineitem_hash GROUP BY l_orderkey;
|
|
SELECT l_orderkey, count(distinct l_partkey) INTO range_results FROM lineitem_range GROUP BY l_orderkey;
|
|
-- they should return the same results
|
|
SELECT * FROM hash_results h, range_results r WHERE h.l_orderkey = r.l_orderkey AND h.count != r.count;
|
|
l_orderkey | count | l_orderkey | count
|
|
------------+-------+------------+-------
|
|
(0 rows)
|
|
|
|
DROP TABLE lineitem_hash;
|