citus/src/test/regress/expected/multi_agg_distinct.out

244 lines
8.1 KiB
Plaintext

--
-- MULTI_AGG_DISTINCT
--
SET citus.coordinator_aggregation_strategy TO 'disabled';
-- Create a new range partitioned lineitem table and load data into it
CREATE TABLE lineitem_range (
l_orderkey bigint not null,
l_partkey integer not null,
l_suppkey integer not null,
l_linenumber integer not null,
l_quantity decimal(15, 2) not null,
l_extendedprice decimal(15, 2) not null,
l_discount decimal(15, 2) not null,
l_tax decimal(15, 2) not null,
l_returnflag char(1) not null,
l_linestatus char(1) not null,
l_shipdate date not null,
l_commitdate date not null,
l_receiptdate date not null,
l_shipinstruct char(25) not null,
l_shipmode char(10) not null,
l_comment varchar(44) not null );
SELECT create_distributed_table('lineitem_range', 'l_orderkey', 'range');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT master_create_empty_shard('lineitem_range') AS new_shard_id
\gset
UPDATE pg_dist_shard SET shardminvalue = 1, shardmaxvalue = 5986
WHERE shardid = :new_shard_id;
SELECT master_create_empty_shard('lineitem_range') AS new_shard_id
\gset
UPDATE pg_dist_shard SET shardminvalue = 8997, shardmaxvalue = 14947
WHERE shardid = :new_shard_id;
\set lineitem_1_data_file :abs_srcdir '/data/lineitem.1.data'
\set lineitem_2_data_file :abs_srcdir '/data/lineitem.2.data'
COPY lineitem_range FROM :'lineitem_1_data_file' with delimiter '|';
COPY lineitem_range FROM :'lineitem_2_data_file' with delimiter '|';
-- Run aggregate(distinct) on partition column for range partitioned table
SELECT count(distinct l_orderkey) FROM lineitem_range;
count
---------------------------------------------------------------------
2985
(1 row)
SELECT avg(distinct l_orderkey) FROM lineitem_range;
avg
---------------------------------------------------------------------
7463.9474036850921273
(1 row)
-- Run count(distinct) on join between a range partitioned table and a single
-- sharded table. For this test, we also change a config setting to ensure that
-- we don't repartition any of the tables during the query.
SELECT p_partkey, count(distinct l_orderkey) FROM lineitem_range, part
WHERE l_partkey = p_partkey
GROUP BY p_partkey
ORDER BY p_partkey LIMIT 10;
p_partkey | count
---------------------------------------------------------------------
18 | 1
79 | 1
91 | 1
149 | 2
175 | 1
179 | 1
182 | 1
195 | 1
204 | 1
222 | 1
(10 rows)
-- Check that we support more complex expressions.
SELECT count(distinct (l_orderkey)) FROM lineitem_range;
count
---------------------------------------------------------------------
2985
(1 row)
SELECT count(distinct (l_orderkey + 1)) FROM lineitem_range;
count
---------------------------------------------------------------------
2985
(1 row)
SELECT count(distinct (l_orderkey % 5)) FROM lineitem_range;
count
---------------------------------------------------------------------
5
(1 row)
-- count(distinct) on non-partition column is allowed
SELECT count(distinct l_partkey) FROM lineitem_range;
count
---------------------------------------------------------------------
11661
(1 row)
SELECT count(distinct (l_partkey + 1)) FROM lineitem_range;
count
---------------------------------------------------------------------
11661
(1 row)
SELECT count(distinct (l_partkey % 5)) FROM lineitem_range;
count
---------------------------------------------------------------------
5
(1 row)
-- Now test append partitioned tables. First run count(distinct) on a single
-- sharded table.
SELECT count(distinct p_mfgr) FROM part;
count
---------------------------------------------------------------------
5
(1 row)
SELECT p_mfgr, count(distinct p_partkey) FROM part GROUP BY p_mfgr ORDER BY p_mfgr;
p_mfgr | count
---------------------------------------------------------------------
Manufacturer#1 | 193
Manufacturer#2 | 190
Manufacturer#3 | 228
Manufacturer#4 | 204
Manufacturer#5 | 185
(5 rows)
-- We support count(distinct) queries on append partitioned tables
-- both on partition column, and non-partition column.
SELECT count(distinct o_orderkey), count(distinct o_custkey) FROM orders;
count | count
---------------------------------------------------------------------
2985 | 923
(1 row)
-- Hash partitioned tables:
CREATE TABLE lineitem_hash (
l_orderkey bigint not null,
l_partkey integer not null,
l_suppkey integer not null,
l_linenumber integer not null,
l_quantity decimal(15, 2) not null,
l_extendedprice decimal(15, 2) not null,
l_discount decimal(15, 2) not null,
l_tax decimal(15, 2) not null,
l_returnflag char(1) not null,
l_linestatus char(1) not null,
l_shipdate date not null,
l_commitdate date not null,
l_receiptdate date not null,
l_shipinstruct char(25) not null,
l_shipmode char(10) not null,
l_comment varchar(44) not null );
SET citus.shard_replication_factor TO 1;
SELECT create_distributed_table('lineitem_hash', 'l_orderkey', 'hash');
create_distributed_table
---------------------------------------------------------------------
(1 row)
COPY lineitem_hash FROM :'lineitem_1_data_file' with delimiter '|';
COPY lineitem_hash FROM :'lineitem_2_data_file' with delimiter '|';
-- aggregate(distinct) on partition column is allowed
SELECT count(distinct l_orderkey) FROM lineitem_hash;
count
---------------------------------------------------------------------
2985
(1 row)
SELECT avg(distinct l_orderkey) FROM lineitem_hash;
avg
---------------------------------------------------------------------
7463.9474036850921273
(1 row)
-- Check that we support more complex expressions.
SELECT count(distinct (l_orderkey)) FROM lineitem_hash;
count
---------------------------------------------------------------------
2985
(1 row)
SELECT count(distinct (l_orderkey + 1)) FROM lineitem_hash;
count
---------------------------------------------------------------------
2985
(1 row)
SELECT count(distinct (l_orderkey % 5)) FROM lineitem_hash;
count
---------------------------------------------------------------------
5
(1 row)
-- count(distinct) on non-partition column is allowed
SELECT count(distinct l_partkey) FROM lineitem_hash;
count
---------------------------------------------------------------------
11661
(1 row)
SELECT count(distinct (l_partkey + 1)) FROM lineitem_hash;
count
---------------------------------------------------------------------
11661
(1 row)
SELECT count(distinct (l_partkey % 5)) FROM lineitem_hash;
count
---------------------------------------------------------------------
5
(1 row)
-- agg(distinct) is allowed if we group by partition column
SELECT l_orderkey, count(distinct l_partkey) INTO hash_results FROM lineitem_hash GROUP BY l_orderkey;
SELECT l_orderkey, count(distinct l_partkey) INTO range_results FROM lineitem_range GROUP BY l_orderkey;
-- they should return the same results
SELECT * FROM hash_results h, range_results r WHERE h.l_orderkey = r.l_orderkey AND h.count != r.count;
l_orderkey | count | l_orderkey | count
---------------------------------------------------------------------
(0 rows)
-- count(distinct) is allowed if we group by non-partition column
SELECT l_partkey, count(distinct l_orderkey) INTO hash_results_np FROM lineitem_hash GROUP BY l_partkey;
SELECT l_partkey, count(distinct l_orderkey) INTO range_results_np FROM lineitem_range GROUP BY l_partkey;
-- they should return the same results
SELECT * FROM hash_results_np h, range_results_np r WHERE h.l_partkey = r.l_partkey AND h.count != r.count;
l_partkey | count | l_partkey | count
---------------------------------------------------------------------
(0 rows)
-- other agg(distinct) are not allowed on non-partition columns even they are grouped
-- on non-partition columns
SELECT SUM(distinct l_partkey) FROM lineitem_hash;
ERROR: cannot compute aggregate (distinct)
DETAIL: table partitioning is unsuitable for aggregate (distinct)
SELECT l_shipmode, sum(distinct l_partkey) FROM lineitem_hash GROUP BY l_shipmode;
ERROR: cannot compute aggregate (distinct)
DETAIL: table partitioning is unsuitable for aggregate (distinct)
DROP TABLE lineitem_hash;