citus/src/test/regress/input/multi_complex_count_distinc...

477 lines
11 KiB
Plaintext

--
-- COMPLEX_COUNT_DISTINCT
--
SET citus.next_shard_id TO 240000;
CREATE TABLE lineitem_hash (
l_orderkey bigint not null,
l_partkey integer not null,
l_suppkey integer not null,
l_linenumber integer not null,
l_quantity decimal(15, 2) not null,
l_extendedprice decimal(15, 2) not null,
l_discount decimal(15, 2) not null,
l_tax decimal(15, 2) not null,
l_returnflag char(1) not null,
l_linestatus char(1) not null,
l_shipdate date not null,
l_commitdate date not null,
l_receiptdate date not null,
l_shipinstruct char(25) not null,
l_shipmode char(10) not null,
l_comment varchar(44) not null,
PRIMARY KEY(l_orderkey, l_linenumber) );
SELECT master_create_distributed_table('lineitem_hash', 'l_orderkey', 'hash');
SELECT master_create_worker_shards('lineitem_hash', 8, 1);
\copy lineitem_hash FROM '@abs_srcdir@/data/lineitem.1.data' with delimiter '|'
\copy lineitem_hash FROM '@abs_srcdir@/data/lineitem.2.data' with delimiter '|'
ANALYZE lineitem_hash;
SET citus.task_executor_type to "task-tracker";
-- count(distinct) is supported on top level query if there
-- is a grouping on the partition key
SELECT
l_orderkey, count(DISTINCT l_partkey)
FROM lineitem_hash
GROUP BY l_orderkey
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
EXPLAIN (COSTS false, VERBOSE true)
SELECT
l_orderkey, count(DISTINCT l_partkey)
FROM lineitem_hash
GROUP BY l_orderkey
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- it is also supported if there is no grouping or grouping is on non-partition field
SELECT
count(DISTINCT l_partkey)
FROM lineitem_hash
ORDER BY 1 DESC
LIMIT 10;
EXPLAIN (COSTS false, VERBOSE true)
SELECT
count(DISTINCT l_partkey)
FROM lineitem_hash
ORDER BY 1 DESC
LIMIT 10;
SELECT
l_shipmode, count(DISTINCT l_partkey)
FROM lineitem_hash
GROUP BY l_shipmode
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
EXPLAIN (COSTS false, VERBOSE true)
SELECT
l_shipmode, count(DISTINCT l_partkey)
FROM lineitem_hash
GROUP BY l_shipmode
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- mixed mode count distinct, grouped by partition column
SELECT
l_orderkey, count(distinct l_partkey), count(distinct l_shipmode)
FROM lineitem_hash
GROUP BY l_orderkey
ORDER BY 3 DESC, 2 DESC, 1
LIMIT 10;
EXPLAIN (COSTS false, VERBOSE true)
SELECT
l_orderkey, count(distinct l_partkey), count(distinct l_shipmode)
FROM lineitem_hash
GROUP BY l_orderkey
ORDER BY 3 DESC, 2 DESC, 1
LIMIT 10;
-- partition/non-partition column count distinct no grouping
SELECT
count(distinct l_orderkey), count(distinct l_partkey), count(distinct l_shipmode)
FROM lineitem_hash;
EXPLAIN (COSTS false, VERBOSE true)
SELECT
count(distinct l_orderkey), count(distinct l_partkey), count(distinct l_shipmode)
FROM lineitem_hash;
-- distinct/non-distinct on partition and non-partition columns
SELECT
count(distinct l_orderkey), count(l_orderkey),
count(distinct l_partkey), count(l_partkey),
count(distinct l_shipmode), count(l_shipmode)
FROM lineitem_hash;
-- mixed mode count distinct, grouped by non-partition column
SELECT
l_shipmode, count(distinct l_partkey), count(distinct l_orderkey)
FROM lineitem_hash
GROUP BY l_shipmode
ORDER BY 1, 2 DESC, 3 DESC;
-- mixed mode count distinct, grouped by non-partition column
-- having on partition column
SELECT
l_shipmode, count(distinct l_partkey), count(distinct l_orderkey)
FROM lineitem_hash
GROUP BY l_shipmode
HAVING count(distinct l_orderkey) > 1300
ORDER BY 1, 2 DESC;
-- same but having clause is not on target list
SELECT
l_shipmode, count(distinct l_partkey)
FROM lineitem_hash
GROUP BY l_shipmode
HAVING count(distinct l_orderkey) > 1300
ORDER BY 1, 2 DESC;
-- mixed mode count distinct, grouped by non-partition column
-- having on non-partition column
SELECT
l_shipmode, count(distinct l_partkey), count(distinct l_suppkey)
FROM lineitem_hash
GROUP BY l_shipmode
HAVING count(distinct l_suppkey) > 1550
ORDER BY 1, 2 DESC;
-- same but having clause is not on target list
SELECT
l_shipmode, count(distinct l_partkey)
FROM lineitem_hash
GROUP BY l_shipmode
HAVING count(distinct l_suppkey) > 1550
ORDER BY 1, 2 DESC;
EXPLAIN (COSTS false, VERBOSE true)
SELECT
l_shipmode, count(distinct l_partkey)
FROM lineitem_hash
GROUP BY l_shipmode
HAVING count(distinct l_suppkey) > 1550
ORDER BY 1, 2 DESC;
-- count distinct is supported on single table subqueries
SELECT *
FROM (
SELECT
l_orderkey, count(DISTINCT l_partkey)
FROM lineitem_hash
GROUP BY l_orderkey) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
SELECT *
FROM (
SELECT
l_partkey, count(DISTINCT l_orderkey)
FROM lineitem_hash
GROUP BY l_partkey) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
EXPLAIN (COSTS false, VERBOSE true)
SELECT *
FROM (
SELECT
l_partkey, count(DISTINCT l_orderkey)
FROM lineitem_hash
GROUP BY l_partkey) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- count distinct with filters
SELECT
l_orderkey,
count(DISTINCT l_suppkey) FILTER (WHERE l_shipmode = 'AIR'),
count(DISTINCT l_suppkey)
FROM lineitem_hash
GROUP BY l_orderkey
ORDER BY 2 DESC, 3 DESC, 1
LIMIT 10;
EXPLAIN (COSTS false, VERBOSE true)
SELECT
l_orderkey,
count(DISTINCT l_suppkey) FILTER (WHERE l_shipmode = 'AIR'),
count(DISTINCT l_suppkey)
FROM lineitem_hash
GROUP BY l_orderkey
ORDER BY 2 DESC, 3 DESC, 1
LIMIT 10;
-- group by on non-partition column
SELECT
l_suppkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR')
FROM lineitem_hash
GROUP BY l_suppkey
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- explaining the same query fails
EXPLAIN (COSTS false, VERBOSE true)
SELECT
l_suppkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR')
FROM lineitem_hash
GROUP BY l_suppkey
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- without group by, on partition column
SELECT
count(DISTINCT l_orderkey) FILTER (WHERE l_shipmode = 'AIR')
FROM lineitem_hash;
-- without group by, on non-partition column
SELECT
count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR')
FROM lineitem_hash;
SELECT
count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR'),
count(DISTINCT l_partkey),
count(DISTINCT l_shipdate)
FROM lineitem_hash;
-- filter column already exists in target list
SELECT *
FROM (
SELECT
l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_orderkey > 100)
FROM lineitem_hash
GROUP BY l_orderkey) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- filter column does not exist in target list
SELECT *
FROM (
SELECT
l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR')
FROM lineitem_hash
GROUP BY l_orderkey) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- case expr in count distinct is supported.
-- count orders partkeys if l_shipmode is air
SELECT *
FROM (
SELECT
l_orderkey, count(DISTINCT CASE WHEN l_shipmode = 'AIR' THEN l_partkey ELSE NULL END) as count
FROM lineitem_hash
GROUP BY l_orderkey) sub
WHERE count > 0
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- text like operator is also supported
SELECT *
FROM (
SELECT
l_orderkey, count(DISTINCT CASE WHEN l_shipmode like '%A%' THEN l_partkey ELSE NULL END) as count
FROM lineitem_hash
GROUP BY l_orderkey) sub
WHERE count > 0
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- count distinct is rejected if it does not reference any columns
SELECT *
FROM (
SELECT
l_linenumber, count(DISTINCT 1)
FROM lineitem_hash
GROUP BY l_linenumber) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- count distinct is rejected if it does not reference any columns
SELECT *
FROM (
SELECT
l_linenumber, count(DISTINCT (random() * 5)::int)
FROM lineitem_hash
GROUP BY l_linenumber) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- even non-const function calls are supported within count distinct
SELECT *
FROM (
SELECT
l_orderkey, count(DISTINCT (random() * 5)::int = l_linenumber)
FROM lineitem_hash
GROUP BY l_orderkey) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 0;
-- multiple nested subquery
SELECT
total,
avg(avg_count) as total_avg_count
FROM (
SELECT
number_sum,
count(DISTINCT l_suppkey) as total,
avg(total_count) avg_count
FROM (
SELECT
l_suppkey,
sum(l_linenumber) as number_sum,
count(DISTINCT l_shipmode) as total_count
FROM
lineitem_hash
WHERE
l_partkey > 100 and
l_quantity > 2 and
l_orderkey < 10000
GROUP BY
l_suppkey) as distributed_table
WHERE
number_sum >= 10
GROUP BY
number_sum) as distributed_table_2
GROUP BY
total
ORDER BY
total_avg_count DESC;
-- multiple cases query
SELECT *
FROM (
SELECT
count(DISTINCT
CASE
WHEN l_shipmode = 'TRUCK' THEN l_partkey
WHEN l_shipmode = 'AIR' THEN l_quantity
WHEN l_shipmode = 'SHIP' THEN l_discount
ELSE l_suppkey
END) as count,
l_shipdate
FROM
lineitem_hash
GROUP BY
l_shipdate) sub
WHERE
count > 0
ORDER BY
1 DESC, 2 DESC
LIMIT 10;
-- count DISTINCT expression
SELECT *
FROM (
SELECT
l_quantity, count(DISTINCT ((l_orderkey / 1000) * 1000 )) as count
FROM
lineitem_hash
GROUP BY
l_quantity) sub
WHERE
count > 0
ORDER BY
2 DESC, 1 DESC
LIMIT 10;
-- count DISTINCT is part of an expression which inclues another aggregate
SELECT *
FROM (
SELECT
sum(((l_partkey * l_tax) / 100)) /
count(DISTINCT
CASE
WHEN l_shipmode = 'TRUCK' THEN l_partkey
ELSE l_suppkey
END) as avg,
l_shipmode
FROM
lineitem_hash
GROUP BY
l_shipmode) sub
ORDER BY
1 DESC, 2 DESC
LIMIT 10;
--- count DISTINCT CASE WHEN expression
SELECT *
FROM (
SELECT
count(DISTINCT
CASE
WHEN l_shipmode = 'TRUCK' THEN l_linenumber
WHEN l_shipmode = 'AIR' THEN l_linenumber + 10
ELSE 2
END) as avg
FROM
lineitem_hash
GROUP BY l_shipdate) sub
ORDER BY 1 DESC
LIMIT 10;
-- COUNT DISTINCT (c1, c2)
SELECT *
FROM
(SELECT
l_shipmode,
count(DISTINCT (l_shipdate, l_tax))
FROM
lineitem_hash
GROUP BY
l_shipmode) t
ORDER BY
2 DESC,1 DESC
LIMIT 10;
-- other distinct aggregate are not supported
SELECT *
FROM (
SELECT
l_linenumber, sum(DISTINCT l_partkey)
FROM lineitem_hash
GROUP BY l_linenumber) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
SELECT *
FROM (
SELECT
l_linenumber, avg(DISTINCT l_partkey)
FROM lineitem_hash
GROUP BY l_linenumber) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
-- whole row references, oid, and ctid are not supported in count distinct
-- test table does not have oid or ctid enabled, so tests for them are skipped
SELECT *
FROM (
SELECT
l_linenumber, count(DISTINCT lineitem_hash)
FROM lineitem_hash
GROUP BY l_linenumber) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
SELECT *
FROM (
SELECT
l_linenumber, count(DISTINCT lineitem_hash.*)
FROM lineitem_hash
GROUP BY l_linenumber) sub
ORDER BY 2 DESC, 1 DESC
LIMIT 10;
DROP TABLE lineitem_hash;