--
-- MULTI_SELECT_DISTINCT
--
-- Tests select distinct, and select distinct on features.
--
ANALYZE lineitem_hash_part;
-- function calls are supported
SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;
 l_orderkey | now
---------------------------------------------------------------------
(0 rows)

SELECT DISTINCT l_orderkey, avg(l_linenumber)
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING avg(l_linenumber) = (select avg(distinct l_linenumber))
LIMIT 10;
ERROR:  Subqueries in HAVING cannot refer to outer query
SELECT DISTINCT l_orderkey
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING (select avg(distinct l_linenumber) = l_orderkey)
LIMIT 10;
ERROR:  Subqueries in HAVING cannot refer to outer query
SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3;
 l_partkey | ?column?
---------------------------------------------------------------------
    199973 |        1
    199946 |        1
    199943 |        1
(3 rows)

-- const expressions are supported
SELECT DISTINCT l_orderkey, 1+1 FROM lineitem_hash_part ORDER BY 1 LIMIT 5;
 l_orderkey | ?column?
---------------------------------------------------------------------
          1 |        2
          2 |        2
          3 |        2
          4 |        2
          5 |        2
(5 rows)

-- non const expressions are also supported
SELECT DISTINCT l_orderkey, l_partkey + 1 FROM lineitem_hash_part ORDER BY 1, 2 LIMIT 5;
 l_orderkey | ?column?
---------------------------------------------------------------------
          1 |     2133
          1 |    15636
          1 |    24028
          1 |    63701
          1 |    67311
(5 rows)

-- column expressions are supported
SELECT DISTINCT l_orderkey, l_shipinstruct || l_shipmode FROM lineitem_hash_part ORDER BY 2 , 1 LIMIT 5;
 l_orderkey |    ?column?
---------------------------------------------------------------------
         32 | COLLECT CODAIR
         39 | COLLECT CODAIR
         66 | COLLECT CODAIR
         70 | COLLECT CODAIR
         98 | COLLECT CODAIR
(5 rows)

-- function calls with const input are supported
SELECT DISTINCT l_orderkey, strpos('AIR', 'A') FROM lineitem_hash_part ORDER BY 1,2 LIMIT 5;
 l_orderkey | strpos
---------------------------------------------------------------------
          1 |      1
          2 |      1
          3 |      1
          4 |      1
          5 |      1
(5 rows)

-- function calls with non-const input are supported
SELECT DISTINCT l_orderkey, strpos(l_shipmode, 'I')
	FROM lineitem_hash_part
	WHERE strpos(l_shipmode, 'I') > 1
	ORDER BY 2, 1
	LIMIT 5;
 l_orderkey | strpos
---------------------------------------------------------------------
          1 |      2
          3 |      2
          5 |      2
         32 |      2
         33 |      2
(5 rows)

-- row types are supported
SELECT DISTINCT (l_orderkey, l_partkey) AS pair FROM lineitem_hash_part ORDER BY 1 LIMIT 5;
   pair
---------------------------------------------------------------------
 (1,2132)
 (1,15635)
 (1,24027)
 (1,63700)
 (1,67310)
(5 rows)

-- distinct on partition column
-- verify counts match with respect to count(distinct)
CREATE TEMP TABLE temp_orderkeys AS SELECT DISTINCT l_orderkey FROM lineitem_hash_part;
SELECT COUNT(*) FROM temp_orderkeys;
 count
---------------------------------------------------------------------
  2985
(1 row)

SELECT COUNT(DISTINCT l_orderkey) FROM lineitem_hash_part;
 count
---------------------------------------------------------------------
  2985
(1 row)

SELECT DISTINCT l_orderkey FROM lineitem_hash_part WHERE l_orderkey < 500 and l_partkey < 5000 order by 1;
 l_orderkey
---------------------------------------------------------------------
          1
          3
         32
         35
         39
         65
        129
        130
        134
        164
        194
        228
        261
        290
        320
        321
        354
        418
(18 rows)

-- distinct on non-partition column
SELECT DISTINCT l_partkey FROM lineitem_hash_part WHERE l_orderkey > 5 and l_orderkey < 20 order by 1;
 l_partkey
---------------------------------------------------------------------
     79251
     94780
    139636
    145243
    151894
    157238
    163073
    182052
(8 rows)

SELECT DISTINCT l_shipmode FROM lineitem_hash_part ORDER BY 1 DESC;
 l_shipmode
---------------------------------------------------------------------
 TRUCK
 SHIP
 REG AIR
 RAIL
 MAIL
 FOB
 AIR
(7 rows)

-- distinct with multiple columns
SELECT DISTINCT l_orderkey, o_orderdate
	FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
	WHERE l_orderkey < 10
	ORDER BY l_orderkey;
 l_orderkey | o_orderdate
---------------------------------------------------------------------
          1 | 01-02-1996
          2 | 12-01-1996
          3 | 10-14-1993
          4 | 10-11-1995
          5 | 07-30-1994
          6 | 02-21-1992
          7 | 01-10-1996
(7 rows)

-- distinct on partition column with aggregate
-- this is the same as the one without distinct due to group by
SELECT DISTINCT l_orderkey, count(*)
	FROM lineitem_hash_part
	WHERE l_orderkey < 200
	GROUP BY 1
	HAVING count(*) > 5
	ORDER BY 2 DESC, 1;
 l_orderkey | count
---------------------------------------------------------------------
          7 |     7
         68 |     7
        129 |     7
        164 |     7
        194 |     7
          1 |     6
          3 |     6
         32 |     6
         35 |     6
         39 |     6
         67 |     6
         69 |     6
         70 |     6
         71 |     6
        134 |     6
        135 |     6
        163 |     6
        192 |     6
        197 |     6
(19 rows)

-- explain the query to see actual plan
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT l_orderkey, count(*)
		FROM lineitem_hash_part
		WHERE l_orderkey < 200
		GROUP BY 1
		HAVING count(*) > 5
		ORDER BY 2 DESC, 1;
                                       QUERY PLAN
---------------------------------------------------------------------
 Sort
   Sort Key: remote_scan.count DESC, remote_scan.l_orderkey
   ->  HashAggregate
         Group Key: remote_scan.count, remote_scan.l_orderkey
         ->  Custom Scan (Citus Adaptive)
               Task Count: 4
               Tasks Shown: One of 4
               ->  Task
                     Node: host=localhost port=xxxxx dbname=regression
                     ->  HashAggregate
                           Group Key: l_orderkey
                           Filter: (count(*) > 5)
                           ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
                                 Filter: (l_orderkey < 200)
(14 rows)

-- check the plan if the hash aggreate is disabled
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT l_orderkey, count(*)
		FROM lineitem_hash_part
		WHERE l_orderkey < 200
		GROUP BY 1
		HAVING count(*) > 5
		ORDER BY 2 DESC, 1;
                                       QUERY PLAN
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: remote_scan.count DESC, remote_scan.l_orderkey
         ->  Custom Scan (Citus Adaptive)
               Task Count: 4
               Tasks Shown: One of 4
               ->  Task
                     Node: host=localhost port=xxxxx dbname=regression
                     ->  HashAggregate
                           Group Key: l_orderkey
                           Filter: (count(*) > 5)
                           ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
                                 Filter: (l_orderkey < 200)
(13 rows)

SET enable_hashagg TO on;
-- distinct on aggregate of group by columns, we try to check whether we handle
-- queries which does not have any group by column in distinct columns properly.
SELECT DISTINCT count(*)
	FROM lineitem_hash_part
	GROUP BY l_suppkey, l_linenumber
	ORDER BY 1;
 count
---------------------------------------------------------------------
     1
     2
     3
     4
(4 rows)

-- explain the query to see actual plan. We expect to see Aggregate node having
-- group by key on count(*) column, since columns in the Group By doesn't guarantee
-- the uniqueness of the result.
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT count(*)
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY 1;
                                          QUERY PLAN
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
         ->  HashAggregate
               Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  HashAggregate
                                 Group Key: l_suppkey, l_linenumber
                                 ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(13 rows)

-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
-- instead of aggregate plan node to handle distinct.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT count(*)
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY 1;
                                             QUERY PLAN
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
         ->  GroupAggregate
               Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
               ->  Sort
                     Sort Key: remote_scan.worker_column_2, remote_scan.worker_column_3
                     ->  Custom Scan (Citus Adaptive)
                           Task Count: 4
                           Tasks Shown: One of 4
                           ->  Task
                                 Node: host=localhost port=xxxxx dbname=regression
                                 ->  HashAggregate
                                       Group Key: l_suppkey, l_linenumber
                                       ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(15 rows)

SET enable_hashagg TO on;
-- Now we have only part of group clause columns in distinct, yet it is still not
-- enough to use Group By columns to guarantee uniqueness of result list.
SELECT DISTINCT l_suppkey, count(*)
	FROM lineitem_hash_part
	GROUP BY l_suppkey, l_linenumber
	ORDER BY 1
	LIMIT 10;
 l_suppkey | count
---------------------------------------------------------------------
         1 |     1
         2 |     1
         3 |     1
         4 |     1
         5 |     1
         7 |     1
        10 |     1
        12 |     1
        13 |     1
        14 |     1
(10 rows)

-- explain the query to see actual plan. Similar to the explain of the query above.
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT l_suppkey, count(*)
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY 1
		LIMIT 10;
                                                     QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
               ->  HashAggregate
                     Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
                     ->  Custom Scan (Citus Adaptive)
                           Task Count: 4
                           Tasks Shown: One of 4
                           ->  Task
                                 Node: host=localhost port=xxxxx dbname=regression
                                 ->  HashAggregate
                                       Group Key: l_suppkey, l_linenumber
                                       ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)

-- check the plan if the hash aggreate is disabled. Similar to the explain of
-- the query above.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT l_suppkey, count(*)
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY 1
		LIMIT 10;
                                                     QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
               ->  GroupAggregate
                     Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
                     ->  Sort
                           Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_3
                           ->  Custom Scan (Citus Adaptive)
                                 Task Count: 4
                                 Tasks Shown: One of 4
                                 ->  Task
                                       Node: host=localhost port=xxxxx dbname=regression
                                       ->  HashAggregate
                                             Group Key: l_suppkey, l_linenumber
                                             ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)

SET enable_hashagg TO on;
-- Similar to the above query, not with count but avg. Only difference with the
-- above query is that, we create run two aggregate functions in workers.
SELECT DISTINCT l_suppkey, avg(l_partkey)
	FROM lineitem_hash_part
	GROUP BY l_suppkey, l_linenumber
	ORDER BY 1,2
	LIMIT 10;
 l_suppkey |          avg
---------------------------------------------------------------------
         1 |    190000.000000000000
         2 |    172450.000000000000
         3 |    112469.000000000000
         3 |    134976.000000000000
         4 |    112470.000000000000
         4 |    142461.000000000000
         5 |    182450.000000000000
         7 |    137493.000000000000
        10 |    150009.000000000000
        12 | 17510.0000000000000000
(10 rows)

-- explain the query to see actual plan. Similar to the explain of the query above.
-- Only aggregate functions will be changed.
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT l_suppkey, avg(l_partkey)
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY 1,2
		LIMIT 10;
                                                       QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
               ->  HashAggregate
                     Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
                     ->  Custom Scan (Citus Adaptive)
                           Task Count: 4
                           Tasks Shown: One of 4
                           ->  Task
                                 Node: host=localhost port=xxxxx dbname=regression
                                 ->  HashAggregate
                                       Group Key: l_suppkey, l_linenumber
                                       ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)

-- check the plan if the hash aggreate is disabled. This explain errors out due
-- to a bug right now, expectation must be corrected after fixing it.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT l_suppkey, avg(l_partkey)
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY 1,2
		LIMIT 10;
                                                       QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
               ->  GroupAggregate
                     Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
                     ->  Sort
                           Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_4
                           ->  Custom Scan (Citus Adaptive)
                                 Task Count: 4
                                 Tasks Shown: One of 4
                                 ->  Task
                                       Node: host=localhost port=xxxxx dbname=regression
                                       ->  HashAggregate
                                             Group Key: l_suppkey, l_linenumber
                                             ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)

SET enable_hashagg TO on;
-- Similar to the above query but with distinct on
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
	FROM lineitem_hash_part
	GROUP BY l_suppkey, l_linenumber
	ORDER BY l_suppkey,1
	LIMIT 10;
          avg
---------------------------------------------------------------------
    190000.000000000000
    172450.000000000000
    112469.000000000000
    112470.000000000000
    182450.000000000000
    137493.000000000000
    150009.000000000000
 17510.0000000000000000
     87504.000000000000
     77506.000000000000
(10 rows)

-- explain the query to see actual plan. We expect to see sort+unique to handle
-- distinct on.
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY l_suppkey,1
		LIMIT 10;
                                                          QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
               ->  HashAggregate
                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
                     ->  Custom Scan (Citus Adaptive)
                           Task Count: 4
                           Tasks Shown: One of 4
                           ->  Task
                                 Node: host=localhost port=xxxxx dbname=regression
                                 ->  HashAggregate
                                       Group Key: l_suppkey, l_linenumber
                                       ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)

-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to
-- handle distinct on.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY l_suppkey,1
		LIMIT 10;
                                                          QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
               ->  GroupAggregate
                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
                     ->  Sort
                           Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
                           ->  Custom Scan (Citus Adaptive)
                                 Task Count: 4
                                 Tasks Shown: One of 4
                                 ->  Task
                                       Node: host=localhost port=xxxxx dbname=regression
                                       ->  HashAggregate
                                             Group Key: l_suppkey, l_linenumber
                                             ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)

SET enable_hashagg TO on;
-- distinct with expression and aggregation
SELECT DISTINCT avg(ceil(l_partkey / 2))
	FROM lineitem_hash_part
	GROUP BY l_suppkey, l_linenumber
	ORDER BY 1
	LIMIT 10;
 avg
---------------------------------------------------------------------
   9
  39
  74
  87
  89
  91
  97
 102
 111
 122
(10 rows)

-- explain the query to see actual plan
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT avg(ceil(l_partkey / 2))
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY 1
		LIMIT 10;
                                                QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
               ->  HashAggregate
                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
                     ->  Custom Scan (Citus Adaptive)
                           Task Count: 4
                           Tasks Shown: One of 4
                           ->  Task
                                 Node: host=localhost port=xxxxx dbname=regression
                                 ->  HashAggregate
                                       Group Key: l_suppkey, l_linenumber
                                       ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)

-- check the plan if the hash aggreate is disabled. This explain errors out due
-- to a bug right now, expectation must be corrected after fixing it.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT avg(ceil(l_partkey / 2))
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY 1
		LIMIT 10;
                                                QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
               ->  GroupAggregate
                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
                     ->  Sort
                           Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
                           ->  Custom Scan (Citus Adaptive)
                                 Task Count: 4
                                 Tasks Shown: One of 4
                                 ->  Task
                                       Node: host=localhost port=xxxxx dbname=regression
                                       ->  HashAggregate
                                             Group Key: l_suppkey, l_linenumber
                                             ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)

SET enable_hashagg TO on;
-- expression among aggregations.
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
	FROM lineitem_hash_part
	GROUP BY l_suppkey, l_linenumber
	ORDER BY 1
	LIMIT 10;
 dis
---------------------------------------------------------------------
   2
   3
   4
   5
   6
   8
  11
  13
  14
  15
(10 rows)

-- explain the query to see actual plan
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY 1
		LIMIT 10;
                                                                 QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
               ->  HashAggregate
                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
                     ->  Custom Scan (Citus Adaptive)
                           Task Count: 4
                           Tasks Shown: One of 4
                           ->  Task
                                 Node: host=localhost port=xxxxx dbname=regression
                                 ->  HashAggregate
                                       Group Key: l_suppkey, l_linenumber
                                       ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)

-- check the plan if the hash aggreate is disabled. This explain errors out due
-- to a bug right now, expectation must be corrected after fixing it.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
		FROM lineitem_hash_part
		GROUP BY l_suppkey, l_linenumber
		ORDER BY 1
		LIMIT 10;
                                                                 QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
               ->  GroupAggregate
                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
                     ->  Sort
                           Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
                           ->  Custom Scan (Citus Adaptive)
                                 Task Count: 4
                                 Tasks Shown: One of 4
                                 ->  Task
                                       Node: host=localhost port=xxxxx dbname=regression
                                       ->  HashAggregate
                                             Group Key: l_suppkey, l_linenumber
                                             ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)

SET enable_hashagg TO on;
-- distinct on all columns, note Group By columns guarantees uniqueness of the
-- result list.
SELECT DISTINCT *
	FROM lineitem_hash_part
	GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
	ORDER BY 1,2
	LIMIT 10;
 l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate |      l_shipinstruct       | l_shipmode |                 l_comment
---------------------------------------------------------------------
          1 |      2132 |      4633 |            4 |      28.00 |        28955.64 |       0.09 |  0.06 | N            | O            | 04-21-1996 | 03-30-1996   | 05-16-1996    | NONE                      | AIR        | lites. fluffily even de
          1 |     15635 |       638 |            6 |      32.00 |        49620.16 |       0.07 |  0.02 | N            | O            | 01-30-1996 | 02-07-1996   | 02-03-1996    | DELIVER IN PERSON         | MAIL       | arefully slyly ex
          1 |     24027 |      1534 |            5 |      24.00 |        22824.48 |       0.10 |  0.04 | N            | O            | 03-30-1996 | 03-14-1996   | 04-01-1996    | NONE                      | FOB        |  pending foxes. slyly re
          1 |     63700 |      3701 |            3 |       8.00 |        13309.60 |       0.10 |  0.02 | N            | O            | 01-29-1996 | 03-05-1996   | 01-31-1996    | TAKE BACK RETURN          | REG AIR    | riously. regular, express dep
          1 |     67310 |      7311 |            2 |      36.00 |        45983.16 |       0.09 |  0.06 | N            | O            | 04-12-1996 | 02-28-1996   | 04-20-1996    | TAKE BACK RETURN          | MAIL       | ly final         dependencies: slyly bold
          1 |    155190 |      7706 |            1 |      17.00 |        21168.23 |       0.04 |  0.02 | N            | O            | 03-13-1996 | 02-12-1996   | 03-22-1996    | DELIVER IN PERSON         | TRUCK      | egular courts above the
          2 |    106170 |      1191 |            1 |      38.00 |        44694.46 |       0.00 |  0.05 | N            | O            | 01-28-1997 | 01-14-1997   | 02-02-1997    | TAKE BACK RETURN          | RAIL       | ven requests. deposits breach a
          3 |      4297 |      1798 |            1 |      45.00 |        54058.05 |       0.06 |  0.00 | R            | F            | 02-02-1994 | 01-04-1994   | 02-23-1994    | NONE                      | AIR        | ongside of the furiously brave acco
          3 |     19036 |      6540 |            2 |      49.00 |        46796.47 |       0.10 |  0.00 | R            | F            | 11-09-1993 | 12-20-1993   | 11-24-1993    | TAKE BACK RETURN          | RAIL       |  unusual accounts. eve
          3 |     29380 |      1883 |            4 |       2.00 |         2618.76 |       0.01 |  0.06 | A            | F            | 12-04-1993 | 01-07-1994   | 01-01-1994    | NONE                      | TRUCK      | y. fluffily pending d
(10 rows)

-- explain the query to see actual plan. We expect to see only one aggregation
-- node since group by columns guarantees the uniqueness.
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT *
		FROM lineitem_hash_part
		GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
		ORDER BY 1,2
		LIMIT 10;
                                                                                                                                                                                                             QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Sort
         Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
         ->  HashAggregate
               Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  Limit
                                 ->  Unique
                                       ->  Group
                                             Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
                                             ->  Sort
                                                   Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
                                                   ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(17 rows)

-- check the plan if the hash aggreate is disabled. We expect to see only one
-- aggregation node since group by columns guarantees the uniqueness.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT *
		FROM lineitem_hash_part
		GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
		ORDER BY 1,2
		LIMIT 10;
                                                                                                                                                                                                             QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  Limit
                                 ->  Unique
                                       ->  Group
                                             Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
                                             ->  Sort
                                                   Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
                                                   ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)

SET enable_hashagg TO on;
-- distinct on count distinct
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
	FROM lineitem_hash_part
	GROUP BY l_orderkey
	ORDER BY 1,2;
 count | count
---------------------------------------------------------------------
     1 |     1
     2 |     1
     2 |     2
     3 |     1
     3 |     2
     3 |     3
     4 |     1
     4 |     2
     4 |     3
     4 |     4
     5 |     2
     5 |     3
     5 |     4
     5 |     5
     6 |     2
     6 |     3
     6 |     4
     6 |     5
     6 |     6
     7 |     2
     7 |     3
     7 |     4
     7 |     5
     7 |     6
     7 |     7
(25 rows)

-- explain the query to see actual plan. We expect to see aggregation plan for
-- the outer distinct.
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
		FROM lineitem_hash_part
		GROUP BY l_orderkey
		ORDER BY 1,2;
                                          QUERY PLAN
---------------------------------------------------------------------
 Sort
   Sort Key: remote_scan.count, remote_scan.count_1
   ->  HashAggregate
         Group Key: remote_scan.count, remote_scan.count_1
         ->  Custom Scan (Citus Adaptive)
               Task Count: 4
               Tasks Shown: One of 4
               ->  Task
                     Node: host=localhost port=xxxxx dbname=regression
                     ->  GroupAggregate
                           Group Key: l_orderkey
                           ->  Sort
                                 Sort Key: l_orderkey
                                 ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)

-- check the plan if the hash aggreate is disabled. We expect to see sort + unique
-- plans for the outer distinct.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
		FROM lineitem_hash_part
		GROUP BY l_orderkey
		ORDER BY 1,2;
                                          QUERY PLAN
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: remote_scan.count, remote_scan.count_1
         ->  Custom Scan (Citus Adaptive)
               Task Count: 4
               Tasks Shown: One of 4
               ->  Task
                     Node: host=localhost port=xxxxx dbname=regression
                     ->  GroupAggregate
                           Group Key: l_orderkey
                           ->  Sort
                                 Sort Key: l_orderkey
                                 ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(13 rows)

SET enable_hashagg TO on;
-- distinct on aggregation with filter and expression
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
	FROM lineitem_hash_part
	GROUP BY l_suppkey
	ORDER BY 1;
 count
---------------------------------------------------------------------
     0
     1
     2
     3
     4
(5 rows)

-- explain the query to see actual plan
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
		FROM lineitem_hash_part
		GROUP BY l_suppkey
		ORDER BY 1;
                                                       QUERY PLAN
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
         ->  HashAggregate
               Group Key: remote_scan.worker_column_2
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  HashAggregate
                                 Group Key: l_suppkey
                                 ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(13 rows)

-- check the plan if the hash aggreate is disabled
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
		FROM lineitem_hash_part
		GROUP BY l_suppkey
		ORDER BY 1;
                                                       QUERY PLAN
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
         ->  GroupAggregate
               Group Key: remote_scan.worker_column_2
               ->  Sort
                     Sort Key: remote_scan.worker_column_2
                     ->  Custom Scan (Citus Adaptive)
                           Task Count: 4
                           Tasks Shown: One of 4
                           ->  Task
                                 Node: host=localhost port=xxxxx dbname=regression
                                 ->  HashAggregate
                                       Group Key: l_suppkey
                                       ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(15 rows)

SET enable_hashagg TO on;
-- explain the query to see actual plan with array_agg aggregation.
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
		FROM lineitem_hash_part
		GROUP BY l_orderkey
		ORDER BY 2
		LIMIT 15;
                                          QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Sort
         Sort Key: remote_scan.array_length
         ->  HashAggregate
               Group Key: remote_scan.array_length, remote_scan.array_agg
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  HashAggregate
                                 Group Key: l_orderkey
                                 ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(13 rows)

-- check the plan if the hash aggreate is disabled.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
		FROM lineitem_hash_part
		GROUP BY l_orderkey
		ORDER BY 2
		LIMIT 15;
                                          QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.array_length, remote_scan.array_agg
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  HashAggregate
                                 Group Key: l_orderkey
                                 ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(12 rows)

SET enable_hashagg TO on;
-- distinct on non-partition column with aggregate
-- this is the same as non-distinct version due to group by
SELECT DISTINCT l_partkey, count(*)
	FROM lineitem_hash_part
	GROUP BY 1
	HAVING count(*) > 2
	ORDER BY 1;
 l_partkey | count
---------------------------------------------------------------------
      1051 |     3
      1927 |     3
      6983 |     3
     15283 |     3
     87761 |     3
    136884 |     3
    149926 |     3
    160895 |     3
    177771 |     3
    188804 |     3
    199146 |     3
(11 rows)

-- explain the query to see actual plan
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT l_partkey, count(*)
		FROM lineitem_hash_part
		GROUP BY 1
		HAVING count(*) > 2
		ORDER BY 1;
                                                  QUERY PLAN
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: remote_scan.l_partkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
         ->  HashAggregate
               Group Key: remote_scan.l_partkey
               Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 2)
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  HashAggregate
                                 Group Key: l_partkey
                                 ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)

-- distinct on non-partition column and avg
SELECT DISTINCT l_partkey, avg(l_linenumber)
	FROM lineitem_hash_part
	WHERE l_partkey < 500
	GROUP BY 1
	HAVING avg(l_linenumber) > 2
	ORDER BY 1;
 l_partkey |        avg
---------------------------------------------------------------------
        18 | 7.0000000000000000
        79 | 6.0000000000000000
       149 | 4.5000000000000000
       175 | 5.0000000000000000
       179 | 6.0000000000000000
       182 | 3.0000000000000000
       222 | 4.0000000000000000
       278 | 3.0000000000000000
       299 | 7.0000000000000000
       308 | 7.0000000000000000
       309 | 5.0000000000000000
       321 | 3.0000000000000000
       337 | 6.0000000000000000
       364 | 3.0000000000000000
       403 | 4.0000000000000000
(15 rows)

-- distinct on multiple non-partition columns
SELECT DISTINCT l_partkey, l_suppkey
	FROM lineitem_hash_part
	WHERE l_shipmode = 'AIR' AND l_orderkey < 100
	ORDER BY 1, 2;
 l_partkey | l_suppkey
---------------------------------------------------------------------
      2132 |      4633
      4297 |      1798
     37531 |        35
     44161 |      6666
     44706 |      4707
     67831 |      5350
     85811 |      8320
     94368 |      6878
    108338 |       849
    108570 |      8571
    137267 |      4807
    137469 |      9983
    173489 |      3490
    196156 |      1195
    197921 |       441
(15 rows)

EXPLAIN (COSTS FALSE)
	SELECT DISTINCT l_partkey, l_suppkey
		FROM lineitem_hash_part
		WHERE l_shipmode = 'AIR' AND l_orderkey < 100
		ORDER BY 1, 2;
                                             QUERY PLAN
---------------------------------------------------------------------
 Sort
   Sort Key: remote_scan.l_partkey, remote_scan.l_suppkey
   ->  HashAggregate
         Group Key: remote_scan.l_partkey, remote_scan.l_suppkey
         ->  Custom Scan (Citus Adaptive)
               Task Count: 4
               Tasks Shown: One of 4
               ->  Task
                     Node: host=localhost port=xxxxx dbname=regression
                     ->  Unique
                           ->  Sort
                                 Sort Key: l_partkey, l_suppkey
                                 ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
                                       Filter: ((l_orderkey < 100) AND (l_shipmode = 'AIR'::bpchar))
(14 rows)

-- distinct on partition column
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey
	FROM lineitem_hash_part
	WHERE l_orderkey < 35
	ORDER BY 1, 2, 3;
 l_orderkey | l_partkey | l_suppkey
---------------------------------------------------------------------
          1 |      2132 |      4633
          2 |    106170 |      1191
          3 |      4297 |      1798
          4 |     88035 |      5560
          5 |     37531 |        35
          6 |    139636 |      2150
          7 |     79251 |      1759
         32 |      2743 |      7744
         33 |     33918 |      3919
         34 |     88362 |       871
(10 rows)

EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey
		FROM lineitem_hash_part
		WHERE l_orderkey < 35
		ORDER BY 1, 2, 3;
                                          QUERY PLAN
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey
         ->  Custom Scan (Citus Adaptive)
               Task Count: 4
               Tasks Shown: One of 4
               ->  Task
                     Node: host=localhost port=xxxxx dbname=regression
                     ->  Unique
                           ->  Sort
                                 Sort Key: l_orderkey, l_partkey, l_suppkey
                                 ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
                                       Filter: (l_orderkey < 35)
(13 rows)

-- distinct on non-partition column
-- note order by is required here
-- otherwise query results will be different since
-- distinct on clause is on non-partition column
SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey
	FROM lineitem_hash_part
	ORDER BY 1,2
	LIMIT 20;
 l_partkey | l_orderkey
---------------------------------------------------------------------
        18 |      12005
        79 |       5121
        91 |       2883
       149 |        807
       175 |       4102
       179 |       2117
       182 |        548
       195 |       2528
       204 |      10048
       222 |       9413
       245 |       9446
       278 |       1287
       299 |       1122
       308 |      11137
       309 |       2374
       318 |        321
       321 |       5984
       337 |      10403
       350 |      13698
       358 |       4323
(20 rows)

EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey
		FROM lineitem_hash_part
		ORDER BY 1,2
		LIMIT 20;
                                                QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  Limit
                                 ->  Unique
                                       ->  Sort
                                             Sort Key: l_partkey, l_orderkey
                                             ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)

-- distinct on with joins
-- each customer's first order key
SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
	FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
	WHERE o_custkey < 15
	ORDER BY 1,2;
 o_custkey | l_orderkey
---------------------------------------------------------------------
         1 |       9154
         2 |      10563
         4 |        320
         5 |      11682
         7 |      10402
         8 |        102
        10 |       1602
        11 |      12800
        13 |        994
        14 |      11011
(10 rows)

SELECT coordinator_plan($Q$
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
		FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
		WHERE o_custkey < 15
		ORDER BY 1,2;
$Q$);
                        coordinator_plan
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey
         ->  Custom Scan (Citus Adaptive)
               Task Count: 4
(5 rows)

-- explain without order by
-- notice master plan has order by on distinct on column
SELECT coordinator_plan($Q$
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
		FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
		WHERE o_custkey < 15;
$Q$);
             coordinator_plan
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: remote_scan.o_custkey
         ->  Custom Scan (Citus Adaptive)
               Task Count: 4
(5 rows)

-- each customer's each order's first l_partkey
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
	FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
	WHERE o_custkey < 20
	ORDER BY 1,2,3;
 o_custkey | l_orderkey | l_linenumber | l_partkey
---------------------------------------------------------------------
         1 |       9154 |            1 |     86513
         1 |      14656 |            1 |     59539
         2 |      10563 |            1 |    147459
         4 |        320 |            1 |      4415
         4 |        739 |            1 |     84489
         4 |      10688 |            1 |     45037
         4 |      10788 |            1 |     50814
         4 |      13728 |            1 |     86216
         5 |      11682 |            1 |     31634
         5 |      11746 |            1 |    180724
         5 |      14308 |            1 |    157430
         7 |      10402 |            1 |     53661
         7 |      13031 |            1 |    112161
         7 |      14145 |            1 |    138729
         7 |      14404 |            1 |    143034
         8 |        102 |            1 |     88914
         8 |        164 |            1 |     91309
         8 |      13601 |            1 |     40504
        10 |       1602 |            1 |    182806
        10 |       9862 |            1 |     86241
        10 |      11431 |            1 |     62112
        10 |      13124 |            1 |     29414
        11 |      12800 |            1 |    152806
        13 |        994 |            1 |     64486
        13 |       1603 |            1 |     38191
        13 |       4704 |            1 |     77934
        13 |       9927 |            1 |       875
        14 |      11011 |            1 |    172485
        17 |        896 |            1 |     38675
        17 |       5507 |            1 |      9600
        19 |        353 |            1 |    119305
        19 |       1504 |            1 |     81389
        19 |       1669 |            1 |     78373
        19 |       5893 |            1 |    133707
        19 |       9954 |            1 |     92138
        19 |      14885 |            1 |     36154
(36 rows)

-- explain without order by
SELECT coordinator_plan($Q$
EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
		FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
		WHERE o_custkey < 20;
$Q$);
                        coordinator_plan
---------------------------------------------------------------------
 Unique
   ->  Sort
         Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey
         ->  Custom Scan (Citus Adaptive)
               Task Count: 4
(5 rows)

-- each customer's each order's last l_partkey
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
	FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
	WHERE o_custkey < 15
	ORDER BY 1,2,3 DESC;
 o_custkey | l_orderkey | l_linenumber | l_partkey
---------------------------------------------------------------------
         1 |       9154 |            7 |    173448
         1 |      14656 |            1 |     59539
         2 |      10563 |            4 |    110741
         4 |        320 |            2 |    192158
         4 |        739 |            5 |    187523
         4 |      10688 |            2 |    132574
         4 |      10788 |            4 |    196473
         4 |      13728 |            3 |     12450
         5 |      11682 |            3 |    177152
         5 |      11746 |            7 |    193807
         5 |      14308 |            3 |    140916
         7 |      10402 |            2 |     64514
         7 |      13031 |            6 |      7761
         7 |      14145 |            6 |    130723
         7 |      14404 |            7 |     35349
         8 |        102 |            4 |     61158
         8 |        164 |            7 |      3037
         8 |      13601 |            5 |     12470
        10 |       1602 |            1 |    182806
        10 |       9862 |            5 |    135675
        10 |      11431 |            7 |      8563
        10 |      13124 |            3 |     67055
        11 |      12800 |            5 |    179110
        13 |        994 |            4 |    130471
        13 |       1603 |            2 |     65209
        13 |       4704 |            3 |     63081
        13 |       9927 |            6 |    119356
        14 |      11011 |            7 |     95939
(28 rows)

-- subqueries
SELECT DISTINCT l_orderkey, l_partkey
	FROM (
		SELECT l_orderkey, l_partkey
		FROM lineitem_hash_part
		) q
	ORDER BY 1,2
	LIMIT 10;
 l_orderkey | l_partkey
---------------------------------------------------------------------
          1 |      2132
          1 |     15635
          1 |     24027
          1 |     63700
          1 |     67310
          1 |    155190
          2 |    106170
          3 |      4297
          3 |     19036
          3 |     29380
(10 rows)

EXPLAIN (COSTS FALSE)
	SELECT DISTINCT l_orderkey, l_partkey
		FROM (
			SELECT l_orderkey, l_partkey
			FROM lineitem_hash_part
			) q
		ORDER BY 1,2
		LIMIT 10;
                                                QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Sort
         Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
         ->  HashAggregate
               Group Key: remote_scan.l_orderkey, remote_scan.l_partkey
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  Limit
                                 ->  Sort
                                       Sort Key: l_orderkey, l_partkey
                                       ->  HashAggregate
                                             Group Key: l_orderkey, l_partkey
                                             ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)

SELECT DISTINCT l_orderkey, cnt
	FROM (
		SELECT l_orderkey, count(*) as cnt
		FROM lineitem_hash_part
		GROUP BY 1
		) q
	ORDER BY 1,2
	LIMIT 10;
 l_orderkey | cnt
---------------------------------------------------------------------
          1 |   6
          2 |   1
          3 |   6
          4 |   1
          5 |   3
          6 |   1
          7 |   7
         32 |   6
         33 |   4
         34 |   3
(10 rows)

EXPLAIN (COSTS FALSE)
	SELECT DISTINCT l_orderkey, cnt
		FROM (
			SELECT l_orderkey, count(*) as cnt
			FROM lineitem_hash_part
			GROUP BY 1
			) q
		ORDER BY 1,2
		LIMIT 10;
                                                   QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Sort
         Sort Key: remote_scan.l_orderkey, remote_scan.cnt
         ->  HashAggregate
               Group Key: remote_scan.l_orderkey, remote_scan.cnt
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  Limit
                                 ->  Sort
                                       Sort Key: lineitem_hash_part.l_orderkey, (count(*))
                                       ->  HashAggregate
                                             Group Key: lineitem_hash_part.l_orderkey, count(*)
                                             ->  HashAggregate
                                                   Group Key: lineitem_hash_part.l_orderkey
                                                   ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(18 rows)

-- distinct on partition column
-- random() is added to inner query to prevent flattening
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey
	FROM (
		SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
		FROM lineitem_hash_part
		) q
	WHERE r > 1
	ORDER BY 1,2
	LIMIT 10;
 l_orderkey | l_partkey
---------------------------------------------------------------------
          1 |      2132
          2 |    106170
          3 |      4297
          4 |     88035
          5 |     37531
          6 |    139636
          7 |     79251
         32 |      2743
         33 |     33918
         34 |     88362
(10 rows)

EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey
		FROM (
			SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
			FROM lineitem_hash_part
			) q
		WHERE r > 1
		ORDER BY 1,2
		LIMIT 10;
                                                   QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  Limit
                                 ->  Unique
                                       ->  Sort
                                             Sort Key: q.l_orderkey, q.l_partkey
                                             ->  Subquery Scan on q
                                                   Filter: (q.r > 1)
                                                   ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)

-- distinct on non-partition column
SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey
	FROM (
		SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
		FROM lineitem_hash_part
		) q
	WHERE r > 1
	ORDER BY 2,1
	LIMIT 10;
 l_orderkey | l_partkey
---------------------------------------------------------------------
      12005 |        18
       5121 |        79
       2883 |        91
        807 |       149
       4102 |       175
       2117 |       179
        548 |       182
       2528 |       195
      10048 |       204
       9413 |       222
(10 rows)

EXPLAIN (COSTS FALSE)
	SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey
		FROM (
			SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
			FROM lineitem_hash_part
			) q
		WHERE r > 1
		ORDER BY 2,1
		LIMIT 10;
                                                   QUERY PLAN
---------------------------------------------------------------------
 Limit
   ->  Unique
         ->  Sort
               Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey
               ->  Custom Scan (Citus Adaptive)
                     Task Count: 4
                     Tasks Shown: One of 4
                     ->  Task
                           Node: host=localhost port=xxxxx dbname=regression
                           ->  Limit
                                 ->  Unique
                                       ->  Sort
                                             Sort Key: q.l_partkey, q.l_orderkey
                                             ->  Subquery Scan on q
                                                   Filter: (q.r > 1)
                                                   ->  Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)