Merge pull request #2018 from citusdata/distinct_without_groupby_column

Add distinct plan after aggregation plan on master planner
2018-02-26 15:51:19 +03:00 · 2018-02-26 15:51:19 +03:00 · ee67ce892f
parent 059644c1ab 78e6d990a2
commit ee67ce892f
3 changed files with 1094 additions and 13 deletions
--- a/src/backend/distributed/planner/multi_master_planner.c
+++ b/src/backend/distributed/planner/multi_master_planner.c
@ -37,6 +37,8 @@ static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTarget
 static Agg * BuildAggregatePlan(Query *masterQuery, Plan *subPlan);
 static bool HasDistinctAggregate(Query *masterQuery);
 static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
+static List * PrepareTargetListForNextPlan(List *targetList);
+static bool IsGroupBySubsetOfDistinct(Query *masterQuery);


 /*
@ -397,28 +399,29 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
 	bool distinctClausesHashable = true;
 	List *distinctClauseList = masterQuery->distinctClause;
 	List *targetList = copyObject(masterQuery->targetList);
-	List *columnList = pull_var_clause_default((Node *) targetList);
-	ListCell *columnCell = NULL;
 	bool hasDistinctAggregate = false;

-	if (IsA(subPlan, Agg))
+	/*
+	 * We don't need to add distinct plan if all of the columns used in group by
+	 * clause also used in distinct clause, since group by clause guarantees the
+	 * uniqueness of the target list for every row.
+	 */
+	if (IsGroupBySubsetOfDistinct(masterQuery))
 	{
 		return subPlan;
 	}

+	/*
+	 * We need to adjust varno to OUTER_VAR, since planner expects that for upper
+	 * level plans above the sequential scan. We also need to convert aggregations
+	 * (if exists) to regular Vars since the aggregation would be applied by the
+	 * previous aggregation plan and we don't want them to be applied again.
+	 */
+	targetList = PrepareTargetListForNextPlan(targetList);
+
 	Assert(masterQuery->distinctClause);
 	Assert(!masterQuery->hasDistinctOn);

-	/*
-	 * For upper level plans above the sequential scan, the planner expects the
-	 * table id (varno) to be set to OUTER_VAR.
-	 */
-	foreach(columnCell, columnList)
-	{
-		Var *column = (Var *) lfirst(columnCell);
-		column->varno = OUTER_VAR;
-	}
-
 	/*
 	 * Create group by plan with HashAggregate if all distinct
 	 * members are hashable, and not containing distinct aggregate.
@ -451,3 +454,84 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)

 	return distinctPlan;
 }
+
+
+/*
+ * PrepareTargetListForNextPlan handles both regular columns to have right varno
+ * and convert aggregates to regular Vars in the target list.
+ */
+static List *
+PrepareTargetListForNextPlan(List *targetList)
+{
+	List *newtargetList = NIL;
+	ListCell *targetEntryCell = NULL;
+
+	foreach(targetEntryCell, targetList)
+	{
+		TargetEntry *targetEntry = lfirst(targetEntryCell);
+		TargetEntry *newTargetEntry = NULL;
+		Var *newVar = NULL;
+
+		Assert(IsA(targetEntry, TargetEntry));
+
+		/*
+		 * For upper level plans above the sequential scan, the planner expects the
+		 * table id (varno) to be set to OUTER_VAR.
+		 */
+		newVar = makeVarFromTargetEntry(OUTER_VAR, targetEntry);
+		newTargetEntry = flatCopyTargetEntry(targetEntry);
+		newTargetEntry->expr = (Expr *) newVar;
+		newtargetList = lappend(newtargetList, newTargetEntry);
+	}
+
+	return newtargetList;
+}
+
+
+/*
+ * IsGroupBySubsetOfDistinct checks whether each clause in group clauses also
+ * exists in the distinct clauses. Note that, empty group clause is not a subset
+ * of distinct clause.
+ */
+static bool
+IsGroupBySubsetOfDistinct(Query *masterQuery)
+{
+	List *distinctClauses = masterQuery->distinctClause;
+	List *groupClauses = masterQuery->groupClause;
+	ListCell *distinctCell = NULL;
+	ListCell *groupCell = NULL;
+
+	/* There must be a group clause */
+	if (list_length(groupClauses) == 0)
+	{
+		return false;
+	}
+
+	foreach(groupCell, groupClauses)
+	{
+		SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupCell);
+		bool isFound = false;
+
+		foreach(distinctCell, distinctClauses)
+		{
+			SortGroupClause *distinctClause = (SortGroupClause *) lfirst(distinctCell);
+
+			if (groupClause->tleSortGroupRef == distinctClause->tleSortGroupRef)
+			{
+				isFound = true;
+				break;
+			}
+		}
+
+		/*
+		 * If we can't find any member of group clause in the distinct clause,
+		 * that means group clause is not a subset of distinct clause.
+		 */
+		if (!isFound)
+		{
+			return false;
+		}
+	}
+
+	return true;
+}
--- a/src/test/regress/expected/multi_select_distinct.out
+++ b/src/test/regress/expected/multi_select_distinct.out
@ -245,6 +245,739 @@ EXPLAIN (COSTS FALSE)
                                       Filter: (l_orderkey < 200)
 (17 rows)

+SET enable_hashagg TO on;
+-- distinct on aggregate of group by columns, we try to check whether we handle
+-- queries which does not have any group by column in distinct columns properly.
+SELECT DISTINCT count(*)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1;
+ count 
+-------
+     1
+     2
+     3
+     4
+(4 rows)
+
+	
+-- explain the query to see actual plan. We expect to see Aggregate node having 
+-- group by key on count(*) column, since columns in the Group By doesn't guarantee
+-- the uniqueness of the result.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1;
+                                          QUERY PLAN                                          
+----------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
+   ->  HashAggregate
+         Group Key: COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
+         ->  HashAggregate
+               Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
+               ->  Custom Scan (Citus Real-Time)
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Node: host=localhost port=57637 dbname=regression
+                           ->  HashAggregate
+                                 Group Key: l_suppkey, l_linenumber
+                                 ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(14 rows)
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
+-- instead of aggregate plan node to handle distinct.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1;
+                                                                                                               QUERY PLAN                                                                                                                
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
+   ->  Unique
+         ->  Sort
+               Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
+               ->  GroupAggregate
+                     Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
+                     ->  Sort
+                           Sort Key: remote_scan.worker_column_2, remote_scan.worker_column_3
+                           ->  Custom Scan (Citus Real-Time)
+                                 Task Count: 4
+                                 Tasks Shown: One of 4
+                                 ->  Task
+                                       Node: host=localhost port=57637 dbname=regression
+                                       ->  HashAggregate
+                                             Group Key: l_suppkey, l_linenumber
+                                             ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(17 rows)
+
+SET enable_hashagg TO on;
+-- Now we have only part of group clause columns in distinct, yet it is still not
+-- enough to use Group By columns to guarantee uniqueness of result list.
+SELECT DISTINCT l_suppkey, count(*)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+ l_suppkey | count 
+-----------+-------
+         1 |     1
+         2 |     1
+         3 |     1
+         4 |     1
+         5 |     1
+         7 |     1
+        10 |     1
+        12 |     1
+        13 |     1
+        14 |     1
+(10 rows)
+
+	
+-- explain the query to see actual plan. Similar to the explain of the query above.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+                                                     QUERY PLAN                                                     
+--------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.l_suppkey
+         ->  HashAggregate
+               Group Key: remote_scan.l_suppkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
+               ->  HashAggregate
+                     Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  Limit
+                                       ->  Sort
+                                             Sort Key: l_suppkey, l_linenumber
+                                             ->  HashAggregate
+                                                   Group Key: l_suppkey, l_linenumber
+                                                   ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(18 rows)
+
+-- check the plan if the hash aggreate is disabled. Similar to the explain of 
+-- the query above.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+                                                                                 QUERY PLAN                                                                                 
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.l_suppkey
+         ->  Unique
+               ->  Sort
+                     Sort Key: remote_scan.l_suppkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
+                     ->  GroupAggregate
+                           Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
+                           ->  Sort
+                                 Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_3
+                                 ->  Custom Scan (Citus Real-Time)
+                                       Task Count: 4
+                                       Tasks Shown: One of 4
+                                       ->  Task
+                                             Node: host=localhost port=57637 dbname=regression
+                                             ->  Limit
+                                                   ->  Sort
+                                                         Sort Key: l_suppkey, l_linenumber
+                                                         ->  HashAggregate
+                                                               Group Key: l_suppkey, l_linenumber
+                                                               ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(21 rows)
+
+SET enable_hashagg TO on;
+-- Similar to the above query, not with count but avg. Only difference with the
+-- above query is that, we create run two aggregate functions in workers.
+SELECT DISTINCT l_suppkey, avg(l_partkey)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1,2
+	LIMIT 10;
+ l_suppkey |          avg           
+-----------+------------------------
+         1 |    190000.000000000000
+         2 |    172450.000000000000
+         3 |    112469.000000000000
+         3 |    134976.000000000000
+         4 |    112470.000000000000
+         4 |    142461.000000000000
+         5 |    182450.000000000000
+         7 |    137493.000000000000
+        10 |    150009.000000000000
+        12 | 17510.0000000000000000
+(10 rows)
+
+	
+-- explain the query to see actual plan. Similar to the explain of the query above.
+-- Only aggregate functions will be changed.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1,2
+		LIMIT 10;
+                                                      QUERY PLAN                                                       
+-----------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
+         ->  HashAggregate
+               Group Key: remote_scan.l_suppkey, (pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))
+               ->  HashAggregate
+                     Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  HashAggregate
+                                       Group Key: l_suppkey, l_linenumber
+                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(15 rows)
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1,2
+		LIMIT 10;
+ERROR:  bogus varattno for OUTER_VAR var: 4
+SET enable_hashagg TO on;
+-- Similar to the above query but with distinct on
+SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY l_suppkey,1
+	LIMIT 10;
+          avg           
+------------------------
+    190000.000000000000
+    172450.000000000000
+    112469.000000000000
+    112470.000000000000
+    182450.000000000000
+    137493.000000000000
+    150009.000000000000
+ 17510.0000000000000000
+     87504.000000000000
+     77506.000000000000
+(10 rows)
+
+	
+-- explain the query to see actual plan. We expect to see sort+unique to handle
+-- distinct on.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY l_suppkey,1
+		LIMIT 10;
+                                                                                          QUERY PLAN                                                                                          
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Unique
+         ->  Sort
+               Sort Key: remote_scan.worker_column_4, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.worker_column_3))
+               ->  HashAggregate
+                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  HashAggregate
+                                       Group Key: l_suppkey, l_linenumber
+                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(14 rows)
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to 
+-- handle distinct on.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY l_suppkey,1
+		LIMIT 10;
+                                                                                          QUERY PLAN                                                                                          
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Unique
+         ->  Sort
+               Sort Key: remote_scan.worker_column_4, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.worker_column_3))
+               ->  GroupAggregate
+                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
+                     ->  Sort
+                           Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
+                           ->  Custom Scan (Citus Real-Time)
+                                 Task Count: 4
+                                 Tasks Shown: One of 4
+                                 ->  Task
+                                       Node: host=localhost port=57637 dbname=regression
+                                       ->  HashAggregate
+                                             Group Key: l_suppkey, l_linenumber
+                                             ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(16 rows)
+
+SET enable_hashagg TO on;
+-- distinct with expression and aggregation
+SELECT DISTINCT avg(ceil(l_partkey / 2))
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+ avg 
+-----
+   9
+  39
+  74
+  87
+  89
+  91
+  97
+ 102
+ 111
+ 122
+(10 rows)
+
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT avg(ceil(l_partkey / 2))
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+                                               QUERY PLAN                                                
+---------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
+         ->  HashAggregate
+               Group Key: (sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)
+               ->  HashAggregate
+                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  HashAggregate
+                                       Group Key: l_suppkey, l_linenumber
+                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(15 rows)
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT avg(ceil(l_partkey / 2))
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;	
+ERROR:  bogus varattno for OUTER_VAR var: 4
+		
+SET enable_hashagg TO on;		
+-- expression among aggregations.
+SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+	FROM lineitem_hash_part 
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+ dis 
+-----
+   2
+   3
+   4
+   5
+   6
+   8
+  11
+  13
+  14
+  15
+(10 rows)
+
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+                                                                QUERY PLAN                                                                 
+-------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
+         ->  HashAggregate
+               Group Key: ((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))
+               ->  HashAggregate
+                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  HashAggregate
+                                       Group Key: l_suppkey, l_linenumber
+                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(15 rows)
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;	
+ERROR:  bogus varattno for OUTER_VAR var: 4
+		
+SET enable_hashagg TO on;		
+		
+-- distinct on all columns, note Group By columns guarantees uniqueness of the
+-- result list.		
+SELECT DISTINCT * 
+	FROM lineitem_hash_part 
+	GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+	ORDER BY 1,2
+	LIMIT 10;
+ l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate |      l_shipinstruct       | l_shipmode |                 l_comment                  
+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+---------------------------+------------+--------------------------------------------
+          1 |      2132 |      4633 |            4 |      28.00 |        28955.64 |       0.09 |  0.06 | N            | O            | 04-21-1996 | 03-30-1996   | 05-16-1996    | NONE                      | AIR        | lites. fluffily even de
+          1 |     15635 |       638 |            6 |      32.00 |        49620.16 |       0.07 |  0.02 | N            | O            | 01-30-1996 | 02-07-1996   | 02-03-1996    | DELIVER IN PERSON         | MAIL       | arefully slyly ex
+          1 |     24027 |      1534 |            5 |      24.00 |        22824.48 |       0.10 |  0.04 | N            | O            | 03-30-1996 | 03-14-1996   | 04-01-1996    | NONE                      | FOB        |  pending foxes. slyly re
+          1 |     63700 |      3701 |            3 |       8.00 |        13309.60 |       0.10 |  0.02 | N            | O            | 01-29-1996 | 03-05-1996   | 01-31-1996    | TAKE BACK RETURN          | REG AIR    | riously. regular, express dep
+          1 |     67310 |      7311 |            2 |      36.00 |        45983.16 |       0.09 |  0.06 | N            | O            | 04-12-1996 | 02-28-1996   | 04-20-1996    | TAKE BACK RETURN          | MAIL       | ly final         dependencies: slyly bold 
+          1 |    155190 |      7706 |            1 |      17.00 |        21168.23 |       0.04 |  0.02 | N            | O            | 03-13-1996 | 02-12-1996   | 03-22-1996    | DELIVER IN PERSON         | TRUCK      | egular courts above the
+          2 |    106170 |      1191 |            1 |      38.00 |        44694.46 |       0.00 |  0.05 | N            | O            | 01-28-1997 | 01-14-1997   | 02-02-1997    | TAKE BACK RETURN          | RAIL       | ven requests. deposits breach a
+          3 |      4297 |      1798 |            1 |      45.00 |        54058.05 |       0.06 |  0.00 | R            | F            | 02-02-1994 | 01-04-1994   | 02-23-1994    | NONE                      | AIR        | ongside of the furiously brave acco
+          3 |     19036 |      6540 |            2 |      49.00 |        46796.47 |       0.10 |  0.00 | R            | F            | 11-09-1993 | 12-20-1993   | 11-24-1993    | TAKE BACK RETURN          | RAIL       |  unusual accounts. eve
+          3 |     29380 |      1883 |            4 |       2.00 |         2618.76 |       0.01 |  0.06 | A            | F            | 12-04-1993 | 01-07-1994   | 01-01-1994    | NONE                      | TRUCK      | y. fluffily pending d
+(10 rows)
+
+	
+-- explain the query to see actual plan. We expect to see only one aggregation
+-- node since group by columns guarantees the uniqueness.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT * 
+		FROM lineitem_hash_part 
+		GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+		ORDER BY 1,2
+		LIMIT 10;
+                                                                                                                                                                                                             QUERY PLAN                                                                                                                                                                                                              
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
+         ->  HashAggregate
+               Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
+               ->  Custom Scan (Citus Real-Time)
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Node: host=localhost port=57637 dbname=regression
+                           ->  Limit
+                                 ->  Unique
+                                       ->  Group
+                                             Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
+                                             ->  Sort
+                                                   Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
+                                                   ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(17 rows)
+
+-- check the plan if the hash aggreate is disabled. We expect to see only one 
+-- aggregation node since group by columns guarantees the uniqueness.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT * 
+		FROM lineitem_hash_part 
+		GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+		ORDER BY 1,2
+		LIMIT 10;	
+                                                                                                                                                                                                                QUERY PLAN                                                                                                                                                                                                                
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
+         ->  GroupAggregate
+               Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
+               ->  Sort
+                     Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  Limit
+                                       ->  Unique
+                                             ->  Group
+                                                   Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
+                                                   ->  Sort
+                                                         Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
+                                                         ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(19 rows)
+
+		
+SET enable_hashagg TO on;
+-- distinct on count distinct
+SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+	FROM lineitem_hash_part
+	GROUP BY l_orderkey
+	ORDER BY 1,2;
+ count | count 
+-------+-------
+     1 |     1
+     2 |     1
+     2 |     2
+     3 |     1
+     3 |     2
+     3 |     3
+     4 |     1
+     4 |     2
+     4 |     3
+     4 |     4
+     5 |     2
+     5 |     3
+     5 |     4
+     5 |     5
+     6 |     2
+     6 |     3
+     6 |     4
+     6 |     5
+     6 |     6
+     7 |     2
+     7 |     3
+     7 |     4
+     7 |     5
+     7 |     6
+     7 |     7
+(25 rows)
+
+	
+-- explain the query to see actual plan. We expect to see aggregation plan for
+-- the outer distinct.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+		FROM lineitem_hash_part
+		GROUP BY l_orderkey
+		ORDER BY 1,2;
+                                                                         QUERY PLAN                                                                          
+-------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))
+   ->  HashAggregate
+         Group Key: COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)
+         ->  HashAggregate
+               Group Key: remote_scan.worker_column_3
+               ->  Custom Scan (Citus Real-Time)
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Node: host=localhost port=57637 dbname=regression
+                           ->  GroupAggregate
+                                 Group Key: l_orderkey
+                                 ->  Sort
+                                       Sort Key: l_orderkey
+                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(16 rows)
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort + unique 
+-- plans for the outer distinct.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+		FROM lineitem_hash_part
+		GROUP BY l_orderkey
+		ORDER BY 1,2;
+                                                                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                                                               
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
+   ->  Unique
+         ->  Sort
+               Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)
+               ->  GroupAggregate
+                     Group Key: remote_scan.worker_column_3
+                     ->  Sort
+                           Sort Key: remote_scan.worker_column_3
+                           ->  Custom Scan (Citus Real-Time)
+                                 Task Count: 4
+                                 Tasks Shown: One of 4
+                                 ->  Task
+                                       Node: host=localhost port=57637 dbname=regression
+                                       ->  GroupAggregate
+                                             Group Key: l_orderkey
+                                             ->  Sort
+                                                   Sort Key: l_orderkey
+                                                   ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(19 rows)
+
+		
+SET enable_hashagg TO on;
+-- distinct on aggregation with filter and expression
+SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+	FROM lineitem_hash_part 
+	GROUP BY l_suppkey
+	ORDER BY 1;
+ count 
+-------
+     0
+     1
+     2
+     3
+     4
+(5 rows)
+
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey
+		ORDER BY 1;
+                                                      QUERY PLAN                                                       
+-----------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
+   ->  HashAggregate
+         Group Key: ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision)
+         ->  HashAggregate
+               Group Key: remote_scan.worker_column_2
+               ->  Custom Scan (Citus Real-Time)
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Node: host=localhost port=57637 dbname=regression
+                           ->  HashAggregate
+                                 Group Key: l_suppkey
+                                 ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(14 rows)
+
+-- check the plan if the hash aggreate is disabled
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey
+		ORDER BY 1;
+                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision)
+   ->  Unique
+         ->  Sort
+               Sort Key: ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision)
+               ->  GroupAggregate
+                     Group Key: remote_scan.worker_column_2
+                     ->  Sort
+                           Sort Key: remote_scan.worker_column_2
+                           ->  Custom Scan (Citus Real-Time)
+                                 Task Count: 4
+                                 Tasks Shown: One of 4
+                                 ->  Task
+                                       Node: host=localhost port=57637 dbname=regression
+                                       ->  HashAggregate
+                                             Group Key: l_suppkey
+                                             ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(17 rows)
+
+		
+SET enable_hashagg TO on;
+	
+-- explain the query to see actual plan with array_agg aggregation. Note that, 
+-- worker query created for this query is not correct. It will be fixed soon.  
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
+		FROM lineitem_hash_part 
+		GROUP BY l_orderkey
+		ORDER BY 2
+		LIMIT 15;
+                                                       QUERY PLAN                                                        
+-------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: (array_length(array_cat_agg(remote_scan.array_length), 1))
+         ->  HashAggregate
+               Group Key: array_length(array_cat_agg(remote_scan.array_length), 1), array_cat_agg(remote_scan.array_agg)
+               ->  HashAggregate
+                     Group Key: remote_scan.worker_column_3
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  Limit
+                                       ->  Sort
+                                             Sort Key: (array_agg(l_linenumber))
+                                             ->  GroupAggregate
+                                                   Group Key: l_orderkey
+                                                   ->  Sort
+                                                         Sort Key: l_orderkey
+                                                         ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(20 rows)
+
+-- check the plan if the hash aggreate is disabled. Note that, 
+-- worker query created for this query is not correct. It will be fixed soon. 
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
+		FROM lineitem_hash_part 
+		GROUP BY l_orderkey
+		ORDER BY 2
+		LIMIT 15;
+                                                                                    QUERY PLAN                                                                                    
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: array_length(array_cat_agg((array_length(array_cat_agg((array_length(array_cat_agg((array_length(array_cat_agg(remote_scan.array_length), 1))), 1))), 1))), 1)
+         ->  Unique
+               ->  Sort
+                     Sort Key: array_length(array_cat_agg((array_length(array_cat_agg(remote_scan.array_length), 1))), 1), array_cat_agg((array_cat_agg(remote_scan.array_agg)))
+                     ->  GroupAggregate
+                           Group Key: remote_scan.worker_column_3
+                           ->  Sort
+                                 Sort Key: remote_scan.worker_column_3
+                                 ->  Custom Scan (Citus Real-Time)
+                                       Task Count: 4
+                                       Tasks Shown: One of 4
+                                       ->  Task
+                                             Node: host=localhost port=57637 dbname=regression
+                                             ->  Limit
+                                                   ->  Sort
+                                                         Sort Key: (array_agg(l_linenumber))
+                                                         ->  GroupAggregate
+                                                               Group Key: l_orderkey
+                                                               ->  Sort
+                                                                     Sort Key: l_orderkey
+                                                                     ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(23 rows)
+
+		
 SET enable_hashagg TO on;
 -- distinct on non-partition column with aggregate
 -- this is the same as non-distinct version due to group by
--- a/src/test/regress/sql/multi_select_distinct.sql
+++ b/src/test/regress/sql/multi_select_distinct.sql
@ -78,6 +78,270 @@ EXPLAIN (COSTS FALSE)

 SET enable_hashagg TO on;

+-- distinct on aggregate of group by columns, we try to check whether we handle
+-- queries which does not have any group by column in distinct columns properly.
+SELECT DISTINCT count(*)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1;
+	
+-- explain the query to see actual plan. We expect to see Aggregate node having 
+-- group by key on count(*) column, since columns in the Group By doesn't guarantee
+-- the uniqueness of the result.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1;
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
+-- instead of aggregate plan node to handle distinct.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1;
+
+SET enable_hashagg TO on;
+
+-- Now we have only part of group clause columns in distinct, yet it is still not
+-- enough to use Group By columns to guarantee uniqueness of result list.
+SELECT DISTINCT l_suppkey, count(*)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+	
+-- explain the query to see actual plan. Similar to the explain of the query above.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. Similar to the explain of 
+-- the query above.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+
+SET enable_hashagg TO on;
+
+-- Similar to the above query, not with count but avg. Only difference with the
+-- above query is that, we create run two aggregate functions in workers.
+SELECT DISTINCT l_suppkey, avg(l_partkey)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1,2
+	LIMIT 10;
+	
+-- explain the query to see actual plan. Similar to the explain of the query above.
+-- Only aggregate functions will be changed.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1,2
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1,2
+		LIMIT 10;
+
+SET enable_hashagg TO on;
+
+-- Similar to the above query but with distinct on
+SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY l_suppkey,1
+	LIMIT 10;
+	
+-- explain the query to see actual plan. We expect to see sort+unique to handle
+-- distinct on.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY l_suppkey,1
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to 
+-- handle distinct on.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY l_suppkey,1
+		LIMIT 10;
+
+SET enable_hashagg TO on;
+
+-- distinct with expression and aggregation
+SELECT DISTINCT avg(ceil(l_partkey / 2))
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT avg(ceil(l_partkey / 2))
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT avg(ceil(l_partkey / 2))
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;	
+		
+SET enable_hashagg TO on;		
+
+-- expression among aggregations.
+SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+	FROM lineitem_hash_part 
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;	
+		
+SET enable_hashagg TO on;		
+		
+-- distinct on all columns, note Group By columns guarantees uniqueness of the
+-- result list.		
+SELECT DISTINCT * 
+	FROM lineitem_hash_part 
+	GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+	ORDER BY 1,2
+	LIMIT 10;
+	
+-- explain the query to see actual plan. We expect to see only one aggregation
+-- node since group by columns guarantees the uniqueness.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT * 
+		FROM lineitem_hash_part 
+		GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+		ORDER BY 1,2
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. We expect to see only one 
+-- aggregation node since group by columns guarantees the uniqueness.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT * 
+		FROM lineitem_hash_part 
+		GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+		ORDER BY 1,2
+		LIMIT 10;	
+		
+SET enable_hashagg TO on;
+
+-- distinct on count distinct
+SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+	FROM lineitem_hash_part
+	GROUP BY l_orderkey
+	ORDER BY 1,2;
+	
+-- explain the query to see actual plan. We expect to see aggregation plan for
+-- the outer distinct.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+		FROM lineitem_hash_part
+		GROUP BY l_orderkey
+		ORDER BY 1,2;
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort + unique 
+-- plans for the outer distinct.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+		FROM lineitem_hash_part
+		GROUP BY l_orderkey
+		ORDER BY 1,2;
+		
+SET enable_hashagg TO on;
+
+-- distinct on aggregation with filter and expression
+SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+	FROM lineitem_hash_part 
+	GROUP BY l_suppkey
+	ORDER BY 1;
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey
+		ORDER BY 1;
+
+-- check the plan if the hash aggreate is disabled
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey
+		ORDER BY 1;
+		
+SET enable_hashagg TO on;
+	
+-- explain the query to see actual plan with array_agg aggregation. Note that, 
+-- worker query created for this query is not correct. It will be fixed soon.  
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
+		FROM lineitem_hash_part 
+		GROUP BY l_orderkey
+		ORDER BY 2
+		LIMIT 15;
+
+-- check the plan if the hash aggreate is disabled. Note that, 
+-- worker query created for this query is not correct. It will be fixed soon. 
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
+		FROM lineitem_hash_part 
+		GROUP BY l_orderkey
+		ORDER BY 2
+		LIMIT 15;
+		
+SET enable_hashagg TO on;
+
 -- distinct on non-partition column with aggregate
 -- this is the same as non-distinct version due to group by
 SELECT DISTINCT l_partkey, count(*)