Fix master plan of the query with distinct, aggregate and group by clauses.

Before this PR, we were trusting on the columns of group by about guaranteeing the uniqueness of the results. However, this assumption is correct only if the columns in the group by is subset of columns in the distinct clause. It can be wrong if we have part of group by columns and some aggregation columns in the distinct clause. With this PR, we add distinct plan on top of aggregate plan when necessary.
2018-02-21 15:45:39 +03:00 · 2018-02-21 15:45:39 +03:00 · 78e6d990a2
parent 059644c1ab
commit 78e6d990a2
3 changed files with 1094 additions and 13 deletions
--- a/src/backend/distributed/planner/multi_master_planner.c
+++ b/src/backend/distributed/planner/multi_master_planner.c
@ -37,6 +37,8 @@ static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTarget
 static Agg * BuildAggregatePlan(Query *masterQuery, Plan *subPlan);
 static bool HasDistinctAggregate(Query *masterQuery);
 static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
+static List * PrepareTargetListForNextPlan(List *targetList);
+static bool IsGroupBySubsetOfDistinct(Query *masterQuery);


 /*
@ -397,28 +399,29 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
 	bool distinctClausesHashable = true;
 	List *distinctClauseList = masterQuery->distinctClause;
 	List *targetList = copyObject(masterQuery->targetList);
-	List *columnList = pull_var_clause_default((Node *) targetList);
-	ListCell *columnCell = NULL;
 	bool hasDistinctAggregate = false;

-	if (IsA(subPlan, Agg))
+	/*
+	 * We don't need to add distinct plan if all of the columns used in group by
+	 * clause also used in distinct clause, since group by clause guarantees the
+	 * uniqueness of the target list for every row.
+	 */
+	if (IsGroupBySubsetOfDistinct(masterQuery))
 	{
 		return subPlan;
 	}

+	/*
+	 * We need to adjust varno to OUTER_VAR, since planner expects that for upper
+	 * level plans above the sequential scan. We also need to convert aggregations
+	 * (if exists) to regular Vars since the aggregation would be applied by the
+	 * previous aggregation plan and we don't want them to be applied again.
+	 */
+	targetList = PrepareTargetListForNextPlan(targetList);
+
 	Assert(masterQuery->distinctClause);
 	Assert(!masterQuery->hasDistinctOn);

-	/*
-	 * For upper level plans above the sequential scan, the planner expects the
-	 * table id (varno) to be set to OUTER_VAR.
-	 */
-	foreach(columnCell, columnList)
-	{
-		Var *column = (Var *) lfirst(columnCell);
-		column->varno = OUTER_VAR;
-	}
-
 	/*
 	 * Create group by plan with HashAggregate if all distinct
 	 * members are hashable, and not containing distinct aggregate.
@ -451,3 +454,84 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)

 	return distinctPlan;
 }
+
+
+/*
+ * PrepareTargetListForNextPlan handles both regular columns to have right varno
+ * and convert aggregates to regular Vars in the target list.
+ */
+static List *
+PrepareTargetListForNextPlan(List *targetList)
+{
+	List *newtargetList = NIL;
+	ListCell *targetEntryCell = NULL;
+
+	foreach(targetEntryCell, targetList)
+	{
+		TargetEntry *targetEntry = lfirst(targetEntryCell);
+		TargetEntry *newTargetEntry = NULL;
+		Var *newVar = NULL;
+
+		Assert(IsA(targetEntry, TargetEntry));
+
+		/*
+		 * For upper level plans above the sequential scan, the planner expects the
+		 * table id (varno) to be set to OUTER_VAR.
+		 */
+		newVar = makeVarFromTargetEntry(OUTER_VAR, targetEntry);
+		newTargetEntry = flatCopyTargetEntry(targetEntry);
+		newTargetEntry->expr = (Expr *) newVar;
+		newtargetList = lappend(newtargetList, newTargetEntry);
+	}
+
+	return newtargetList;
+}
+
+
+/*
+ * IsGroupBySubsetOfDistinct checks whether each clause in group clauses also
+ * exists in the distinct clauses. Note that, empty group clause is not a subset
+ * of distinct clause.
+ */
+static bool
+IsGroupBySubsetOfDistinct(Query *masterQuery)
+{
+	List *distinctClauses = masterQuery->distinctClause;
+	List *groupClauses = masterQuery->groupClause;
+	ListCell *distinctCell = NULL;
+	ListCell *groupCell = NULL;
+
+	/* There must be a group clause */
+	if (list_length(groupClauses) == 0)
+	{
+		return false;
+	}
+
+	foreach(groupCell, groupClauses)
+	{
+		SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupCell);
+		bool isFound = false;
+
+		foreach(distinctCell, distinctClauses)
+		{
+			SortGroupClause *distinctClause = (SortGroupClause *) lfirst(distinctCell);
+
+			if (groupClause->tleSortGroupRef == distinctClause->tleSortGroupRef)
+			{
+				isFound = true;
+				break;
+			}
+		}
+
+		/*
+		 * If we can't find any member of group clause in the distinct clause,
+		 * that means group clause is not a subset of distinct clause.
+		 */
+		if (!isFound)
+		{
+			return false;
+		}
+	}
+
+	return true;
+}
--- a/src/test/regress/expected/multi_select_distinct.out
+++ b/src/test/regress/expected/multi_select_distinct.out
@ -245,6 +245,739 @@ EXPLAIN (COSTS FALSE)
                                       Filter: (l_orderkey < 200)
 (17 rows)

+SET enable_hashagg TO on;
+-- distinct on aggregate of group by columns, we try to check whether we handle
+-- queries which does not have any group by column in distinct columns properly.
+SELECT DISTINCT count(*)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1;
+ count 
+-------
+     1
+     2
+     3
+     4
+(4 rows)
+
+	
+-- explain the query to see actual plan. We expect to see Aggregate node having 
+-- group by key on count(*) column, since columns in the Group By doesn't guarantee
+-- the uniqueness of the result.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1;
+                                          QUERY PLAN                                          
+----------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
+   ->  HashAggregate
+         Group Key: COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
+         ->  HashAggregate
+               Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
+               ->  Custom Scan (Citus Real-Time)
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Node: host=localhost port=57637 dbname=regression
+                           ->  HashAggregate
+                                 Group Key: l_suppkey, l_linenumber
+                                 ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(14 rows)
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
+-- instead of aggregate plan node to handle distinct.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1;
+                                                                                                               QUERY PLAN                                                                                                                
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
+   ->  Unique
+         ->  Sort
+               Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
+               ->  GroupAggregate
+                     Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
+                     ->  Sort
+                           Sort Key: remote_scan.worker_column_2, remote_scan.worker_column_3
+                           ->  Custom Scan (Citus Real-Time)
+                                 Task Count: 4
+                                 Tasks Shown: One of 4
+                                 ->  Task
+                                       Node: host=localhost port=57637 dbname=regression
+                                       ->  HashAggregate
+                                             Group Key: l_suppkey, l_linenumber
+                                             ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(17 rows)
+
+SET enable_hashagg TO on;
+-- Now we have only part of group clause columns in distinct, yet it is still not
+-- enough to use Group By columns to guarantee uniqueness of result list.
+SELECT DISTINCT l_suppkey, count(*)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+ l_suppkey | count 
+-----------+-------
+         1 |     1
+         2 |     1
+         3 |     1
+         4 |     1
+         5 |     1
+         7 |     1
+        10 |     1
+        12 |     1
+        13 |     1
+        14 |     1
+(10 rows)
+
+	
+-- explain the query to see actual plan. Similar to the explain of the query above.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+                                                     QUERY PLAN                                                     
+--------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.l_suppkey
+         ->  HashAggregate
+               Group Key: remote_scan.l_suppkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
+               ->  HashAggregate
+                     Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  Limit
+                                       ->  Sort
+                                             Sort Key: l_suppkey, l_linenumber
+                                             ->  HashAggregate
+                                                   Group Key: l_suppkey, l_linenumber
+                                                   ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(18 rows)
+
+-- check the plan if the hash aggreate is disabled. Similar to the explain of 
+-- the query above.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+                                                                                 QUERY PLAN                                                                                 
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.l_suppkey
+         ->  Unique
+               ->  Sort
+                     Sort Key: remote_scan.l_suppkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
+                     ->  GroupAggregate
+                           Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
+                           ->  Sort
+                                 Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_3
+                                 ->  Custom Scan (Citus Real-Time)
+                                       Task Count: 4
+                                       Tasks Shown: One of 4
+                                       ->  Task
+                                             Node: host=localhost port=57637 dbname=regression
+                                             ->  Limit
+                                                   ->  Sort
+                                                         Sort Key: l_suppkey, l_linenumber
+                                                         ->  HashAggregate
+                                                               Group Key: l_suppkey, l_linenumber
+                                                               ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(21 rows)
+
+SET enable_hashagg TO on;
+-- Similar to the above query, not with count but avg. Only difference with the
+-- above query is that, we create run two aggregate functions in workers.
+SELECT DISTINCT l_suppkey, avg(l_partkey)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1,2
+	LIMIT 10;
+ l_suppkey |          avg           
+-----------+------------------------
+         1 |    190000.000000000000
+         2 |    172450.000000000000
+         3 |    112469.000000000000
+         3 |    134976.000000000000
+         4 |    112470.000000000000
+         4 |    142461.000000000000
+         5 |    182450.000000000000
+         7 |    137493.000000000000
+        10 |    150009.000000000000
+        12 | 17510.0000000000000000
+(10 rows)
+
+	
+-- explain the query to see actual plan. Similar to the explain of the query above.
+-- Only aggregate functions will be changed.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1,2
+		LIMIT 10;
+                                                      QUERY PLAN                                                       
+-----------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
+         ->  HashAggregate
+               Group Key: remote_scan.l_suppkey, (pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))
+               ->  HashAggregate
+                     Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  HashAggregate
+                                       Group Key: l_suppkey, l_linenumber
+                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(15 rows)
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1,2
+		LIMIT 10;
+ERROR:  bogus varattno for OUTER_VAR var: 4
+SET enable_hashagg TO on;
+-- Similar to the above query but with distinct on
+SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY l_suppkey,1
+	LIMIT 10;
+          avg           
+------------------------
+    190000.000000000000
+    172450.000000000000
+    112469.000000000000
+    112470.000000000000
+    182450.000000000000
+    137493.000000000000
+    150009.000000000000
+ 17510.0000000000000000
+     87504.000000000000
+     77506.000000000000
+(10 rows)
+
+	
+-- explain the query to see actual plan. We expect to see sort+unique to handle
+-- distinct on.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY l_suppkey,1
+		LIMIT 10;
+                                                                                          QUERY PLAN                                                                                          
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Unique
+         ->  Sort
+               Sort Key: remote_scan.worker_column_4, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.worker_column_3))
+               ->  HashAggregate
+                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  HashAggregate
+                                       Group Key: l_suppkey, l_linenumber
+                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(14 rows)
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to 
+-- handle distinct on.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY l_suppkey,1
+		LIMIT 10;
+                                                                                          QUERY PLAN                                                                                          
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Unique
+         ->  Sort
+               Sort Key: remote_scan.worker_column_4, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.worker_column_3))
+               ->  GroupAggregate
+                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
+                     ->  Sort
+                           Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
+                           ->  Custom Scan (Citus Real-Time)
+                                 Task Count: 4
+                                 Tasks Shown: One of 4
+                                 ->  Task
+                                       Node: host=localhost port=57637 dbname=regression
+                                       ->  HashAggregate
+                                             Group Key: l_suppkey, l_linenumber
+                                             ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(16 rows)
+
+SET enable_hashagg TO on;
+-- distinct with expression and aggregation
+SELECT DISTINCT avg(ceil(l_partkey / 2))
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+ avg 
+-----
+   9
+  39
+  74
+  87
+  89
+  91
+  97
+ 102
+ 111
+ 122
+(10 rows)
+
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT avg(ceil(l_partkey / 2))
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+                                               QUERY PLAN                                                
+---------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
+         ->  HashAggregate
+               Group Key: (sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)
+               ->  HashAggregate
+                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  HashAggregate
+                                       Group Key: l_suppkey, l_linenumber
+                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(15 rows)
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT avg(ceil(l_partkey / 2))
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;	
+ERROR:  bogus varattno for OUTER_VAR var: 4
+		
+SET enable_hashagg TO on;		
+-- expression among aggregations.
+SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+	FROM lineitem_hash_part 
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+ dis 
+-----
+   2
+   3
+   4
+   5
+   6
+   8
+  11
+  13
+  14
+  15
+(10 rows)
+
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+                                                                QUERY PLAN                                                                 
+-------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
+         ->  HashAggregate
+               Group Key: ((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))
+               ->  HashAggregate
+                     Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  HashAggregate
+                                       Group Key: l_suppkey, l_linenumber
+                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(15 rows)
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;	
+ERROR:  bogus varattno for OUTER_VAR var: 4
+		
+SET enable_hashagg TO on;		
+		
+-- distinct on all columns, note Group By columns guarantees uniqueness of the
+-- result list.		
+SELECT DISTINCT * 
+	FROM lineitem_hash_part 
+	GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+	ORDER BY 1,2
+	LIMIT 10;
+ l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate |      l_shipinstruct       | l_shipmode |                 l_comment                  
+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+---------------------------+------------+--------------------------------------------
+          1 |      2132 |      4633 |            4 |      28.00 |        28955.64 |       0.09 |  0.06 | N            | O            | 04-21-1996 | 03-30-1996   | 05-16-1996    | NONE                      | AIR        | lites. fluffily even de
+          1 |     15635 |       638 |            6 |      32.00 |        49620.16 |       0.07 |  0.02 | N            | O            | 01-30-1996 | 02-07-1996   | 02-03-1996    | DELIVER IN PERSON         | MAIL       | arefully slyly ex
+          1 |     24027 |      1534 |            5 |      24.00 |        22824.48 |       0.10 |  0.04 | N            | O            | 03-30-1996 | 03-14-1996   | 04-01-1996    | NONE                      | FOB        |  pending foxes. slyly re
+          1 |     63700 |      3701 |            3 |       8.00 |        13309.60 |       0.10 |  0.02 | N            | O            | 01-29-1996 | 03-05-1996   | 01-31-1996    | TAKE BACK RETURN          | REG AIR    | riously. regular, express dep
+          1 |     67310 |      7311 |            2 |      36.00 |        45983.16 |       0.09 |  0.06 | N            | O            | 04-12-1996 | 02-28-1996   | 04-20-1996    | TAKE BACK RETURN          | MAIL       | ly final         dependencies: slyly bold 
+          1 |    155190 |      7706 |            1 |      17.00 |        21168.23 |       0.04 |  0.02 | N            | O            | 03-13-1996 | 02-12-1996   | 03-22-1996    | DELIVER IN PERSON         | TRUCK      | egular courts above the
+          2 |    106170 |      1191 |            1 |      38.00 |        44694.46 |       0.00 |  0.05 | N            | O            | 01-28-1997 | 01-14-1997   | 02-02-1997    | TAKE BACK RETURN          | RAIL       | ven requests. deposits breach a
+          3 |      4297 |      1798 |            1 |      45.00 |        54058.05 |       0.06 |  0.00 | R            | F            | 02-02-1994 | 01-04-1994   | 02-23-1994    | NONE                      | AIR        | ongside of the furiously brave acco
+          3 |     19036 |      6540 |            2 |      49.00 |        46796.47 |       0.10 |  0.00 | R            | F            | 11-09-1993 | 12-20-1993   | 11-24-1993    | TAKE BACK RETURN          | RAIL       |  unusual accounts. eve
+          3 |     29380 |      1883 |            4 |       2.00 |         2618.76 |       0.01 |  0.06 | A            | F            | 12-04-1993 | 01-07-1994   | 01-01-1994    | NONE                      | TRUCK      | y. fluffily pending d
+(10 rows)
+
+	
+-- explain the query to see actual plan. We expect to see only one aggregation
+-- node since group by columns guarantees the uniqueness.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT * 
+		FROM lineitem_hash_part 
+		GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+		ORDER BY 1,2
+		LIMIT 10;
+                                                                                                                                                                                                             QUERY PLAN                                                                                                                                                                                                              
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
+         ->  HashAggregate
+               Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
+               ->  Custom Scan (Citus Real-Time)
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Node: host=localhost port=57637 dbname=regression
+                           ->  Limit
+                                 ->  Unique
+                                       ->  Group
+                                             Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
+                                             ->  Sort
+                                                   Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
+                                                   ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(17 rows)
+
+-- check the plan if the hash aggreate is disabled. We expect to see only one 
+-- aggregation node since group by columns guarantees the uniqueness.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT * 
+		FROM lineitem_hash_part 
+		GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+		ORDER BY 1,2
+		LIMIT 10;	
+                                                                                                                                                                                                                QUERY PLAN                                                                                                                                                                                                                
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
+         ->  GroupAggregate
+               Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
+               ->  Sort
+                     Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  Limit
+                                       ->  Unique
+                                             ->  Group
+                                                   Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
+                                                   ->  Sort
+                                                         Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
+                                                         ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(19 rows)
+
+		
+SET enable_hashagg TO on;
+-- distinct on count distinct
+SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+	FROM lineitem_hash_part
+	GROUP BY l_orderkey
+	ORDER BY 1,2;
+ count | count 
+-------+-------
+     1 |     1
+     2 |     1
+     2 |     2
+     3 |     1
+     3 |     2
+     3 |     3
+     4 |     1
+     4 |     2
+     4 |     3
+     4 |     4
+     5 |     2
+     5 |     3
+     5 |     4
+     5 |     5
+     6 |     2
+     6 |     3
+     6 |     4
+     6 |     5
+     6 |     6
+     7 |     2
+     7 |     3
+     7 |     4
+     7 |     5
+     7 |     6
+     7 |     7
+(25 rows)
+
+	
+-- explain the query to see actual plan. We expect to see aggregation plan for
+-- the outer distinct.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+		FROM lineitem_hash_part
+		GROUP BY l_orderkey
+		ORDER BY 1,2;
+                                                                         QUERY PLAN                                                                          
+-------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))
+   ->  HashAggregate
+         Group Key: COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)
+         ->  HashAggregate
+               Group Key: remote_scan.worker_column_3
+               ->  Custom Scan (Citus Real-Time)
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Node: host=localhost port=57637 dbname=regression
+                           ->  GroupAggregate
+                                 Group Key: l_orderkey
+                                 ->  Sort
+                                       Sort Key: l_orderkey
+                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(16 rows)
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort + unique 
+-- plans for the outer distinct.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+		FROM lineitem_hash_part
+		GROUP BY l_orderkey
+		ORDER BY 1,2;
+                                                                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                                                               
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
+   ->  Unique
+         ->  Sort
+               Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)
+               ->  GroupAggregate
+                     Group Key: remote_scan.worker_column_3
+                     ->  Sort
+                           Sort Key: remote_scan.worker_column_3
+                           ->  Custom Scan (Citus Real-Time)
+                                 Task Count: 4
+                                 Tasks Shown: One of 4
+                                 ->  Task
+                                       Node: host=localhost port=57637 dbname=regression
+                                       ->  GroupAggregate
+                                             Group Key: l_orderkey
+                                             ->  Sort
+                                                   Sort Key: l_orderkey
+                                                   ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(19 rows)
+
+		
+SET enable_hashagg TO on;
+-- distinct on aggregation with filter and expression
+SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+	FROM lineitem_hash_part 
+	GROUP BY l_suppkey
+	ORDER BY 1;
+ count 
+-------
+     0
+     1
+     2
+     3
+     4
+(5 rows)
+
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey
+		ORDER BY 1;
+                                                      QUERY PLAN                                                       
+-----------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
+   ->  HashAggregate
+         Group Key: ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision)
+         ->  HashAggregate
+               Group Key: remote_scan.worker_column_2
+               ->  Custom Scan (Citus Real-Time)
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Node: host=localhost port=57637 dbname=regression
+                           ->  HashAggregate
+                                 Group Key: l_suppkey
+                                 ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(14 rows)
+
+-- check the plan if the hash aggreate is disabled
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey
+		ORDER BY 1;
+                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision)
+   ->  Unique
+         ->  Sort
+               Sort Key: ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision)
+               ->  GroupAggregate
+                     Group Key: remote_scan.worker_column_2
+                     ->  Sort
+                           Sort Key: remote_scan.worker_column_2
+                           ->  Custom Scan (Citus Real-Time)
+                                 Task Count: 4
+                                 Tasks Shown: One of 4
+                                 ->  Task
+                                       Node: host=localhost port=57637 dbname=regression
+                                       ->  HashAggregate
+                                             Group Key: l_suppkey
+                                             ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(17 rows)
+
+		
+SET enable_hashagg TO on;
+	
+-- explain the query to see actual plan with array_agg aggregation. Note that, 
+-- worker query created for this query is not correct. It will be fixed soon.  
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
+		FROM lineitem_hash_part 
+		GROUP BY l_orderkey
+		ORDER BY 2
+		LIMIT 15;
+                                                       QUERY PLAN                                                        
+-------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: (array_length(array_cat_agg(remote_scan.array_length), 1))
+         ->  HashAggregate
+               Group Key: array_length(array_cat_agg(remote_scan.array_length), 1), array_cat_agg(remote_scan.array_agg)
+               ->  HashAggregate
+                     Group Key: remote_scan.worker_column_3
+                     ->  Custom Scan (Citus Real-Time)
+                           Task Count: 4
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Node: host=localhost port=57637 dbname=regression
+                                 ->  Limit
+                                       ->  Sort
+                                             Sort Key: (array_agg(l_linenumber))
+                                             ->  GroupAggregate
+                                                   Group Key: l_orderkey
+                                                   ->  Sort
+                                                         Sort Key: l_orderkey
+                                                         ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(20 rows)
+
+-- check the plan if the hash aggreate is disabled. Note that, 
+-- worker query created for this query is not correct. It will be fixed soon. 
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
+		FROM lineitem_hash_part 
+		GROUP BY l_orderkey
+		ORDER BY 2
+		LIMIT 15;
+                                                                                    QUERY PLAN                                                                                    
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: array_length(array_cat_agg((array_length(array_cat_agg((array_length(array_cat_agg((array_length(array_cat_agg(remote_scan.array_length), 1))), 1))), 1))), 1)
+         ->  Unique
+               ->  Sort
+                     Sort Key: array_length(array_cat_agg((array_length(array_cat_agg(remote_scan.array_length), 1))), 1), array_cat_agg((array_cat_agg(remote_scan.array_agg)))
+                     ->  GroupAggregate
+                           Group Key: remote_scan.worker_column_3
+                           ->  Sort
+                                 Sort Key: remote_scan.worker_column_3
+                                 ->  Custom Scan (Citus Real-Time)
+                                       Task Count: 4
+                                       Tasks Shown: One of 4
+                                       ->  Task
+                                             Node: host=localhost port=57637 dbname=regression
+                                             ->  Limit
+                                                   ->  Sort
+                                                         Sort Key: (array_agg(l_linenumber))
+                                                         ->  GroupAggregate
+                                                               Group Key: l_orderkey
+                                                               ->  Sort
+                                                                     Sort Key: l_orderkey
+                                                                     ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
+(23 rows)
+
+		
 SET enable_hashagg TO on;
 -- distinct on non-partition column with aggregate
 -- this is the same as non-distinct version due to group by
--- a/src/test/regress/sql/multi_select_distinct.sql
+++ b/src/test/regress/sql/multi_select_distinct.sql
@ -78,6 +78,270 @@ EXPLAIN (COSTS FALSE)

 SET enable_hashagg TO on;

+-- distinct on aggregate of group by columns, we try to check whether we handle
+-- queries which does not have any group by column in distinct columns properly.
+SELECT DISTINCT count(*)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1;
+	
+-- explain the query to see actual plan. We expect to see Aggregate node having 
+-- group by key on count(*) column, since columns in the Group By doesn't guarantee
+-- the uniqueness of the result.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1;
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
+-- instead of aggregate plan node to handle distinct.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1;
+
+SET enable_hashagg TO on;
+
+-- Now we have only part of group clause columns in distinct, yet it is still not
+-- enough to use Group By columns to guarantee uniqueness of result list.
+SELECT DISTINCT l_suppkey, count(*)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+	
+-- explain the query to see actual plan. Similar to the explain of the query above.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. Similar to the explain of 
+-- the query above.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, count(*)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+
+SET enable_hashagg TO on;
+
+-- Similar to the above query, not with count but avg. Only difference with the
+-- above query is that, we create run two aggregate functions in workers.
+SELECT DISTINCT l_suppkey, avg(l_partkey)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1,2
+	LIMIT 10;
+	
+-- explain the query to see actual plan. Similar to the explain of the query above.
+-- Only aggregate functions will be changed.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1,2
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT l_suppkey, avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1,2
+		LIMIT 10;
+
+SET enable_hashagg TO on;
+
+-- Similar to the above query but with distinct on
+SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY l_suppkey,1
+	LIMIT 10;
+	
+-- explain the query to see actual plan. We expect to see sort+unique to handle
+-- distinct on.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY l_suppkey,1
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to 
+-- handle distinct on.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY l_suppkey,1
+		LIMIT 10;
+
+SET enable_hashagg TO on;
+
+-- distinct with expression and aggregation
+SELECT DISTINCT avg(ceil(l_partkey / 2))
+	FROM lineitem_hash_part
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT avg(ceil(l_partkey / 2))
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT avg(ceil(l_partkey / 2))
+		FROM lineitem_hash_part
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;	
+		
+SET enable_hashagg TO on;		
+
+-- expression among aggregations.
+SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+	FROM lineitem_hash_part 
+	GROUP BY l_suppkey, l_linenumber
+	ORDER BY 1
+	LIMIT 10;
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. This explain errors out due
+-- to a bug right now, expectation must be corrected after fixing it.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey, l_linenumber
+		ORDER BY 1
+		LIMIT 10;	
+		
+SET enable_hashagg TO on;		
+		
+-- distinct on all columns, note Group By columns guarantees uniqueness of the
+-- result list.		
+SELECT DISTINCT * 
+	FROM lineitem_hash_part 
+	GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+	ORDER BY 1,2
+	LIMIT 10;
+	
+-- explain the query to see actual plan. We expect to see only one aggregation
+-- node since group by columns guarantees the uniqueness.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT * 
+		FROM lineitem_hash_part 
+		GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+		ORDER BY 1,2
+		LIMIT 10;
+
+-- check the plan if the hash aggreate is disabled. We expect to see only one 
+-- aggregation node since group by columns guarantees the uniqueness.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT * 
+		FROM lineitem_hash_part 
+		GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+		ORDER BY 1,2
+		LIMIT 10;	
+		
+SET enable_hashagg TO on;
+
+-- distinct on count distinct
+SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+	FROM lineitem_hash_part
+	GROUP BY l_orderkey
+	ORDER BY 1,2;
+	
+-- explain the query to see actual plan. We expect to see aggregation plan for
+-- the outer distinct.
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+		FROM lineitem_hash_part
+		GROUP BY l_orderkey
+		ORDER BY 1,2;
+
+-- check the plan if the hash aggreate is disabled. We expect to see sort + unique 
+-- plans for the outer distinct.
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
+		FROM lineitem_hash_part
+		GROUP BY l_orderkey
+		ORDER BY 1,2;
+		
+SET enable_hashagg TO on;
+
+-- distinct on aggregation with filter and expression
+SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+	FROM lineitem_hash_part 
+	GROUP BY l_suppkey
+	ORDER BY 1;
+	
+-- explain the query to see actual plan
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey
+		ORDER BY 1;
+
+-- check the plan if the hash aggreate is disabled
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count 
+		FROM lineitem_hash_part 
+		GROUP BY l_suppkey
+		ORDER BY 1;
+		
+SET enable_hashagg TO on;
+	
+-- explain the query to see actual plan with array_agg aggregation. Note that, 
+-- worker query created for this query is not correct. It will be fixed soon.  
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
+		FROM lineitem_hash_part 
+		GROUP BY l_orderkey
+		ORDER BY 2
+		LIMIT 15;
+
+-- check the plan if the hash aggreate is disabled. Note that, 
+-- worker query created for this query is not correct. It will be fixed soon. 
+SET enable_hashagg TO off;
+EXPLAIN (COSTS FALSE)
+	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
+		FROM lineitem_hash_part 
+		GROUP BY l_orderkey
+		ORDER BY 2
+		LIMIT 15;
+		
+SET enable_hashagg TO on;
+
 -- distinct on non-partition column with aggregate
 -- this is the same as non-distinct version due to group by
 SELECT DISTINCT l_partkey, count(*)