Fix worker query when order by avg aggregate is used (#2024)

We push down order by to worker query when limit is specified (with some other additional checks). If the query has an expression on an aggregate or avg aggregate by itself, and there is an order by on this particular target we may send wrong order by to worker query with potential to affect query result. The fix creates a auxilary target entry in the worker query and uses that target entry for sorting.
2018-02-28 12:12:54 +03:00 · 2018-02-28 12:12:54 +03:00 · e13c5beced
parent 8516d8631e
commit e13c5beced
9 changed files with 645 additions and 22 deletions
--- a/src/backend/distributed/planner/multi_logical_optimizer.c
+++ b/src/backend/distributed/planner/multi_logical_optimizer.c
@ -172,6 +172,10 @@ static Node * WorkerLimitCount(MultiExtendedOp *originalOpNode,
 							   bool groupedByDisjointPartitionColumn);
 static List * WorkerSortClauseList(MultiExtendedOp *originalOpNode,
 								   bool groupedByDisjointPartitionColumn);
+static List * GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
+													 List *sortClauseList,
+													 AttrNumber *targetProjectionNumber,
+													 Index *nextSortGroupRefIndex);
 static bool CanPushDownLimitApproximate(List *sortClauseList, List *targetList);
 static bool HasOrderByAggregate(List *sortClauseList, List *targetList);
 static bool HasOrderByAverage(List *sortClauseList, List *targetList);
@ -2007,7 +2011,6 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
 	}

 	workerExtendedOpNode = CitusMakeNode(MultiExtendedOp);
-	workerExtendedOpNode->targetList = newTargetEntryList;
 	workerExtendedOpNode->distinctClause = NIL;
 	workerExtendedOpNode->hasDistinctOn = false;

@ -2021,13 +2024,26 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,

 	if (enableLimitPushdown)
 	{
+		List *newTargetEntryListForSortClauses = NIL;
+
 		/* if we can push down the limit, also set related fields */
 		workerExtendedOpNode->limitCount = WorkerLimitCount(originalOpNode,
 															groupedByDisjointPartitionColumn);
 		workerExtendedOpNode->sortClauseList =
 			WorkerSortClauseList(originalOpNode, groupedByDisjointPartitionColumn);
+
+		newTargetEntryListForSortClauses =
+			GenerateNewTargetEntriesForSortClauses(originalOpNode->targetList,
+												   workerExtendedOpNode->sortClauseList,
+												   &targetProjectionNumber,
+												   &nextSortGroupRefIndex);
+
+		newTargetEntryList = list_concat(newTargetEntryList,
+										 newTargetEntryListForSortClauses);
 	}

+	workerExtendedOpNode->targetList = newTargetEntryList;
+
 	/*
 	 * If grouped by a partition column whose values are shards have disjoint sets
 	 * of partition values, we can push down the having qualifier.
@ -3495,7 +3511,7 @@ WorkerSortClauseList(MultiExtendedOp *originalOpNode,
 {
 	List *workerSortClauseList = NIL;
 	List *groupClauseList = originalOpNode->groupClauseList;
-	List *sortClauseList = originalOpNode->sortClauseList;
+	List *sortClauseList = copyObject(originalOpNode->sortClauseList);
 	List *targetList = originalOpNode->targetList;

 	/* if no limit node and no hasDistinctOn, no need to push down sort clauses */
@ -3514,7 +3530,7 @@ WorkerSortClauseList(MultiExtendedOp *originalOpNode,
 	 */
 	if (groupClauseList == NIL || groupedByDisjointPartitionColumn)
 	{
-		workerSortClauseList = originalOpNode->sortClauseList;
+		workerSortClauseList = sortClauseList;
 	}
 	else if (sortClauseList != NIL)
 	{
@ -3523,12 +3539,12 @@ WorkerSortClauseList(MultiExtendedOp *originalOpNode,

 		if (orderByNonAggregates)
 		{
-			workerSortClauseList = list_copy(sortClauseList);
+			workerSortClauseList = sortClauseList;
 			workerSortClauseList = list_concat(workerSortClauseList, groupClauseList);
 		}
 		else if (canApproximate)
 		{
-			workerSortClauseList = originalOpNode->sortClauseList;
+			workerSortClauseList = sortClauseList;
 		}
 	}

@ -3536,6 +3552,88 @@ WorkerSortClauseList(MultiExtendedOp *originalOpNode,
 }


+/*
+ * GenerateNewTargetEntriesForSortClauses goes over provided sort clause lists and
+ * creates new target entries if needed to make sure sort clauses has correct
+ * references. The function returns list of new target entries, caller is
+ * responsible to add those target entries to the end of worker target list.
+ *
+ * The function is required because we change the target entry if it contains an
+ * expression having an aggregate operation, or just the AVG aggregate.
+ * Afterwards any order by clause referring to original target entry starts
+ * to point to a wrong expression.
+ *
+ * Note the function modifies SortGroupClause items in sortClauseList,
+ * targetProjectionNumber, and nextSortGroupRefIndex.
+ */
+static List *
+GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
+									   List *sortClauseList,
+									   AttrNumber *targetProjectionNumber,
+									   Index *nextSortGroupRefIndex)
+{
+	List *createdTargetList = NIL;
+	ListCell *sortClauseCell = NULL;
+
+	foreach(sortClauseCell, sortClauseList)
+	{
+		SortGroupClause *sgClause = (SortGroupClause *) lfirst(sortClauseCell);
+		TargetEntry *targetEntry = get_sortgroupclause_tle(sgClause, originalTargetList);
+		Expr *targetExpr = targetEntry->expr;
+		bool containsAggregate = contain_agg_clause((Node *) targetExpr);
+		bool createNewTargetEntry = false;
+
+		/* we are only interested in target entries containing aggregates */
+		if (!containsAggregate)
+		{
+			continue;
+		}
+
+		/*
+		 * If the target expression is not an Aggref, it is either an expression
+		 * on a single aggregate, or expression containing multiple aggregates.
+		 * Worker query mutates these target entries to have a naked target entry
+		 * per aggregate function. We want to use original target entries if this
+		 * the case.
+		 * If the original target expression is an avg aggref, we also want to use
+		 * original target entry.
+		 */
+		if (!IsA(targetExpr, Aggref))
+		{
+			createNewTargetEntry = true;
+		}
+		else
+		{
+			Aggref *aggNode = (Aggref *) targetExpr;
+			AggregateType aggregateType = GetAggregateType(aggNode->aggfnoid);
+			if (aggregateType == AGGREGATE_AVERAGE)
+			{
+				createNewTargetEntry = true;
+			}
+		}
+
+		if (createNewTargetEntry)
+		{
+			bool resJunk = true;
+			AttrNumber nextResNo = (*targetProjectionNumber);
+			Expr *newExpr = copyObject(targetExpr);
+			TargetEntry *newTargetEntry = makeTargetEntry(newExpr, nextResNo,
+														  targetEntry->resname, resJunk);
+			newTargetEntry->ressortgroupref = *nextSortGroupRefIndex;
+
+			createdTargetList = lappend(createdTargetList, newTargetEntry);
+
+			sgClause->tleSortGroupRef = *nextSortGroupRefIndex;
+
+			(*nextSortGroupRefIndex)++;
+			(*targetProjectionNumber)++;
+		}
+	}
+
+	return createdTargetList;
+}
+
+
 /*
 * CanPushDownLimitApproximate checks if we can push down the limit clause to
 * the worker nodes, and get approximate and meaningful results. We can do this
--- a/src/backend/distributed/planner/multi_master_planner.c
+++ b/src/backend/distributed/planner/multi_master_planner.c
@ -81,9 +81,17 @@ MasterTargetList(List *workerTargetList)
 	foreach(workerTargetCell, workerTargetList)
 	{
 		TargetEntry *workerTargetEntry = (TargetEntry *) lfirst(workerTargetCell);
-		TargetEntry *masterTargetEntry = copyObject(workerTargetEntry);
+		TargetEntry *masterTargetEntry = NULL;
+		Var *masterColumn = NULL;

-		Var *masterColumn = makeVarFromTargetEntry(tableId, workerTargetEntry);
+		if (workerTargetEntry->resjunk)
+		{
+			continue;
+		}
+
+		masterTargetEntry = copyObject(workerTargetEntry);
+
+		masterColumn = makeVarFromTargetEntry(tableId, workerTargetEntry);
 		masterColumn->varattno = columnId;
 		masterColumn->varoattno = columnId;
 		columnId++;
--- a/src/test/regress/expected/multi_orderby_limit_pushdown.out
+++ b/src/test/regress/expected/multi_orderby_limit_pushdown.out
@ -0,0 +1,335 @@
+--
+-- MULTI_ORDERBY_LIMIT_PUSHDOWN
+--
+-- order by pushdown with aggregates
+-- tests order by when pushing down order by clauses including aggregate
+-- aggregate expressions.
+SELECT user_id, avg(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY avg(value_1) DESC
+LIMIT 5;
+ user_id |        avg         
+---------+--------------------
+       1 | 3.2857142857142857
+       4 | 2.7391304347826087
+       5 | 2.6538461538461538
+       3 | 2.3529411764705882
+       2 | 2.3333333333333333
+(5 rows)
+
+SELECT user_id, avg(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY avg(value_1) DESC
+LIMIT 1;
+ user_id |        avg         
+---------+--------------------
+       1 | 3.2857142857142857
+(1 row)
+
+EXPLAIN
+SELECT user_id, avg(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY avg(value_1) DESC
+LIMIT 1;
+ERROR:  bogus varattno for OUTER_VAR var: 3
+SELECT user_id, avg(value_1) + 1
+FROM users_table
+GROUP BY user_id
+ORDER BY avg(value_1) + 1 DESC
+LIMIT 1;
+ user_id |      ?column?      
+---------+--------------------
+       1 | 4.2857142857142857
+(1 row)
+
+SELECT user_id, avg(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY avg(value_1) + 1 DESC
+LIMIT 1;
+ user_id |        avg         
+---------+--------------------
+       1 | 3.2857142857142857
+(1 row)
+
+SELECT user_id, avg(value_1) + sum(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC
+LIMIT 1;
+ user_id |      ?column?       
+---------+---------------------
+       5 | 65.6538461538461538
+(1 row)
+
+SELECT user_id, avg(value_1) + count(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC;
+ user_id |      ?column?       
+---------+---------------------
+       5 | 28.6538461538461538
+       4 | 25.7391304347826087
+       2 | 20.3333333333333333
+       3 | 19.3529411764705882
+       6 | 12.1000000000000000
+       1 | 10.2857142857142857
+(6 rows)
+
+EXPLAIN 
+SELECT user_id, avg(value_1) + count(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC;
+ERROR:  bogus varattno for OUTER_VAR var: 3
+SELECT user_id, avg(value_1) + count(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC
+LIMIT 1;
+ user_id |      ?column?       
+---------+---------------------
+       5 | 28.6538461538461538
+(1 row)
+
+SELECT user_id, sum(value_1) + sum(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC
+LIMIT 1;
+ user_id | ?column? 
+---------+----------
+       5 |      132
+(1 row)
+
+SELECT user_id, sum(value_1) + sum(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY sum(value_2)  DESC
+LIMIT 1;
+ user_id | ?column? 
+---------+----------
+       5 |      132
+(1 row)
+
+SELECT user_id, (100 / max(value_1))
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC, 1 DESC
+LIMIT 2;
+ user_id | ?column? 
+---------+----------
+       2 |       25
+       6 |       20
+(2 rows)
+
+SELECT user_id, (100 / (1 + min(value_1)))
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC, 1
+LIMIT 2;
+ user_id | ?column? 
+---------+----------
+       2 |      100
+       3 |      100
+(2 rows)
+
+SELECT user_id, sum(value_1 + value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC
+LIMIT 2;
+ user_id | sum 
+---------+-----
+       5 | 132
+       4 | 113
+(2 rows)
+
+SELECT user_id, 10000 / (sum(value_1 + value_2))
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC
+LIMIT 2;
+ user_id | ?column? 
+---------+----------
+       6 |      238
+       1 |      232
+(2 rows)
+
+SELECT user_id, sum(value_1 + value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY (10000 / (sum(value_1 + value_2)))  DESC
+LIMIT 2;
+ user_id | sum 
+---------+-----
+       6 |  42
+       1 |  43
+(2 rows)
+
+SELECT user_id
+FROM users_table
+GROUP BY user_id
+ORDER BY (10000 / (sum(value_1 + value_2)))  DESC
+LIMIT 2;
+ user_id 
+---------
+       6
+       1
+(2 rows)
+
+EXPLAIN
+SELECT user_id
+FROM users_table
+GROUP BY user_id
+ORDER BY (10000 / (sum(value_1 + value_2)))  DESC
+LIMIT 2;
+                                                           QUERY PLAN                                                            
+---------------------------------------------------------------------------------------------------------------------------------
+ Limit  (cost=0.00..0.00 rows=0 width=0)
+   ->  Sort  (cost=0.00..0.00 rows=0 width=0)
+         Sort Key: (10000 / (pg_catalog.sum(((10000 / (pg_catalog.sum(remote_scan.worker_column_2))::bigint))))::bigint) DESC
+         ->  HashAggregate  (cost=0.00..0.00 rows=0 width=0)
+               Group Key: remote_scan.user_id
+               ->  Custom Scan (Citus Real-Time)  (cost=0.00..0.00 rows=0 width=0)
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Node: host=localhost port=57637 dbname=regression
+                           ->  Limit  (cost=1.78..1.78 rows=2 width=20)
+                                 ->  Sort  (cost=1.78..1.78 rows=2 width=20)
+                                       Sort Key: ((10000 / sum((value_1 + value_2)))) DESC
+                                       ->  HashAggregate  (cost=1.74..1.77 rows=2 width=20)
+                                             Group Key: user_id
+                                             ->  Seq Scan on users_table_1400000 users_table  (cost=0.00..1.33 rows=33 width=12)
+(16 rows)
+
+SELECT 10000 / (sum(value_1 + value_2))
+FROM users_table
+ORDER BY 1 DESC
+LIMIT 2;
+ ?column? 
+----------
+       19
+(1 row)
+
+SELECT user_id, AVG(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY user_id * avg(value_1) DESC
+LIMIT 2;
+ user_id |        avg         
+---------+--------------------
+       5 | 2.6538461538461538
+       6 | 2.1000000000000000
+(2 rows)
+
+SELECT user_id, AVG(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY user_id * avg(value_1 + value_2) DESC
+LIMIT 2;
+ user_id |        avg         
+---------+--------------------
+       5 | 2.6538461538461538
+       6 | 2.1000000000000000
+(2 rows)
+
+SELECT user_id
+FROM users_table
+GROUP BY user_id
+ORDER BY sum(value_1) DESC
+LIMIT 2;
+ user_id 
+---------
+       5
+       4
+(2 rows)
+
+EXPLAIN
+SELECT user_id
+FROM users_table
+GROUP BY user_id
+ORDER BY sum(value_1) DESC
+LIMIT 2;
+                                                           QUERY PLAN                                                           
+--------------------------------------------------------------------------------------------------------------------------------
+ Limit  (cost=0.00..0.00 rows=0 width=0)
+   ->  Sort  (cost=0.00..0.00 rows=0 width=0)
+         Sort Key: (pg_catalog.sum(((pg_catalog.sum(remote_scan.worker_column_2))::bigint)))::bigint DESC
+         ->  HashAggregate  (cost=0.00..0.00 rows=0 width=0)
+               Group Key: remote_scan.user_id
+               ->  Custom Scan (Citus Real-Time)  (cost=0.00..0.00 rows=0 width=0)
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Node: host=localhost port=57637 dbname=regression
+                           ->  Limit  (cost=1.53..1.53 rows=2 width=12)
+                                 ->  Sort  (cost=1.53..1.53 rows=2 width=12)
+                                       Sort Key: (sum(value_1)) DESC
+                                       ->  HashAggregate  (cost=1.50..1.52 rows=2 width=12)
+                                             Group Key: user_id
+                                             ->  Seq Scan on users_table_1400000 users_table  (cost=0.00..1.33 rows=33 width=8)
+(16 rows)
+
+SELECT ut.user_id, avg(ut.value_2)
+FROM users_table ut, events_table et
+WHERE ut.user_id = et.user_id and et.value_2 < 5
+GROUP BY ut.user_id
+ORDER BY MAX(et.time), AVG(ut.value_1)
+LIMIT 5;
+ user_id |        avg         
+---------+--------------------
+       6 | 2.1000000000000000
+       2 | 2.7777777777777778
+       5 | 2.4230769230769231
+       3 | 3.2352941176470588
+       4 | 2.1739130434782609
+(5 rows)
+
+EXPLAIN
+SELECT ut.user_id, avg(ut.value_2)
+FROM users_table ut, events_table et
+WHERE ut.user_id = et.user_id and et.value_2 < 5
+GROUP BY ut.user_id
+ORDER BY MAX(et.time), AVG(ut.value_1)
+LIMIT 5;
+ERROR:  bogus varattno for OUTER_VAR var: 5
+SELECT ut.user_id, avg(et.value_2)
+FROM users_table ut, events_table et
+WHERE ut.user_id = et.user_id and et.value_2 < 5
+GROUP BY ut.user_id
+ORDER BY avg(ut.value_2) DESC, AVG(et.value_2)
+LIMIT 5;
+ user_id |        avg         
+---------+--------------------
+       3 | 1.8947368421052632
+       1 | 2.4615384615384615
+       2 | 2.0000000000000000
+       5 | 2.2142857142857143
+       4 | 2.0666666666666667
+(5 rows)
+
+SELECT ut.user_id, count(DISTINCT ut.value_2)
+FROM users_table ut, events_table et
+WHERE ut.user_id = et.user_id and et.value_2 < 5
+GROUP BY ut.user_id
+ORDER BY 2, AVG(ut.value_1), 1 DESC
+LIMIT 2;
+ user_id | count 
+---------+-------
+       1 |     4
+       6 |     5
+(2 rows)
+
+EXPLAIN
+SELECT ut.user_id, count(DISTINCT ut.value_2)
+FROM users_table ut, events_table et
+WHERE ut.user_id = et.user_id and et.value_2 < 5
+GROUP BY ut.user_id
+ORDER BY 2, AVG(ut.value_1), 1 DESC
+LIMIT 5;
+ERROR:  bogus varattno for OUTER_VAR var: 4
--- a/src/test/regress/expected/multi_select_distinct.out
+++ b/src/test/regress/expected/multi_select_distinct.out
@ -909,8 +909,7 @@ EXPLAIN (COSTS FALSE)
 		
 SET enable_hashagg TO on;
 	
-- explain the query to see actual plan with array_agg aggregation. Note that, 
-- worker query created for this query is not correct. It will be fixed soon.  
+-- explain the query to see actual plan with array_agg aggregation.  
 EXPLAIN (COSTS FALSE)
 	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
 		FROM lineitem_hash_part 
@ -933,7 +932,7 @@ EXPLAIN (COSTS FALSE)
                                 Node: host=localhost port=57637 dbname=regression
                                 ->  Limit
                                       ->  Sort
-                                             Sort Key: (array_agg(l_linenumber))
+                                             Sort Key: (array_length(array_agg(l_linenumber), 1))
                                             ->  GroupAggregate
                                                   Group Key: l_orderkey
                                                   ->  Sort
@ -941,8 +940,7 @@ EXPLAIN (COSTS FALSE)
                                                         ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
 (20 rows)

-- check the plan if the hash aggreate is disabled. Note that, 
-- worker query created for this query is not correct. It will be fixed soon. 
+-- check the plan if the hash aggreate is disabled. 
 SET enable_hashagg TO off;
 EXPLAIN (COSTS FALSE)
 	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
@ -969,7 +967,7 @@ EXPLAIN (COSTS FALSE)
                                             Node: host=localhost port=57637 dbname=regression
                                             ->  Limit
                                                   ->  Sort
-                                                         Sort Key: (array_agg(l_linenumber))
+                                                         Sort Key: (array_length(array_agg(l_linenumber), 1))
                                                         ->  GroupAggregate
                                                               Group Key: l_orderkey
                                                               ->  Sort
--- a/src/test/regress/expected/multi_subquery.out
+++ b/src/test/regress/expected/multi_subquery.out
@ -80,11 +80,11 @@ SELECT
 DEBUG:  push down of limit count: 5
 l_orderkey |       unit_price       
 ------------+------------------------
+        421 |    102749.853333333333
+        806 | 96257.4480681818181818
+        418 | 57151.4156630824373871
       1124 | 56102.2804738959822181
-        230 | 54613.8568599033816703
-        935 | 51688.6111227238944448
-        451 | 51673.9297867063492063
-        646 | 50919.3957476807927619
+        230 | 53847.0509778948909754
 (5 rows)

 SELECT
--- a/src/test/regress/multi_schedule
+++ b/src/test/regress/multi_schedule
@ -59,7 +59,7 @@ test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
 test: multi_agg_distinct multi_agg_approximate_distinct multi_limit_clause_approximate multi_outer_join_reference multi_single_relation_subquery multi_prepare_plsql
 test: multi_reference_table
 test: multi_average_expression multi_working_columns multi_having_pushdown
-test: multi_array_agg multi_limit_clause
+test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown
 test: multi_jsonb_agg
 test: multi_jsonb_object_agg
 test: multi_json_agg
--- a/src/test/regress/sql/multi_complex_expressions.sql
+++ b/src/test/regress/sql/multi_complex_expressions.sql
@ -245,3 +245,4 @@ SELECT
 	HAVING count(*) FILTER (WHERE l_shipmode = 'AIR') > 1
 	ORDER BY 2 DESC, 1 DESC
 	LIMIT 10;
+
--- a/src/test/regress/sql/multi_orderby_limit_pushdown.sql
+++ b/src/test/regress/sql/multi_orderby_limit_pushdown.sql
@ -0,0 +1,185 @@
+--
+-- MULTI_ORDERBY_LIMIT_PUSHDOWN
+--
+
+-- order by pushdown with aggregates
+-- tests order by when pushing down order by clauses including aggregate
+-- aggregate expressions.
+
+SELECT user_id, avg(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY avg(value_1) DESC
+LIMIT 5;
+
+SELECT user_id, avg(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY avg(value_1) DESC
+LIMIT 1;
+
+EXPLAIN
+SELECT user_id, avg(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY avg(value_1) DESC
+LIMIT 1;
+
+SELECT user_id, avg(value_1) + 1
+FROM users_table
+GROUP BY user_id
+ORDER BY avg(value_1) + 1 DESC
+LIMIT 1;
+
+SELECT user_id, avg(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY avg(value_1) + 1 DESC
+LIMIT 1;
+
+SELECT user_id, avg(value_1) + sum(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC
+LIMIT 1;
+
+SELECT user_id, avg(value_1) + count(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC;
+
+EXPLAIN 
+SELECT user_id, avg(value_1) + count(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC;
+
+SELECT user_id, avg(value_1) + count(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC
+LIMIT 1;
+
+SELECT user_id, sum(value_1) + sum(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC
+LIMIT 1;
+
+SELECT user_id, sum(value_1) + sum(value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY sum(value_2)  DESC
+LIMIT 1;
+
+SELECT user_id, (100 / max(value_1))
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC, 1 DESC
+LIMIT 2;
+
+SELECT user_id, (100 / (1 + min(value_1)))
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC, 1
+LIMIT 2;
+
+
+SELECT user_id, sum(value_1 + value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC
+LIMIT 2;
+
+SELECT user_id, 10000 / (sum(value_1 + value_2))
+FROM users_table
+GROUP BY user_id
+ORDER BY 2  DESC
+LIMIT 2;
+
+SELECT user_id, sum(value_1 + value_2)
+FROM users_table
+GROUP BY user_id
+ORDER BY (10000 / (sum(value_1 + value_2)))  DESC
+LIMIT 2;
+
+SELECT user_id
+FROM users_table
+GROUP BY user_id
+ORDER BY (10000 / (sum(value_1 + value_2)))  DESC
+LIMIT 2;
+
+EXPLAIN
+SELECT user_id
+FROM users_table
+GROUP BY user_id
+ORDER BY (10000 / (sum(value_1 + value_2)))  DESC
+LIMIT 2;
+
+SELECT 10000 / (sum(value_1 + value_2))
+FROM users_table
+ORDER BY 1 DESC
+LIMIT 2;
+
+SELECT user_id, AVG(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY user_id * avg(value_1) DESC
+LIMIT 2;
+
+SELECT user_id, AVG(value_1)
+FROM users_table
+GROUP BY user_id
+ORDER BY user_id * avg(value_1 + value_2) DESC
+LIMIT 2;
+
+SELECT user_id
+FROM users_table
+GROUP BY user_id
+ORDER BY sum(value_1) DESC
+LIMIT 2;
+
+EXPLAIN
+SELECT user_id
+FROM users_table
+GROUP BY user_id
+ORDER BY sum(value_1) DESC
+LIMIT 2;
+
+SELECT ut.user_id, avg(ut.value_2)
+FROM users_table ut, events_table et
+WHERE ut.user_id = et.user_id and et.value_2 < 5
+GROUP BY ut.user_id
+ORDER BY MAX(et.time), AVG(ut.value_1)
+LIMIT 5;
+
+EXPLAIN
+SELECT ut.user_id, avg(ut.value_2)
+FROM users_table ut, events_table et
+WHERE ut.user_id = et.user_id and et.value_2 < 5
+GROUP BY ut.user_id
+ORDER BY MAX(et.time), AVG(ut.value_1)
+LIMIT 5;
+
+
+SELECT ut.user_id, avg(et.value_2)
+FROM users_table ut, events_table et
+WHERE ut.user_id = et.user_id and et.value_2 < 5
+GROUP BY ut.user_id
+ORDER BY avg(ut.value_2) DESC, AVG(et.value_2)
+LIMIT 5;
+
+SELECT ut.user_id, count(DISTINCT ut.value_2)
+FROM users_table ut, events_table et
+WHERE ut.user_id = et.user_id and et.value_2 < 5
+GROUP BY ut.user_id
+ORDER BY 2, AVG(ut.value_1), 1 DESC
+LIMIT 2;
+
+EXPLAIN
+SELECT ut.user_id, count(DISTINCT ut.value_2)
+FROM users_table ut, events_table et
+WHERE ut.user_id = et.user_id and et.value_2 < 5
+GROUP BY ut.user_id
+ORDER BY 2, AVG(ut.value_1), 1 DESC
+LIMIT 5;
--- a/src/test/regress/sql/multi_select_distinct.sql
+++ b/src/test/regress/sql/multi_select_distinct.sql
@ -321,8 +321,7 @@ EXPLAIN (COSTS FALSE)
 		
 SET enable_hashagg TO on;
 	
-- explain the query to see actual plan with array_agg aggregation. Note that, 
-- worker query created for this query is not correct. It will be fixed soon.  
+-- explain the query to see actual plan with array_agg aggregation.  
 EXPLAIN (COSTS FALSE)
 	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
 		FROM lineitem_hash_part 
@ -330,8 +329,7 @@ EXPLAIN (COSTS FALSE)
 		ORDER BY 2
 		LIMIT 15;

-- check the plan if the hash aggreate is disabled. Note that, 
-- worker query created for this query is not correct. It will be fixed soon. 
+-- check the plan if the hash aggreate is disabled. 
 SET enable_hashagg TO off;
 EXPLAIN (COSTS FALSE)
 	SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)