Fix incorrect limit pushdown when distinct clause is not superset of group by (#2035)

Pushing down limit and order by into workers may produce wrong output when distinct on() clause has expressions, aggregates, or window functions. This checking allows pushing down of limits only if distinct clause is a superset of group by clause. i.e. it contains all clauses in group by.
2018-03-07 13:24:56 +03:00 · 2018-03-07 13:24:56 +03:00 · 1440caeef2
parent 27d159d6f6
commit 1440caeef2
7 changed files with 460 additions and 100 deletions
--- a/src/backend/distributed/planner/multi_logical_optimizer.c
+++ b/src/backend/distributed/planner/multi_logical_optimizer.c
@ -1860,7 +1860,9 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
 		palloc0(sizeof(WorkerAggregateWalkerContext));
 	Index nextSortGroupRefIndex = 0;
 	bool queryHasAggregates = false;
-	bool enableLimitPushdown = true;
+	bool distinctClauseSupersetofGroupClause = false;
+	bool distinctPreventsLimitPushdown = false;
+	bool createdNewGroupByClause = false;
 	bool hasNonPartitionColumnDistinctAgg = false;
 	bool repartitionSubquery = false;

@ -1949,11 +1951,7 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
 				groupClauseList = lappend(groupClauseList, groupByClause);
 				nextSortGroupRefIndex++;

-				/*
-				 * If we introduce new columns accompanied by a new group by clause,
-				 * than pushing down limits will cause incorrect results.
-				 */
-				enableLimitPushdown = false;
+				createdNewGroupByClause = true;
 			}

 			if (newTargetEntry->resname == NULL)
@ -2013,11 +2011,7 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
 				groupClauseList = lappend(groupClauseList, groupByClause);
 				nextSortGroupRefIndex++;

-				/*
-				 * If we introduce new columns accompanied by a new group by clause,
-				 * than pushing down limits will cause incorrect results.
-				 */
-				enableLimitPushdown = false;
+				createdNewGroupByClause = true;
 			}

 			newTargetEntryList = lappend(newTargetEntryList, newTargetEntry);
@ -2030,16 +2024,52 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
 	workerExtendedOpNode->hasDistinctOn = false;
 	workerExtendedOpNode->hasWindowFuncs = originalOpNode->hasWindowFuncs;
 	workerExtendedOpNode->windowClause = originalOpNode->windowClause;
+	workerExtendedOpNode->groupClauseList = groupClauseList;

-	if (!queryHasAggregates)
+	if (originalOpNode->distinctClause)
+	{
+		bool shouldPushdownDistinct = false;
+		if (groupClauseList == NIL ||
+			IsGroupBySubsetOfDistinct(groupClauseList,
+									  originalOpNode->distinctClause))
+		{
+			distinctClauseSupersetofGroupClause = true;
+		}
+		else
+		{
+			distinctClauseSupersetofGroupClause = false;
+
+			/*
+			 * GROUP BY being a subset of DISTINCT guarantees the
+			 * distinctness on the workers. Otherwise, pushing down
+			 * LIMIT might cause missing the necessary data from
+			 * the worker query
+			 */
+			distinctPreventsLimitPushdown = true;
+		}
+
+		/*
+		 * Distinct is pushed down to worker query only if the query does not
+		 * contain an aggregate in which master processing might be required to
+		 * complete the final result before distinct operation. We also prevent
+		 * distinct pushdown if distinct clause is missing some entries that
+		 * group by clause has.
+		 */
+		shouldPushdownDistinct = !queryHasAggregates &&
+								 distinctClauseSupersetofGroupClause;
+		if (shouldPushdownDistinct)
 		{
 			workerExtendedOpNode->distinctClause = originalOpNode->distinctClause;
 			workerExtendedOpNode->hasDistinctOn = originalOpNode->hasDistinctOn;
 		}
+	}

-	workerExtendedOpNode->groupClauseList = groupClauseList;
-
-	if (enableLimitPushdown)
+	/*
+	 * Order by and limit clauses are pushed down only if
+	 * (1) We do not create a new group by clause during aggregate mutation, and
+	 * (2) There distinct clause does not prevent limit pushdown
+	 */
+	if (!createdNewGroupByClause && !distinctPreventsLimitPushdown)
 	{
 		List *newTargetEntryListForSortClauses = NIL;

@ -3858,3 +3888,50 @@ HasOrderByHllType(List *sortClauseList, List *targetList)

 	return hasOrderByHllType;
 }
+
+
+/*
+ * IsGroupBySubsetOfDistinct checks whether each clause in group clauses also
+ * exists in the distinct clauses. Note that, empty group clause is not a subset
+ * of distinct clause.
+ */
+bool
+IsGroupBySubsetOfDistinct(List *groupClause, List *distinctClause)
+{
+	ListCell *distinctCell = NULL;
+	ListCell *groupCell = NULL;
+
+	/* There must be a group clause */
+	if (list_length(groupClause) == 0)
+	{
+		return false;
+	}
+
+	foreach(groupCell, groupClause)
+	{
+		SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupCell);
+		bool isFound = false;
+
+		foreach(distinctCell, distinctClause)
+		{
+			SortGroupClause *distinctClause = (SortGroupClause *) lfirst(distinctCell);
+
+			if (groupClause->tleSortGroupRef == distinctClause->tleSortGroupRef)
+			{
+				isFound = true;
+				break;
+			}
+		}
+
+		/*
+		 * If we can't find any member of group clause in the distinct clause,
+		 * that means group clause is not a subset of distinct clause.
+		 */
+		if (!isFound)
+		{
+			return false;
+		}
+	}
+
+	return true;
+}
--- a/src/backend/distributed/planner/multi_master_planner.c
+++ b/src/backend/distributed/planner/multi_master_planner.c
@ -13,6 +13,7 @@

 #include "postgres.h"

+#include "distributed/multi_logical_optimizer.h"
 #include "distributed/multi_master_planner.h"
 #include "distributed/multi_physical_planner.h"
 #include "distributed/distributed_planner.h"
@ -38,7 +39,6 @@ static Agg * BuildAggregatePlan(Query *masterQuery, Plan *subPlan);
 static bool HasDistinctAggregate(Query *masterQuery);
 static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
 static List * PrepareTargetListForNextPlan(List *targetList);
-static bool IsGroupBySubsetOfDistinct(Query *masterQuery);


 /*
@ -414,7 +414,7 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
 	 * clause also used in distinct clause, since group by clause guarantees the
 	 * uniqueness of the target list for every row.
 	 */
-	if (IsGroupBySubsetOfDistinct(masterQuery))
+	if (IsGroupBySubsetOfDistinct(masterQuery->groupClause, masterQuery->distinctClause))
 	{
 		return subPlan;
 	}
@ -494,52 +494,3 @@ PrepareTargetListForNextPlan(List *targetList)

 	return newtargetList;
 }
-
-
-/*
- * IsGroupBySubsetOfDistinct checks whether each clause in group clauses also
- * exists in the distinct clauses. Note that, empty group clause is not a subset
- * of distinct clause.
- */
-static bool
-IsGroupBySubsetOfDistinct(Query *masterQuery)
-{
-	List *distinctClauses = masterQuery->distinctClause;
-	List *groupClauses = masterQuery->groupClause;
-	ListCell *distinctCell = NULL;
-	ListCell *groupCell = NULL;
-
-	/* There must be a group clause */
-	if (list_length(groupClauses) == 0)
-	{
-		return false;
-	}
-
-	foreach(groupCell, groupClauses)
-	{
-		SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupCell);
-		bool isFound = false;
-
-		foreach(distinctCell, distinctClauses)
-		{
-			SortGroupClause *distinctClause = (SortGroupClause *) lfirst(distinctCell);
-
-			if (groupClause->tleSortGroupRef == distinctClause->tleSortGroupRef)
-			{
-				isFound = true;
-				break;
-			}
-		}
-
-		/*
-		 * If we can't find any member of group clause in the distinct clause,
-		 * that means group clause is not a subset of distinct clause.
-		 */
-		if (!isFound)
-		{
-			return false;
-		}
-	}
-
-	return true;
-}
--- a/src/include/distributed/multi_logical_optimizer.h
+++ b/src/include/distributed/multi_logical_optimizer.h
@ -138,5 +138,6 @@ extern bool IsPartitionColumn(Expr *columnExpression, Query *query);
 extern void FindReferencedTableColumn(Expr *columnExpression, List *parentQueryList,
 									  Query *query, Oid *relationId, Var **column);

+extern bool IsGroupBySubsetOfDistinct(List *groupClause, List *distinctClause);

 #endif   /* MULTI_LOGICAL_OPTIMIZER_H */
--- a/src/test/regress/expected/multi_limit_clause.out
+++ b/src/test/regress/expected/multi_limit_clause.out
@ -297,5 +297,227 @@ SELECT
 O            | TRUCK      | 11-17-1998
 (5 rows)

+-- Push down limit even if there is distinct on
+SELECT
+	DISTINCT ON (l_orderkey, l_linenumber) l_orderkey, l_linenumber
+	FROM lineitem_hash
+	GROUP BY l_orderkey, l_linenumber
+	ORDER BY l_orderkey, l_linenumber
+	LIMIT 5;
+DEBUG:  push down of limit count: 5
+ l_orderkey | l_linenumber 
+------------+--------------
+          1 |            1
+          1 |            2
+          1 |            3
+          1 |            4
+          1 |            5
+(5 rows)
+
+-- Don't push down limit when group by clause not included in distinct on
+SELECT
+	DISTINCT ON (l_linenumber) l_orderkey, l_linenumber
+	FROM lineitem_hash
+	GROUP BY l_orderkey, l_linenumber
+	ORDER BY l_linenumber, l_orderkey
+	LIMIT 5;
+ l_orderkey | l_linenumber 
+------------+--------------
+          1 |            1
+          1 |            2
+          1 |            3
+          1 |            4
+          1 |            5
+(5 rows)
+
+-- Push down limit when there is const in distinct on
+-- referring to a column such that group by clause
+-- list is contained in distinct on
+SELECT
+	DISTINCT ON (l_linenumber, 1) l_orderkey, l_linenumber
+	FROM lineitem_hash
+	GROUP BY l_orderkey, l_linenumber
+	ORDER BY l_linenumber, l_orderkey
+	LIMIT 5;
+DEBUG:  push down of limit count: 5
+ l_orderkey | l_linenumber 
+------------+--------------
+          1 |            1
+          2 |            1
+          3 |            1
+          4 |            1
+          5 |            1
+(5 rows)
+
+-- Don't push down limit when there is const expression in distinct on
+-- even if there is a group by on the expression
+-- This is due to fact that postgres removes (1+1) from distinct on
+-- clause but keeps it in group by list.
+SELECT
+	DISTINCT ON (l_linenumber, 1+1, l_linenumber) l_orderkey, l_linenumber
+	FROM lineitem_hash
+	GROUP BY l_orderkey, (1+1), l_linenumber
+	ORDER BY l_linenumber, (1+1), l_orderkey
+	LIMIT 5;
+ l_orderkey | l_linenumber 
+------------+--------------
+          1 |            1
+          1 |            2
+          1 |            3
+          1 |            4
+          1 |            5
+(5 rows)
+
+-- Don't push down limit when there is const reference
+-- does not point to a column to make distinct clause superset
+-- of group by
+SELECT
+	DISTINCT ON (l_linenumber, 2) l_orderkey, l_linenumber
+	FROM lineitem_hash
+	GROUP BY l_orderkey, l_linenumber
+	ORDER BY l_linenumber, l_orderkey
+	LIMIT 5;
+ l_orderkey | l_linenumber 
+------------+--------------
+          1 |            1
+          1 |            2
+          1 |            3
+          1 |            4
+          1 |            5
+(5 rows)
+
+-- Push down limit even when there is a column expression
+-- in distinct clause provided that distinct clause covers
+-- group by expression, and there is no aggregates in the query.
+SELECT
+	DISTINCT ON (l_orderkey + 1) l_orderkey + 1
+	FROM lineitem_hash
+	GROUP BY l_orderkey + 1
+	ORDER BY l_orderkey + 1
+	LIMIT 5;
+DEBUG:  push down of limit count: 5
+ ?column? 
+----------
+        2
+        3
+        4
+        5
+        6
+(5 rows)
+
+-- Limit is not pushed down when there are aggregates in the query
+-- This is because group by is not on distribution column itself
+-- but on an expression on distribution column
+SELECT
+	DISTINCT ON (l_orderkey + 1, count(*)) l_orderkey + 1, count(*)
+	FROM lineitem_hash
+	GROUP BY l_orderkey + 1
+	ORDER BY l_orderkey + 1 , 2
+	LIMIT 5;
+ ?column? | count 
+----------+-------
+        2 |     6
+        3 |     1
+        4 |     6
+        5 |     1
+        6 |     3
+(5 rows)
+
+-- same query with column instead of column expression, limit is pushed down
+-- because group by is on distribution column
+SELECT
+	DISTINCT ON (l_orderkey, count(*)) l_orderkey, count(*)
+	FROM lineitem_hash
+	GROUP BY l_orderkey
+	ORDER BY l_orderkey , 2
+	LIMIT 5;
+DEBUG:  push down of limit count: 5
+ l_orderkey | count 
+------------+-------
+          1 |     6
+          2 |     1
+          3 |     6
+          4 |     1
+          5 |     3
+(5 rows)
+
+-- limit is not pushed down because distinct clause
+-- does not cover group by clause
+SELECT
+	DISTINCT ON (count(*)) l_orderkey, count(*)
+	FROM lineitem_hash
+	GROUP BY l_orderkey
+	ORDER BY 2 DESC, 1
+	LIMIT 2;
+ l_orderkey | count 
+------------+-------
+          7 |     7
+          1 |     6
+(2 rows)
+
+-- push down limit if there is a window function in distinct on
+SELECT
+	DISTINCT ON (l_orderkey, RANK() OVER (partition by l_orderkey)) l_orderkey, RANK() OVER (partition by l_orderkey)
+	FROM lineitem_hash
+	GROUP BY l_orderkey
+	ORDER BY l_orderkey , 2
+	LIMIT 5;
+DEBUG:  push down of limit count: 5
+ l_orderkey | rank 
+------------+------
+          1 |    1
+          2 |    1
+          3 |    1
+          4 |    1
+          5 |    1
+(5 rows)
+
+-- do not push down limit if there is an aggragete in distinct on
+-- we should be able to push this down, but query goes to subquery
+-- planner and we can't safely determine it is grouped by partition
+-- column.
+SELECT
+	DISTINCT ON (l_orderkey, RANK() OVER (partition by l_orderkey)) l_orderkey, count(*), RANK() OVER (partition by l_orderkey)
+	FROM lineitem_hash
+	GROUP BY l_orderkey
+	ORDER BY l_orderkey , 3, 2
+	LIMIT 5;
+ l_orderkey | count | rank 
+------------+-------+------
+          1 |     6 |    1
+          2 |     1 |    1
+          3 |     6 |    1
+          4 |     1 |    1
+          5 |     3 |    1
+(5 rows)
+
+-- limit is not pushed down due to same reason
+SELECT
+	DISTINCT ON (l_orderkey, count(*) OVER (partition by l_orderkey)) l_orderkey, l_linenumber, count(*), count(*) OVER (partition by l_orderkey)
+	FROM lineitem_hash
+	GROUP BY l_orderkey, l_linenumber
+	ORDER BY l_orderkey , count(*) OVER (partition by l_orderkey), count(*), l_linenumber
+	LIMIT 5;
+ l_orderkey | l_linenumber | count | count 
+------------+--------------+-------+-------
+          1 |            1 |     1 |     6
+          2 |            1 |     1 |     1
+          3 |            1 |     1 |     6
+          4 |            1 |     1 |     1
+          5 |            1 |     1 |     3
+(5 rows)
+
+-- limit is not pushed down since distinct clause is not superset of group clause
+SELECT
+	DISTINCT ON (RANK() OVER (partition by l_orderkey)) l_orderkey, RANK() OVER (partition by l_orderkey)
+	FROM lineitem_hash
+	GROUP BY l_orderkey
+	ORDER BY 2 DESC, 1 
+	LIMIT 5;
+ l_orderkey | rank 
+------------+------
+          1 |    1
+(1 row)
+
 SET client_min_messages TO NOTICE;
 DROP TABLE lineitem_hash;
--- a/src/test/regress/expected/multi_select_distinct.out
+++ b/src/test/regress/expected/multi_select_distinct.out
@ -3,6 +3,7 @@
 --
 -- Tests select distinct, and select distinct on features.
 --
+ANALYZE lineitem_hash_part;
 -- function calls are supported
 SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;
 l_orderkey | now 
@ -360,13 +361,10 @@ EXPLAIN (COSTS FALSE)
                           Tasks Shown: One of 4
                           ->  Task
                                 Node: host=localhost port=57637 dbname=regression
-                                 ->  Limit
-                                       ->  Sort
-                                             Sort Key: l_suppkey, l_linenumber
                                 ->  HashAggregate
                                       Group Key: l_suppkey, l_linenumber
                                       ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
-(18 rows)
+(15 rows)

 -- check the plan if the hash aggreate is disabled. Similar to the explain of 
 -- the query above.
@ -394,13 +392,10 @@ EXPLAIN (COSTS FALSE)
                                       Tasks Shown: One of 4
                                       ->  Task
                                             Node: host=localhost port=57637 dbname=regression
-                                             ->  Limit
-                                                   ->  Sort
-                                                         Sort Key: l_suppkey, l_linenumber
                                             ->  HashAggregate
                                                   Group Key: l_suppkey, l_linenumber
                                                   ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
-(21 rows)
+(18 rows)

 SET enable_hashagg TO on;
 -- Similar to the above query, not with count but avg. Only difference with the
@ -930,15 +925,12 @@ EXPLAIN (COSTS FALSE)
                           Tasks Shown: One of 4
                           ->  Task
                                 Node: host=localhost port=57637 dbname=regression
-                                 ->  Limit
-                                       ->  Sort
-                                             Sort Key: (array_length(array_agg(l_linenumber), 1))
                                 ->  GroupAggregate
                                       Group Key: l_orderkey
                                       ->  Sort
                                             Sort Key: l_orderkey
                                             ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
-(20 rows)
+(17 rows)

 -- check the plan if the hash aggreate is disabled. 
 SET enable_hashagg TO off;
@ -965,15 +957,12 @@ EXPLAIN (COSTS FALSE)
                                       Tasks Shown: One of 4
                                       ->  Task
                                             Node: host=localhost port=57637 dbname=regression
-                                             ->  Limit
-                                                   ->  Sort
-                                                         Sort Key: (array_length(array_agg(l_linenumber), 1))
                                             ->  GroupAggregate
                                                   Group Key: l_orderkey
                                                   ->  Sort
                                                         Sort Key: l_orderkey
                                                         ->  Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
-(23 rows)
+(20 rows)

 		
 SET enable_hashagg TO on;
--- a/src/test/regress/sql/multi_limit_clause.sql
+++ b/src/test/regress/sql/multi_limit_clause.sql
@ -102,6 +102,125 @@ SELECT
 	GROUP BY l_linestatus, l_shipmode
 	ORDER BY 3 DESC, 1, 2 LIMIT 5;

+-- Push down limit even if there is distinct on
+SELECT
+	DISTINCT ON (l_orderkey, l_linenumber) l_orderkey, l_linenumber
+	FROM lineitem_hash
+	GROUP BY l_orderkey, l_linenumber
+	ORDER BY l_orderkey, l_linenumber
+	LIMIT 5;
+
+-- Don't push down limit when group by clause not included in distinct on
+SELECT
+	DISTINCT ON (l_linenumber) l_orderkey, l_linenumber
+	FROM lineitem_hash
+	GROUP BY l_orderkey, l_linenumber
+	ORDER BY l_linenumber, l_orderkey
+	LIMIT 5;
+
+-- Push down limit when there is const in distinct on
+-- referring to a column such that group by clause
+-- list is contained in distinct on
+SELECT
+	DISTINCT ON (l_linenumber, 1) l_orderkey, l_linenumber
+	FROM lineitem_hash
+	GROUP BY l_orderkey, l_linenumber
+	ORDER BY l_linenumber, l_orderkey
+	LIMIT 5;
+
+-- Don't push down limit when there is const expression in distinct on
+-- even if there is a group by on the expression
+-- This is due to fact that postgres removes (1+1) from distinct on
+-- clause but keeps it in group by list.
+SELECT
+	DISTINCT ON (l_linenumber, 1+1, l_linenumber) l_orderkey, l_linenumber
+	FROM lineitem_hash
+	GROUP BY l_orderkey, (1+1), l_linenumber
+	ORDER BY l_linenumber, (1+1), l_orderkey
+	LIMIT 5;
+
+-- Don't push down limit when there is const reference
+-- does not point to a column to make distinct clause superset
+-- of group by
+SELECT
+	DISTINCT ON (l_linenumber, 2) l_orderkey, l_linenumber
+	FROM lineitem_hash
+	GROUP BY l_orderkey, l_linenumber
+	ORDER BY l_linenumber, l_orderkey
+	LIMIT 5;
+
+-- Push down limit even when there is a column expression
+-- in distinct clause provided that distinct clause covers
+-- group by expression, and there is no aggregates in the query.
+SELECT
+	DISTINCT ON (l_orderkey + 1) l_orderkey + 1
+	FROM lineitem_hash
+	GROUP BY l_orderkey + 1
+	ORDER BY l_orderkey + 1
+	LIMIT 5;
+
+-- Limit is not pushed down when there are aggregates in the query
+-- This is because group by is not on distribution column itself
+-- but on an expression on distribution column
+SELECT
+	DISTINCT ON (l_orderkey + 1, count(*)) l_orderkey + 1, count(*)
+	FROM lineitem_hash
+	GROUP BY l_orderkey + 1
+	ORDER BY l_orderkey + 1 , 2
+	LIMIT 5;
+
+-- same query with column instead of column expression, limit is pushed down
+-- because group by is on distribution column
+SELECT
+	DISTINCT ON (l_orderkey, count(*)) l_orderkey, count(*)
+	FROM lineitem_hash
+	GROUP BY l_orderkey
+	ORDER BY l_orderkey , 2
+	LIMIT 5;
+
+-- limit is not pushed down because distinct clause
+-- does not cover group by clause
+SELECT
+	DISTINCT ON (count(*)) l_orderkey, count(*)
+	FROM lineitem_hash
+	GROUP BY l_orderkey
+	ORDER BY 2 DESC, 1
+	LIMIT 2;
+
+-- push down limit if there is a window function in distinct on
+SELECT
+	DISTINCT ON (l_orderkey, RANK() OVER (partition by l_orderkey)) l_orderkey, RANK() OVER (partition by l_orderkey)
+	FROM lineitem_hash
+	GROUP BY l_orderkey
+	ORDER BY l_orderkey , 2
+	LIMIT 5;
+
+-- do not push down limit if there is an aggragete in distinct on
+-- we should be able to push this down, but query goes to subquery
+-- planner and we can't safely determine it is grouped by partition
+-- column.
+SELECT
+	DISTINCT ON (l_orderkey, RANK() OVER (partition by l_orderkey)) l_orderkey, count(*), RANK() OVER (partition by l_orderkey)
+	FROM lineitem_hash
+	GROUP BY l_orderkey
+	ORDER BY l_orderkey , 3, 2
+	LIMIT 5;
+
+-- limit is not pushed down due to same reason
+SELECT
+	DISTINCT ON (l_orderkey, count(*) OVER (partition by l_orderkey)) l_orderkey, l_linenumber, count(*), count(*) OVER (partition by l_orderkey)
+	FROM lineitem_hash
+	GROUP BY l_orderkey, l_linenumber
+	ORDER BY l_orderkey , count(*) OVER (partition by l_orderkey), count(*), l_linenumber
+	LIMIT 5;
+
+-- limit is not pushed down since distinct clause is not superset of group clause
+SELECT
+	DISTINCT ON (RANK() OVER (partition by l_orderkey)) l_orderkey, RANK() OVER (partition by l_orderkey)
+	FROM lineitem_hash
+	GROUP BY l_orderkey
+	ORDER BY 2 DESC, 1 
+	LIMIT 5;

 SET client_min_messages TO NOTICE;
 DROP TABLE lineitem_hash;
--- a/src/test/regress/sql/multi_select_distinct.sql
+++ b/src/test/regress/sql/multi_select_distinct.sql
@ -4,6 +4,7 @@
 -- Tests select distinct, and select distinct on features.
 --

+ANALYZE lineitem_hash_part;

 -- function calls are supported
 SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;