mirror of https://github.com/citusdata/citus.git
Merge pull request #2018 from citusdata/distinct_without_groupby_column
Add distinct plan after aggregation plan on master plannerpull/2028/head
commit
ee67ce892f
|
@ -37,6 +37,8 @@ static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTarget
|
|||
static Agg * BuildAggregatePlan(Query *masterQuery, Plan *subPlan);
|
||||
static bool HasDistinctAggregate(Query *masterQuery);
|
||||
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
|
||||
static List * PrepareTargetListForNextPlan(List *targetList);
|
||||
static bool IsGroupBySubsetOfDistinct(Query *masterQuery);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -397,28 +399,29 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
|
|||
bool distinctClausesHashable = true;
|
||||
List *distinctClauseList = masterQuery->distinctClause;
|
||||
List *targetList = copyObject(masterQuery->targetList);
|
||||
List *columnList = pull_var_clause_default((Node *) targetList);
|
||||
ListCell *columnCell = NULL;
|
||||
bool hasDistinctAggregate = false;
|
||||
|
||||
if (IsA(subPlan, Agg))
|
||||
/*
|
||||
* We don't need to add distinct plan if all of the columns used in group by
|
||||
* clause also used in distinct clause, since group by clause guarantees the
|
||||
* uniqueness of the target list for every row.
|
||||
*/
|
||||
if (IsGroupBySubsetOfDistinct(masterQuery))
|
||||
{
|
||||
return subPlan;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to adjust varno to OUTER_VAR, since planner expects that for upper
|
||||
* level plans above the sequential scan. We also need to convert aggregations
|
||||
* (if exists) to regular Vars since the aggregation would be applied by the
|
||||
* previous aggregation plan and we don't want them to be applied again.
|
||||
*/
|
||||
targetList = PrepareTargetListForNextPlan(targetList);
|
||||
|
||||
Assert(masterQuery->distinctClause);
|
||||
Assert(!masterQuery->hasDistinctOn);
|
||||
|
||||
/*
|
||||
* For upper level plans above the sequential scan, the planner expects the
|
||||
* table id (varno) to be set to OUTER_VAR.
|
||||
*/
|
||||
foreach(columnCell, columnList)
|
||||
{
|
||||
Var *column = (Var *) lfirst(columnCell);
|
||||
column->varno = OUTER_VAR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create group by plan with HashAggregate if all distinct
|
||||
* members are hashable, and not containing distinct aggregate.
|
||||
|
@ -451,3 +454,84 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
|
|||
|
||||
return distinctPlan;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PrepareTargetListForNextPlan handles both regular columns to have right varno
|
||||
* and convert aggregates to regular Vars in the target list.
|
||||
*/
|
||||
static List *
|
||||
PrepareTargetListForNextPlan(List *targetList)
|
||||
{
|
||||
List *newtargetList = NIL;
|
||||
ListCell *targetEntryCell = NULL;
|
||||
|
||||
foreach(targetEntryCell, targetList)
|
||||
{
|
||||
TargetEntry *targetEntry = lfirst(targetEntryCell);
|
||||
TargetEntry *newTargetEntry = NULL;
|
||||
Var *newVar = NULL;
|
||||
|
||||
Assert(IsA(targetEntry, TargetEntry));
|
||||
|
||||
/*
|
||||
* For upper level plans above the sequential scan, the planner expects the
|
||||
* table id (varno) to be set to OUTER_VAR.
|
||||
*/
|
||||
newVar = makeVarFromTargetEntry(OUTER_VAR, targetEntry);
|
||||
newTargetEntry = flatCopyTargetEntry(targetEntry);
|
||||
newTargetEntry->expr = (Expr *) newVar;
|
||||
newtargetList = lappend(newtargetList, newTargetEntry);
|
||||
}
|
||||
|
||||
return newtargetList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsGroupBySubsetOfDistinct checks whether each clause in group clauses also
|
||||
* exists in the distinct clauses. Note that, empty group clause is not a subset
|
||||
* of distinct clause.
|
||||
*/
|
||||
static bool
|
||||
IsGroupBySubsetOfDistinct(Query *masterQuery)
|
||||
{
|
||||
List *distinctClauses = masterQuery->distinctClause;
|
||||
List *groupClauses = masterQuery->groupClause;
|
||||
ListCell *distinctCell = NULL;
|
||||
ListCell *groupCell = NULL;
|
||||
|
||||
/* There must be a group clause */
|
||||
if (list_length(groupClauses) == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach(groupCell, groupClauses)
|
||||
{
|
||||
SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupCell);
|
||||
bool isFound = false;
|
||||
|
||||
foreach(distinctCell, distinctClauses)
|
||||
{
|
||||
SortGroupClause *distinctClause = (SortGroupClause *) lfirst(distinctCell);
|
||||
|
||||
if (groupClause->tleSortGroupRef == distinctClause->tleSortGroupRef)
|
||||
{
|
||||
isFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we can't find any member of group clause in the distinct clause,
|
||||
* that means group clause is not a subset of distinct clause.
|
||||
*/
|
||||
if (!isFound)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -245,6 +245,739 @@ EXPLAIN (COSTS FALSE)
|
|||
Filter: (l_orderkey < 200)
|
||||
(17 rows)
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
-- distinct on aggregate of group by columns, we try to check whether we handle
|
||||
-- queries which does not have any group by column in distinct columns properly.
|
||||
SELECT DISTINCT count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1;
|
||||
count
|
||||
-------
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
(4 rows)
|
||||
|
||||
|
||||
-- explain the query to see actual plan. We expect to see Aggregate node having
|
||||
-- group by key on count(*) column, since columns in the Group By doesn't guarantee
|
||||
-- the uniqueness of the result.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
|
||||
-> HashAggregate
|
||||
Group Key: COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey, l_linenumber
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(14 rows)
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
|
||||
-- instead of aggregate plan node to handle distinct.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1;
|
||||
QUERY PLAN
|
||||
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
||||
-> Unique
|
||||
-> Sort
|
||||
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
||||
-> GroupAggregate
|
||||
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
|
||||
-> Sort
|
||||
Sort Key: remote_scan.worker_column_2, remote_scan.worker_column_3
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey, l_linenumber
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(17 rows)
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
-- Now we have only part of group clause columns in distinct, yet it is still not
|
||||
-- enough to use Group By columns to guarantee uniqueness of result list.
|
||||
SELECT DISTINCT l_suppkey, count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
l_suppkey | count
|
||||
-----------+-------
|
||||
1 | 1
|
||||
2 | 1
|
||||
3 | 1
|
||||
4 | 1
|
||||
5 | 1
|
||||
7 | 1
|
||||
10 | 1
|
||||
12 | 1
|
||||
13 | 1
|
||||
14 | 1
|
||||
(10 rows)
|
||||
|
||||
|
||||
-- explain the query to see actual plan. Similar to the explain of the query above.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT l_suppkey, count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: remote_scan.l_suppkey
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.l_suppkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> Limit
|
||||
-> Sort
|
||||
Sort Key: l_suppkey, l_linenumber
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey, l_linenumber
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(18 rows)
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. Similar to the explain of
|
||||
-- the query above.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT l_suppkey, count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: remote_scan.l_suppkey
|
||||
-> Unique
|
||||
-> Sort
|
||||
Sort Key: remote_scan.l_suppkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
||||
-> GroupAggregate
|
||||
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
|
||||
-> Sort
|
||||
Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_3
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> Limit
|
||||
-> Sort
|
||||
Sort Key: l_suppkey, l_linenumber
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey, l_linenumber
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(21 rows)
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
-- Similar to the above query, not with count but avg. Only difference with the
|
||||
-- above query is that, we create run two aggregate functions in workers.
|
||||
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
l_suppkey | avg
|
||||
-----------+------------------------
|
||||
1 | 190000.000000000000
|
||||
2 | 172450.000000000000
|
||||
3 | 112469.000000000000
|
||||
3 | 134976.000000000000
|
||||
4 | 112470.000000000000
|
||||
4 | 142461.000000000000
|
||||
5 | 182450.000000000000
|
||||
7 | 137493.000000000000
|
||||
10 | 150009.000000000000
|
||||
12 | 17510.0000000000000000
|
||||
(10 rows)
|
||||
|
||||
|
||||
-- explain the query to see actual plan. Similar to the explain of the query above.
|
||||
-- Only aggregate functions will be changed.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
QUERY PLAN
|
||||
-----------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.l_suppkey, (pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey, l_linenumber
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(15 rows)
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||
-- to a bug right now, expectation must be corrected after fixing it.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
ERROR: bogus varattno for OUTER_VAR var: 4
|
||||
SET enable_hashagg TO on;
|
||||
-- Similar to the above query but with distinct on
|
||||
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY l_suppkey,1
|
||||
LIMIT 10;
|
||||
avg
|
||||
------------------------
|
||||
190000.000000000000
|
||||
172450.000000000000
|
||||
112469.000000000000
|
||||
112470.000000000000
|
||||
182450.000000000000
|
||||
137493.000000000000
|
||||
150009.000000000000
|
||||
17510.0000000000000000
|
||||
87504.000000000000
|
||||
77506.000000000000
|
||||
(10 rows)
|
||||
|
||||
|
||||
-- explain the query to see actual plan. We expect to see sort+unique to handle
|
||||
-- distinct on.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY l_suppkey,1
|
||||
LIMIT 10;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Unique
|
||||
-> Sort
|
||||
Sort Key: remote_scan.worker_column_4, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.worker_column_3))
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey, l_linenumber
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(14 rows)
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to
|
||||
-- handle distinct on.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY l_suppkey,1
|
||||
LIMIT 10;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Unique
|
||||
-> Sort
|
||||
Sort Key: remote_scan.worker_column_4, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.worker_column_3))
|
||||
-> GroupAggregate
|
||||
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
||||
-> Sort
|
||||
Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey, l_linenumber
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(16 rows)
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
-- distinct with expression and aggregation
|
||||
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
avg
|
||||
-----
|
||||
9
|
||||
39
|
||||
74
|
||||
87
|
||||
89
|
||||
91
|
||||
97
|
||||
102
|
||||
111
|
||||
122
|
||||
(10 rows)
|
||||
|
||||
|
||||
-- explain the query to see actual plan
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
|
||||
-> HashAggregate
|
||||
Group Key: (sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey, l_linenumber
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(15 rows)
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||
-- to a bug right now, expectation must be corrected after fixing it.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
ERROR: bogus varattno for OUTER_VAR var: 4
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
-- expression among aggregations.
|
||||
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
dis
|
||||
-----
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
8
|
||||
11
|
||||
13
|
||||
14
|
||||
15
|
||||
(10 rows)
|
||||
|
||||
|
||||
-- explain the query to see actual plan
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
|
||||
-> HashAggregate
|
||||
Group Key: ((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey, l_linenumber
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(15 rows)
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||
-- to a bug right now, expectation must be corrected after fixing it.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
ERROR: bogus varattno for OUTER_VAR var: 4
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- distinct on all columns, note Group By columns guarantees uniqueness of the
|
||||
-- result list.
|
||||
SELECT DISTINCT *
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment
|
||||
------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+---------------------------+------------+--------------------------------------------
|
||||
1 | 2132 | 4633 | 4 | 28.00 | 28955.64 | 0.09 | 0.06 | N | O | 04-21-1996 | 03-30-1996 | 05-16-1996 | NONE | AIR | lites. fluffily even de
|
||||
1 | 15635 | 638 | 6 | 32.00 | 49620.16 | 0.07 | 0.02 | N | O | 01-30-1996 | 02-07-1996 | 02-03-1996 | DELIVER IN PERSON | MAIL | arefully slyly ex
|
||||
1 | 24027 | 1534 | 5 | 24.00 | 22824.48 | 0.10 | 0.04 | N | O | 03-30-1996 | 03-14-1996 | 04-01-1996 | NONE | FOB | pending foxes. slyly re
|
||||
1 | 63700 | 3701 | 3 | 8.00 | 13309.60 | 0.10 | 0.02 | N | O | 01-29-1996 | 03-05-1996 | 01-31-1996 | TAKE BACK RETURN | REG AIR | riously. regular, express dep
|
||||
1 | 67310 | 7311 | 2 | 36.00 | 45983.16 | 0.09 | 0.06 | N | O | 04-12-1996 | 02-28-1996 | 04-20-1996 | TAKE BACK RETURN | MAIL | ly final dependencies: slyly bold
|
||||
1 | 155190 | 7706 | 1 | 17.00 | 21168.23 | 0.04 | 0.02 | N | O | 03-13-1996 | 02-12-1996 | 03-22-1996 | DELIVER IN PERSON | TRUCK | egular courts above the
|
||||
2 | 106170 | 1191 | 1 | 38.00 | 44694.46 | 0.00 | 0.05 | N | O | 01-28-1997 | 01-14-1997 | 02-02-1997 | TAKE BACK RETURN | RAIL | ven requests. deposits breach a
|
||||
3 | 4297 | 1798 | 1 | 45.00 | 54058.05 | 0.06 | 0.00 | R | F | 02-02-1994 | 01-04-1994 | 02-23-1994 | NONE | AIR | ongside of the furiously brave acco
|
||||
3 | 19036 | 6540 | 2 | 49.00 | 46796.47 | 0.10 | 0.00 | R | F | 11-09-1993 | 12-20-1993 | 11-24-1993 | TAKE BACK RETURN | RAIL | unusual accounts. eve
|
||||
3 | 29380 | 1883 | 4 | 2.00 | 2618.76 | 0.01 | 0.06 | A | F | 12-04-1993 | 01-07-1994 | 01-01-1994 | NONE | TRUCK | y. fluffily pending d
|
||||
(10 rows)
|
||||
|
||||
|
||||
-- explain the query to see actual plan. We expect to see only one aggregation
|
||||
-- node since group by columns guarantees the uniqueness.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT *
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> Limit
|
||||
-> Unique
|
||||
-> Group
|
||||
Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
||||
-> Sort
|
||||
Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(17 rows)
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. We expect to see only one
|
||||
-- aggregation node since group by columns guarantees the uniqueness.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT *
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
||||
-> GroupAggregate
|
||||
Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
|
||||
-> Sort
|
||||
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> Limit
|
||||
-> Unique
|
||||
-> Group
|
||||
Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
||||
-> Sort
|
||||
Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(19 rows)
|
||||
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
-- distinct on count distinct
|
||||
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_orderkey
|
||||
ORDER BY 1,2;
|
||||
count | count
|
||||
-------+-------
|
||||
1 | 1
|
||||
2 | 1
|
||||
2 | 2
|
||||
3 | 1
|
||||
3 | 2
|
||||
3 | 3
|
||||
4 | 1
|
||||
4 | 2
|
||||
4 | 3
|
||||
4 | 4
|
||||
5 | 2
|
||||
5 | 3
|
||||
5 | 4
|
||||
5 | 5
|
||||
6 | 2
|
||||
6 | 3
|
||||
6 | 4
|
||||
6 | 5
|
||||
6 | 6
|
||||
7 | 2
|
||||
7 | 3
|
||||
7 | 4
|
||||
7 | 5
|
||||
7 | 6
|
||||
7 | 7
|
||||
(25 rows)
|
||||
|
||||
|
||||
-- explain the query to see actual plan. We expect to see aggregation plan for
|
||||
-- the outer distinct.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_orderkey
|
||||
ORDER BY 1,2;
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))
|
||||
-> HashAggregate
|
||||
Group Key: COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.worker_column_3
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> GroupAggregate
|
||||
Group Key: l_orderkey
|
||||
-> Sort
|
||||
Sort Key: l_orderkey
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(16 rows)
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. We expect to see sort + unique
|
||||
-- plans for the outer distinct.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_orderkey
|
||||
ORDER BY 1,2;
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
||||
-> Unique
|
||||
-> Sort
|
||||
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
||||
-> GroupAggregate
|
||||
Group Key: remote_scan.worker_column_3
|
||||
-> Sort
|
||||
Sort Key: remote_scan.worker_column_3
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> GroupAggregate
|
||||
Group Key: l_orderkey
|
||||
-> Sort
|
||||
Sort Key: l_orderkey
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(19 rows)
|
||||
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
-- distinct on aggregation with filter and expression
|
||||
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey
|
||||
ORDER BY 1;
|
||||
count
|
||||
-------
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
(5 rows)
|
||||
|
||||
|
||||
-- explain the query to see actual plan
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey
|
||||
ORDER BY 1;
|
||||
QUERY PLAN
|
||||
-----------------------------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
|
||||
-> HashAggregate
|
||||
Group Key: ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision)
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.worker_column_2
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(14 rows)
|
||||
|
||||
-- check the plan if the hash aggreate is disabled
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey
|
||||
ORDER BY 1;
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision)
|
||||
-> Unique
|
||||
-> Sort
|
||||
Sort Key: ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision)
|
||||
-> GroupAggregate
|
||||
Group Key: remote_scan.worker_column_2
|
||||
-> Sort
|
||||
Sort Key: remote_scan.worker_column_2
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: l_suppkey
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(17 rows)
|
||||
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- explain the query to see actual plan with array_agg aggregation. Note that,
|
||||
-- worker query created for this query is not correct. It will be fixed soon.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_orderkey
|
||||
ORDER BY 2
|
||||
LIMIT 15;
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: (array_length(array_cat_agg(remote_scan.array_length), 1))
|
||||
-> HashAggregate
|
||||
Group Key: array_length(array_cat_agg(remote_scan.array_length), 1), array_cat_agg(remote_scan.array_agg)
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.worker_column_3
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> Limit
|
||||
-> Sort
|
||||
Sort Key: (array_agg(l_linenumber))
|
||||
-> GroupAggregate
|
||||
Group Key: l_orderkey
|
||||
-> Sort
|
||||
Sort Key: l_orderkey
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(20 rows)
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. Note that,
|
||||
-- worker query created for this query is not correct. It will be fixed soon.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_orderkey
|
||||
ORDER BY 2
|
||||
LIMIT 15;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: array_length(array_cat_agg((array_length(array_cat_agg((array_length(array_cat_agg((array_length(array_cat_agg(remote_scan.array_length), 1))), 1))), 1))), 1)
|
||||
-> Unique
|
||||
-> Sort
|
||||
Sort Key: array_length(array_cat_agg((array_length(array_cat_agg(remote_scan.array_length), 1))), 1), array_cat_agg((array_cat_agg(remote_scan.array_agg)))
|
||||
-> GroupAggregate
|
||||
Group Key: remote_scan.worker_column_3
|
||||
-> Sort
|
||||
Sort Key: remote_scan.worker_column_3
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> Limit
|
||||
-> Sort
|
||||
Sort Key: (array_agg(l_linenumber))
|
||||
-> GroupAggregate
|
||||
Group Key: l_orderkey
|
||||
-> Sort
|
||||
Sort Key: l_orderkey
|
||||
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||
(23 rows)
|
||||
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
-- distinct on non-partition column with aggregate
|
||||
-- this is the same as non-distinct version due to group by
|
||||
|
|
|
@ -78,6 +78,270 @@ EXPLAIN (COSTS FALSE)
|
|||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- distinct on aggregate of group by columns, we try to check whether we handle
|
||||
-- queries which does not have any group by column in distinct columns properly.
|
||||
SELECT DISTINCT count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1;
|
||||
|
||||
-- explain the query to see actual plan. We expect to see Aggregate node having
|
||||
-- group by key on count(*) column, since columns in the Group By doesn't guarantee
|
||||
-- the uniqueness of the result.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1;
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
|
||||
-- instead of aggregate plan node to handle distinct.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1;
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- Now we have only part of group clause columns in distinct, yet it is still not
|
||||
-- enough to use Group By columns to guarantee uniqueness of result list.
|
||||
SELECT DISTINCT l_suppkey, count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
|
||||
-- explain the query to see actual plan. Similar to the explain of the query above.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT l_suppkey, count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. Similar to the explain of
|
||||
-- the query above.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT l_suppkey, count(*)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- Similar to the above query, not with count but avg. Only difference with the
|
||||
-- above query is that, we create run two aggregate functions in workers.
|
||||
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
|
||||
-- explain the query to see actual plan. Similar to the explain of the query above.
|
||||
-- Only aggregate functions will be changed.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||
-- to a bug right now, expectation must be corrected after fixing it.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- Similar to the above query but with distinct on
|
||||
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY l_suppkey,1
|
||||
LIMIT 10;
|
||||
|
||||
-- explain the query to see actual plan. We expect to see sort+unique to handle
|
||||
-- distinct on.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY l_suppkey,1
|
||||
LIMIT 10;
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to
|
||||
-- handle distinct on.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY l_suppkey,1
|
||||
LIMIT 10;
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- distinct with expression and aggregation
|
||||
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
|
||||
-- explain the query to see actual plan
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||
-- to a bug right now, expectation must be corrected after fixing it.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- expression among aggregations.
|
||||
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
|
||||
-- explain the query to see actual plan
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||
-- to a bug right now, expectation must be corrected after fixing it.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey, l_linenumber
|
||||
ORDER BY 1
|
||||
LIMIT 10;
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- distinct on all columns, note Group By columns guarantees uniqueness of the
|
||||
-- result list.
|
||||
SELECT DISTINCT *
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
|
||||
-- explain the query to see actual plan. We expect to see only one aggregation
|
||||
-- node since group by columns guarantees the uniqueness.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT *
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. We expect to see only one
|
||||
-- aggregation node since group by columns guarantees the uniqueness.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT *
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||
ORDER BY 1,2
|
||||
LIMIT 10;
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- distinct on count distinct
|
||||
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_orderkey
|
||||
ORDER BY 1,2;
|
||||
|
||||
-- explain the query to see actual plan. We expect to see aggregation plan for
|
||||
-- the outer distinct.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_orderkey
|
||||
ORDER BY 1,2;
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. We expect to see sort + unique
|
||||
-- plans for the outer distinct.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_orderkey
|
||||
ORDER BY 1,2;
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- distinct on aggregation with filter and expression
|
||||
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey
|
||||
ORDER BY 1;
|
||||
|
||||
-- explain the query to see actual plan
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey
|
||||
ORDER BY 1;
|
||||
|
||||
-- check the plan if the hash aggreate is disabled
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_suppkey
|
||||
ORDER BY 1;
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- explain the query to see actual plan with array_agg aggregation. Note that,
|
||||
-- worker query created for this query is not correct. It will be fixed soon.
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_orderkey
|
||||
ORDER BY 2
|
||||
LIMIT 15;
|
||||
|
||||
-- check the plan if the hash aggreate is disabled. Note that,
|
||||
-- worker query created for this query is not correct. It will be fixed soon.
|
||||
SET enable_hashagg TO off;
|
||||
EXPLAIN (COSTS FALSE)
|
||||
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
|
||||
FROM lineitem_hash_part
|
||||
GROUP BY l_orderkey
|
||||
ORDER BY 2
|
||||
LIMIT 15;
|
||||
|
||||
SET enable_hashagg TO on;
|
||||
|
||||
-- distinct on non-partition column with aggregate
|
||||
-- this is the same as non-distinct version due to group by
|
||||
SELECT DISTINCT l_partkey, count(*)
|
||||
|
|
Loading…
Reference in New Issue