mirror of https://github.com/citusdata/citus.git
Merge pull request #2018 from citusdata/distinct_without_groupby_column
Add distinct plan after aggregation plan on master plannerpull/2028/head
commit
ee67ce892f
|
@ -37,6 +37,8 @@ static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTarget
|
||||||
static Agg * BuildAggregatePlan(Query *masterQuery, Plan *subPlan);
|
static Agg * BuildAggregatePlan(Query *masterQuery, Plan *subPlan);
|
||||||
static bool HasDistinctAggregate(Query *masterQuery);
|
static bool HasDistinctAggregate(Query *masterQuery);
|
||||||
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
|
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
|
||||||
|
static List * PrepareTargetListForNextPlan(List *targetList);
|
||||||
|
static bool IsGroupBySubsetOfDistinct(Query *masterQuery);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -397,28 +399,29 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
|
||||||
bool distinctClausesHashable = true;
|
bool distinctClausesHashable = true;
|
||||||
List *distinctClauseList = masterQuery->distinctClause;
|
List *distinctClauseList = masterQuery->distinctClause;
|
||||||
List *targetList = copyObject(masterQuery->targetList);
|
List *targetList = copyObject(masterQuery->targetList);
|
||||||
List *columnList = pull_var_clause_default((Node *) targetList);
|
|
||||||
ListCell *columnCell = NULL;
|
|
||||||
bool hasDistinctAggregate = false;
|
bool hasDistinctAggregate = false;
|
||||||
|
|
||||||
if (IsA(subPlan, Agg))
|
/*
|
||||||
|
* We don't need to add distinct plan if all of the columns used in group by
|
||||||
|
* clause also used in distinct clause, since group by clause guarantees the
|
||||||
|
* uniqueness of the target list for every row.
|
||||||
|
*/
|
||||||
|
if (IsGroupBySubsetOfDistinct(masterQuery))
|
||||||
{
|
{
|
||||||
return subPlan;
|
return subPlan;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to adjust varno to OUTER_VAR, since planner expects that for upper
|
||||||
|
* level plans above the sequential scan. We also need to convert aggregations
|
||||||
|
* (if exists) to regular Vars since the aggregation would be applied by the
|
||||||
|
* previous aggregation plan and we don't want them to be applied again.
|
||||||
|
*/
|
||||||
|
targetList = PrepareTargetListForNextPlan(targetList);
|
||||||
|
|
||||||
Assert(masterQuery->distinctClause);
|
Assert(masterQuery->distinctClause);
|
||||||
Assert(!masterQuery->hasDistinctOn);
|
Assert(!masterQuery->hasDistinctOn);
|
||||||
|
|
||||||
/*
|
|
||||||
* For upper level plans above the sequential scan, the planner expects the
|
|
||||||
* table id (varno) to be set to OUTER_VAR.
|
|
||||||
*/
|
|
||||||
foreach(columnCell, columnList)
|
|
||||||
{
|
|
||||||
Var *column = (Var *) lfirst(columnCell);
|
|
||||||
column->varno = OUTER_VAR;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create group by plan with HashAggregate if all distinct
|
* Create group by plan with HashAggregate if all distinct
|
||||||
* members are hashable, and not containing distinct aggregate.
|
* members are hashable, and not containing distinct aggregate.
|
||||||
|
@ -451,3 +454,84 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
|
||||||
|
|
||||||
return distinctPlan;
|
return distinctPlan;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PrepareTargetListForNextPlan handles both regular columns to have right varno
|
||||||
|
* and convert aggregates to regular Vars in the target list.
|
||||||
|
*/
|
||||||
|
static List *
|
||||||
|
PrepareTargetListForNextPlan(List *targetList)
|
||||||
|
{
|
||||||
|
List *newtargetList = NIL;
|
||||||
|
ListCell *targetEntryCell = NULL;
|
||||||
|
|
||||||
|
foreach(targetEntryCell, targetList)
|
||||||
|
{
|
||||||
|
TargetEntry *targetEntry = lfirst(targetEntryCell);
|
||||||
|
TargetEntry *newTargetEntry = NULL;
|
||||||
|
Var *newVar = NULL;
|
||||||
|
|
||||||
|
Assert(IsA(targetEntry, TargetEntry));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For upper level plans above the sequential scan, the planner expects the
|
||||||
|
* table id (varno) to be set to OUTER_VAR.
|
||||||
|
*/
|
||||||
|
newVar = makeVarFromTargetEntry(OUTER_VAR, targetEntry);
|
||||||
|
newTargetEntry = flatCopyTargetEntry(targetEntry);
|
||||||
|
newTargetEntry->expr = (Expr *) newVar;
|
||||||
|
newtargetList = lappend(newtargetList, newTargetEntry);
|
||||||
|
}
|
||||||
|
|
||||||
|
return newtargetList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IsGroupBySubsetOfDistinct checks whether each clause in group clauses also
|
||||||
|
* exists in the distinct clauses. Note that, empty group clause is not a subset
|
||||||
|
* of distinct clause.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
IsGroupBySubsetOfDistinct(Query *masterQuery)
|
||||||
|
{
|
||||||
|
List *distinctClauses = masterQuery->distinctClause;
|
||||||
|
List *groupClauses = masterQuery->groupClause;
|
||||||
|
ListCell *distinctCell = NULL;
|
||||||
|
ListCell *groupCell = NULL;
|
||||||
|
|
||||||
|
/* There must be a group clause */
|
||||||
|
if (list_length(groupClauses) == 0)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach(groupCell, groupClauses)
|
||||||
|
{
|
||||||
|
SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupCell);
|
||||||
|
bool isFound = false;
|
||||||
|
|
||||||
|
foreach(distinctCell, distinctClauses)
|
||||||
|
{
|
||||||
|
SortGroupClause *distinctClause = (SortGroupClause *) lfirst(distinctCell);
|
||||||
|
|
||||||
|
if (groupClause->tleSortGroupRef == distinctClause->tleSortGroupRef)
|
||||||
|
{
|
||||||
|
isFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we can't find any member of group clause in the distinct clause,
|
||||||
|
* that means group clause is not a subset of distinct clause.
|
||||||
|
*/
|
||||||
|
if (!isFound)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
|
@ -245,6 +245,739 @@ EXPLAIN (COSTS FALSE)
|
||||||
Filter: (l_orderkey < 200)
|
Filter: (l_orderkey < 200)
|
||||||
(17 rows)
|
(17 rows)
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
-- distinct on aggregate of group by columns, we try to check whether we handle
|
||||||
|
-- queries which does not have any group by column in distinct columns properly.
|
||||||
|
SELECT DISTINCT count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. We expect to see Aggregate node having
|
||||||
|
-- group by key on count(*) column, since columns in the Group By doesn't guarantee
|
||||||
|
-- the uniqueness of the result.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1;
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------------------------------
|
||||||
|
Sort
|
||||||
|
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey, l_linenumber
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(14 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
|
||||||
|
-- instead of aggregate plan node to handle distinct.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Sort
|
||||||
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
||||||
|
-> Unique
|
||||||
|
-> Sort
|
||||||
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.worker_column_2, remote_scan.worker_column_3
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey, l_linenumber
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(17 rows)
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
-- Now we have only part of group clause columns in distinct, yet it is still not
|
||||||
|
-- enough to use Group By columns to guarantee uniqueness of result list.
|
||||||
|
SELECT DISTINCT l_suppkey, count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
l_suppkey | count
|
||||||
|
-----------+-------
|
||||||
|
1 | 1
|
||||||
|
2 | 1
|
||||||
|
3 | 1
|
||||||
|
4 | 1
|
||||||
|
5 | 1
|
||||||
|
7 | 1
|
||||||
|
10 | 1
|
||||||
|
12 | 1
|
||||||
|
13 | 1
|
||||||
|
14 | 1
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. Similar to the explain of the query above.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT l_suppkey, count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
QUERY PLAN
|
||||||
|
--------------------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_suppkey
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.l_suppkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: l_suppkey, l_linenumber
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey, l_linenumber
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(18 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. Similar to the explain of
|
||||||
|
-- the query above.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT l_suppkey, count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_suppkey
|
||||||
|
-> Unique
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_suppkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_3
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: l_suppkey, l_linenumber
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey, l_linenumber
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(21 rows)
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
-- Similar to the above query, not with count but avg. Only difference with the
|
||||||
|
-- above query is that, we create run two aggregate functions in workers.
|
||||||
|
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
l_suppkey | avg
|
||||||
|
-----------+------------------------
|
||||||
|
1 | 190000.000000000000
|
||||||
|
2 | 172450.000000000000
|
||||||
|
3 | 112469.000000000000
|
||||||
|
3 | 134976.000000000000
|
||||||
|
4 | 112470.000000000000
|
||||||
|
4 | 142461.000000000000
|
||||||
|
5 | 182450.000000000000
|
||||||
|
7 | 137493.000000000000
|
||||||
|
10 | 150009.000000000000
|
||||||
|
12 | 17510.0000000000000000
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. Similar to the explain of the query above.
|
||||||
|
-- Only aggregate functions will be changed.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.l_suppkey, (pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey, l_linenumber
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(15 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||||
|
-- to a bug right now, expectation must be corrected after fixing it.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
ERROR: bogus varattno for OUTER_VAR var: 4
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
-- Similar to the above query but with distinct on
|
||||||
|
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY l_suppkey,1
|
||||||
|
LIMIT 10;
|
||||||
|
avg
|
||||||
|
------------------------
|
||||||
|
190000.000000000000
|
||||||
|
172450.000000000000
|
||||||
|
112469.000000000000
|
||||||
|
112470.000000000000
|
||||||
|
182450.000000000000
|
||||||
|
137493.000000000000
|
||||||
|
150009.000000000000
|
||||||
|
17510.0000000000000000
|
||||||
|
87504.000000000000
|
||||||
|
77506.000000000000
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. We expect to see sort+unique to handle
|
||||||
|
-- distinct on.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY l_suppkey,1
|
||||||
|
LIMIT 10;
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Unique
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.worker_column_4, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.worker_column_3))
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey, l_linenumber
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(14 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to
|
||||||
|
-- handle distinct on.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY l_suppkey,1
|
||||||
|
LIMIT 10;
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Unique
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.worker_column_4, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.worker_column_3))
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey, l_linenumber
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(16 rows)
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
-- distinct with expression and aggregation
|
||||||
|
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
avg
|
||||||
|
-----
|
||||||
|
9
|
||||||
|
39
|
||||||
|
74
|
||||||
|
87
|
||||||
|
89
|
||||||
|
91
|
||||||
|
97
|
||||||
|
102
|
||||||
|
111
|
||||||
|
122
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
|
||||||
|
-- explain the query to see actual plan
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
QUERY PLAN
|
||||||
|
---------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: (sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey, l_linenumber
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(15 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||||
|
-- to a bug right now, expectation must be corrected after fixing it.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
ERROR: bogus varattno for OUTER_VAR var: 4
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
-- expression among aggregations.
|
||||||
|
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
dis
|
||||||
|
-----
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
5
|
||||||
|
6
|
||||||
|
8
|
||||||
|
11
|
||||||
|
13
|
||||||
|
14
|
||||||
|
15
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
|
||||||
|
-- explain the query to see actual plan
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
QUERY PLAN
|
||||||
|
-------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: ((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey, l_linenumber
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(15 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||||
|
-- to a bug right now, expectation must be corrected after fixing it.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
ERROR: bogus varattno for OUTER_VAR var: 4
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- distinct on all columns, note Group By columns guarantees uniqueness of the
|
||||||
|
-- result list.
|
||||||
|
SELECT DISTINCT *
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment
|
||||||
|
------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+---------------------------+------------+--------------------------------------------
|
||||||
|
1 | 2132 | 4633 | 4 | 28.00 | 28955.64 | 0.09 | 0.06 | N | O | 04-21-1996 | 03-30-1996 | 05-16-1996 | NONE | AIR | lites. fluffily even de
|
||||||
|
1 | 15635 | 638 | 6 | 32.00 | 49620.16 | 0.07 | 0.02 | N | O | 01-30-1996 | 02-07-1996 | 02-03-1996 | DELIVER IN PERSON | MAIL | arefully slyly ex
|
||||||
|
1 | 24027 | 1534 | 5 | 24.00 | 22824.48 | 0.10 | 0.04 | N | O | 03-30-1996 | 03-14-1996 | 04-01-1996 | NONE | FOB | pending foxes. slyly re
|
||||||
|
1 | 63700 | 3701 | 3 | 8.00 | 13309.60 | 0.10 | 0.02 | N | O | 01-29-1996 | 03-05-1996 | 01-31-1996 | TAKE BACK RETURN | REG AIR | riously. regular, express dep
|
||||||
|
1 | 67310 | 7311 | 2 | 36.00 | 45983.16 | 0.09 | 0.06 | N | O | 04-12-1996 | 02-28-1996 | 04-20-1996 | TAKE BACK RETURN | MAIL | ly final dependencies: slyly bold
|
||||||
|
1 | 155190 | 7706 | 1 | 17.00 | 21168.23 | 0.04 | 0.02 | N | O | 03-13-1996 | 02-12-1996 | 03-22-1996 | DELIVER IN PERSON | TRUCK | egular courts above the
|
||||||
|
2 | 106170 | 1191 | 1 | 38.00 | 44694.46 | 0.00 | 0.05 | N | O | 01-28-1997 | 01-14-1997 | 02-02-1997 | TAKE BACK RETURN | RAIL | ven requests. deposits breach a
|
||||||
|
3 | 4297 | 1798 | 1 | 45.00 | 54058.05 | 0.06 | 0.00 | R | F | 02-02-1994 | 01-04-1994 | 02-23-1994 | NONE | AIR | ongside of the furiously brave acco
|
||||||
|
3 | 19036 | 6540 | 2 | 49.00 | 46796.47 | 0.10 | 0.00 | R | F | 11-09-1993 | 12-20-1993 | 11-24-1993 | TAKE BACK RETURN | RAIL | unusual accounts. eve
|
||||||
|
3 | 29380 | 1883 | 4 | 2.00 | 2618.76 | 0.01 | 0.06 | A | F | 12-04-1993 | 01-07-1994 | 01-01-1994 | NONE | TRUCK | y. fluffily pending d
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. We expect to see only one aggregation
|
||||||
|
-- node since group by columns guarantees the uniqueness.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT *
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
QUERY PLAN
|
||||||
|
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> Limit
|
||||||
|
-> Unique
|
||||||
|
-> Group
|
||||||
|
Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
||||||
|
-> Sort
|
||||||
|
Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(17 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. We expect to see only one
|
||||||
|
-- aggregation node since group by columns guarantees the uniqueness.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT *
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> Limit
|
||||||
|
-> Unique
|
||||||
|
-> Group
|
||||||
|
Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
||||||
|
-> Sort
|
||||||
|
Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(19 rows)
|
||||||
|
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
-- distinct on count distinct
|
||||||
|
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
ORDER BY 1,2;
|
||||||
|
count | count
|
||||||
|
-------+-------
|
||||||
|
1 | 1
|
||||||
|
2 | 1
|
||||||
|
2 | 2
|
||||||
|
3 | 1
|
||||||
|
3 | 2
|
||||||
|
3 | 3
|
||||||
|
4 | 1
|
||||||
|
4 | 2
|
||||||
|
4 | 3
|
||||||
|
4 | 4
|
||||||
|
5 | 2
|
||||||
|
5 | 3
|
||||||
|
5 | 4
|
||||||
|
5 | 5
|
||||||
|
6 | 2
|
||||||
|
6 | 3
|
||||||
|
6 | 4
|
||||||
|
6 | 5
|
||||||
|
6 | 6
|
||||||
|
7 | 2
|
||||||
|
7 | 3
|
||||||
|
7 | 4
|
||||||
|
7 | 5
|
||||||
|
7 | 6
|
||||||
|
7 | 7
|
||||||
|
(25 rows)
|
||||||
|
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. We expect to see aggregation plan for
|
||||||
|
-- the outer distinct.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
ORDER BY 1,2;
|
||||||
|
QUERY PLAN
|
||||||
|
-------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Sort
|
||||||
|
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.worker_column_3
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: l_orderkey
|
||||||
|
-> Sort
|
||||||
|
Sort Key: l_orderkey
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(16 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. We expect to see sort + unique
|
||||||
|
-- plans for the outer distinct.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
ORDER BY 1,2;
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Sort
|
||||||
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
||||||
|
-> Unique
|
||||||
|
-> Sort
|
||||||
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: remote_scan.worker_column_3
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.worker_column_3
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: l_orderkey
|
||||||
|
-> Sort
|
||||||
|
Sort Key: l_orderkey
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(19 rows)
|
||||||
|
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
-- distinct on aggregation with filter and expression
|
||||||
|
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey
|
||||||
|
ORDER BY 1;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
|
||||||
|
-- explain the query to see actual plan
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey
|
||||||
|
ORDER BY 1;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------------------------------------------------------------------------------------------
|
||||||
|
Sort
|
||||||
|
Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision)
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.worker_column_2
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(14 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey
|
||||||
|
ORDER BY 1;
|
||||||
|
QUERY PLAN
|
||||||
|
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Sort
|
||||||
|
Sort Key: ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision)
|
||||||
|
-> Unique
|
||||||
|
-> Sort
|
||||||
|
Sort Key: ceil(((COALESCE((pg_catalog.sum((ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))))::bigint, '0'::bigint) / 2))::double precision)
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: remote_scan.worker_column_2
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.worker_column_2
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_suppkey
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(17 rows)
|
||||||
|
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan with array_agg aggregation. Note that,
|
||||||
|
-- worker query created for this query is not correct. It will be fixed soon.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
ORDER BY 2
|
||||||
|
LIMIT 15;
|
||||||
|
QUERY PLAN
|
||||||
|
-------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: (array_length(array_cat_agg(remote_scan.array_length), 1))
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: array_length(array_cat_agg(remote_scan.array_length), 1), array_cat_agg(remote_scan.array_agg)
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: remote_scan.worker_column_3
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: (array_agg(l_linenumber))
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: l_orderkey
|
||||||
|
-> Sort
|
||||||
|
Sort Key: l_orderkey
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(20 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. Note that,
|
||||||
|
-- worker query created for this query is not correct. It will be fixed soon.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
ORDER BY 2
|
||||||
|
LIMIT 15;
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: array_length(array_cat_agg((array_length(array_cat_agg((array_length(array_cat_agg((array_length(array_cat_agg(remote_scan.array_length), 1))), 1))), 1))), 1)
|
||||||
|
-> Unique
|
||||||
|
-> Sort
|
||||||
|
Sort Key: array_length(array_cat_agg((array_length(array_cat_agg(remote_scan.array_length), 1))), 1), array_cat_agg((array_cat_agg(remote_scan.array_agg)))
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: remote_scan.worker_column_3
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.worker_column_3
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> Limit
|
||||||
|
-> Sort
|
||||||
|
Sort Key: (array_agg(l_linenumber))
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: l_orderkey
|
||||||
|
-> Sort
|
||||||
|
Sort Key: l_orderkey
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
(23 rows)
|
||||||
|
|
||||||
|
|
||||||
SET enable_hashagg TO on;
|
SET enable_hashagg TO on;
|
||||||
-- distinct on non-partition column with aggregate
|
-- distinct on non-partition column with aggregate
|
||||||
-- this is the same as non-distinct version due to group by
|
-- this is the same as non-distinct version due to group by
|
||||||
|
|
|
@ -78,6 +78,270 @@ EXPLAIN (COSTS FALSE)
|
||||||
|
|
||||||
SET enable_hashagg TO on;
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- distinct on aggregate of group by columns, we try to check whether we handle
|
||||||
|
-- queries which does not have any group by column in distinct columns properly.
|
||||||
|
SELECT DISTINCT count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. We expect to see Aggregate node having
|
||||||
|
-- group by key on count(*) column, since columns in the Group By doesn't guarantee
|
||||||
|
-- the uniqueness of the result.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
|
||||||
|
-- instead of aggregate plan node to handle distinct.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- Now we have only part of group clause columns in distinct, yet it is still not
|
||||||
|
-- enough to use Group By columns to guarantee uniqueness of result list.
|
||||||
|
SELECT DISTINCT l_suppkey, count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. Similar to the explain of the query above.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT l_suppkey, count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. Similar to the explain of
|
||||||
|
-- the query above.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT l_suppkey, count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- Similar to the above query, not with count but avg. Only difference with the
|
||||||
|
-- above query is that, we create run two aggregate functions in workers.
|
||||||
|
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. Similar to the explain of the query above.
|
||||||
|
-- Only aggregate functions will be changed.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||||
|
-- to a bug right now, expectation must be corrected after fixing it.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- Similar to the above query but with distinct on
|
||||||
|
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY l_suppkey,1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. We expect to see sort+unique to handle
|
||||||
|
-- distinct on.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY l_suppkey,1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to
|
||||||
|
-- handle distinct on.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY l_suppkey,1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- distinct with expression and aggregation
|
||||||
|
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||||
|
-- to a bug right now, expectation must be corrected after fixing it.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- expression among aggregations.
|
||||||
|
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
||||||
|
-- to a bug right now, expectation must be corrected after fixing it.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey, l_linenumber
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- distinct on all columns, note Group By columns guarantees uniqueness of the
|
||||||
|
-- result list.
|
||||||
|
SELECT DISTINCT *
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. We expect to see only one aggregation
|
||||||
|
-- node since group by columns guarantees the uniqueness.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT *
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. We expect to see only one
|
||||||
|
-- aggregation node since group by columns guarantees the uniqueness.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT *
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
||||||
|
ORDER BY 1,2
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- distinct on count distinct
|
||||||
|
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
ORDER BY 1,2;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan. We expect to see aggregation plan for
|
||||||
|
-- the outer distinct.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
ORDER BY 1,2;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. We expect to see sort + unique
|
||||||
|
-- plans for the outer distinct.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
ORDER BY 1,2;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- distinct on aggregation with filter and expression
|
||||||
|
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_suppkey
|
||||||
|
ORDER BY 1;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
|
-- explain the query to see actual plan with array_agg aggregation. Note that,
|
||||||
|
-- worker query created for this query is not correct. It will be fixed soon.
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
ORDER BY 2
|
||||||
|
LIMIT 15;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled. Note that,
|
||||||
|
-- worker query created for this query is not correct. It will be fixed soon.
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
ORDER BY 2
|
||||||
|
LIMIT 15;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
-- distinct on non-partition column with aggregate
|
-- distinct on non-partition column with aggregate
|
||||||
-- this is the same as non-distinct version due to group by
|
-- this is the same as non-distinct version due to group by
|
||||||
SELECT DISTINCT l_partkey, count(*)
|
SELECT DISTINCT l_partkey, count(*)
|
||||||
|
|
Loading…
Reference in New Issue