mirror of https://github.com/citusdata/citus.git
Merge pull request #1988 from citusdata/respect_enable_hashagg
Respect enable_hashagg in the master plannerpull/1993/head
commit
aba2f47cdf
|
@ -21,6 +21,7 @@
|
||||||
#include "nodes/makefuncs.h"
|
#include "nodes/makefuncs.h"
|
||||||
#include "nodes/nodeFuncs.h"
|
#include "nodes/nodeFuncs.h"
|
||||||
#include "optimizer/clauses.h"
|
#include "optimizer/clauses.h"
|
||||||
|
#include "optimizer/cost.h"
|
||||||
#include "optimizer/planmain.h"
|
#include "optimizer/planmain.h"
|
||||||
#include "optimizer/tlist.h"
|
#include "optimizer/tlist.h"
|
||||||
#include "optimizer/var.h"
|
#include "optimizer/var.h"
|
||||||
|
@ -289,7 +290,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
|
||||||
if (groupColumnCount > 0)
|
if (groupColumnCount > 0)
|
||||||
{
|
{
|
||||||
bool groupingIsHashable = grouping_is_hashable(groupColumnList);
|
bool groupingIsHashable = grouping_is_hashable(groupColumnList);
|
||||||
bool groupingIsSortable = grouping_is_hashable(groupColumnList);
|
bool groupingIsSortable = grouping_is_sortable(groupColumnList);
|
||||||
bool hasDistinctAggregate = HasDistinctAggregate(masterQuery);
|
bool hasDistinctAggregate = HasDistinctAggregate(masterQuery);
|
||||||
|
|
||||||
if (!groupingIsHashable && !groupingIsSortable)
|
if (!groupingIsHashable && !groupingIsSortable)
|
||||||
|
@ -303,13 +304,20 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
|
||||||
* see nodeAgg.c:build_pertrans_for_aggref(). In that case we use
|
* see nodeAgg.c:build_pertrans_for_aggref(). In that case we use
|
||||||
* sorted agg strategy, otherwise we use hash strategy.
|
* sorted agg strategy, otherwise we use hash strategy.
|
||||||
*/
|
*/
|
||||||
if (!groupingIsHashable || hasDistinctAggregate)
|
if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate)
|
||||||
{
|
{
|
||||||
|
char *messageHint = NULL;
|
||||||
|
if (!enable_hashagg && groupingIsHashable)
|
||||||
|
{
|
||||||
|
messageHint = "Consider setting enable_hashagg to on.";
|
||||||
|
}
|
||||||
|
|
||||||
if (!groupingIsSortable)
|
if (!groupingIsSortable)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errmsg("grouped column list must cannot be sorted"),
|
ereport(ERROR, (errmsg("grouped column list must cannot be sorted"),
|
||||||
errdetail("Having a distinct aggregate requires "
|
errdetail("Having a distinct aggregate requires "
|
||||||
"grouped column list to be sortable.")));
|
"grouped column list to be sortable."),
|
||||||
|
messageHint ? errhint("%s", messageHint) : 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
aggregateStrategy = AGG_SORTED;
|
aggregateStrategy = AGG_SORTED;
|
||||||
|
@ -418,7 +426,8 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
|
||||||
*/
|
*/
|
||||||
distinctClausesHashable = grouping_is_hashable(distinctClauseList);
|
distinctClausesHashable = grouping_is_hashable(distinctClauseList);
|
||||||
hasDistinctAggregate = HasDistinctAggregate(masterQuery);
|
hasDistinctAggregate = HasDistinctAggregate(masterQuery);
|
||||||
if (distinctClausesHashable && !hasDistinctAggregate)
|
|
||||||
|
if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate)
|
||||||
{
|
{
|
||||||
const long rowEstimate = 10; /* using the same value as BuildAggregatePlan() */
|
const long rowEstimate = 10; /* using the same value as BuildAggregatePlan() */
|
||||||
AttrNumber *distinctColumnIdArray = extract_grouping_cols(distinctClauseList,
|
AttrNumber *distinctColumnIdArray = extract_grouping_cols(distinctClauseList,
|
||||||
|
|
|
@ -54,6 +54,26 @@ Sort
|
||||||
-> HashAggregate
|
-> HashAggregate
|
||||||
Group Key: l_quantity
|
Group Key: l_quantity
|
||||||
-> Seq Scan on lineitem_290001 lineitem
|
-> Seq Scan on lineitem_290001 lineitem
|
||||||
|
-- Test disable hash aggregate
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE, FORMAT TEXT)
|
||||||
|
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
||||||
|
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
|
||||||
|
Sort
|
||||||
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: remote_scan.l_quantity
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_quantity
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 8
|
||||||
|
Tasks Shown: One of 8
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_quantity
|
||||||
|
-> Seq Scan on lineitem_290001 lineitem
|
||||||
|
SET enable_hashagg TO on;
|
||||||
-- Test JSON format
|
-- Test JSON format
|
||||||
EXPLAIN (COSTS FALSE, FORMAT JSON)
|
EXPLAIN (COSTS FALSE, FORMAT JSON)
|
||||||
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
||||||
|
@ -1128,7 +1148,7 @@ SELECT l_orderkey FROM series JOIN keys ON (s = l_orderkey)
|
||||||
ORDER BY s;
|
ORDER BY s;
|
||||||
Custom Scan (Citus Router)
|
Custom Scan (Citus Router)
|
||||||
Output: remote_scan.l_orderkey
|
Output: remote_scan.l_orderkey
|
||||||
-> Distributed Subplan 54_1
|
-> Distributed Subplan 55_1
|
||||||
-> HashAggregate
|
-> HashAggregate
|
||||||
Output: remote_scan.l_orderkey
|
Output: remote_scan.l_orderkey
|
||||||
Group Key: remote_scan.l_orderkey
|
Group Key: remote_scan.l_orderkey
|
||||||
|
@ -1143,7 +1163,7 @@ Custom Scan (Citus Router)
|
||||||
Group Key: lineitem_hash_part.l_orderkey
|
Group Key: lineitem_hash_part.l_orderkey
|
||||||
-> Seq Scan on public.lineitem_hash_part_360038 lineitem_hash_part
|
-> Seq Scan on public.lineitem_hash_part_360038 lineitem_hash_part
|
||||||
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
||||||
-> Distributed Subplan 54_2
|
-> Distributed Subplan 55_2
|
||||||
-> Function Scan on pg_catalog.generate_series s
|
-> Function Scan on pg_catalog.generate_series s
|
||||||
Output: s
|
Output: s
|
||||||
Function Call: generate_series(1, 10)
|
Function Call: generate_series(1, 10)
|
||||||
|
@ -1159,13 +1179,13 @@ Custom Scan (Citus Router)
|
||||||
Sort Key: intermediate_result.s
|
Sort Key: intermediate_result.s
|
||||||
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result
|
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result
|
||||||
Output: intermediate_result.s
|
Output: intermediate_result.s
|
||||||
Function Call: read_intermediate_result('54_2'::text, 'binary'::citus_copy_format)
|
Function Call: read_intermediate_result('55_2'::text, 'binary'::citus_copy_format)
|
||||||
-> Sort
|
-> Sort
|
||||||
Output: intermediate_result_1.l_orderkey
|
Output: intermediate_result_1.l_orderkey
|
||||||
Sort Key: intermediate_result_1.l_orderkey
|
Sort Key: intermediate_result_1.l_orderkey
|
||||||
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1
|
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1
|
||||||
Output: intermediate_result_1.l_orderkey
|
Output: intermediate_result_1.l_orderkey
|
||||||
Function Call: read_intermediate_result('54_1'::text, 'binary'::citus_copy_format)
|
Function Call: read_intermediate_result('55_1'::text, 'binary'::citus_copy_format)
|
||||||
SELECT true AS valid FROM explain_json($$
|
SELECT true AS valid FROM explain_json($$
|
||||||
WITH result AS (
|
WITH result AS (
|
||||||
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
||||||
|
|
|
@ -54,6 +54,26 @@ Sort
|
||||||
-> HashAggregate
|
-> HashAggregate
|
||||||
Group Key: l_quantity
|
Group Key: l_quantity
|
||||||
-> Seq Scan on lineitem_290001 lineitem
|
-> Seq Scan on lineitem_290001 lineitem
|
||||||
|
-- Test disable hash aggregate
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE, FORMAT TEXT)
|
||||||
|
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
||||||
|
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
|
||||||
|
Sort
|
||||||
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: remote_scan.l_quantity
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_quantity
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 8
|
||||||
|
Tasks Shown: One of 8
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_quantity
|
||||||
|
-> Seq Scan on lineitem_290001 lineitem
|
||||||
|
SET enable_hashagg TO on;
|
||||||
-- Test JSON format
|
-- Test JSON format
|
||||||
EXPLAIN (COSTS FALSE, FORMAT JSON)
|
EXPLAIN (COSTS FALSE, FORMAT JSON)
|
||||||
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
||||||
|
@ -1128,7 +1148,7 @@ SELECT l_orderkey FROM series JOIN keys ON (s = l_orderkey)
|
||||||
ORDER BY s;
|
ORDER BY s;
|
||||||
Custom Scan (Citus Router)
|
Custom Scan (Citus Router)
|
||||||
Output: remote_scan.l_orderkey
|
Output: remote_scan.l_orderkey
|
||||||
-> Distributed Subplan 54_1
|
-> Distributed Subplan 55_1
|
||||||
-> HashAggregate
|
-> HashAggregate
|
||||||
Output: remote_scan.l_orderkey
|
Output: remote_scan.l_orderkey
|
||||||
Group Key: remote_scan.l_orderkey
|
Group Key: remote_scan.l_orderkey
|
||||||
|
@ -1143,7 +1163,7 @@ Custom Scan (Citus Router)
|
||||||
Group Key: lineitem_hash_part.l_orderkey
|
Group Key: lineitem_hash_part.l_orderkey
|
||||||
-> Seq Scan on public.lineitem_hash_part_360038 lineitem_hash_part
|
-> Seq Scan on public.lineitem_hash_part_360038 lineitem_hash_part
|
||||||
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
||||||
-> Distributed Subplan 54_2
|
-> Distributed Subplan 55_2
|
||||||
-> Function Scan on pg_catalog.generate_series s
|
-> Function Scan on pg_catalog.generate_series s
|
||||||
Output: s
|
Output: s
|
||||||
Function Call: generate_series(1, 10)
|
Function Call: generate_series(1, 10)
|
||||||
|
@ -1159,13 +1179,13 @@ Custom Scan (Citus Router)
|
||||||
Sort Key: intermediate_result.s
|
Sort Key: intermediate_result.s
|
||||||
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result
|
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result
|
||||||
Output: intermediate_result.s
|
Output: intermediate_result.s
|
||||||
Function Call: read_intermediate_result('54_2'::text, 'binary'::citus_copy_format)
|
Function Call: read_intermediate_result('55_2'::text, 'binary'::citus_copy_format)
|
||||||
-> Sort
|
-> Sort
|
||||||
Output: intermediate_result_1.l_orderkey
|
Output: intermediate_result_1.l_orderkey
|
||||||
Sort Key: intermediate_result_1.l_orderkey
|
Sort Key: intermediate_result_1.l_orderkey
|
||||||
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1
|
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1
|
||||||
Output: intermediate_result_1.l_orderkey
|
Output: intermediate_result_1.l_orderkey
|
||||||
Function Call: read_intermediate_result('54_1'::text, 'binary'::citus_copy_format)
|
Function Call: read_intermediate_result('55_1'::text, 'binary'::citus_copy_format)
|
||||||
SELECT true AS valid FROM explain_json($$
|
SELECT true AS valid FROM explain_json($$
|
||||||
WITH result AS (
|
WITH result AS (
|
||||||
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
||||||
|
|
|
@ -215,6 +215,37 @@ EXPLAIN (COSTS FALSE)
|
||||||
Filter: (l_orderkey < 200)
|
Filter: (l_orderkey < 200)
|
||||||
(15 rows)
|
(15 rows)
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT l_orderkey, count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
WHERE l_orderkey < 200
|
||||||
|
GROUP BY 1
|
||||||
|
HAVING count(*) > 5
|
||||||
|
ORDER BY 2 DESC, 1;
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
Sort
|
||||||
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey
|
||||||
|
-> GroupAggregate
|
||||||
|
Group Key: remote_scan.l_orderkey
|
||||||
|
Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 5)
|
||||||
|
-> Sort
|
||||||
|
Sort Key: remote_scan.l_orderkey
|
||||||
|
-> Custom Scan (Citus Real-Time)
|
||||||
|
Task Count: 4
|
||||||
|
Tasks Shown: One of 4
|
||||||
|
-> Task
|
||||||
|
Node: host=localhost port=57637 dbname=regression
|
||||||
|
-> HashAggregate
|
||||||
|
Group Key: l_orderkey
|
||||||
|
Filter: (count(*) > 5)
|
||||||
|
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
|
||||||
|
Filter: (l_orderkey < 200)
|
||||||
|
(17 rows)
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
-- distinct on non-partition column with aggregate
|
-- distinct on non-partition column with aggregate
|
||||||
-- this is the same as non-distinct version due to group by
|
-- this is the same as non-distinct version due to group by
|
||||||
SELECT DISTINCT l_partkey, count(*)
|
SELECT DISTINCT l_partkey, count(*)
|
||||||
|
|
|
@ -46,6 +46,14 @@ EXPLAIN (COSTS FALSE, FORMAT TEXT)
|
||||||
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
||||||
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
|
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
|
||||||
|
|
||||||
|
-- Test disable hash aggregate
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE, FORMAT TEXT)
|
||||||
|
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
||||||
|
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
-- Test JSON format
|
-- Test JSON format
|
||||||
EXPLAIN (COSTS FALSE, FORMAT JSON)
|
EXPLAIN (COSTS FALSE, FORMAT JSON)
|
||||||
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
SELECT l_quantity, count(*) count_quantity FROM lineitem
|
||||||
|
|
|
@ -66,6 +66,18 @@ EXPLAIN (COSTS FALSE)
|
||||||
HAVING count(*) > 5
|
HAVING count(*) > 5
|
||||||
ORDER BY 2 DESC, 1;
|
ORDER BY 2 DESC, 1;
|
||||||
|
|
||||||
|
-- check the plan if the hash aggreate is disabled
|
||||||
|
SET enable_hashagg TO off;
|
||||||
|
EXPLAIN (COSTS FALSE)
|
||||||
|
SELECT DISTINCT l_orderkey, count(*)
|
||||||
|
FROM lineitem_hash_part
|
||||||
|
WHERE l_orderkey < 200
|
||||||
|
GROUP BY 1
|
||||||
|
HAVING count(*) > 5
|
||||||
|
ORDER BY 2 DESC, 1;
|
||||||
|
|
||||||
|
SET enable_hashagg TO on;
|
||||||
|
|
||||||
-- distinct on non-partition column with aggregate
|
-- distinct on non-partition column with aggregate
|
||||||
-- this is the same as non-distinct version due to group by
|
-- this is the same as non-distinct version due to group by
|
||||||
SELECT DISTINCT l_partkey, count(*)
|
SELECT DISTINCT l_partkey, count(*)
|
||||||
|
|
Loading…
Reference in New Issue