Merge pull request #1988 from citusdata/respect_enable_hashagg

Respect enable_hashagg in the master planner
pull/1993/head
Metin Döşlü 2018-02-05 16:27:05 +03:00 committed by GitHub
commit aba2f47cdf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 112 additions and 12 deletions

View File

@ -21,6 +21,7 @@
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/planmain.h"
#include "optimizer/tlist.h"
#include "optimizer/var.h"
@ -289,7 +290,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
if (groupColumnCount > 0)
{
bool groupingIsHashable = grouping_is_hashable(groupColumnList);
bool groupingIsSortable = grouping_is_hashable(groupColumnList);
bool groupingIsSortable = grouping_is_sortable(groupColumnList);
bool hasDistinctAggregate = HasDistinctAggregate(masterQuery);
if (!groupingIsHashable && !groupingIsSortable)
@ -303,13 +304,20 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
* see nodeAgg.c:build_pertrans_for_aggref(). In that case we use
* sorted agg strategy, otherwise we use hash strategy.
*/
if (!groupingIsHashable || hasDistinctAggregate)
if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate)
{
char *messageHint = NULL;
if (!enable_hashagg && groupingIsHashable)
{
messageHint = "Consider setting enable_hashagg to on.";
}
if (!groupingIsSortable)
{
ereport(ERROR, (errmsg("grouped column list must cannot be sorted"),
errdetail("Having a distinct aggregate requires "
"grouped column list to be sortable.")));
"grouped column list to be sortable."),
messageHint ? errhint("%s", messageHint) : 0));
}
aggregateStrategy = AGG_SORTED;
@ -418,7 +426,8 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan)
*/
distinctClausesHashable = grouping_is_hashable(distinctClauseList);
hasDistinctAggregate = HasDistinctAggregate(masterQuery);
if (distinctClausesHashable && !hasDistinctAggregate)
if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate)
{
const long rowEstimate = 10; /* using the same value as BuildAggregatePlan() */
AttrNumber *distinctColumnIdArray = extract_grouping_cols(distinctClauseList,

View File

@ -54,6 +54,26 @@ Sort
-> HashAggregate
Group Key: l_quantity
-> Seq Scan on lineitem_290001 lineitem
-- Test disable hash aggregate
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
-> GroupAggregate
Group Key: remote_scan.l_quantity
-> Sort
Sort Key: remote_scan.l_quantity
-> Custom Scan (Citus Real-Time)
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: l_quantity
-> Seq Scan on lineitem_290001 lineitem
SET enable_hashagg TO on;
-- Test JSON format
EXPLAIN (COSTS FALSE, FORMAT JSON)
SELECT l_quantity, count(*) count_quantity FROM lineitem
@ -1128,7 +1148,7 @@ SELECT l_orderkey FROM series JOIN keys ON (s = l_orderkey)
ORDER BY s;
Custom Scan (Citus Router)
Output: remote_scan.l_orderkey
-> Distributed Subplan 54_1
-> Distributed Subplan 55_1
-> HashAggregate
Output: remote_scan.l_orderkey
Group Key: remote_scan.l_orderkey
@ -1143,7 +1163,7 @@ Custom Scan (Citus Router)
Group Key: lineitem_hash_part.l_orderkey
-> Seq Scan on public.lineitem_hash_part_360038 lineitem_hash_part
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-> Distributed Subplan 54_2
-> Distributed Subplan 55_2
-> Function Scan on pg_catalog.generate_series s
Output: s
Function Call: generate_series(1, 10)
@ -1159,13 +1179,13 @@ Custom Scan (Citus Router)
Sort Key: intermediate_result.s
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result
Output: intermediate_result.s
Function Call: read_intermediate_result('54_2'::text, 'binary'::citus_copy_format)
Function Call: read_intermediate_result('55_2'::text, 'binary'::citus_copy_format)
-> Sort
Output: intermediate_result_1.l_orderkey
Sort Key: intermediate_result_1.l_orderkey
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1
Output: intermediate_result_1.l_orderkey
Function Call: read_intermediate_result('54_1'::text, 'binary'::citus_copy_format)
Function Call: read_intermediate_result('55_1'::text, 'binary'::citus_copy_format)
SELECT true AS valid FROM explain_json($$
WITH result AS (
SELECT l_quantity, count(*) count_quantity FROM lineitem

View File

@ -54,6 +54,26 @@ Sort
-> HashAggregate
Group Key: l_quantity
-> Seq Scan on lineitem_290001 lineitem
-- Test disable hash aggregate
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity
-> GroupAggregate
Group Key: remote_scan.l_quantity
-> Sort
Sort Key: remote_scan.l_quantity
-> Custom Scan (Citus Real-Time)
Task Count: 8
Tasks Shown: One of 8
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: l_quantity
-> Seq Scan on lineitem_290001 lineitem
SET enable_hashagg TO on;
-- Test JSON format
EXPLAIN (COSTS FALSE, FORMAT JSON)
SELECT l_quantity, count(*) count_quantity FROM lineitem
@ -1128,7 +1148,7 @@ SELECT l_orderkey FROM series JOIN keys ON (s = l_orderkey)
ORDER BY s;
Custom Scan (Citus Router)
Output: remote_scan.l_orderkey
-> Distributed Subplan 54_1
-> Distributed Subplan 55_1
-> HashAggregate
Output: remote_scan.l_orderkey
Group Key: remote_scan.l_orderkey
@ -1143,7 +1163,7 @@ Custom Scan (Citus Router)
Group Key: lineitem_hash_part.l_orderkey
-> Seq Scan on public.lineitem_hash_part_360038 lineitem_hash_part
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-> Distributed Subplan 54_2
-> Distributed Subplan 55_2
-> Function Scan on pg_catalog.generate_series s
Output: s
Function Call: generate_series(1, 10)
@ -1159,13 +1179,13 @@ Custom Scan (Citus Router)
Sort Key: intermediate_result.s
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result
Output: intermediate_result.s
Function Call: read_intermediate_result('54_2'::text, 'binary'::citus_copy_format)
Function Call: read_intermediate_result('55_2'::text, 'binary'::citus_copy_format)
-> Sort
Output: intermediate_result_1.l_orderkey
Sort Key: intermediate_result_1.l_orderkey
-> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1
Output: intermediate_result_1.l_orderkey
Function Call: read_intermediate_result('54_1'::text, 'binary'::citus_copy_format)
Function Call: read_intermediate_result('55_1'::text, 'binary'::citus_copy_format)
SELECT true AS valid FROM explain_json($$
WITH result AS (
SELECT l_quantity, count(*) count_quantity FROM lineitem

View File

@ -215,6 +215,37 @@ EXPLAIN (COSTS FALSE)
Filter: (l_orderkey < 200)
(15 rows)
-- check the plan if the hash aggreate is disabled
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_orderkey, count(*)
FROM lineitem_hash_part
WHERE l_orderkey < 200
GROUP BY 1
HAVING count(*) > 5
ORDER BY 2 DESC, 1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey
-> GroupAggregate
Group Key: remote_scan.l_orderkey
Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 5)
-> Sort
Sort Key: remote_scan.l_orderkey
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: l_orderkey
Filter: (count(*) > 5)
-> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part
Filter: (l_orderkey < 200)
(17 rows)
SET enable_hashagg TO on;
-- distinct on non-partition column with aggregate
-- this is the same as non-distinct version due to group by
SELECT DISTINCT l_partkey, count(*)

View File

@ -46,6 +46,14 @@ EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
-- Test disable hash aggregate
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE, FORMAT TEXT)
SELECT l_quantity, count(*) count_quantity FROM lineitem
GROUP BY l_quantity ORDER BY count_quantity, l_quantity;
SET enable_hashagg TO on;
-- Test JSON format
EXPLAIN (COSTS FALSE, FORMAT JSON)
SELECT l_quantity, count(*) count_quantity FROM lineitem

View File

@ -66,6 +66,18 @@ EXPLAIN (COSTS FALSE)
HAVING count(*) > 5
ORDER BY 2 DESC, 1;
-- check the plan if the hash aggreate is disabled
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_orderkey, count(*)
FROM lineitem_hash_part
WHERE l_orderkey < 200
GROUP BY 1
HAVING count(*) > 5
ORDER BY 2 DESC, 1;
SET enable_hashagg TO on;
-- distinct on non-partition column with aggregate
-- this is the same as non-distinct version due to group by
SELECT DISTINCT l_partkey, count(*)