Merge pull request #2523 from citusdata/disable_hashagg_hll

Adds support for disabling hash agg with hll functions on coordinator
pull/2503/head
Burak Velioglu 2018-12-07 19:16:19 +03:00 committed by GitHub
commit fd3b0044b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 875 additions and 2 deletions

View File

@ -13,6 +13,9 @@
#include "postgres.h"
#include "commands/extension.h"
#include "distributed/citus_ruleutils.h"
#include "distributed/function_utils.h"
#include "distributed/multi_logical_optimizer.h"
#include "distributed/multi_master_planner.h"
#include "distributed/multi_physical_planner.h"
@ -21,15 +24,18 @@
#include "distributed/worker_protocol.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/print.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/planmain.h"
#include "optimizer/tlist.h"
#include "optimizer/var.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/syscache.h"
#include "utils/lsyscache.h"
static List * MasterTargetList(List *workerTargetList);
@ -37,6 +43,8 @@ static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTarget
CustomScan *remoteScan);
static Agg * BuildAggregatePlan(Query *masterQuery, Plan *subPlan);
static bool HasDistinctAggregate(Query *masterQuery);
static bool UseGroupAggregateWithHLL(Query *masterQuery);
static bool QueryContainsAggregateWithHLL(Query *query);
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
static List * PrepareTargetListForNextPlan(List *targetList);
@ -313,8 +321,12 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
* in group and order by with aggregate operations.
* see nodeAgg.c:build_pertrans_for_aggref(). In that case we use
* sorted agg strategy, otherwise we use hash strategy.
*
* If the master query contains hll aggregate functions and the client set
* hll.force_groupagg to on, then we choose to use group aggregation.
*/
if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate)
if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate ||
UseGroupAggregateWithHLL(masterQuery))
{
char *messageHint = NULL;
if (!enable_hashagg && groupingIsHashable)
@ -392,6 +404,71 @@ HasDistinctAggregate(Query *masterQuery)
}
/*
* UseGroupAggregateWithHLL first checks whether the HLL extension is loaded, if
* it is not then simply return false. Otherwise, checks whether the client set
* the hll.force_groupagg to on. If it is enabled and the master query contains
* hll aggregate function, it returns true.
*/
static bool
UseGroupAggregateWithHLL(Query *masterQuery)
{
Oid hllId = get_extension_oid(HLL_EXTENSION_NAME, true);
const char *gucStrValue = NULL;
/* If HLL extension is not loaded, return false */
if (!OidIsValid(hllId))
{
return false;
}
/* If HLL is loaded but related GUC is not set, return false */
gucStrValue = GetConfigOption(HLL_FORCE_GROUPAGG_GUC_NAME, true, false);
if (gucStrValue == NULL || strcmp(gucStrValue, "off") == 0)
{
return false;
}
return QueryContainsAggregateWithHLL(masterQuery);
}
/*
* QueryContainsAggregateWithHLL returns true if the query has an hll aggregate
* function in it's target list.
*/
static bool
QueryContainsAggregateWithHLL(Query *query)
{
List *varList = NIL;
ListCell *varCell = NULL;
varList = pull_var_clause((Node *) query->targetList, PVC_INCLUDE_AGGREGATES);
foreach(varCell, varList)
{
Var *var = (Var *) lfirst(varCell);
if (nodeTag(var) == T_Aggref)
{
Aggref *aggref = (Aggref *) var;
int argCount = list_length(aggref->args);
Oid hllId = get_extension_oid(HLL_EXTENSION_NAME, false);
Oid hllSchemaOid = get_extension_schema(hllId);
const char *hllSchemaName = get_namespace_name(hllSchemaOid);
Oid addFunctionId = FunctionOid(hllSchemaName, HLL_ADD_AGGREGATE_NAME,
argCount);
Oid unionFunctionId = FunctionOid(hllSchemaName, HLL_UNION_AGGREGATE_NAME, 1);
if (aggref->aggfnoid == addFunctionId || aggref->aggfnoid == unionFunctionId)
{
return true;
}
}
}
return false;
}
/*
* BuildDistinctPlan creates an returns a plan for distinct. Depending on
* availability of hash function it chooses HashAgg over Sort/Unique

View File

@ -38,6 +38,7 @@
#define HLL_ADD_AGGREGATE_NAME "hll_add_agg"
#define HLL_UNION_AGGREGATE_NAME "hll_union_agg"
#define HLL_CARDINALITY_FUNC_NAME "hll_cardinality"
#define HLL_FORCE_GROUPAGG_GUC_NAME "hll.force_groupagg"
/* Definitions related to Top-N approximations */
#define TOPN_ADD_AGGREGATE_NAME "topn_add_agg"

View File

@ -106,6 +106,321 @@ FROM daily_uniques
WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING);
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- Test disabling hash_agg on coordinator query
SET citus.explain_all_tasks to true;
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------------
GroupAggregate
Group Key: remote_scan.day
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
(27 rows)
-- Test disabling hash_agg with operator on coordinator query
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------------
GroupAggregate
Group Key: remote_scan.day
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
(27 rows)
-- Test disabling hash_agg with expression on coordinator query
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------------
GroupAggregate
Group Key: remote_scan.day
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
(27 rows)
-- Test disabling hash_agg with having
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1)
HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
GroupAggregate
Group Key: remote_scan.day
Filter: (hll_cardinality(hll_union_agg(remote_scan.worker_column_3)) > '1'::double precision)
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> GroupAggregate
Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> GroupAggregate
Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> GroupAggregate
Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> GroupAggregate
Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
(40 rows)
DROP TABLE raw_table;
DROP TABLE daily_uniques;
-- Check if TopN aggregates work as expected

View File

@ -107,6 +107,92 @@ WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING);
ERROR: relation "daily_uniques" does not exist
LINE 2: FROM daily_uniques
^
-- Test disabling hash_agg on coordinator query
SET citus.explain_all_tasks to true;
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
ERROR: relation "daily_uniques" does not exist
LINE 5: daily_uniques
^
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
ERROR: relation "daily_uniques" does not exist
LINE 5: daily_uniques
^
-- Test disabling hash_agg with operator on coordinator query
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
ERROR: relation "daily_uniques" does not exist
LINE 5: daily_uniques
^
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
ERROR: relation "daily_uniques" does not exist
LINE 5: daily_uniques
^
-- Test disabling hash_agg with expression on coordinator query
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
ERROR: relation "daily_uniques" does not exist
LINE 5: daily_uniques
^
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
ERROR: relation "daily_uniques" does not exist
LINE 5: daily_uniques
^
-- Test disabling hash_agg with having
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
ERROR: relation "daily_uniques" does not exist
LINE 5: daily_uniques
^
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1)
HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
ERROR: relation "daily_uniques" does not exist
LINE 5: daily_uniques
^
DROP TABLE raw_table;
DROP TABLE daily_uniques;
ERROR: table "daily_uniques" does not exist

View File

@ -106,6 +106,321 @@ FROM daily_uniques
WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING);
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- Test disabling hash_agg on coordinator query
SET citus.explain_all_tasks to true;
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360261 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360262 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360263 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360264 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------------
GroupAggregate
Group Key: remote_scan.day
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360261 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360262 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360263 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360264 daily_uniques
(27 rows)
-- Test disabling hash_agg with operator on coordinator query
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360261 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360262 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360263 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360264 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------------
GroupAggregate
Group Key: remote_scan.day
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360261 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360262 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360263 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360264 daily_uniques
(27 rows)
-- Test disabling hash_agg with expression on coordinator query
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360261 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360262 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360263 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360264 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------------
GroupAggregate
Group Key: remote_scan.day
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360261 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360262 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360263 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360264 daily_uniques
(27 rows)
-- Test disabling hash_agg with having
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
QUERY PLAN
------------------------------------------------------------------------
HashAggregate
Group Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360261 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360262 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360263 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360264 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1)
HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
GroupAggregate
Group Key: remote_scan.day
Filter: (hll_cardinality(hll_union_agg(remote_scan.worker_column_3)) > '1'::double precision)
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=57637 dbname=regression
-> GroupAggregate
Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360261 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> GroupAggregate
Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360262 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> GroupAggregate
Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360263 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> GroupAggregate
Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360264 daily_uniques
(40 rows)
DROP TABLE raw_table;
DROP TABLE daily_uniques;
-- Check if TopN aggregates work as expected

View File

@ -68,7 +68,8 @@ test: sql_procedure
test: multi_subquery_in_where_reference_clause
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
test: multi_agg_distinct multi_agg_approximate_distinct multi_limit_clause_approximate multi_outer_join_reference multi_single_relation_subquery multi_prepare_plsql
test: multi_reference_table multi_select_for_update relation_access_tracking custom_aggregate_support
test: multi_reference_table multi_select_for_update relation_access_tracking
test: custom_aggregate_support
test: multi_average_expression multi_working_columns multi_having_pushdown
test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown
test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg

View File

@ -299,6 +299,14 @@ if (-e $pg_stat_statements_control)
{
$sharedPreloadLibraries .= ',pg_stat_statements';
}
# check if hll extension is installed
# if it is add it to shared preload libraries
my $hll_control = catfile($sharedir, "extension", "hll.control");
if (-e $hll_control)
{
$sharedPreloadLibraries .= ',hll';
}
push(@pgOptions, '-c', "shared_preload_libraries=${sharedPreloadLibraries}");
push(@pgOptions, '-c', "wal_level=logical");

View File

@ -76,6 +76,76 @@ SELECT day, (hll_cardinality(hll_union_agg(unique_users) OVER two_days)) - hll_c
FROM daily_uniques
WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING);
-- Test disabling hash_agg on coordinator query
SET citus.explain_all_tasks to true;
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
-- Test disabling hash_agg with operator on coordinator query
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
FROM
daily_uniques
GROUP BY(1);
-- Test disabling hash_agg with expression on coordinator query
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
-- Test disabling hash_agg with having
SET hll.force_groupagg to OFF;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1);
SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF)
SELECT
day, hll_cardinality(hll_union_agg(unique_users))
FROM
daily_uniques
GROUP BY(1)
HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
DROP TABLE raw_table;
DROP TABLE daily_uniques;