mirror of https://github.com/citusdata/citus.git
Merge pull request #2523 from citusdata/disable_hashagg_hll
Adds support for disabling hash agg with hll functions on coordinatorpull/2503/head
commit
fd3b0044b4
|
@ -13,6 +13,9 @@
|
|||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "commands/extension.h"
|
||||
#include "distributed/citus_ruleutils.h"
|
||||
#include "distributed/function_utils.h"
|
||||
#include "distributed/multi_logical_optimizer.h"
|
||||
#include "distributed/multi_master_planner.h"
|
||||
#include "distributed/multi_physical_planner.h"
|
||||
|
@ -21,15 +24,18 @@
|
|||
#include "distributed/worker_protocol.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "nodes/print.h"
|
||||
#include "optimizer/clauses.h"
|
||||
#include "optimizer/cost.h"
|
||||
#include "optimizer/planmain.h"
|
||||
#include "optimizer/tlist.h"
|
||||
#include "optimizer/var.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/syscache.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
|
||||
static List * MasterTargetList(List *workerTargetList);
|
||||
|
@ -37,6 +43,8 @@ static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTarget
|
|||
CustomScan *remoteScan);
|
||||
static Agg * BuildAggregatePlan(Query *masterQuery, Plan *subPlan);
|
||||
static bool HasDistinctAggregate(Query *masterQuery);
|
||||
static bool UseGroupAggregateWithHLL(Query *masterQuery);
|
||||
static bool QueryContainsAggregateWithHLL(Query *query);
|
||||
static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan);
|
||||
static List * PrepareTargetListForNextPlan(List *targetList);
|
||||
|
||||
|
@ -313,8 +321,12 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
|
|||
* in group and order by with aggregate operations.
|
||||
* see nodeAgg.c:build_pertrans_for_aggref(). In that case we use
|
||||
* sorted agg strategy, otherwise we use hash strategy.
|
||||
*
|
||||
* If the master query contains hll aggregate functions and the client set
|
||||
* hll.force_groupagg to on, then we choose to use group aggregation.
|
||||
*/
|
||||
if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate)
|
||||
if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate ||
|
||||
UseGroupAggregateWithHLL(masterQuery))
|
||||
{
|
||||
char *messageHint = NULL;
|
||||
if (!enable_hashagg && groupingIsHashable)
|
||||
|
@ -392,6 +404,71 @@ HasDistinctAggregate(Query *masterQuery)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* UseGroupAggregateWithHLL first checks whether the HLL extension is loaded, if
|
||||
* it is not then simply return false. Otherwise, checks whether the client set
|
||||
* the hll.force_groupagg to on. If it is enabled and the master query contains
|
||||
* hll aggregate function, it returns true.
|
||||
*/
|
||||
static bool
|
||||
UseGroupAggregateWithHLL(Query *masterQuery)
|
||||
{
|
||||
Oid hllId = get_extension_oid(HLL_EXTENSION_NAME, true);
|
||||
const char *gucStrValue = NULL;
|
||||
|
||||
/* If HLL extension is not loaded, return false */
|
||||
if (!OidIsValid(hllId))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If HLL is loaded but related GUC is not set, return false */
|
||||
gucStrValue = GetConfigOption(HLL_FORCE_GROUPAGG_GUC_NAME, true, false);
|
||||
if (gucStrValue == NULL || strcmp(gucStrValue, "off") == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return QueryContainsAggregateWithHLL(masterQuery);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* QueryContainsAggregateWithHLL returns true if the query has an hll aggregate
|
||||
* function in it's target list.
|
||||
*/
|
||||
static bool
|
||||
QueryContainsAggregateWithHLL(Query *query)
|
||||
{
|
||||
List *varList = NIL;
|
||||
ListCell *varCell = NULL;
|
||||
|
||||
varList = pull_var_clause((Node *) query->targetList, PVC_INCLUDE_AGGREGATES);
|
||||
foreach(varCell, varList)
|
||||
{
|
||||
Var *var = (Var *) lfirst(varCell);
|
||||
if (nodeTag(var) == T_Aggref)
|
||||
{
|
||||
Aggref *aggref = (Aggref *) var;
|
||||
int argCount = list_length(aggref->args);
|
||||
Oid hllId = get_extension_oid(HLL_EXTENSION_NAME, false);
|
||||
Oid hllSchemaOid = get_extension_schema(hllId);
|
||||
const char *hllSchemaName = get_namespace_name(hllSchemaOid);
|
||||
Oid addFunctionId = FunctionOid(hllSchemaName, HLL_ADD_AGGREGATE_NAME,
|
||||
argCount);
|
||||
Oid unionFunctionId = FunctionOid(hllSchemaName, HLL_UNION_AGGREGATE_NAME, 1);
|
||||
|
||||
if (aggref->aggfnoid == addFunctionId || aggref->aggfnoid == unionFunctionId)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* BuildDistinctPlan creates an returns a plan for distinct. Depending on
|
||||
* availability of hash function it chooses HashAgg over Sort/Unique
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#define HLL_ADD_AGGREGATE_NAME "hll_add_agg"
|
||||
#define HLL_UNION_AGGREGATE_NAME "hll_union_agg"
|
||||
#define HLL_CARDINALITY_FUNC_NAME "hll_cardinality"
|
||||
#define HLL_FORCE_GROUPAGG_GUC_NAME "hll.force_groupagg"
|
||||
|
||||
/* Definitions related to Top-N approximations */
|
||||
#define TOPN_ADD_AGGREGATE_NAME "topn_add_agg"
|
||||
|
|
|
@ -106,6 +106,321 @@ FROM daily_uniques
|
|||
WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING);
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- Test disabling hash_agg on coordinator query
|
||||
SET citus.explain_all_tasks to true;
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------
|
||||
HashAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360289 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360290 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360291 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360292 daily_uniques
|
||||
(25 rows)
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Sort
|
||||
Sort Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360289 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360290 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360291 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360292 daily_uniques
|
||||
(27 rows)
|
||||
|
||||
-- Test disabling hash_agg with operator on coordinator query
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------
|
||||
HashAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360289 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360290 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360291 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360292 daily_uniques
|
||||
(25 rows)
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Sort
|
||||
Sort Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360289 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360290 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360291 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360292 daily_uniques
|
||||
(27 rows)
|
||||
|
||||
-- Test disabling hash_agg with expression on coordinator query
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------
|
||||
HashAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360289 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360290 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360291 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360292 daily_uniques
|
||||
(25 rows)
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Sort
|
||||
Sort Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360289 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360290 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360291 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360292 daily_uniques
|
||||
(27 rows)
|
||||
|
||||
-- Test disabling hash_agg with having
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------
|
||||
HashAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360289 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360290 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360291 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360292 daily_uniques
|
||||
(25 rows)
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1)
|
||||
HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
Group Key: remote_scan.day
|
||||
Filter: (hll_cardinality(hll_union_agg(remote_scan.worker_column_3)) > '1'::double precision)
|
||||
-> Sort
|
||||
Sort Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> GroupAggregate
|
||||
Group Key: day
|
||||
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
|
||||
-> Sort
|
||||
Sort Key: day
|
||||
-> Seq Scan on daily_uniques_360289 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> GroupAggregate
|
||||
Group Key: day
|
||||
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
|
||||
-> Sort
|
||||
Sort Key: day
|
||||
-> Seq Scan on daily_uniques_360290 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> GroupAggregate
|
||||
Group Key: day
|
||||
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
|
||||
-> Sort
|
||||
Sort Key: day
|
||||
-> Seq Scan on daily_uniques_360291 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> GroupAggregate
|
||||
Group Key: day
|
||||
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
|
||||
-> Sort
|
||||
Sort Key: day
|
||||
-> Seq Scan on daily_uniques_360292 daily_uniques
|
||||
(40 rows)
|
||||
|
||||
DROP TABLE raw_table;
|
||||
DROP TABLE daily_uniques;
|
||||
-- Check if TopN aggregates work as expected
|
||||
|
|
|
@ -107,6 +107,92 @@ WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING);
|
|||
ERROR: relation "daily_uniques" does not exist
|
||||
LINE 2: FROM daily_uniques
|
||||
^
|
||||
-- Test disabling hash_agg on coordinator query
|
||||
SET citus.explain_all_tasks to true;
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
ERROR: relation "daily_uniques" does not exist
|
||||
LINE 5: daily_uniques
|
||||
^
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
ERROR: relation "daily_uniques" does not exist
|
||||
LINE 5: daily_uniques
|
||||
^
|
||||
-- Test disabling hash_agg with operator on coordinator query
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
ERROR: relation "daily_uniques" does not exist
|
||||
LINE 5: daily_uniques
|
||||
^
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
ERROR: relation "daily_uniques" does not exist
|
||||
LINE 5: daily_uniques
|
||||
^
|
||||
-- Test disabling hash_agg with expression on coordinator query
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
ERROR: relation "daily_uniques" does not exist
|
||||
LINE 5: daily_uniques
|
||||
^
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
ERROR: relation "daily_uniques" does not exist
|
||||
LINE 5: daily_uniques
|
||||
^
|
||||
-- Test disabling hash_agg with having
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
ERROR: relation "daily_uniques" does not exist
|
||||
LINE 5: daily_uniques
|
||||
^
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1)
|
||||
HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
|
||||
ERROR: relation "daily_uniques" does not exist
|
||||
LINE 5: daily_uniques
|
||||
^
|
||||
DROP TABLE raw_table;
|
||||
DROP TABLE daily_uniques;
|
||||
ERROR: table "daily_uniques" does not exist
|
||||
|
|
|
@ -106,6 +106,321 @@ FROM daily_uniques
|
|||
WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING);
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- Test disabling hash_agg on coordinator query
|
||||
SET citus.explain_all_tasks to true;
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------
|
||||
HashAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360261 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360262 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360263 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360264 daily_uniques
|
||||
(25 rows)
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Sort
|
||||
Sort Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360261 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360262 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360263 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360264 daily_uniques
|
||||
(27 rows)
|
||||
|
||||
-- Test disabling hash_agg with operator on coordinator query
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------
|
||||
HashAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360261 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360262 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360263 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360264 daily_uniques
|
||||
(25 rows)
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Sort
|
||||
Sort Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360261 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360262 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360263 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360264 daily_uniques
|
||||
(27 rows)
|
||||
|
||||
-- Test disabling hash_agg with expression on coordinator query
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------
|
||||
HashAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360261 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360262 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360263 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360264 daily_uniques
|
||||
(25 rows)
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Sort
|
||||
Sort Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360261 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360262 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360263 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360264 daily_uniques
|
||||
(27 rows)
|
||||
|
||||
-- Test disabling hash_agg with having
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------
|
||||
HashAggregate
|
||||
Group Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360261 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360262 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360263 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: day
|
||||
-> Seq Scan on daily_uniques_360264 daily_uniques
|
||||
(25 rows)
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1)
|
||||
HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
Group Key: remote_scan.day
|
||||
Filter: (hll_cardinality(hll_union_agg(remote_scan.worker_column_3)) > '1'::double precision)
|
||||
-> Sort
|
||||
Sort Key: remote_scan.day
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> GroupAggregate
|
||||
Group Key: day
|
||||
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
|
||||
-> Sort
|
||||
Sort Key: day
|
||||
-> Seq Scan on daily_uniques_360261 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> GroupAggregate
|
||||
Group Key: day
|
||||
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
|
||||
-> Sort
|
||||
Sort Key: day
|
||||
-> Seq Scan on daily_uniques_360262 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> GroupAggregate
|
||||
Group Key: day
|
||||
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
|
||||
-> Sort
|
||||
Sort Key: day
|
||||
-> Seq Scan on daily_uniques_360263 daily_uniques
|
||||
-> Task
|
||||
Node: host=localhost port=57638 dbname=regression
|
||||
-> GroupAggregate
|
||||
Group Key: day
|
||||
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
|
||||
-> Sort
|
||||
Sort Key: day
|
||||
-> Seq Scan on daily_uniques_360264 daily_uniques
|
||||
(40 rows)
|
||||
|
||||
DROP TABLE raw_table;
|
||||
DROP TABLE daily_uniques;
|
||||
-- Check if TopN aggregates work as expected
|
||||
|
|
|
@ -68,7 +68,8 @@ test: sql_procedure
|
|||
test: multi_subquery_in_where_reference_clause
|
||||
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
|
||||
test: multi_agg_distinct multi_agg_approximate_distinct multi_limit_clause_approximate multi_outer_join_reference multi_single_relation_subquery multi_prepare_plsql
|
||||
test: multi_reference_table multi_select_for_update relation_access_tracking custom_aggregate_support
|
||||
test: multi_reference_table multi_select_for_update relation_access_tracking
|
||||
test: custom_aggregate_support
|
||||
test: multi_average_expression multi_working_columns multi_having_pushdown
|
||||
test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown
|
||||
test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg
|
||||
|
|
|
@ -299,6 +299,14 @@ if (-e $pg_stat_statements_control)
|
|||
{
|
||||
$sharedPreloadLibraries .= ',pg_stat_statements';
|
||||
}
|
||||
|
||||
# check if hll extension is installed
|
||||
# if it is add it to shared preload libraries
|
||||
my $hll_control = catfile($sharedir, "extension", "hll.control");
|
||||
if (-e $hll_control)
|
||||
{
|
||||
$sharedPreloadLibraries .= ',hll';
|
||||
}
|
||||
push(@pgOptions, '-c', "shared_preload_libraries=${sharedPreloadLibraries}");
|
||||
|
||||
push(@pgOptions, '-c', "wal_level=logical");
|
||||
|
|
|
@ -76,6 +76,76 @@ SELECT day, (hll_cardinality(hll_union_agg(unique_users) OVER two_days)) - hll_c
|
|||
FROM daily_uniques
|
||||
WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING);
|
||||
|
||||
-- Test disabling hash_agg on coordinator query
|
||||
SET citus.explain_all_tasks to true;
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
|
||||
-- Test disabling hash_agg with operator on coordinator query
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_union_agg(unique_users) || hll_union_agg(unique_users)
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
|
||||
-- Test disabling hash_agg with expression on coordinator query
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
|
||||
-- Test disabling hash_agg with having
|
||||
SET hll.force_groupagg to OFF;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1);
|
||||
|
||||
SET hll.force_groupagg to ON;
|
||||
EXPLAIN(COSTS OFF)
|
||||
SELECT
|
||||
day, hll_cardinality(hll_union_agg(unique_users))
|
||||
FROM
|
||||
daily_uniques
|
||||
GROUP BY(1)
|
||||
HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
|
||||
|
||||
DROP TABLE raw_table;
|
||||
DROP TABLE daily_uniques;
|
||||
|
||||
|
|
Loading…
Reference in New Issue