diff --git a/src/backend/distributed/planner/multi_master_planner.c b/src/backend/distributed/planner/multi_master_planner.c index 3b73a0bcf..2fd728d6d 100644 --- a/src/backend/distributed/planner/multi_master_planner.c +++ b/src/backend/distributed/planner/multi_master_planner.c @@ -13,6 +13,9 @@ #include "postgres.h" +#include "commands/extension.h" +#include "distributed/citus_ruleutils.h" +#include "distributed/function_utils.h" #include "distributed/multi_logical_optimizer.h" #include "distributed/multi_master_planner.h" #include "distributed/multi_physical_planner.h" @@ -21,15 +24,18 @@ #include "distributed/worker_protocol.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "nodes/print.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" #include "optimizer/var.h" #include "utils/builtins.h" +#include "utils/guc.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/syscache.h" +#include "utils/lsyscache.h" static List * MasterTargetList(List *workerTargetList); @@ -37,6 +43,8 @@ static PlannedStmt * BuildSelectStatement(Query *masterQuery, List *masterTarget CustomScan *remoteScan); static Agg * BuildAggregatePlan(Query *masterQuery, Plan *subPlan); static bool HasDistinctAggregate(Query *masterQuery); +static bool UseGroupAggregateWithHLL(Query *masterQuery); +static bool QueryContainsAggregateWithHLL(Query *query); static Plan * BuildDistinctPlan(Query *masterQuery, Plan *subPlan); static List * PrepareTargetListForNextPlan(List *targetList); @@ -313,8 +321,12 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan) * in group and order by with aggregate operations. * see nodeAgg.c:build_pertrans_for_aggref(). In that case we use * sorted agg strategy, otherwise we use hash strategy. + * + * If the master query contains hll aggregate functions and the client set + * hll.force_groupagg to on, then we choose to use group aggregation. */ - if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate) + if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate || + UseGroupAggregateWithHLL(masterQuery)) { char *messageHint = NULL; if (!enable_hashagg && groupingIsHashable) @@ -392,6 +404,71 @@ HasDistinctAggregate(Query *masterQuery) } +/* + * UseGroupAggregateWithHLL first checks whether the HLL extension is loaded, if + * it is not then simply return false. Otherwise, checks whether the client set + * the hll.force_groupagg to on. If it is enabled and the master query contains + * hll aggregate function, it returns true. + */ +static bool +UseGroupAggregateWithHLL(Query *masterQuery) +{ + Oid hllId = get_extension_oid(HLL_EXTENSION_NAME, true); + const char *gucStrValue = NULL; + + /* If HLL extension is not loaded, return false */ + if (!OidIsValid(hllId)) + { + return false; + } + + /* If HLL is loaded but related GUC is not set, return false */ + gucStrValue = GetConfigOption(HLL_FORCE_GROUPAGG_GUC_NAME, true, false); + if (gucStrValue == NULL || strcmp(gucStrValue, "off") == 0) + { + return false; + } + + return QueryContainsAggregateWithHLL(masterQuery); +} + + +/* + * QueryContainsAggregateWithHLL returns true if the query has an hll aggregate + * function in it's target list. + */ +static bool +QueryContainsAggregateWithHLL(Query *query) +{ + List *varList = NIL; + ListCell *varCell = NULL; + + varList = pull_var_clause((Node *) query->targetList, PVC_INCLUDE_AGGREGATES); + foreach(varCell, varList) + { + Var *var = (Var *) lfirst(varCell); + if (nodeTag(var) == T_Aggref) + { + Aggref *aggref = (Aggref *) var; + int argCount = list_length(aggref->args); + Oid hllId = get_extension_oid(HLL_EXTENSION_NAME, false); + Oid hllSchemaOid = get_extension_schema(hllId); + const char *hllSchemaName = get_namespace_name(hllSchemaOid); + Oid addFunctionId = FunctionOid(hllSchemaName, HLL_ADD_AGGREGATE_NAME, + argCount); + Oid unionFunctionId = FunctionOid(hllSchemaName, HLL_UNION_AGGREGATE_NAME, 1); + + if (aggref->aggfnoid == addFunctionId || aggref->aggfnoid == unionFunctionId) + { + return true; + } + } + } + + return false; +} + + /* * BuildDistinctPlan creates an returns a plan for distinct. Depending on * availability of hash function it chooses HashAgg over Sort/Unique diff --git a/src/include/distributed/multi_logical_optimizer.h b/src/include/distributed/multi_logical_optimizer.h index ab733a0e9..9512f84e1 100644 --- a/src/include/distributed/multi_logical_optimizer.h +++ b/src/include/distributed/multi_logical_optimizer.h @@ -38,6 +38,7 @@ #define HLL_ADD_AGGREGATE_NAME "hll_add_agg" #define HLL_UNION_AGGREGATE_NAME "hll_union_agg" #define HLL_CARDINALITY_FUNC_NAME "hll_cardinality" +#define HLL_FORCE_GROUPAGG_GUC_NAME "hll.force_groupagg" /* Definitions related to Top-N approximations */ #define TOPN_ADD_AGGREGATE_NAME "topn_add_agg" diff --git a/src/test/regress/expected/custom_aggregate_support.out b/src/test/regress/expected/custom_aggregate_support.out index 9ffa8d9a4..ec364f88d 100644 --- a/src/test/regress/expected/custom_aggregate_support.out +++ b/src/test/regress/expected/custom_aggregate_support.out @@ -106,6 +106,321 @@ FROM daily_uniques WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING); ERROR: could not run distributed query because the window function that is used cannot be pushed down HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column +-- Test disabling hash_agg on coordinator query +SET citus.explain_all_tasks to true; +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------ + HashAggregate + Group Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360289 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360290 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360291 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360292 daily_uniques +(25 rows) + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------------ + GroupAggregate + Group Key: remote_scan.day + -> Sort + Sort Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360289 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360290 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360291 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360292 daily_uniques +(27 rows) + +-- Test disabling hash_agg with operator on coordinator query +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) || hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------ + HashAggregate + Group Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360289 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360290 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360291 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360292 daily_uniques +(25 rows) + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) || hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------------ + GroupAggregate + Group Key: remote_scan.day + -> Sort + Sort Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360289 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360290 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360291 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360292 daily_uniques +(27 rows) + +-- Test disabling hash_agg with expression on coordinator query +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------ + HashAggregate + Group Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360289 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360290 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360291 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360292 daily_uniques +(25 rows) + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------------ + GroupAggregate + Group Key: remote_scan.day + -> Sort + Sort Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360289 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360290 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360291 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360292 daily_uniques +(27 rows) + +-- Test disabling hash_agg with having +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------ + HashAggregate + Group Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360289 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360290 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360291 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360292 daily_uniques +(25 rows) + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1) +HAVING hll_cardinality(hll_union_agg(unique_users)) > 1; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + GroupAggregate + Group Key: remote_scan.day + Filter: (hll_cardinality(hll_union_agg(remote_scan.worker_column_3)) > '1'::double precision) + -> Sort + Sort Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> GroupAggregate + Group Key: day + Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) + -> Sort + Sort Key: day + -> Seq Scan on daily_uniques_360289 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> GroupAggregate + Group Key: day + Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) + -> Sort + Sort Key: day + -> Seq Scan on daily_uniques_360290 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> GroupAggregate + Group Key: day + Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) + -> Sort + Sort Key: day + -> Seq Scan on daily_uniques_360291 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> GroupAggregate + Group Key: day + Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) + -> Sort + Sort Key: day + -> Seq Scan on daily_uniques_360292 daily_uniques +(40 rows) + DROP TABLE raw_table; DROP TABLE daily_uniques; -- Check if TopN aggregates work as expected diff --git a/src/test/regress/expected/custom_aggregate_support_0.out b/src/test/regress/expected/custom_aggregate_support_0.out index aafd3bd16..211ff51ed 100644 --- a/src/test/regress/expected/custom_aggregate_support_0.out +++ b/src/test/regress/expected/custom_aggregate_support_0.out @@ -107,6 +107,92 @@ WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING); ERROR: relation "daily_uniques" does not exist LINE 2: FROM daily_uniques ^ +-- Test disabling hash_agg on coordinator query +SET citus.explain_all_tasks to true; +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); +ERROR: relation "daily_uniques" does not exist +LINE 5: daily_uniques + ^ +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); +ERROR: relation "daily_uniques" does not exist +LINE 5: daily_uniques + ^ +-- Test disabling hash_agg with operator on coordinator query +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) || hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); +ERROR: relation "daily_uniques" does not exist +LINE 5: daily_uniques + ^ +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) || hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); +ERROR: relation "daily_uniques" does not exist +LINE 5: daily_uniques + ^ +-- Test disabling hash_agg with expression on coordinator query +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); +ERROR: relation "daily_uniques" does not exist +LINE 5: daily_uniques + ^ +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); +ERROR: relation "daily_uniques" does not exist +LINE 5: daily_uniques + ^ +-- Test disabling hash_agg with having +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); +ERROR: relation "daily_uniques" does not exist +LINE 5: daily_uniques + ^ +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1) +HAVING hll_cardinality(hll_union_agg(unique_users)) > 1; +ERROR: relation "daily_uniques" does not exist +LINE 5: daily_uniques + ^ DROP TABLE raw_table; DROP TABLE daily_uniques; ERROR: table "daily_uniques" does not exist diff --git a/src/test/regress/expected/custom_aggregate_support_1.out b/src/test/regress/expected/custom_aggregate_support_1.out index b2af1d477..81d139258 100644 --- a/src/test/regress/expected/custom_aggregate_support_1.out +++ b/src/test/regress/expected/custom_aggregate_support_1.out @@ -106,6 +106,321 @@ FROM daily_uniques WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING); ERROR: could not run distributed query because the window function that is used cannot be pushed down HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column +-- Test disabling hash_agg on coordinator query +SET citus.explain_all_tasks to true; +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------ + HashAggregate + Group Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360261 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360262 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360263 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360264 daily_uniques +(25 rows) + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------------ + GroupAggregate + Group Key: remote_scan.day + -> Sort + Sort Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360261 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360262 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360263 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360264 daily_uniques +(27 rows) + +-- Test disabling hash_agg with operator on coordinator query +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) || hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------ + HashAggregate + Group Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360261 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360262 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360263 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360264 daily_uniques +(25 rows) + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) || hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------------ + GroupAggregate + Group Key: remote_scan.day + -> Sort + Sort Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360261 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360262 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360263 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360264 daily_uniques +(27 rows) + +-- Test disabling hash_agg with expression on coordinator query +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------ + HashAggregate + Group Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360261 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360262 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360263 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360264 daily_uniques +(25 rows) + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------------ + GroupAggregate + Group Key: remote_scan.day + -> Sort + Sort Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360261 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360262 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360263 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360264 daily_uniques +(27 rows) + +-- Test disabling hash_agg with having +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); + QUERY PLAN +------------------------------------------------------------------------ + HashAggregate + Group Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360261 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360262 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360263 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> HashAggregate + Group Key: day + -> Seq Scan on daily_uniques_360264 daily_uniques +(25 rows) + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1) +HAVING hll_cardinality(hll_union_agg(unique_users)) > 1; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + GroupAggregate + Group Key: remote_scan.day + Filter: (hll_cardinality(hll_union_agg(remote_scan.worker_column_3)) > '1'::double precision) + -> Sort + Sort Key: remote_scan.day + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: All + -> Task + Node: host=localhost port=57637 dbname=regression + -> GroupAggregate + Group Key: day + Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) + -> Sort + Sort Key: day + -> Seq Scan on daily_uniques_360261 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> GroupAggregate + Group Key: day + Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) + -> Sort + Sort Key: day + -> Seq Scan on daily_uniques_360262 daily_uniques + -> Task + Node: host=localhost port=57637 dbname=regression + -> GroupAggregate + Group Key: day + Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) + -> Sort + Sort Key: day + -> Seq Scan on daily_uniques_360263 daily_uniques + -> Task + Node: host=localhost port=57638 dbname=regression + -> GroupAggregate + Group Key: day + Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) + -> Sort + Sort Key: day + -> Seq Scan on daily_uniques_360264 daily_uniques +(40 rows) + DROP TABLE raw_table; DROP TABLE daily_uniques; -- Check if TopN aggregates work as expected diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index dcc745f76..7dbe8e4ae 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -68,7 +68,8 @@ test: sql_procedure test: multi_subquery_in_where_reference_clause test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc test: multi_agg_distinct multi_agg_approximate_distinct multi_limit_clause_approximate multi_outer_join_reference multi_single_relation_subquery multi_prepare_plsql -test: multi_reference_table multi_select_for_update relation_access_tracking custom_aggregate_support +test: multi_reference_table multi_select_for_update relation_access_tracking +test: custom_aggregate_support test: multi_average_expression multi_working_columns multi_having_pushdown test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl index 05771a193..ece36d62f 100755 --- a/src/test/regress/pg_regress_multi.pl +++ b/src/test/regress/pg_regress_multi.pl @@ -299,6 +299,14 @@ if (-e $pg_stat_statements_control) { $sharedPreloadLibraries .= ',pg_stat_statements'; } + +# check if hll extension is installed +# if it is add it to shared preload libraries +my $hll_control = catfile($sharedir, "extension", "hll.control"); +if (-e $hll_control) +{ + $sharedPreloadLibraries .= ',hll'; +} push(@pgOptions, '-c', "shared_preload_libraries=${sharedPreloadLibraries}"); push(@pgOptions, '-c', "wal_level=logical"); diff --git a/src/test/regress/sql/custom_aggregate_support.sql b/src/test/regress/sql/custom_aggregate_support.sql index 1fc4ada4e..5722b4cfe 100644 --- a/src/test/regress/sql/custom_aggregate_support.sql +++ b/src/test/regress/sql/custom_aggregate_support.sql @@ -76,6 +76,76 @@ SELECT day, (hll_cardinality(hll_union_agg(unique_users) OVER two_days)) - hll_c FROM daily_uniques WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING); +-- Test disabling hash_agg on coordinator query +SET citus.explain_all_tasks to true; +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + +-- Test disabling hash_agg with operator on coordinator query +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) || hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_union_agg(unique_users) || hll_union_agg(unique_users) +FROM + daily_uniques +GROUP BY(1); + +-- Test disabling hash_agg with expression on coordinator query +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); + +-- Test disabling hash_agg with having +SET hll.force_groupagg to OFF; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1); + +SET hll.force_groupagg to ON; +EXPLAIN(COSTS OFF) +SELECT + day, hll_cardinality(hll_union_agg(unique_users)) +FROM + daily_uniques +GROUP BY(1) +HAVING hll_cardinality(hll_union_agg(unique_users)) > 1; + DROP TABLE raw_table; DROP TABLE daily_uniques;