Do not repeat GROUP BY distribution_column on coordinator

Allow arbitrary aggregates to be pushed down in these scenarios
pull/3307/head
Marco Slot 2019-12-14 10:04:14 +01:00 committed by Philip Dubé
parent 11368451f4
commit b21b6905ae
13 changed files with 702 additions and 688 deletions

View File

@ -11,6 +11,7 @@
#include "postgres.h" #include "postgres.h"
#include "distributed/extended_op_node_utils.h" #include "distributed/extended_op_node_utils.h"
#include "distributed/listutils.h"
#include "distributed/metadata_cache.h" #include "distributed/metadata_cache.h"
#include "distributed/multi_logical_optimizer.h" #include "distributed/multi_logical_optimizer.h"
#include "distributed/pg_dist_partition.h" #include "distributed/pg_dist_partition.h"
@ -24,8 +25,7 @@
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
static bool GroupedByDisjointPartitionColumn(List *tableNodeList, static bool GroupedByPartitionColumn(MultiNode *node, MultiExtendedOp *opNode);
MultiExtendedOp *opNode);
static bool ExtendedOpNodeContainsRepartitionSubquery(MultiExtendedOp *originalOpNode); static bool ExtendedOpNodeContainsRepartitionSubquery(MultiExtendedOp *originalOpNode);
static bool HasNonPartitionColumnDistinctAgg(List *targetEntryList, Node *havingQual, static bool HasNonPartitionColumnDistinctAgg(List *targetEntryList, Node *havingQual,
@ -46,11 +46,9 @@ BuildExtendedOpNodeProperties(MultiExtendedOp *extendedOpNode)
{ {
ExtendedOpNodeProperties extendedOpNodeProperties; ExtendedOpNodeProperties extendedOpNodeProperties;
List *tableNodeList = FindNodesOfType((MultiNode *) extendedOpNode, T_MultiTable); List *tableNodeList = FindNodesOfType((MultiNode *) extendedOpNode, T_MultiTable);
bool groupedByDisjointPartitionColumn = GroupedByDisjointPartitionColumn( bool groupedByDisjointPartitionColumn =
tableNodeList, GroupedByPartitionColumn((MultiNode *) extendedOpNode, extendedOpNode);
extendedOpNode);
bool repartitionSubquery = ExtendedOpNodeContainsRepartitionSubquery(extendedOpNode); bool repartitionSubquery = ExtendedOpNodeContainsRepartitionSubquery(extendedOpNode);
@ -83,41 +81,86 @@ BuildExtendedOpNodeProperties(MultiExtendedOp *extendedOpNode)
/* /*
* GroupedByDisjointPartitionColumn returns true if the query is grouped by the * GroupedByPartitionColumn returns true if a GROUP BY in the opNode contains
* partition column of a table whose shards have disjoint sets of partition values. * the partition column of the underlying relation, which is determined by
* searching the MultiNode tree for a MultiTable and MultiPartition with
* a matching column.
*
* When there is a re-partition join, the search terminates at the
* MultiPartition node. Hence we can push down the GROUP BY if the join
* column is in the GROUP BY.
*/ */
static bool static bool
GroupedByDisjointPartitionColumn(List *tableNodeList, MultiExtendedOp *opNode) GroupedByPartitionColumn(MultiNode *node, MultiExtendedOp *opNode)
{ {
bool result = false; if (node == NULL)
ListCell *tableNodeCell = NULL; {
return false;
}
foreach(tableNodeCell, tableNodeList) if (CitusIsA(node, MultiTable))
{ {
MultiTable *tableNode = (MultiTable *) lfirst(tableNodeCell); MultiTable *tableNode = (MultiTable *) node;
Oid relationId = tableNode->relationId; Oid relationId = tableNode->relationId;
if (relationId == SUBQUERY_RELATION_ID || !IsDistributedTable(relationId)) if (relationId == SUBQUERY_RELATION_ID ||
relationId == SUBQUERY_PUSHDOWN_RELATION_ID)
{ {
continue; /* ignore subqueries for now */
return false;
} }
char partitionMethod = PartitionMethod(relationId); char partitionMethod = PartitionMethod(relationId);
if (partitionMethod != DISTRIBUTE_BY_RANGE && if (partitionMethod != DISTRIBUTE_BY_RANGE &&
partitionMethod != DISTRIBUTE_BY_HASH) partitionMethod != DISTRIBUTE_BY_HASH)
{ {
continue; /* only range- and hash-distributed tables are strictly partitioned */
return false;
} }
if (GroupedByColumn(opNode->groupClauseList, opNode->targetList, if (GroupedByColumn(opNode->groupClauseList, opNode->targetList,
tableNode->partitionColumn)) tableNode->partitionColumn))
{ {
result = true; /* this node is partitioned by a column in the GROUP BY */
break; return true;
}
}
else if (CitusIsA(node, MultiPartition))
{
MultiPartition *partitionNode = (MultiPartition *) node;
if (GroupedByColumn(opNode->groupClauseList, opNode->targetList,
partitionNode->partitionColumn))
{
/* this node is partitioned by a column in the GROUP BY */
return true;
}
}
else if (UnaryOperator(node))
{
MultiNode *childNode = ((MultiUnaryNode *) node)->childNode;
if (GroupedByPartitionColumn(childNode, opNode))
{
/* a child node is partitioned by a column in the GROUP BY */
return true;
}
}
else if (BinaryOperator(node))
{
MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode;
MultiNode *rightChildNode = ((MultiBinaryNode *) node)->rightChildNode;
if (GroupedByPartitionColumn(leftChildNode, opNode) ||
GroupedByPartitionColumn(rightChildNode, opNode))
{
/* a child node is partitioned by a column in the GROUP BY */
return true;
} }
} }
return result; return false;
} }

View File

@ -316,8 +316,16 @@ MultiLogicalPlanOptimize(MultiTreeRoot *multiLogicalPlan)
ListCell *tableNodeCell = NULL; ListCell *tableNodeCell = NULL;
MultiNode *logicalPlanNode = (MultiNode *) multiLogicalPlan; MultiNode *logicalPlanNode = (MultiNode *) multiLogicalPlan;
List *extendedOpNodeList = FindNodesOfType(logicalPlanNode, T_MultiExtendedOp);
MultiExtendedOp *extendedOpNode = (MultiExtendedOp *) linitial(extendedOpNodeList);
ExtendedOpNodeProperties extendedOpNodeProperties = BuildExtendedOpNodeProperties(
extendedOpNode);
if (!extendedOpNodeProperties.groupedByDisjointPartitionColumn)
{
/* check that we can optimize aggregates in the plan */ /* check that we can optimize aggregates in the plan */
ErrorIfContainsUnsupportedAggregate(logicalPlanNode); ErrorIfContainsUnsupportedAggregate(logicalPlanNode);
}
/* /*
* If a select node exists, we use the idempower property to split the node * If a select node exists, we use the idempower property to split the node
@ -374,11 +382,6 @@ MultiLogicalPlanOptimize(MultiTreeRoot *multiLogicalPlan)
* clause list to the worker operator node. We then push the worker operator * clause list to the worker operator node. We then push the worker operator
* node below the collect node. * node below the collect node.
*/ */
List *extendedOpNodeList = FindNodesOfType(logicalPlanNode, T_MultiExtendedOp);
MultiExtendedOp *extendedOpNode = (MultiExtendedOp *) linitial(extendedOpNodeList);
ExtendedOpNodeProperties extendedOpNodeProperties = BuildExtendedOpNodeProperties(
extendedOpNode);
MultiExtendedOp *masterExtendedOpNode = MultiExtendedOp *masterExtendedOpNode =
MasterExtendedOpNode(extendedOpNode, &extendedOpNodeProperties); MasterExtendedOpNode(extendedOpNode, &extendedOpNodeProperties);
@ -1359,6 +1362,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
{ {
List *targetEntryList = originalOpNode->targetList; List *targetEntryList = originalOpNode->targetList;
List *newTargetEntryList = NIL; List *newTargetEntryList = NIL;
List *newGroupClauseList = NIL;
ListCell *targetEntryCell = NULL; ListCell *targetEntryCell = NULL;
Node *originalHavingQual = originalOpNode->havingQual; Node *originalHavingQual = originalOpNode->havingQual;
Node *newHavingQual = NULL; Node *newHavingQual = NULL;
@ -1383,7 +1387,8 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
* if the aggregate belongs to a window function, it is not mutated, but pushed * if the aggregate belongs to a window function, it is not mutated, but pushed
* down to worker as it is. Master query should treat that as a Var. * down to worker as it is. Master query should treat that as a Var.
*/ */
if (hasAggregates && !hasWindowFunction) if (hasAggregates && !hasWindowFunction &&
!extendedOpNodeProperties->groupedByDisjointPartitionColumn)
{ {
Node *newNode = MasterAggregateMutator((Node *) originalExpression, Node *newNode = MasterAggregateMutator((Node *) originalExpression,
walkerContext); walkerContext);
@ -1392,8 +1397,9 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
else else
{ {
/* /*
* The expression does not have any aggregates. We simply make it * The expression does not have any aggregates or the group by
* reference the output generated by worker nodes. * is on the partition column. We simply make it reference the
* output generated by worker nodes.
*/ */
const uint32 masterTableId = 1; /* only one table on master node */ const uint32 masterTableId = 1; /* only one table on master node */
@ -1414,14 +1420,23 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
newTargetEntryList = lappend(newTargetEntryList, newTargetEntry); newTargetEntryList = lappend(newTargetEntryList, newTargetEntry);
} }
if (!extendedOpNodeProperties->groupedByDisjointPartitionColumn)
{
/*
* Not pushing down GROUP BY, need to regroup on coordinator
* and apply having on the coordinator.
*/
newGroupClauseList = originalOpNode->groupClauseList;
if (originalHavingQual != NULL) if (originalHavingQual != NULL)
{ {
newHavingQual = MasterAggregateMutator(originalHavingQual, walkerContext); newHavingQual = MasterAggregateMutator(originalHavingQual, walkerContext);
} }
}
MultiExtendedOp *masterExtendedOpNode = CitusMakeNode(MultiExtendedOp); MultiExtendedOp *masterExtendedOpNode = CitusMakeNode(MultiExtendedOp);
masterExtendedOpNode->targetList = newTargetEntryList; masterExtendedOpNode->targetList = newTargetEntryList;
masterExtendedOpNode->groupClauseList = originalOpNode->groupClauseList; masterExtendedOpNode->groupClauseList = newGroupClauseList;
masterExtendedOpNode->sortClauseList = originalOpNode->sortClauseList; masterExtendedOpNode->sortClauseList = originalOpNode->sortClauseList;
masterExtendedOpNode->distinctClause = originalOpNode->distinctClause; masterExtendedOpNode->distinctClause = originalOpNode->distinctClause;
masterExtendedOpNode->hasDistinctOn = originalOpNode->hasDistinctOn; masterExtendedOpNode->hasDistinctOn = originalOpNode->hasDistinctOn;
@ -2213,11 +2228,15 @@ ProcessTargetListForWorkerQuery(List *targetEntryList,
workerAggContext->createGroupByClause = false; workerAggContext->createGroupByClause = false;
/* /*
* If the expression uses aggregates inside window function contain agg * If the query has a window function, we currently assume it's safe to push
* clause still returns true. We want to make sure it is not a part of * down the target list.
* window function before we proceed. *
* If there are aggregates without a GROUP BY on the distribution column
* then the results of those aggregates need to be combined on the coordinator.
* In that case we rewrite the expressions using WorkerAggregateWalker.
*/ */
if (hasAggregates && !hasWindowFunction) if (!hasWindowFunction && hasAggregates &&
!extendedOpNodeProperties->groupedByDisjointPartitionColumn)
{ {
WorkerAggregateWalker((Node *) originalExpression, workerAggContext); WorkerAggregateWalker((Node *) originalExpression, workerAggContext);
@ -2245,11 +2264,6 @@ ProcessTargetListForWorkerQuery(List *targetEntryList,
* having clause is safe to pushdown to the workers, workerHavingQual is set to * having clause is safe to pushdown to the workers, workerHavingQual is set to
* be the original having clause. * be the original having clause.
* *
* TODO: Citus currently always pulls the expressions in the having clause to the
* coordinator and apply it on the coordinator. Do we really need to pull those
* expressions to the coordinator and apply the having on the coordinator if we're
* already pushing down the HAVING clause?
*
* inputs: originalHavingQual, extendedOpNodeProperties * inputs: originalHavingQual, extendedOpNodeProperties
* outputs: workerHavingQual, queryTargetList, queryGroupClause * outputs: workerHavingQual, queryTargetList, queryGroupClause
*/ */
@ -2269,10 +2283,17 @@ ProcessHavingClauseForWorkerQuery(Node *originalHavingQual,
*workerHavingQual = NULL; *workerHavingQual = NULL;
if (!extendedOpNodeProperties->groupedByDisjointPartitionColumn)
{
/*
* If the GROUP BY or PARTITION BY is not on the distribution column
* then we need to combine the aggregates in the HAVING across shards.
*/
WorkerAggregateWalkerContext *workerAggContext = palloc0( WorkerAggregateWalkerContext *workerAggContext = palloc0(
sizeof(WorkerAggregateWalkerContext)); sizeof(WorkerAggregateWalkerContext));
workerAggContext->expressionList = NIL; workerAggContext->expressionList = NIL;
workerAggContext->pullDistinctColumns = extendedOpNodeProperties->pullDistinctColumns; workerAggContext->pullDistinctColumns =
extendedOpNodeProperties->pullDistinctColumns;
workerAggContext->createGroupByClause = false; workerAggContext->createGroupByClause = false;
WorkerAggregateWalker(originalHavingQual, workerAggContext); WorkerAggregateWalker(originalHavingQual, workerAggContext);
@ -2281,6 +2302,7 @@ ProcessHavingClauseForWorkerQuery(Node *originalHavingQual,
ExpandWorkerTargetEntry(newExpressionList, targetEntry, ExpandWorkerTargetEntry(newExpressionList, targetEntry,
workerAggContext->createGroupByClause, workerAggContext->createGroupByClause,
queryTargetList, queryGroupClause); queryTargetList, queryGroupClause);
}
/* /*
* If grouped by a partition column whose values are shards have disjoint sets * If grouped by a partition column whose values are shards have disjoint sets

View File

@ -150,6 +150,28 @@ SELECT create_distributed_function('last(anyelement)');
SELECT key, first(val ORDER BY id), last(val ORDER BY id) SELECT key, first(val ORDER BY id), last(val ORDER BY id)
FROM aggdata GROUP BY key ORDER BY key; FROM aggdata GROUP BY key ORDER BY key;
ERROR: unsupported aggregate function first ERROR: unsupported aggregate function first
-- However, GROUP BY on distribution column gets pushed down
SELECT id, first(val ORDER BY key), last(val ORDER BY key)
FROM aggdata GROUP BY id ORDER BY id;
id | first | last
----+-------+------
1 | 2 | 2
2 | |
3 | 2 | 2
4 | 3 | 3
5 | 5 | 5
6 | 4 | 4
7 | |
8 | |
9 | |
10 | 8 | 8
11 | 0 | 0
(11 rows)
-- Test that expressions don't slip past. This fails
SELECT id%5, first(val ORDER BY key), last(val ORDER BY key)
FROM aggdata GROUP BY id%5 ORDER BY id%5;
ERROR: unsupported aggregate function first
-- test aggregate with stype which is not a by-value datum -- test aggregate with stype which is not a by-value datum
-- also test our handling of the aggregate not existing on workers -- also test our handling of the aggregate not existing on workers
create function sumstring_sfunc(state text, x text) create function sumstring_sfunc(state text, x text)
@ -165,7 +187,7 @@ create aggregate sumstring(text) (
); );
select sumstring(valf::text) from aggdata where valf is not null; select sumstring(valf::text) from aggdata where valf is not null;
ERROR: function "aggregate_support.sumstring(text)" does not exist ERROR: function "aggregate_support.sumstring(text)" does not exist
CONTEXT: while executing command on localhost:57638 CONTEXT: while executing command on localhost:57637
select create_distributed_function('sumstring(text)'); select create_distributed_function('sumstring(text)');
create_distributed_function create_distributed_function
----------------------------- -----------------------------

View File

@ -109,10 +109,8 @@ FROM
daily_uniques daily_uniques
GROUP BY(1); GROUP BY(1);
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------ ------------------------------------------------------------------
HashAggregate Custom Scan (Citus Adaptive)
Group Key: remote_scan.day
-> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: All Tasks Shown: All
-> Task -> Task
@ -135,7 +133,7 @@ GROUP BY(1);
-> HashAggregate -> HashAggregate
Group Key: day Group Key: day
-> Seq Scan on daily_uniques_360618 daily_uniques -> Seq Scan on daily_uniques_360618 daily_uniques
(25 rows) (23 rows)
SET hll.force_groupagg to ON; SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF) EXPLAIN(COSTS OFF)
@ -145,12 +143,8 @@ FROM
daily_uniques daily_uniques
GROUP BY(1); GROUP BY(1);
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------------ ------------------------------------------------------------------
GroupAggregate Custom Scan (Citus Adaptive)
Group Key: remote_scan.day
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: All Tasks Shown: All
-> Task -> Task
@ -173,7 +167,7 @@ GROUP BY(1);
-> HashAggregate -> HashAggregate
Group Key: day Group Key: day
-> Seq Scan on daily_uniques_360618 daily_uniques -> Seq Scan on daily_uniques_360618 daily_uniques
(27 rows) (23 rows)
-- Test disabling hash_agg with operator on coordinator query -- Test disabling hash_agg with operator on coordinator query
SET hll.force_groupagg to OFF; SET hll.force_groupagg to OFF;
@ -184,10 +178,8 @@ FROM
daily_uniques daily_uniques
GROUP BY(1); GROUP BY(1);
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------ ------------------------------------------------------------------
HashAggregate Custom Scan (Citus Adaptive)
Group Key: remote_scan.day
-> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: All Tasks Shown: All
-> Task -> Task
@ -210,7 +202,7 @@ GROUP BY(1);
-> HashAggregate -> HashAggregate
Group Key: day Group Key: day
-> Seq Scan on daily_uniques_360618 daily_uniques -> Seq Scan on daily_uniques_360618 daily_uniques
(25 rows) (23 rows)
SET hll.force_groupagg to ON; SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF) EXPLAIN(COSTS OFF)
@ -220,12 +212,8 @@ FROM
daily_uniques daily_uniques
GROUP BY(1); GROUP BY(1);
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------------ ------------------------------------------------------------------
GroupAggregate Custom Scan (Citus Adaptive)
Group Key: remote_scan.day
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: All Tasks Shown: All
-> Task -> Task
@ -248,7 +236,7 @@ GROUP BY(1);
-> HashAggregate -> HashAggregate
Group Key: day Group Key: day
-> Seq Scan on daily_uniques_360618 daily_uniques -> Seq Scan on daily_uniques_360618 daily_uniques
(27 rows) (23 rows)
-- Test disabling hash_agg with expression on coordinator query -- Test disabling hash_agg with expression on coordinator query
SET hll.force_groupagg to OFF; SET hll.force_groupagg to OFF;
@ -259,10 +247,8 @@ FROM
daily_uniques daily_uniques
GROUP BY(1); GROUP BY(1);
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------ ------------------------------------------------------------------
HashAggregate Custom Scan (Citus Adaptive)
Group Key: remote_scan.day
-> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: All Tasks Shown: All
-> Task -> Task
@ -285,7 +271,7 @@ GROUP BY(1);
-> HashAggregate -> HashAggregate
Group Key: day Group Key: day
-> Seq Scan on daily_uniques_360618 daily_uniques -> Seq Scan on daily_uniques_360618 daily_uniques
(25 rows) (23 rows)
SET hll.force_groupagg to ON; SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF) EXPLAIN(COSTS OFF)
@ -295,12 +281,8 @@ FROM
daily_uniques daily_uniques
GROUP BY(1); GROUP BY(1);
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------------ ------------------------------------------------------------------
GroupAggregate Custom Scan (Citus Adaptive)
Group Key: remote_scan.day
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: All Tasks Shown: All
-> Task -> Task
@ -323,7 +305,7 @@ GROUP BY(1);
-> HashAggregate -> HashAggregate
Group Key: day Group Key: day
-> Seq Scan on daily_uniques_360618 daily_uniques -> Seq Scan on daily_uniques_360618 daily_uniques
(27 rows) (23 rows)
-- Test disabling hash_agg with having -- Test disabling hash_agg with having
SET hll.force_groupagg to OFF; SET hll.force_groupagg to OFF;
@ -334,10 +316,8 @@ FROM
daily_uniques daily_uniques
GROUP BY(1); GROUP BY(1);
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------ ------------------------------------------------------------------
HashAggregate Custom Scan (Citus Adaptive)
Group Key: remote_scan.day
-> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: All Tasks Shown: All
-> Task -> Task
@ -360,7 +340,7 @@ GROUP BY(1);
-> HashAggregate -> HashAggregate
Group Key: day Group Key: day
-> Seq Scan on daily_uniques_360618 daily_uniques -> Seq Scan on daily_uniques_360618 daily_uniques
(25 rows) (23 rows)
SET hll.force_groupagg to ON; SET hll.force_groupagg to ON;
EXPLAIN(COSTS OFF) EXPLAIN(COSTS OFF)
@ -371,48 +351,35 @@ FROM
GROUP BY(1) GROUP BY(1)
HAVING hll_cardinality(hll_union_agg(unique_users)) > 1; HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
QUERY PLAN QUERY PLAN
---------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------
GroupAggregate Custom Scan (Citus Adaptive)
Group Key: remote_scan.day
Filter: (hll_cardinality(hll_union_agg(remote_scan.worker_column_3)) > '1'::double precision)
-> Sort
Sort Key: remote_scan.day
-> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: All Tasks Shown: All
-> Task -> Task
Node: host=localhost port=57637 dbname=regression Node: host=localhost port=57637 dbname=regression
-> GroupAggregate -> HashAggregate
Group Key: day Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360615 daily_uniques -> Seq Scan on daily_uniques_360615 daily_uniques
-> Task -> Task
Node: host=localhost port=57638 dbname=regression Node: host=localhost port=57638 dbname=regression
-> GroupAggregate -> HashAggregate
Group Key: day Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360616 daily_uniques -> Seq Scan on daily_uniques_360616 daily_uniques
-> Task -> Task
Node: host=localhost port=57637 dbname=regression Node: host=localhost port=57637 dbname=regression
-> GroupAggregate -> HashAggregate
Group Key: day Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360617 daily_uniques -> Seq Scan on daily_uniques_360617 daily_uniques
-> Task -> Task
Node: host=localhost port=57638 dbname=regression Node: host=localhost port=57638 dbname=regression
-> GroupAggregate -> HashAggregate
Group Key: day Group Key: day
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision) Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360618 daily_uniques -> Seq Scan on daily_uniques_360618 daily_uniques
(40 rows) (27 rows)
DROP TABLE raw_table; DROP TABLE raw_table;
DROP TABLE daily_uniques; DROP TABLE daily_uniques;

View File

@ -23,13 +23,10 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_orderkey HAVING sum(l_quantity) > 24 GROUP BY l_orderkey HAVING sum(l_quantity) > 24
ORDER BY 2 DESC, 1 ASC LIMIT 3; ORDER BY 2 DESC, 1 ASC LIMIT 3;
QUERY PLAN QUERY PLAN
-------------------------------------------------------------------------------------------------------- --------------------------------------------------------------------------------------------------
Limit Limit
-> Sort -> Sort
Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.l_orderkey Sort Key: remote_scan.revenue DESC, remote_scan.l_orderkey
-> HashAggregate
Group Key: remote_scan.l_orderkey
Filter: (sum(remote_scan.worker_column_3) > '24'::numeric)
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -42,7 +39,7 @@ EXPLAIN (COSTS FALSE)
Group Key: l_orderkey Group Key: l_orderkey
Filter: (sum(l_quantity) > '24'::numeric) Filter: (sum(l_quantity) > '24'::numeric)
-> Seq Scan on lineitem_hash_590000 lineitem_hash -> Seq Scan on lineitem_hash_590000 lineitem_hash
(18 rows) (15 rows)
-- but don't push down when table is distributed by append -- but don't push down when table is distributed by append
EXPLAIN (COSTS FALSE) EXPLAIN (COSTS FALSE)
@ -99,13 +96,10 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_shipmode, l_orderkey HAVING sum(l_quantity) > 24 GROUP BY l_shipmode, l_orderkey HAVING sum(l_quantity) > 24
ORDER BY 3 DESC, 1, 2 LIMIT 3; ORDER BY 3 DESC, 1, 2 LIMIT 3;
QUERY PLAN QUERY PLAN
-------------------------------------------------------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------------
Limit Limit
-> Sort -> Sort
Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.l_shipmode, remote_scan.l_orderkey Sort Key: remote_scan.revenue DESC, remote_scan.l_shipmode, remote_scan.l_orderkey
-> HashAggregate
Group Key: remote_scan.l_shipmode, remote_scan.l_orderkey
Filter: (sum(remote_scan.worker_column_4) > '24'::numeric)
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -118,7 +112,7 @@ EXPLAIN (COSTS FALSE)
Group Key: l_shipmode, l_orderkey Group Key: l_shipmode, l_orderkey
Filter: (sum(l_quantity) > '24'::numeric) Filter: (sum(l_quantity) > '24'::numeric)
-> Seq Scan on lineitem_hash_590000 lineitem_hash -> Seq Scan on lineitem_hash_590000 lineitem_hash
(18 rows) (15 rows)
-- couple more checks with joins -- couple more checks with joins
EXPLAIN (COSTS FALSE) EXPLAIN (COSTS FALSE)
@ -128,13 +122,10 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_orderkey, o_orderkey, l_shipmode HAVING sum(l_quantity) > 24 GROUP BY l_orderkey, o_orderkey, l_shipmode HAVING sum(l_quantity) > 24
ORDER BY 1 DESC LIMIT 3; ORDER BY 1 DESC LIMIT 3;
QUERY PLAN QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------- -----------------------------------------------------------------------------------------------------------------------------
Limit Limit
-> Sort -> Sort
Sort Key: (sum(remote_scan.revenue)) DESC Sort Key: remote_scan.revenue DESC
-> HashAggregate
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3, remote_scan.worker_column_4
Filter: (sum(remote_scan.worker_column_5) > '24'::numeric)
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -151,7 +142,7 @@ EXPLAIN (COSTS FALSE)
-> Seq Scan on orders_hash_590004 orders_hash -> Seq Scan on orders_hash_590004 orders_hash
-> Hash -> Hash
-> Seq Scan on lineitem_hash_590000 lineitem_hash -> Seq Scan on lineitem_hash_590000 lineitem_hash
(22 rows) (19 rows)
EXPLAIN (COSTS FALSE) EXPLAIN (COSTS FALSE)
SELECT sum(l_extendedprice * l_discount) as revenue SELECT sum(l_extendedprice * l_discount) as revenue

View File

@ -35,24 +35,22 @@ GROUP BY user_id
ORDER BY avg(value_1) DESC ORDER BY avg(value_1) DESC
LIMIT 1; LIMIT 1;
QUERY PLAN QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.00..0.00 rows=0 width=0) Limit (cost=0.00..0.00 rows=0 width=0)
-> Sort (cost=0.00..0.00 rows=0 width=0) -> Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) DESC Sort Key: remote_scan.avg DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) -> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
-> Task -> Task
Node: host=localhost port=57637 dbname=regression Node: host=localhost port=57637 dbname=regression
-> Limit (cost=1.70..1.70 rows=1 width=52) -> Limit (cost=1.53..1.53 rows=1 width=36)
-> Sort (cost=1.70..1.70 rows=2 width=52) -> Sort (cost=1.53..1.53 rows=2 width=36)
Sort Key: (avg(value_1)) DESC Sort Key: (avg(value_1)) DESC
-> HashAggregate (cost=1.66..1.69 rows=2 width=52) -> HashAggregate (cost=1.50..1.52 rows=2 width=36)
Group Key: user_id Group Key: user_id
-> Seq Scan on users_table_1400256 users_table (cost=0.00..1.33 rows=33 width=8) -> Seq Scan on users_table_1400256 users_table (cost=0.00..1.33 rows=33 width=8)
(16 rows) (14 rows)
SELECT user_id, avg(value_1) + 1 SELECT user_id, avg(value_1) + 1
FROM users_table FROM users_table
@ -104,20 +102,18 @@ FROM users_table
GROUP BY user_id GROUP BY user_id
ORDER BY 2 DESC; ORDER BY 2 DESC;
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0) Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: (((pg_catalog.sum(remote_scan."?column?") / pg_catalog.sum(remote_scan."?column?_1")) + (COALESCE((pg_catalog.sum(remote_scan."?column?_2"))::bigint, '0'::bigint))::numeric)) DESC Sort Key: remote_scan."?column?" DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) -> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
-> Task -> Task
Node: host=localhost port=57637 dbname=regression Node: host=localhost port=57637 dbname=regression
-> HashAggregate (cost=1.66..1.68 rows=2 width=28) -> HashAggregate (cost=1.58..1.61 rows=2 width=36)
Group Key: user_id Group Key: user_id
-> Seq Scan on users_table_1400256 users_table (cost=0.00..1.33 rows=33 width=12) -> Seq Scan on users_table_1400256 users_table (cost=0.00..1.33 rows=33 width=12)
(12 rows) (10 rows)
SELECT user_id, avg(value_1) + count(value_2) SELECT user_id, avg(value_1) + count(value_2)
FROM users_table FROM users_table
@ -222,12 +218,10 @@ GROUP BY user_id
ORDER BY (10000 / (sum(value_1 + value_2))) DESC ORDER BY (10000 / (sum(value_1 + value_2))) DESC
LIMIT 2; LIMIT 2;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------
Limit Limit
-> Sort -> Sort
Sort Key: ((10000 / (pg_catalog.sum(remote_scan.worker_column_2))::bigint)) DESC Sort Key: remote_scan.worker_column_2 DESC
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -239,7 +233,7 @@ LIMIT 2;
-> HashAggregate -> HashAggregate
Group Key: user_id Group Key: user_id
-> Seq Scan on users_table_1400256 users_table -> Seq Scan on users_table_1400256 users_table
(16 rows) (14 rows)
SELECT 10000 / (sum(value_1 + value_2)) SELECT 10000 / (sum(value_1 + value_2))
FROM users_table FROM users_table
@ -290,12 +284,10 @@ GROUP BY user_id
ORDER BY sum(value_1) DESC ORDER BY sum(value_1) DESC
LIMIT 2; LIMIT 2;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------
Limit Limit
-> Sort -> Sort
Sort Key: ((pg_catalog.sum(remote_scan.worker_column_2))::bigint) DESC Sort Key: remote_scan.worker_column_2 DESC
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -307,7 +299,7 @@ LIMIT 2;
-> HashAggregate -> HashAggregate
Group Key: user_id Group Key: user_id
-> Seq Scan on users_table_1400256 users_table -> Seq Scan on users_table_1400256 users_table
(16 rows) (14 rows)
SELECT ut.user_id, avg(ut.value_2) SELECT ut.user_id, avg(ut.value_2)
FROM users_table ut, events_table et FROM users_table ut, events_table et
@ -332,12 +324,10 @@ GROUP BY ut.user_id
ORDER BY MAX(et.time), AVG(ut.value_1) ORDER BY MAX(et.time), AVG(ut.value_1)
LIMIT 5; LIMIT 5;
QUERY PLAN QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------------------
Limit Limit
-> Sort -> Sort
Sort Key: (max(remote_scan.worker_column_4)), ((pg_catalog.sum(remote_scan.worker_column_5) / pg_catalog.sum(remote_scan.worker_column_6))) Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -354,7 +344,7 @@ LIMIT 5;
-> Hash -> Hash
-> Seq Scan on events_table_1400260 et -> Seq Scan on events_table_1400260 et
Filter: (value_2 < 5) Filter: (value_2 < 5)
(21 rows) (19 rows)
SELECT ut.user_id, avg(et.value_2) SELECT ut.user_id, avg(et.value_2)
FROM users_table ut, events_table et FROM users_table ut, events_table et
@ -391,12 +381,10 @@ GROUP BY ut.user_id
ORDER BY 2, AVG(ut.value_1), 1 DESC ORDER BY 2, AVG(ut.value_1), 1 DESC
LIMIT 5; LIMIT 5;
QUERY PLAN QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------
Limit Limit
-> Sort -> Sort
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), ((pg_catalog.sum(remote_scan.worker_column_3) / pg_catalog.sum(remote_scan.worker_column_4))), remote_scan.user_id DESC Sort Key: remote_scan.count, remote_scan.worker_column_3, remote_scan.user_id DESC
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -415,5 +403,5 @@ LIMIT 5;
-> Hash -> Hash
-> Seq Scan on events_table_1400260 et -> Seq Scan on events_table_1400260 et
Filter: (value_2 < 5) Filter: (value_2 < 5)
(23 rows) (21 rows)

View File

@ -461,10 +461,10 @@ DEBUG: generated sql query for task 3
DETAIL: query string: "SELECT s_i_id, s_w_id, s_quantity FROM stock_690006 stock WHERE true" DETAIL: query string: "SELECT s_i_id, s_w_id, s_quantity FROM stock_690006 stock WHERE true"
DEBUG: generated sql query for task 4 DEBUG: generated sql query for task 4
DETAIL: query string: "SELECT s_i_id, s_w_id, s_quantity FROM stock_690007 stock WHERE true" DETAIL: query string: "SELECT s_i_id, s_w_id, s_quantity FROM stock_690007 stock WHERE true"
DEBUG: assigned task 1 to node localhost:57637 DEBUG: assigned task 2 to node localhost:57637
DEBUG: assigned task 2 to node localhost:57638 DEBUG: assigned task 1 to node localhost:57638
DEBUG: assigned task 3 to node localhost:57637 DEBUG: assigned task 4 to node localhost:57637
DEBUG: assigned task 4 to node localhost:57638 DEBUG: assigned task 3 to node localhost:57638
DEBUG: generated sql query for task 1 DEBUG: generated sql query for task 1
DETAIL: query string: "SELECT ol_i_id FROM order_line_690000 order_line WHERE true" DETAIL: query string: "SELECT ol_i_id FROM order_line_690000 order_line WHERE true"
DEBUG: generated sql query for task 2 DEBUG: generated sql query for task 2
@ -490,13 +490,13 @@ DEBUG: join prunable for task partitionId 3 and 0
DEBUG: join prunable for task partitionId 3 and 1 DEBUG: join prunable for task partitionId 3 and 1
DEBUG: join prunable for task partitionId 3 and 2 DEBUG: join prunable for task partitionId 3 and 2
DEBUG: generated sql query for task 3 DEBUG: generated sql query for task 3
DETAIL: query string: "SELECT "pg_merge_job_0016.task_000005".intermediate_column_16_0 AS s_i_id, "pg_merge_job_0016.task_000005".intermediate_column_16_1 AS worker_column_2, any_value("pg_merge_job_0016.task_000005".intermediate_column_16_2) AS worker_column_3, any_value("pg_merge_job_0016.task_000005".intermediate_column_16_2) AS worker_column_4 FROM (pg_merge_job_0016.task_000005 "pg_merge_job_0016.task_000005" JOIN pg_merge_job_0017.task_000005 "pg_merge_job_0017.task_000005" ON (("pg_merge_job_0017.task_000005".intermediate_column_17_0 OPERATOR(pg_catalog.=) "pg_merge_job_0016.task_000005".intermediate_column_16_0))) WHERE true GROUP BY "pg_merge_job_0016.task_000005".intermediate_column_16_0, "pg_merge_job_0016.task_000005".intermediate_column_16_1 HAVING ((any_value("pg_merge_job_0016.task_000005".intermediate_column_16_2))::double precision OPERATOR(pg_catalog.>) random())" DETAIL: query string: "SELECT "pg_merge_job_0016.task_000005".intermediate_column_16_0 AS s_i_id, "pg_merge_job_0016.task_000005".intermediate_column_16_1 AS worker_column_2, any_value("pg_merge_job_0016.task_000005".intermediate_column_16_2) AS worker_column_3 FROM (pg_merge_job_0016.task_000005 "pg_merge_job_0016.task_000005" JOIN pg_merge_job_0017.task_000005 "pg_merge_job_0017.task_000005" ON (("pg_merge_job_0017.task_000005".intermediate_column_17_0 OPERATOR(pg_catalog.=) "pg_merge_job_0016.task_000005".intermediate_column_16_0))) WHERE true GROUP BY "pg_merge_job_0016.task_000005".intermediate_column_16_0, "pg_merge_job_0016.task_000005".intermediate_column_16_1 HAVING ((any_value("pg_merge_job_0016.task_000005".intermediate_column_16_2))::double precision OPERATOR(pg_catalog.>) random())"
DEBUG: generated sql query for task 6 DEBUG: generated sql query for task 6
DETAIL: query string: "SELECT "pg_merge_job_0016.task_000010".intermediate_column_16_0 AS s_i_id, "pg_merge_job_0016.task_000010".intermediate_column_16_1 AS worker_column_2, any_value("pg_merge_job_0016.task_000010".intermediate_column_16_2) AS worker_column_3, any_value("pg_merge_job_0016.task_000010".intermediate_column_16_2) AS worker_column_4 FROM (pg_merge_job_0016.task_000010 "pg_merge_job_0016.task_000010" JOIN pg_merge_job_0017.task_000010 "pg_merge_job_0017.task_000010" ON (("pg_merge_job_0017.task_000010".intermediate_column_17_0 OPERATOR(pg_catalog.=) "pg_merge_job_0016.task_000010".intermediate_column_16_0))) WHERE true GROUP BY "pg_merge_job_0016.task_000010".intermediate_column_16_0, "pg_merge_job_0016.task_000010".intermediate_column_16_1 HAVING ((any_value("pg_merge_job_0016.task_000010".intermediate_column_16_2))::double precision OPERATOR(pg_catalog.>) random())" DETAIL: query string: "SELECT "pg_merge_job_0016.task_000010".intermediate_column_16_0 AS s_i_id, "pg_merge_job_0016.task_000010".intermediate_column_16_1 AS worker_column_2, any_value("pg_merge_job_0016.task_000010".intermediate_column_16_2) AS worker_column_3 FROM (pg_merge_job_0016.task_000010 "pg_merge_job_0016.task_000010" JOIN pg_merge_job_0017.task_000010 "pg_merge_job_0017.task_000010" ON (("pg_merge_job_0017.task_000010".intermediate_column_17_0 OPERATOR(pg_catalog.=) "pg_merge_job_0016.task_000010".intermediate_column_16_0))) WHERE true GROUP BY "pg_merge_job_0016.task_000010".intermediate_column_16_0, "pg_merge_job_0016.task_000010".intermediate_column_16_1 HAVING ((any_value("pg_merge_job_0016.task_000010".intermediate_column_16_2))::double precision OPERATOR(pg_catalog.>) random())"
DEBUG: generated sql query for task 9 DEBUG: generated sql query for task 9
DETAIL: query string: "SELECT "pg_merge_job_0016.task_000015".intermediate_column_16_0 AS s_i_id, "pg_merge_job_0016.task_000015".intermediate_column_16_1 AS worker_column_2, any_value("pg_merge_job_0016.task_000015".intermediate_column_16_2) AS worker_column_3, any_value("pg_merge_job_0016.task_000015".intermediate_column_16_2) AS worker_column_4 FROM (pg_merge_job_0016.task_000015 "pg_merge_job_0016.task_000015" JOIN pg_merge_job_0017.task_000015 "pg_merge_job_0017.task_000015" ON (("pg_merge_job_0017.task_000015".intermediate_column_17_0 OPERATOR(pg_catalog.=) "pg_merge_job_0016.task_000015".intermediate_column_16_0))) WHERE true GROUP BY "pg_merge_job_0016.task_000015".intermediate_column_16_0, "pg_merge_job_0016.task_000015".intermediate_column_16_1 HAVING ((any_value("pg_merge_job_0016.task_000015".intermediate_column_16_2))::double precision OPERATOR(pg_catalog.>) random())" DETAIL: query string: "SELECT "pg_merge_job_0016.task_000015".intermediate_column_16_0 AS s_i_id, "pg_merge_job_0016.task_000015".intermediate_column_16_1 AS worker_column_2, any_value("pg_merge_job_0016.task_000015".intermediate_column_16_2) AS worker_column_3 FROM (pg_merge_job_0016.task_000015 "pg_merge_job_0016.task_000015" JOIN pg_merge_job_0017.task_000015 "pg_merge_job_0017.task_000015" ON (("pg_merge_job_0017.task_000015".intermediate_column_17_0 OPERATOR(pg_catalog.=) "pg_merge_job_0016.task_000015".intermediate_column_16_0))) WHERE true GROUP BY "pg_merge_job_0016.task_000015".intermediate_column_16_0, "pg_merge_job_0016.task_000015".intermediate_column_16_1 HAVING ((any_value("pg_merge_job_0016.task_000015".intermediate_column_16_2))::double precision OPERATOR(pg_catalog.>) random())"
DEBUG: generated sql query for task 12 DEBUG: generated sql query for task 12
DETAIL: query string: "SELECT "pg_merge_job_0016.task_000020".intermediate_column_16_0 AS s_i_id, "pg_merge_job_0016.task_000020".intermediate_column_16_1 AS worker_column_2, any_value("pg_merge_job_0016.task_000020".intermediate_column_16_2) AS worker_column_3, any_value("pg_merge_job_0016.task_000020".intermediate_column_16_2) AS worker_column_4 FROM (pg_merge_job_0016.task_000020 "pg_merge_job_0016.task_000020" JOIN pg_merge_job_0017.task_000020 "pg_merge_job_0017.task_000020" ON (("pg_merge_job_0017.task_000020".intermediate_column_17_0 OPERATOR(pg_catalog.=) "pg_merge_job_0016.task_000020".intermediate_column_16_0))) WHERE true GROUP BY "pg_merge_job_0016.task_000020".intermediate_column_16_0, "pg_merge_job_0016.task_000020".intermediate_column_16_1 HAVING ((any_value("pg_merge_job_0016.task_000020".intermediate_column_16_2))::double precision OPERATOR(pg_catalog.>) random())" DETAIL: query string: "SELECT "pg_merge_job_0016.task_000020".intermediate_column_16_0 AS s_i_id, "pg_merge_job_0016.task_000020".intermediate_column_16_1 AS worker_column_2, any_value("pg_merge_job_0016.task_000020".intermediate_column_16_2) AS worker_column_3 FROM (pg_merge_job_0016.task_000020 "pg_merge_job_0016.task_000020" JOIN pg_merge_job_0017.task_000020 "pg_merge_job_0017.task_000020" ON (("pg_merge_job_0017.task_000020".intermediate_column_17_0 OPERATOR(pg_catalog.=) "pg_merge_job_0016.task_000020".intermediate_column_16_0))) WHERE true GROUP BY "pg_merge_job_0016.task_000020".intermediate_column_16_0, "pg_merge_job_0016.task_000020".intermediate_column_16_1 HAVING ((any_value("pg_merge_job_0016.task_000020".intermediate_column_16_2))::double precision OPERATOR(pg_catalog.>) random())"
DEBUG: pruning merge fetch taskId 1 DEBUG: pruning merge fetch taskId 1
DETAIL: Creating dependency on merge taskId 5 DETAIL: Creating dependency on merge taskId 5
DEBUG: pruning merge fetch taskId 2 DEBUG: pruning merge fetch taskId 2

View File

@ -208,12 +208,11 @@ EXPLAIN (COSTS FALSE)
HAVING count(*) > 5 HAVING count(*) > 5
ORDER BY 2 DESC, 1; ORDER BY 2 DESC, 1;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------
Sort Sort
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) DESC, remote_scan.l_orderkey Sort Key: remote_scan.count DESC, remote_scan.l_orderkey
-> HashAggregate -> HashAggregate
Group Key: remote_scan.l_orderkey Group Key: remote_scan.count, remote_scan.l_orderkey
Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 5)
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -224,7 +223,7 @@ EXPLAIN (COSTS FALSE)
Filter: (count(*) > 5) Filter: (count(*) > 5)
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
Filter: (l_orderkey < 200) Filter: (l_orderkey < 200)
(15 rows) (14 rows)
-- check the plan if the hash aggreate is disabled -- check the plan if the hash aggreate is disabled
SET enable_hashagg TO off; SET enable_hashagg TO off;
@ -236,14 +235,12 @@ EXPLAIN (COSTS FALSE)
HAVING count(*) > 5 HAVING count(*) > 5
ORDER BY 2 DESC, 1; ORDER BY 2 DESC, 1;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------
Sort Sort
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) DESC, remote_scan.l_orderkey Sort Key: remote_scan.count DESC, remote_scan.l_orderkey
-> GroupAggregate -> Unique
Group Key: remote_scan.l_orderkey
Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 5)
-> Sort -> Sort
Sort Key: remote_scan.l_orderkey Sort Key: remote_scan.count DESC, remote_scan.l_orderkey
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -254,7 +251,7 @@ EXPLAIN (COSTS FALSE)
Filter: (count(*) > 5) Filter: (count(*) > 5)
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
Filter: (l_orderkey < 200) Filter: (l_orderkey < 200)
(17 rows) (15 rows)
SET enable_hashagg TO on; SET enable_hashagg TO on;
-- distinct on aggregate of group by columns, we try to check whether we handle -- distinct on aggregate of group by columns, we try to check whether we handle
@ -782,8 +779,7 @@ EXPLAIN (COSTS FALSE)
Limit Limit
-> Sort -> Sort
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
-> GroupAggregate -> Unique
Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
-> Sort -> Sort
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
@ -798,7 +794,7 @@ EXPLAIN (COSTS FALSE)
-> Sort -> Sort
Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(19 rows) (18 rows)
SET enable_hashagg TO on; SET enable_hashagg TO on;
-- distinct on count distinct -- distinct on count distinct
@ -843,13 +839,11 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_orderkey GROUP BY l_orderkey
ORDER BY 1,2; ORDER BY 1,2;
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------
Sort Sort
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) Sort Key: remote_scan.count, remote_scan.count_1
-> HashAggregate -> HashAggregate
Group Key: COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint) Group Key: remote_scan.count, remote_scan.count_1
-> HashAggregate
Group Key: remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -860,7 +854,7 @@ EXPLAIN (COSTS FALSE)
-> Sort -> Sort
Sort Key: l_orderkey Sort Key: l_orderkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows) (14 rows)
-- check the plan if the hash aggreate is disabled. We expect to see sort + unique -- check the plan if the hash aggreate is disabled. We expect to see sort + unique
-- plans for the outer distinct. -- plans for the outer distinct.
@ -871,16 +865,12 @@ EXPLAIN (COSTS FALSE)
GROUP BY l_orderkey GROUP BY l_orderkey
ORDER BY 1,2; ORDER BY 1,2;
QUERY PLAN QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------
Sort Sort
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) Sort Key: remote_scan.count, remote_scan.count_1
-> Unique -> Unique
-> Sort -> Sort
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) Sort Key: remote_scan.count, remote_scan.count_1
-> GroupAggregate
Group Key: remote_scan.worker_column_3
-> Sort
Sort Key: remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -891,7 +881,7 @@ EXPLAIN (COSTS FALSE)
-> Sort -> Sort
Sort Key: l_orderkey Sort Key: l_orderkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(19 rows) (15 rows)
SET enable_hashagg TO on; SET enable_hashagg TO on;
-- distinct on aggregation with filter and expression -- distinct on aggregation with filter and expression
@ -969,14 +959,12 @@ EXPLAIN (COSTS FALSE)
ORDER BY 2 ORDER BY 2
LIMIT 15; LIMIT 15;
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------
Limit Limit
-> Sort -> Sort
Sort Key: (array_length(array_cat_agg(remote_scan.array_length), 1)) Sort Key: remote_scan.array_length
-> HashAggregate -> HashAggregate
Group Key: array_length(array_cat_agg(remote_scan.array_length), 1), array_cat_agg(remote_scan.array_agg) Group Key: remote_scan.array_length, remote_scan.array_agg
-> HashAggregate
Group Key: remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -987,7 +975,7 @@ EXPLAIN (COSTS FALSE)
-> Sort -> Sort
Sort Key: l_orderkey Sort Key: l_orderkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(17 rows) (15 rows)
-- check the plan if the hash aggreate is disabled. -- check the plan if the hash aggreate is disabled.
SET enable_hashagg TO off; SET enable_hashagg TO off;
@ -998,17 +986,13 @@ EXPLAIN (COSTS FALSE)
ORDER BY 2 ORDER BY 2
LIMIT 15; LIMIT 15;
QUERY PLAN QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------------
Limit Limit
-> Sort -> Sort
Sort Key: (array_length(array_cat_agg(remote_scan.array_length), 1)) Sort Key: remote_scan.array_length
-> Unique -> Unique
-> Sort -> Sort
Sort Key: (array_length(array_cat_agg(remote_scan.array_length), 1)), (array_cat_agg(remote_scan.array_agg)) Sort Key: remote_scan.array_length, remote_scan.array_agg
-> GroupAggregate
Group Key: remote_scan.worker_column_3
-> Sort
Sort Key: remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -1019,7 +1003,7 @@ EXPLAIN (COSTS FALSE)
-> Sort -> Sort
Sort Key: l_orderkey Sort Key: l_orderkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(20 rows) (16 rows)
SET enable_hashagg TO on; SET enable_hashagg TO on;
-- distinct on non-partition column with aggregate -- distinct on non-partition column with aggregate

View File

@ -808,7 +808,7 @@ EXPLAIN (COSTS FALSE) SELECT *
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------------------------------
Limit Limit
-> Sort -> Sort
Sort Key: remote_scan."time" DESC Sort Key: remote_scan."time" DESC
@ -816,9 +816,7 @@ EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USIN
-> Distributed Subplan 90_1 -> Distributed Subplan 90_1
-> Limit -> Limit
-> Sort -> Sort
Sort Key: (max(remote_scan.lastseen)) DESC Sort Key: remote_scan.lastseen DESC
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Adaptive) -> Custom Scan (Citus Adaptive)
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
@ -842,7 +840,7 @@ EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USIN
-> Function Scan on read_intermediate_result intermediate_result -> Function Scan on read_intermediate_result intermediate_result
-> Hash -> Hash
-> Seq Scan on events_table_1400260 et -> Seq Scan on events_table_1400260 et
(33 rows) (31 rows)
SET citus.subquery_pushdown to ON; SET citus.subquery_pushdown to ON;
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;

View File

@ -54,11 +54,11 @@ SELECT
FROM FROM
( (
SELECT SELECT
array_agg(users_table.user_id ORDER BY users_table.time) array_agg(users_table.value_2 ORDER BY users_table.time)
FROM FROM
users_table, (SELECT user_id FROM events_table) as evs users_table, (SELECT user_id FROM events_table) as evs
WHERE users_table.user_id = evs.user_id WHERE users_table.user_id = evs.user_id
GROUP BY users_table.user_id GROUP BY users_table.value_2
LIMIT 5 LIMIT 5
) as foo; ) as foo;
ERROR: array_agg with order by is unsupported ERROR: array_agg with order by is unsupported

View File

@ -62,15 +62,12 @@ SELECT
ORDER BY 2 DESC, 1 DESC ORDER BY 2 DESC, 1 DESC
LIMIT 10; LIMIT 10;
QUERY PLAN QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit Limit
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) Output: remote_scan.l_orderkey, remote_scan.count
-> Sort -> Sort
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) Output: remote_scan.l_orderkey, remote_scan.count
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) DESC, remote_scan.l_orderkey DESC Sort Key: remote_scan.count DESC, remote_scan.l_orderkey DESC
-> HashAggregate
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
Group Key: remote_scan.l_orderkey
-> Custom Scan (Citus Task-Tracker) -> Custom Scan (Citus Task-Tracker)
Output: remote_scan.l_orderkey, remote_scan.count Output: remote_scan.l_orderkey, remote_scan.count
Task Count: 8 Task Count: 8
@ -87,7 +84,7 @@ SELECT
Group Key: lineitem_hash.l_orderkey Group Key: lineitem_hash.l_orderkey
-> Index Scan Backward using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash -> Index Scan Backward using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
(24 rows) (21 rows)
-- it is also supported if there is no grouping or grouping is on non-partition field -- it is also supported if there is no grouping or grouping is on non-partition field
SELECT SELECT
@ -207,15 +204,12 @@ SELECT
ORDER BY 3 DESC, 2 DESC, 1 ORDER BY 3 DESC, 2 DESC, 1
LIMIT 10; LIMIT 10;
QUERY PLAN QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit Limit
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1
-> Sort -> Sort
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) DESC, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) DESC, remote_scan.l_orderkey Sort Key: remote_scan.count_1 DESC, remote_scan.count DESC, remote_scan.l_orderkey
-> HashAggregate
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)
Group Key: remote_scan.l_orderkey
-> Custom Scan (Citus Task-Tracker) -> Custom Scan (Citus Task-Tracker)
Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1 Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1
Task Count: 8 Task Count: 8
@ -232,7 +226,7 @@ SELECT
Group Key: lineitem_hash.l_orderkey Group Key: lineitem_hash.l_orderkey
-> Index Scan using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash -> Index Scan using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
(24 rows) (21 rows)
-- partition/non-partition column count distinct no grouping -- partition/non-partition column count distinct no grouping
SELECT SELECT
@ -491,15 +485,12 @@ SELECT
ORDER BY 2 DESC, 3 DESC, 1 ORDER BY 2 DESC, 3 DESC, 1
LIMIT 10; LIMIT 10;
QUERY PLAN QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit Limit
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1
-> Sort -> Sort
Output: remote_scan.l_orderkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)), (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) DESC, (COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)) DESC, remote_scan.l_orderkey Sort Key: remote_scan.count DESC, remote_scan.count_1 DESC, remote_scan.l_orderkey
-> HashAggregate
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)
Group Key: remote_scan.l_orderkey
-> Custom Scan (Citus Task-Tracker) -> Custom Scan (Citus Task-Tracker)
Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1 Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1
Task Count: 8 Task Count: 8
@ -516,7 +507,7 @@ SELECT
Group Key: lineitem_hash.l_orderkey Group Key: lineitem_hash.l_orderkey
-> Index Scan using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash -> Index Scan using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
(24 rows) (21 rows)
-- group by on non-partition column -- group by on non-partition column
SELECT SELECT

View File

@ -97,6 +97,14 @@ SELECT create_distributed_function('last(anyelement)');
SELECT key, first(val ORDER BY id), last(val ORDER BY id) SELECT key, first(val ORDER BY id), last(val ORDER BY id)
FROM aggdata GROUP BY key ORDER BY key; FROM aggdata GROUP BY key ORDER BY key;
-- However, GROUP BY on distribution column gets pushed down
SELECT id, first(val ORDER BY key), last(val ORDER BY key)
FROM aggdata GROUP BY id ORDER BY id;
-- Test that expressions don't slip past. This fails
SELECT id%5, first(val ORDER BY key), last(val ORDER BY key)
FROM aggdata GROUP BY id%5 ORDER BY id%5;
-- test aggregate with stype which is not a by-value datum -- test aggregate with stype which is not a by-value datum
-- also test our handling of the aggregate not existing on workers -- also test our handling of the aggregate not existing on workers
create function sumstring_sfunc(state text, x text) create function sumstring_sfunc(state text, x text)

View File

@ -53,11 +53,11 @@ SELECT
FROM FROM
( (
SELECT SELECT
array_agg(users_table.user_id ORDER BY users_table.time) array_agg(users_table.value_2 ORDER BY users_table.time)
FROM FROM
users_table, (SELECT user_id FROM events_table) as evs users_table, (SELECT user_id FROM events_table) as evs
WHERE users_table.user_id = evs.user_id WHERE users_table.user_id = evs.user_id
GROUP BY users_table.user_id GROUP BY users_table.value_2
LIMIT 5 LIMIT 5
) as foo; ) as foo;