mirror of https://github.com/citusdata/citus.git
Merge pull request #3406 from citusdata/fix-limit-approx
Expand the set of aggregates which cannot have LIMIT approximatedpull/3423/head
commit
6b43fab325
|
@ -136,6 +136,18 @@ typedef struct QueryOrderByLimit
|
||||||
} QueryOrderByLimit;
|
} QueryOrderByLimit;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* LimitPushdownable tells us how a limit can be pushed down.
|
||||||
|
* See WorkerLimitCount for details.
|
||||||
|
*/
|
||||||
|
typedef enum LimitPushdownable
|
||||||
|
{
|
||||||
|
LIMIT_CANNOT_PUSHDOWN,
|
||||||
|
LIMIT_CAN_PUSHDOWN,
|
||||||
|
LIMIT_CAN_APPROXIMATE,
|
||||||
|
} LimitPushdownable;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* OrderByLimitReference a structure that is used commonly while
|
* OrderByLimitReference a structure that is used commonly while
|
||||||
* processing sort and limit clauses.
|
* processing sort and limit clauses.
|
||||||
|
@ -300,7 +312,7 @@ static List * GenerateNewTargetEntriesForSortClauses(List *originalTargetList,
|
||||||
Index *nextSortGroupRefIndex);
|
Index *nextSortGroupRefIndex);
|
||||||
static bool CanPushDownLimitApproximate(List *sortClauseList, List *targetList);
|
static bool CanPushDownLimitApproximate(List *sortClauseList, List *targetList);
|
||||||
static bool HasOrderByAggregate(List *sortClauseList, List *targetList);
|
static bool HasOrderByAggregate(List *sortClauseList, List *targetList);
|
||||||
static bool HasOrderByAverage(List *sortClauseList, List *targetList);
|
static bool HasOrderByNonCommutativeAggregate(List *sortClauseList, List *targetList);
|
||||||
static bool HasOrderByComplexExpression(List *sortClauseList, List *targetList);
|
static bool HasOrderByComplexExpression(List *sortClauseList, List *targetList);
|
||||||
static bool HasOrderByHllType(List *sortClauseList, List *targetList);
|
static bool HasOrderByHllType(List *sortClauseList, List *targetList);
|
||||||
|
|
||||||
|
@ -4213,8 +4225,7 @@ WorkerLimitCount(Node *limitCount, Node *limitOffset, OrderByLimitReference
|
||||||
orderByLimitReference)
|
orderByLimitReference)
|
||||||
{
|
{
|
||||||
Node *workerLimitNode = NULL;
|
Node *workerLimitNode = NULL;
|
||||||
bool canPushDownLimit = false;
|
LimitPushdownable canPushDownLimit = LIMIT_CANNOT_PUSHDOWN;
|
||||||
bool canApproximate = false;
|
|
||||||
|
|
||||||
/* no limit node to push down */
|
/* no limit node to push down */
|
||||||
if (limitCount == NULL)
|
if (limitCount == NULL)
|
||||||
|
@ -4239,27 +4250,27 @@ WorkerLimitCount(Node *limitCount, Node *limitOffset, OrderByLimitReference
|
||||||
if (orderByLimitReference.groupClauseIsEmpty ||
|
if (orderByLimitReference.groupClauseIsEmpty ||
|
||||||
orderByLimitReference.groupedByDisjointPartitionColumn)
|
orderByLimitReference.groupedByDisjointPartitionColumn)
|
||||||
{
|
{
|
||||||
canPushDownLimit = true;
|
canPushDownLimit = LIMIT_CAN_PUSHDOWN;
|
||||||
}
|
}
|
||||||
else if (orderByLimitReference.sortClauseIsEmpty)
|
else if (orderByLimitReference.sortClauseIsEmpty)
|
||||||
{
|
{
|
||||||
canPushDownLimit = false;
|
canPushDownLimit = LIMIT_CANNOT_PUSHDOWN;
|
||||||
}
|
}
|
||||||
else if (!orderByLimitReference.hasOrderByAggregate)
|
else if (!orderByLimitReference.hasOrderByAggregate)
|
||||||
{
|
{
|
||||||
canPushDownLimit = true;
|
canPushDownLimit = LIMIT_CAN_PUSHDOWN;
|
||||||
}
|
}
|
||||||
else
|
else if (orderByLimitReference.canApproximate)
|
||||||
{
|
{
|
||||||
canApproximate = orderByLimitReference.canApproximate;
|
canPushDownLimit = LIMIT_CAN_APPROXIMATE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* create the workerLimitNode according to the decisions above */
|
/* create the workerLimitNode according to the decisions above */
|
||||||
if (canPushDownLimit)
|
if (canPushDownLimit == LIMIT_CAN_PUSHDOWN)
|
||||||
{
|
{
|
||||||
workerLimitNode = (Node *) copyObject(limitCount);
|
workerLimitNode = (Node *) copyObject(limitCount);
|
||||||
}
|
}
|
||||||
else if (canApproximate)
|
else if (canPushDownLimit == LIMIT_CAN_APPROXIMATE)
|
||||||
{
|
{
|
||||||
Const *workerLimitConst = (Const *) copyObject(limitCount);
|
Const *workerLimitConst = (Const *) copyObject(limitCount);
|
||||||
int64 workerLimitCount = (int64) LimitClauseRowFetchCount;
|
int64 workerLimitCount = (int64) LimitClauseRowFetchCount;
|
||||||
|
@ -4452,14 +4463,11 @@ CanPushDownLimitApproximate(List *sortClauseList, List *targetList)
|
||||||
|
|
||||||
if (sortClauseList != NIL)
|
if (sortClauseList != NIL)
|
||||||
{
|
{
|
||||||
bool orderByAverage = HasOrderByAverage(sortClauseList, targetList);
|
bool orderByNonCommutativeAggregate =
|
||||||
|
HasOrderByNonCommutativeAggregate(sortClauseList, targetList);
|
||||||
bool orderByComplex = HasOrderByComplexExpression(sortClauseList, targetList);
|
bool orderByComplex = HasOrderByComplexExpression(sortClauseList, targetList);
|
||||||
|
|
||||||
/*
|
if (!orderByNonCommutativeAggregate && !orderByComplex)
|
||||||
* If we don't have any order by average or any complex expressions with
|
|
||||||
* aggregates in them, we can meaningfully approximate.
|
|
||||||
*/
|
|
||||||
if (!orderByAverage && !orderByComplex)
|
|
||||||
{
|
{
|
||||||
canApproximate = true;
|
canApproximate = true;
|
||||||
}
|
}
|
||||||
|
@ -4497,13 +4505,13 @@ HasOrderByAggregate(List *sortClauseList, List *targetList)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* HasOrderByAverage walks over the given order by clauses, and checks if we
|
* HasOrderByNonCommutativeAggregate walks over the given order by clauses,
|
||||||
* have an order by an average. If we do, the function returns true.
|
* and checks if we have an order by an aggregate which is not commutative.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
HasOrderByAverage(List *sortClauseList, List *targetList)
|
HasOrderByNonCommutativeAggregate(List *sortClauseList, List *targetList)
|
||||||
{
|
{
|
||||||
bool hasOrderByAverage = false;
|
bool hasOrderByNonCommutativeAggregate = false;
|
||||||
ListCell *sortClauseCell = NULL;
|
ListCell *sortClauseCell = NULL;
|
||||||
|
|
||||||
foreach(sortClauseCell, sortClauseList)
|
foreach(sortClauseCell, sortClauseList)
|
||||||
|
@ -4517,15 +4525,22 @@ HasOrderByAverage(List *sortClauseList, List *targetList)
|
||||||
Aggref *aggregate = (Aggref *) sortExpression;
|
Aggref *aggregate = (Aggref *) sortExpression;
|
||||||
|
|
||||||
AggregateType aggregateType = GetAggregateType(aggregate);
|
AggregateType aggregateType = GetAggregateType(aggregate);
|
||||||
if (aggregateType == AGGREGATE_AVERAGE)
|
if (aggregateType != AGGREGATE_MIN &&
|
||||||
|
aggregateType != AGGREGATE_MAX &&
|
||||||
|
aggregateType != AGGREGATE_SUM &&
|
||||||
|
aggregateType != AGGREGATE_COUNT &&
|
||||||
|
aggregateType != AGGREGATE_BIT_AND &&
|
||||||
|
aggregateType != AGGREGATE_BIT_OR &&
|
||||||
|
aggregateType != AGGREGATE_EVERY &&
|
||||||
|
aggregateType != AGGREGATE_ANY_VALUE)
|
||||||
{
|
{
|
||||||
hasOrderByAverage = true;
|
hasOrderByNonCommutativeAggregate = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return hasOrderByAverage;
|
return hasOrderByNonCommutativeAggregate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -90,10 +90,45 @@ DEBUG: push down of limit count: 150
|
||||||
-- We now test scenarios where applying the limit optimization wouldn't produce
|
-- We now test scenarios where applying the limit optimization wouldn't produce
|
||||||
-- meaningful results. First, we check that we don't push down the limit clause
|
-- meaningful results. First, we check that we don't push down the limit clause
|
||||||
-- for non-commutative aggregates.
|
-- for non-commutative aggregates.
|
||||||
SELECT l_partkey, avg(l_suppkey) AS average FROM lineitem
|
SELECT l_partkey, avg(l_suppkey) FROM lineitem
|
||||||
GROUP BY l_partkey
|
GROUP BY l_partkey
|
||||||
ORDER BY average DESC, l_partkey LIMIT 10;
|
ORDER BY 2 DESC, l_partkey LIMIT 10;
|
||||||
l_partkey | average
|
l_partkey | avg
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
9998 | 9999.0000000000000000
|
||||||
|
102466 | 9997.0000000000000000
|
||||||
|
184959 | 9996.0000000000000000
|
||||||
|
17492 | 9994.0000000000000000
|
||||||
|
124966 | 9991.0000000000000000
|
||||||
|
89989 | 9990.0000000000000000
|
||||||
|
32479 | 9989.0000000000000000
|
||||||
|
144960 | 9989.0000000000000000
|
||||||
|
147473 | 9988.0000000000000000
|
||||||
|
37481 | 9985.0000000000000000
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
SELECT l_partkey, stddev(l_suppkey::float8) FROM lineitem
|
||||||
|
GROUP BY l_partkey
|
||||||
|
ORDER BY 2 DESC NULLS LAST, l_partkey LIMIT 10;
|
||||||
|
l_partkey | stddev
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
192434 | 5343.60594542674
|
||||||
|
160226 | 5337.24198439606
|
||||||
|
151174 | 5335.1206640525
|
||||||
|
60844 | 5316.02878096046
|
||||||
|
62405 | 5316.02878096046
|
||||||
|
50168 | 5313.9074606169
|
||||||
|
52148 | 5313.9074606169
|
||||||
|
52398 | 5313.9074606169
|
||||||
|
10259 | 5305.42217924267
|
||||||
|
3496 | 5303.30085889911
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
|
-- also test that we handle execution on coordinator properly
|
||||||
|
SELECT l_partkey, avg(distinct l_suppkey) FROM lineitem
|
||||||
|
GROUP BY l_partkey
|
||||||
|
ORDER BY 2 DESC, l_partkey LIMIT 10;
|
||||||
|
l_partkey | avg
|
||||||
---------------------------------------------------------------------
|
---------------------------------------------------------------------
|
||||||
9998 | 9999.0000000000000000
|
9998 | 9999.0000000000000000
|
||||||
102466 | 9997.0000000000000000
|
102466 | 9997.0000000000000000
|
||||||
|
|
|
@ -48,9 +48,17 @@ SELECT c_custkey, c_name, count(*) as lineitem_count
|
||||||
-- meaningful results. First, we check that we don't push down the limit clause
|
-- meaningful results. First, we check that we don't push down the limit clause
|
||||||
-- for non-commutative aggregates.
|
-- for non-commutative aggregates.
|
||||||
|
|
||||||
SELECT l_partkey, avg(l_suppkey) AS average FROM lineitem
|
SELECT l_partkey, avg(l_suppkey) FROM lineitem
|
||||||
GROUP BY l_partkey
|
GROUP BY l_partkey
|
||||||
ORDER BY average DESC, l_partkey LIMIT 10;
|
ORDER BY 2 DESC, l_partkey LIMIT 10;
|
||||||
|
SELECT l_partkey, stddev(l_suppkey::float8) FROM lineitem
|
||||||
|
GROUP BY l_partkey
|
||||||
|
ORDER BY 2 DESC NULLS LAST, l_partkey LIMIT 10;
|
||||||
|
|
||||||
|
-- also test that we handle execution on coordinator properly
|
||||||
|
SELECT l_partkey, avg(distinct l_suppkey) FROM lineitem
|
||||||
|
GROUP BY l_partkey
|
||||||
|
ORDER BY 2 DESC, l_partkey LIMIT 10;
|
||||||
|
|
||||||
-- Next, check that we don't apply the limit optimization for expressions that
|
-- Next, check that we don't apply the limit optimization for expressions that
|
||||||
-- have aggregates within them
|
-- have aggregates within them
|
||||||
|
|
Loading…
Reference in New Issue