Add support for window functions that can be pushed down to worker (#2008)

This is the first of series of window function work.

We can now support window functions that can be pushed down to workers.
Window function must have distribution column in the partition clause
 to be pushed down.
pull/2031/head
Murat Tuncer 2018-03-01 19:07:07 +03:00 committed by GitHub
parent 8e2c72c054
commit 76f6883d5d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1721 additions and 41 deletions

View File

@ -1296,7 +1296,13 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
Expr *newExpression = NULL;
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
if (hasAggregates)
bool hasWindowFunction = contain_window_function((Node *) originalExpression);
/*
* if the aggregate belongs to a window function, it is not mutated, but pushed
* down to worker as it is. Master query should treat that as a Var.
*/
if (hasAggregates && !hasWindowFunction)
{
Node *newNode = MasterAggregateMutator((Node *) originalExpression,
walkerContext);
@ -1858,6 +1864,9 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
bool hasNonPartitionColumnDistinctAgg = false;
bool repartitionSubquery = false;
/* only window functions that can be pushed down reach here */
bool pushDownWindowFunction = originalOpNode->hasWindowFuncs;
walkerContext->expressionList = NIL;
/* find max of sort group ref index */
@ -1893,12 +1902,18 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
List *newExpressionList = NIL;
ListCell *newExpressionCell = NULL;
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
bool hasWindowFunction = contain_window_function((Node *) originalExpression);
/* reset walker context */
walkerContext->expressionList = NIL;
walkerContext->createGroupByClause = false;
if (hasAggregates)
/*
* If the expression uses aggregates inside window function contain agg
* clause still returns true. We want to make sure it is not a part of
* window function before we proceed.
*/
if (hasAggregates && !hasWindowFunction)
{
WorkerAggregateWalker((Node *) originalExpression, walkerContext);
@ -2013,6 +2028,8 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
workerExtendedOpNode = CitusMakeNode(MultiExtendedOp);
workerExtendedOpNode->distinctClause = NIL;
workerExtendedOpNode->hasDistinctOn = false;
workerExtendedOpNode->hasWindowFuncs = originalOpNode->hasWindowFuncs;
workerExtendedOpNode->windowClause = originalOpNode->windowClause;
if (!queryHasAggregates)
{
@ -2042,13 +2059,48 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
newTargetEntryListForSortClauses);
}
if (workerExtendedOpNode->windowClause)
{
List *windowClauseList = workerExtendedOpNode->windowClause;
ListCell *windowClauseCell = NULL;
foreach(windowClauseCell, windowClauseList)
{
WindowClause *windowClause = (WindowClause *) lfirst(windowClauseCell);
List *partitionClauseTargetList =
GenerateNewTargetEntriesForSortClauses(originalOpNode->targetList,
windowClause->partitionClause,
&targetProjectionNumber,
&nextSortGroupRefIndex);
List *orderClauseTargetList =
GenerateNewTargetEntriesForSortClauses(originalOpNode->targetList,
windowClause->orderClause,
&targetProjectionNumber,
&nextSortGroupRefIndex);
newTargetEntryList = list_concat(newTargetEntryList,
partitionClauseTargetList);
newTargetEntryList = list_concat(newTargetEntryList,
orderClauseTargetList);
}
}
workerExtendedOpNode->targetList = newTargetEntryList;
/*
* If grouped by a partition column whose values are shards have disjoint sets
* of partition values, we can push down the having qualifier.
*
* When a query with subquery is provided, we can't determine if
* groupedByDisjointPartitionColumn, therefore we also check if there is a
* window function too. If there is a window function we would know that it
* is safe to push down (i.e. it is partitioned on distribution column, and
* if there is a group by, it contains distribution column).
*
*/
if (havingQual != NULL && groupedByDisjointPartitionColumn)
if (havingQual != NULL &&
(groupedByDisjointPartitionColumn || pushDownWindowFunction))
{
workerExtendedOpNode->havingQual = originalOpNode->havingQual;
}
@ -2067,7 +2119,10 @@ HasNonPartitionColumnDistinctAgg(List *targetEntryList, Node *havingQual,
List *tableNodeList)
{
List *targetVarList = pull_var_clause((Node *) targetEntryList,
PVC_INCLUDE_AGGREGATES);
PVC_INCLUDE_AGGREGATES |
PVC_RECURSE_WINDOWFUNCS);
/* having clause can't have window functions, no need to recurse for that */
List *havingVarList = pull_var_clause((Node *) havingQual, PVC_INCLUDE_AGGREGATES);
List *aggregateCheckList = list_concat(targetVarList, havingVarList);
@ -2722,7 +2777,8 @@ ErrorIfContainsUnsupportedAggregate(MultiNode *logicalPlanNode)
* PVC_REJECT_PLACEHOLDERS is implicit if PVC_INCLUDE_PLACEHOLDERS isn't
* specified.
*/
List *expressionList = pull_var_clause((Node *) targetList, PVC_INCLUDE_AGGREGATES);
List *expressionList = pull_var_clause((Node *) targetList, PVC_INCLUDE_AGGREGATES |
PVC_INCLUDE_WINDOWFUNCS);
ListCell *expressionCell = NULL;
foreach(expressionCell, expressionList)

View File

@ -161,6 +161,7 @@ static MultiNode * SubqueryMultiNodeTree(Query *originalQuery,
plannerRestrictionContext);
static MultiNode * SubqueryPushdownMultiNodeTree(Query *queryTree);
static void FlattenJoinVars(List *columnList, Query *queryTree);
static List * CreateSubqueryTargetEntryList(List *columnList);
static void UpdateVarMappingsForExtendedOpNode(List *columnList,
List *subqueryTargetEntryList);
@ -212,6 +213,7 @@ static bool
ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery)
{
List *qualifierList = NIL;
StringInfo errorMessage = NULL;
/*
* We check the existence of subqueries in FROM clause on the modified query
@ -253,6 +255,13 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery)
return true;
}
/* check if the query has a window function and it is safe to pushdown */
if (originalQuery->hasWindowFuncs &&
SafeToPushdownWindowFunction(originalQuery, &errorMessage))
{
return true;
}
return false;
}
@ -850,8 +859,8 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
* We support window functions when the window function
* is partitioned on distribution column.
*/
if (subqueryTree->windowClause && !SafeToPushdownWindowFunction(subqueryTree,
&errorInfo))
if (subqueryTree->hasWindowFuncs && !SafeToPushdownWindowFunction(subqueryTree,
&errorInfo))
{
errorDetail = (char *) errorInfo->data;
preconditionsSatisfied = false;
@ -2079,6 +2088,7 @@ DeferErrorIfQueryNotSupported(Query *queryTree)
bool hasComplexJoinOrder = false;
bool hasComplexRangeTableType = false;
bool preconditionsSatisfied = true;
StringInfo errorInfo = NULL;
const char *errorHint = NULL;
const char *joinHint = "Consider joining tables on partition column and have "
"equal filter on joining columns.";
@ -2097,15 +2107,16 @@ DeferErrorIfQueryNotSupported(Query *queryTree)
errorHint = filterHint;
}
if (queryTree->hasWindowFuncs)
if (queryTree->hasWindowFuncs &&
!SafeToPushdownWindowFunction(queryTree, &errorInfo))
{
preconditionsSatisfied = false;
errorMessage = "could not run distributed query because the window "
"function that is used cannot be pushed down";
errorHint = "Window functions are supported in two ways. Either add "
"an equality filter on the distributed tables' partition "
"column or use the window functions inside a subquery with "
"a PARTITION BY clause containing the distribution column";
"column or use the window functions with a PARTITION BY "
"clause containing the distribution column";
}
if (queryTree->setOperations)
@ -3135,6 +3146,8 @@ MultiExtendedOpNode(Query *queryTree)
extendedOpNode->havingQual = queryTree->havingQual;
extendedOpNode->distinctClause = queryTree->distinctClause;
extendedOpNode->hasDistinctOn = queryTree->hasDistinctOn;
extendedOpNode->hasWindowFuncs = queryTree->hasWindowFuncs;
extendedOpNode->windowClause = queryTree->windowClause;
return extendedOpNode;
}
@ -3396,7 +3409,8 @@ pull_var_clause_default(Node *node)
* PVC_REJECT_PLACEHOLDERS is implicit if PVC_INCLUDE_PLACEHOLDERS
* isn't specified.
*/
List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES);
List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES |
PVC_RECURSE_WINDOWFUNCS);
return columnList;
}
@ -3768,6 +3782,8 @@ SubqueryPushdownMultiNodeTree(Query *queryTree)
havingClauseColumnList = pull_var_clause_default(queryTree->havingQual);
columnList = list_concat(targetColumnList, havingClauseColumnList);
FlattenJoinVars(columnList, queryTree);
/* create a target entry for each unique column */
subqueryTargetEntryList = CreateSubqueryTargetEntryList(columnList);
@ -3835,6 +3851,73 @@ SubqueryPushdownMultiNodeTree(Query *queryTree)
}
/*
* FlattenJoinVars iterates over provided columnList to identify
* Var's that are referenced from join RTE, and reverts back to their
* original RTEs.
*
* This is required because Postgres allows columns to be referenced using
* a join alias. Therefore the same column from a table could be referenced
* twice using its absolute table name (t1.a), and without table name (a).
* This is a problem when one of them is inside the group by clause and the
* other is not. Postgres is smart about it to detect that both target columns
* resolve to the same thing, and allows a single group by clause to cover
* both target entries when standard planner is called. Since we operate on
* the original query, we want to make sure we provide correct varno/varattno
* values to Postgres so that it could produce valid query.
*
* Only exception is that, if a join is given an alias name, we do not want to
* flatten those var's. If we do, deparsing fails since it expects to see a join
* alias, and cannot access the RTE in the join tree by their names.
*/
static void
FlattenJoinVars(List *columnList, Query *queryTree)
{
ListCell *columnCell = NULL;
List *rteList = queryTree->rtable;
foreach(columnCell, columnList)
{
Var *column = (Var *) lfirst(columnCell);
RangeTblEntry *columnRte = NULL;
PlannerInfo *root = NULL;
Assert(IsA(column, Var));
/*
* if join does not have an alias, it is copied over join rte.
* There is no need to find the JoinExpr to check whether it has
* an alias defined.
*
* We use the planner's flatten_join_alias_vars routine to do
* the flattening; it wants a PlannerInfo root node, which
* fortunately can be mostly dummy.
*/
columnRte = rt_fetch(column->varno, rteList);
if (columnRte->rtekind == RTE_JOIN && columnRte->alias == NULL)
{
Var *normalizedVar = NULL;
if (root == NULL)
{
root = makeNode(PlannerInfo);
root->parse = (queryTree);
root->planner_cxt = CurrentMemoryContext;
root->hasJoinRTEs = true;
}
normalizedVar = (Var *) flatten_join_alias_vars(root, (Node *) column);
/*
* We need to copy values over existing one to make sure it is updated on
* respective places.
*/
memcpy(column, normalizedVar, sizeof(Var));
}
}
}
/*
* CreateSubqueryTargetEntryList creates a target entry for each unique column
* in the column list and returns the target entry list.

View File

@ -1415,6 +1415,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
bool hasAggregates = false;
List *distinctClause = NIL;
bool hasDistinctOn = false;
bool hasWindowFuncs = false;
List *windowClause = NIL;
/* we start building jobs from below the collect node */
Assert(!CitusIsA(multiNode, MultiCollect));
@ -1462,6 +1464,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
havingQual = extendedOp->havingQual;
distinctClause = extendedOp->distinctClause;
hasDistinctOn = extendedOp->hasDistinctOn;
hasWindowFuncs = extendedOp->hasWindowFuncs;
windowClause = extendedOp->windowClause;
}
/* build group clauses */
@ -1508,6 +1512,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
jobQuery->hasAggs = hasAggregates;
jobQuery->hasDistinctOn = hasDistinctOn;
jobQuery->distinctClause = distinctClause;
jobQuery->hasWindowFuncs = hasWindowFuncs;
jobQuery->windowClause = windowClause;
return jobQuery;
}

View File

@ -176,6 +176,8 @@ typedef struct MultiExtendedOp
Node *havingQual;
List *distinctClause;
bool hasDistinctOn;
bool hasWindowFuncs;
List *windowClause;
} MultiExtendedOp;

View File

@ -654,7 +654,7 @@ ORDER BY
LIMIT
10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- user needs to supply partition by which should
-- include the distribution key
INSERT INTO agg_results_window (user_id, agg_time, value_2_agg)
@ -673,7 +673,7 @@ ORDER BY
LIMIT
10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- user needs to supply partition by which should
-- include the distribution key
INSERT INTO agg_results_window (user_id, agg_time, value_2_agg)
@ -692,7 +692,7 @@ ORDER BY
LIMIT
10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- w2 should not be pushed down
INSERT INTO agg_results_window (user_id, value_1_agg, value_2_agg)
SELECT * FROM
@ -710,7 +710,7 @@ SELECT * FROM
LIMIT
10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- GROUP BY includes the partition key, but not the WINDOW function
INSERT INTO agg_results_window (user_id, agg_time, value_2_agg)
SELECT
@ -728,7 +728,7 @@ FROM
WHERE
my_rank > 125;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- GROUP BY includes the partition key, but not the WINDOW function
INSERT INTO agg_results_window (user_id, agg_time, value_2_agg)
SELECT
@ -746,7 +746,7 @@ FROM
WHERE
my_rank > 125;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- w2 should not be allowed
INSERT INTO agg_results_window (user_id, value_2_agg, value_3_agg)
SELECT * FROM
@ -762,7 +762,7 @@ SELECT * FROM
w2 AS (ORDER BY events_table.time)
) as foo;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- unsupported window function with an override
INSERT INTO agg_results_window(user_id, agg_time, value_2_agg)
SELECT * FROM (
@ -780,7 +780,7 @@ SELECT * FROM (
w2 as (PARTITION BY user_id, time)
) a;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- Subquery in where with unsupported window function
INSERT INTO agg_results_window(user_id)
SELECT
@ -812,7 +812,7 @@ SELECT * FROM (
user_id
) a;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- UNION with only one subquery which has a partition on non-distribution column should
-- error out
INSERT INTO agg_results_window(user_id, value_1_agg)
@ -850,5 +850,5 @@ FROM (
)
) AS ftop;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
DROP VIEW view_with_window_func;

View File

@ -1129,12 +1129,12 @@ SELECT id, MIN(id) over (order by word_count)
FROM articles_hash_mx
WHERE author_id = 1 or author_id = 2;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count
FROM articles_hash_mx
WHERE author_id = 5 or author_id = 2;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- complex query hitting a single shard
SELECT
count(DISTINCT CASE

View File

@ -1306,12 +1306,12 @@ SELECT id, MIN(id) over (order by word_count)
FROM articles_hash
WHERE author_id = 1 or author_id = 2;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count
FROM articles_hash
WHERE author_id = 5 or author_id = 2;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- where false queries are router plannable
SELECT *
FROM articles_hash

View File

@ -2581,3 +2581,112 @@ ORDER BY
3 | 1
(4 rows)
-- we've fixed a bug related to joins w/wout alias
-- while implementing top window functions
-- thus adding some tests related to that (i.e., next 3 tests)
WITH users_events AS
(
SELECT
user_id
FROM
users_table
)
SELECT
uid,
event_type,
value_2,
value_3
FROM (
(SELECT
user_id as uid
FROM
users_events
) users
JOIN
events_table
ON
users.uid = events_table.event_type
) a
ORDER BY
1,2,3,4
LIMIT 5;
uid | event_type | value_2 | value_3
-----+------------+---------+---------
1 | 1 | 0 | 2
1 | 1 | 0 | 2
1 | 1 | 0 | 2
1 | 1 | 0 | 2
1 | 1 | 0 | 2
(5 rows)
-- the following queries are almost the same,
-- the only difference is the final GROUP BY
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN
(SELECT
DISTINCT ON (value_2) value_2 , user_id, value_3
FROM
users_table
WHERE
(value_1 > 3)
ORDER BY
1,2,3
) AS b
USING (user_id)
GROUP BY user_id
ORDER BY 1;
user_id | subquery_avg
---------+------------------------
1 | 2.3333333333333333
3 | 5.0000000000000000
4 | 1.00000000000000000000
5 |
(4 rows)
-- see the comment for the above query
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN
(SELECT
DISTINCT ON (value_2) value_2 , user_id, value_3
FROM
users_table
WHERE
(value_1 > 3)
ORDER BY
1,2,3
) AS b
USING (user_id)
GROUP BY a.user_id
ORDER BY 1;
user_id | subquery_avg
---------+------------------------
1 | 2.3333333333333333
3 | 5.0000000000000000
4 | 1.00000000000000000000
5 |
(4 rows)

View File

@ -791,7 +791,7 @@ ORDER BY
LIMIT
10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- user needs to supply partition by which should
-- include the distribution key
SELECT
@ -809,7 +809,7 @@ ORDER BY
LIMIT
10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- user needs to supply partition by which should
-- include the distribution key
SELECT
@ -827,7 +827,7 @@ ORDER BY
LIMIT
10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- w2 should not be pushed down
SELECT * FROM
(
@ -844,7 +844,7 @@ SELECT * FROM
ORDER BY 3 DESC, 1 DESC, 2 DESC NULLS LAST
LIMIT 10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- w2 should not be pushed down
SELECT * FROM
(
@ -863,7 +863,7 @@ ORDER BY
LIMIT
10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- GROUP BY includes the partition key, but not the WINDOW function
SELECT
user_id, time, my_rank
@ -884,7 +884,7 @@ ORDER BY
LIMIT
10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- GROUP BY includes the partition key, but not the WINDOW function
SELECT
user_id, time, my_rank
@ -905,7 +905,7 @@ ORDER BY
LIMIT
10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- Overriding window function but not supported
SELECT * FROM (
SELECT
@ -924,7 +924,7 @@ SELECT * FROM (
ORDER BY
1,2,3;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- Aggregate function on distribution column should error out
SELECT * FROM (
SELECT
@ -937,7 +937,7 @@ SELECT * FROM (
ORDER BY
1 DESC, 2 DESC;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- test with reference table partitioned on only a column from reference table
SELECT *
FROM
@ -952,7 +952,7 @@ ORDER BY
LIMIT
20;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- UNION ALL with only one of them is not partitioned over distribution column which
-- should not be allowed.
SELECT
@ -975,7 +975,7 @@ GROUP BY user_id
ORDER BY 1 DESC
LIMIT 5;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- UNION with only one subquery which has a partition on non-distribution column should
-- error out
SELECT *
@ -1012,5 +1012,5 @@ FROM (
ORDER BY 2 DESC, 1 DESC
LIMIT 5;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
DROP VIEW subq;

View File

@ -587,7 +587,7 @@ SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_w
UNION
SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_win AS (PARTITION BY y ORDER BY x DESC)) as bar;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- other set operations in joins also cannot be pushed down
SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM test ORDER BY x LIMIT 1)) u JOIN test USING (x) ORDER BY 1,2;
DEBUG: push down of limit count: 1

View File

@ -100,7 +100,7 @@ ORDER BY
LIMIT
10) as foo;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- OUTER JOINs where the outer part is recursively planned and not the other way
-- around is not supported
SELECT

View File

@ -0,0 +1,915 @@
-- ===================================================================
-- test top level window functions that are pushdownable
-- ===================================================================
-- a very simple window function with an aggregate and a window function
-- distribution column is on the partition by clause
SELECT
user_id, COUNT(*) OVER (PARTITION BY user_id),
rank() OVER (PARTITION BY user_id)
FROM
users_table
ORDER BY
1 DESC, 2 DESC, 3 DESC
LIMIT 5;
user_id | count | rank
---------+-------+------
6 | 10 | 1
6 | 10 | 1
6 | 10 | 1
6 | 10 | 1
6 | 10 | 1
(5 rows)
-- a more complicated window clause, including an aggregate
-- in both the window clause and the target entry
SELECT
user_id, avg(avg(value_3)) OVER (PARTITION BY user_id, MIN(value_2))
FROM
users_table
GROUP BY
1
ORDER BY
2 DESC NULLS LAST, 1 DESC;
user_id | avg
---------+------------------
2 | 3
4 | 2.82608695652174
3 | 2.70588235294118
6 | 2.6
1 | 2.57142857142857
5 | 2.46153846153846
(6 rows)
-- window clause operates on the results of a subquery
SELECT
user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2))
FROM (
SELECT
DISTINCT us.user_id, us.value_2, value_1, random() as r1
FROM
users_table as us, events_table
WHERE
us.user_id = events_table.user_id AND event_type IN (1,2)
ORDER BY
user_id, value_2
) s
GROUP BY
1, value_1
ORDER BY
2 DESC, 1;
user_id | max
---------+-----
1 | 5
3 | 5
3 | 5
4 | 5
5 | 5
5 | 5
6 | 5
6 | 5
1 | 4
2 | 4
3 | 4
3 | 4
3 | 4
4 | 4
4 | 4
5 | 4
5 | 4
1 | 3
2 | 3
2 | 3
2 | 3
6 | 3
2 | 2
4 | 2
4 | 2
4 | 2
6 | 2
1 | 1
3 | 1
5 | 1
6 | 1
5 | 0
(32 rows)
-- window function operates on the results of
-- a join
SELECT
us.user_id,
SUM(us.value_1) OVER (PARTITION BY us.user_id)
FROM
users_table us
JOIN
events_table ev
ON (us.user_id = ev.user_id)
GROUP BY
1,
value_1
ORDER BY
1,
2
LIMIT 5;
user_id | sum
---------+-----
1 | 13
1 | 13
1 | 13
1 | 13
2 | 10
(5 rows)
-- the same query, but this time join with an alias
SELECT
user_id, value_1, SUM(j.value_1) OVER (PARTITION BY j.user_id)
FROM
(users_table us
JOIN
events_table ev
USING (user_id )
) j
GROUP BY
user_id,
value_1
ORDER BY
3 DESC, 2 DESC, 1 DESC
LIMIT 5;
user_id | value_1 | sum
---------+---------+-----
5 | 5 | 15
4 | 5 | 15
3 | 5 | 15
5 | 4 | 15
4 | 4 | 15
(5 rows)
-- querying views that have window functions should be ok
CREATE VIEW window_view AS
SELECT
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
FROM
users_table
GROUP BY
user_id, value_1
HAVING count(*) > 1;
-- Window function in View works
SELECT *
FROM
window_view
ORDER BY
2 DESC, 1
LIMIT 10;
user_id | rank
---------+------
5 | 6
2 | 5
4 | 5
5 | 5
2 | 4
3 | 4
4 | 4
5 | 4
6 | 4
2 | 3
(10 rows)
-- the other way around also should work fine
-- query a view using window functions
CREATE VIEW users_view AS SELECT * FROM users_table;
SELECT
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
FROM
users_view
GROUP BY
user_id, value_1
HAVING count(*) > 4
ORDER BY
2 DESC, 1;
user_id | rank
---------+------
4 | 2
5 | 2
2 | 1
3 | 1
4 | 1
5 | 1
(6 rows)
DROP VIEW users_view, window_view;
-- window function uses columns from two different tables
SELECT
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
FROM
events_table, users_table
WHERE
users_table.user_id = events_table.user_id
WINDOW
my_win AS (PARTITION BY events_table.user_id, users_table.value_1 ORDER BY events_table.time DESC)
ORDER BY
rnk DESC, 1 DESC
LIMIT 10;
user_id | rnk
---------+-----
3 | 121
5 | 118
2 | 116
3 | 115
4 | 113
2 | 111
5 | 109
3 | 109
4 | 106
2 | 106
(10 rows)
-- the same query with reference table column is also on the partition by clause
SELECT
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
FROM
events_table, users_ref_test_table uref
WHERE
uref.id = events_table.user_id
WINDOW
my_win AS (PARTITION BY events_table.user_id, uref.k_no ORDER BY events_table.time DESC)
ORDER BY
rnk DESC, 1 DESC
LIMIT 10;
user_id | rnk
---------+-----
2 | 24
2 | 23
2 | 22
3 | 21
2 | 21
3 | 20
2 | 20
3 | 19
2 | 19
3 | 18
(10 rows)
-- similar query with no distribution column is on the partition by clause
-- is not supported
SELECT
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
FROM
events_table, users_ref_test_table uref
WHERE
uref.id = events_table.user_id
WINDOW
my_win AS (PARTITION BY events_table.value_2, uref.k_no ORDER BY events_table.time DESC)
ORDER BY
rnk DESC, 1 DESC
LIMIT 10;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- ORDER BY in the window function is an aggragate
SELECT
user_id, rank() OVER my_win as rnk, avg(value_2) as avg_val_2
FROM
events_table
GROUP BY
user_id, date_trunc('day', time)
WINDOW
my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
ORDER BY
3 DESC, 2 DESC, 1 DESC;
user_id | rnk | avg_val_2
---------+-----+--------------------
1 | 1 | 3.3750000000000000
3 | 2 | 3.1666666666666667
5 | 1 | 2.6666666666666667
6 | 1 | 2.5000000000000000
4 | 1 | 2.5000000000000000
2 | 1 | 2.4736842105263158
4 | 2 | 2.4000000000000000
1 | 2 | 2.1428571428571429
5 | 2 | 2.0909090909090909
6 | 2 | 2.0000000000000000
2 | 2 | 2.0000000000000000
3 | 1 | 1.8000000000000000
(12 rows)
-- lets push the limits of writing complex expressions aling with the window functions
SELECT
COUNT(*) OVER (PARTITION BY user_id, user_id + 1),
rank() OVER (PARTITION BY user_id) as cnt1,
COUNT(*) OVER (PARTITION BY user_id, abs(value_1 - value_2)) as cnt2,
date_trunc('min', lag(time) OVER (PARTITION BY user_id)) as datee,
rank() OVER my_win as rnnk,
avg(CASE
WHEN user_id > 4
THEN value_1
ELSE value_2
END) FILTER (WHERE user_id > 2) OVER my_win_2 as filtered_count,
sum(user_id * (5.0 / (value_1 + value_2 + 0.1)) * value_3) FILTER (WHERE value_1::text LIKE '%1%') OVER my_win_4 as cnt_with_filter_2
FROM
users_table
WINDOW
my_win AS (PARTITION BY user_id, (value_1%3)::int ORDER BY time DESC),
my_win_2 AS (PARTITION BY user_id, (value_1)::int ORDER BY time DESC),
my_win_3 AS (PARTITION BY user_id, date_trunc('min', time)),
my_win_4 AS (my_win_3 ORDER BY value_2, value_3)
ORDER BY
cnt_with_filter_2 DESC NULLS LAST, filtered_count DESC NULLS LAST, datee DESC NULLS LAST, rnnk DESC, cnt2 DESC, cnt1 DESC, user_id DESC
LIMIT 5;
count | cnt1 | cnt2 | datee | rnnk | filtered_count | cnt_with_filter_2
-------+------+------+--------------------------+------+------------------------+-------------------
23 | 1 | 7 | Thu Nov 23 02:14:00 2017 | 6 | 0.00000000000000000000 | 72.7272727272727
10 | 1 | 3 | Wed Nov 22 23:01:00 2017 | 1 | 1.00000000000000000000 | 57.1428571428571
17 | 1 | 5 | Wed Nov 22 23:24:00 2017 | 8 | 3.0000000000000000 | 28.5714285714286
17 | 1 | 5 | | 10 | 2.6666666666666667 | 28.5714285714286
17 | 1 | 5 | Thu Nov 23 00:15:00 2017 | 7 | 3.6666666666666667 | 24.1935483870968
(5 rows)
-- some tests with GROUP BY along with PARTITION BY
SELECT
user_id,
rank() OVER my_win as my_rank,
avg(avg(event_type)) OVER my_win_2 as avg,
max(time) as mx_time
FROM
events_table
GROUP BY
user_id,
value_2
WINDOW
my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC),
my_win_2 AS (PARTITION BY user_id, avg(user_id) ORDER BY count(*) DESC)
ORDER BY
avg DESC,
mx_time DESC,
my_rank DESC,
user_id DESC;
user_id | my_rank | avg | mx_time
---------+---------+------------------------+---------------------------------
6 | 1 | 3.0000000000000000 | Thu Nov 23 14:00:13.20013 2017
6 | 2 | 3.0000000000000000 | Thu Nov 23 11:16:13.106691 2017
6 | 1 | 3.0000000000000000 | Thu Nov 23 07:27:32.822068 2017
3 | 1 | 2.9857142857142857 | Thu Nov 23 16:31:56.219594 2017
4 | 2 | 2.9555555555555556 | Thu Nov 23 14:19:25.765876 2017
4 | 1 | 2.9555555555555556 | Thu Nov 23 08:36:53.871919 2017
1 | 4 | 2.8633333333333333 | Wed Nov 22 21:06:57.457147 2017
1 | 1 | 2.8250000000000000 | Thu Nov 23 21:54:46.924477 2017
2 | 2 | 2.7738095238095238 | Thu Nov 23 13:27:37.441959 2017
1 | 2 | 2.7722222222222222 | Thu Nov 23 09:23:30.994345 2017
3 | 1 | 2.7682539682539682 | Thu Nov 23 01:17:49.040685 2017
2 | 1 | 2.7142857142857143 | Thu Nov 23 15:58:49.273421 2017
1 | 3 | 2.5791666666666667 | Thu Nov 23 11:09:38.074595 2017
3 | 1 | 2.5714285714285714 | Thu Nov 23 16:44:41.903713 2017
2 | 1 | 2.5158730158730159 | Thu Nov 23 14:02:47.738901 2017
4 | 1 | 2.47777777777777778333 | Thu Nov 23 16:20:33.264457 2017
4 | 3 | 2.47777777777777778333 | Thu Nov 23 08:14:18.231273 2017
4 | 3 | 2.47777777777777778333 | Thu Nov 23 07:32:45.521278 2017
1 | 1 | 2.4000000000000000 | Thu Nov 23 10:23:27.617726 2017
2 | 1 | 2.3869047619047619 | Thu Nov 23 17:26:14.563216 2017
3 | 1 | 2.3841269841269841 | Thu Nov 23 18:08:26.550729 2017
3 | 1 | 2.3841269841269841 | Thu Nov 23 09:38:45.338008 2017
3 | 2 | 2.3841269841269841 | Thu Nov 23 06:44:50.887182 2017
2 | 2 | 2.3095238095238095 | Thu Nov 23 04:05:16.217731 2017
5 | 2 | 2.3000000000000000 | Thu Nov 23 14:28:51.833214 2017
5 | 2 | 2.3000000000000000 | Thu Nov 23 14:23:09.889786 2017
4 | 1 | 2.2000000000000000 | Thu Nov 23 18:10:21.338399 2017
2 | 1 | 2.09126984126984126667 | Thu Nov 23 03:35:04.321504 2017
5 | 1 | 2.0000000000000000 | Thu Nov 23 16:11:02.929469 2017
5 | 1 | 2.0000000000000000 | Thu Nov 23 14:40:40.467511 2017
5 | 1 | 2.0000000000000000 | Thu Nov 23 13:26:45.571108 2017
(31 rows)
-- test for range and rows mode and different window functions
-- mostly to make sure that deparsing works fine
SELECT
user_id,
rank() OVER (PARTITION BY user_id ROWS BETWEEN
UNBOUNDED PRECEDING AND CURRENT ROW),
dense_rank() OVER (PARTITION BY user_id RANGE BETWEEN
UNBOUNDED PRECEDING AND CURRENT ROW),
CUME_DIST() OVER (PARTITION BY user_id RANGE BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
PERCENT_RANK() OVER (PARTITION BY user_id ORDER BY avg(value_1) RANGE BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
FROM
users_table
GROUP BY
1
ORDER BY
4 DESC,3 DESC,2 DESC ,1 DESC;
user_id | rank | dense_rank | cume_dist | percent_rank
---------+------+------------+-----------+--------------
6 | 1 | 1 | 1 | 0
5 | 1 | 1 | 1 | 0
4 | 1 | 1 | 1 | 0
3 | 1 | 1 | 1 | 0
2 | 1 | 1 | 1 | 0
1 | 1 | 1 | 1 | 0
(6 rows)
-- some tests with GROUP BY, HAVING and LIMIT
SELECT
user_id, sum(event_type) OVER my_win , event_type
FROM
events_table
GROUP BY
user_id, event_type
HAVING count(*) > 2
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
ORDER BY
2 DESC, 3 DESC, 1 DESC
LIMIT
5;
user_id | sum | event_type
---------+-----+------------
4 | 4 | 4
3 | 4 | 4
2 | 4 | 4
1 | 4 | 4
5 | 3 | 3
(5 rows)
-- Group by has more columns than partition by
SELECT
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
FROM
users_table
GROUP BY
user_id, value_1, value_2
HAVING count(*) > 2
ORDER BY
2 DESC, 1
LIMIT
10;
user_id | sum
---------+-----
5 | 3
4 | 2
(2 rows)
SELECT
DISTINCT ON (user_id) user_id, SUM(value_2) OVER (PARTITION BY user_id)
FROM
users_table
GROUP BY
user_id, value_1, value_2
HAVING count(*) > 2
ORDER BY
1, 2 DESC
LIMIT
10;
user_id | sum
---------+-----
4 | 2
5 | 3
(2 rows)
SELECT
DISTINCT ON (SUM(value_1) OVER (PARTITION BY user_id)) user_id, SUM(value_2) OVER (PARTITION BY user_id)
FROM
users_table
GROUP BY
user_id, value_1, value_2
HAVING count(*) > 2
ORDER BY
(SUM(value_1) OVER (PARTITION BY user_id)) , 2 DESC, 1
LIMIT
10;
user_id | sum
---------+-----
5 | 3
4 | 2
(2 rows)
-- not a meaningful query, with interesting syntax
SELECT
user_id,
AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)),
AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1))
FROM
users_table
GROUP BY
1
ORDER BY
3 DESC, 2 DESC, 1 DESC;
user_id | avg | avg
---------+--------------------+------------------------
6 | 2.1000000000000000 | 6.0000000000000000
5 | 2.6538461538461538 | 5.0000000000000000
4 | 2.7391304347826087 | 4.0000000000000000
3 | 2.3529411764705882 | 3.0000000000000000
2 | 2.3333333333333333 | 2.0000000000000000
1 | 3.2857142857142857 | 1.00000000000000000000
(6 rows)
EXPLAIN
SELECT
user_id,
AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)),
AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1))
FROM
users_table
GROUP BY
1
ORDER BY
3 DESC, 2 DESC, 1 DESC;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: remote_scan.avg_1 DESC, remote_scan.avg DESC, remote_scan.user_id DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> WindowAgg (cost=5.06..5.80 rows=33 width=128)
-> Sort (cost=5.06..5.14 rows=33 width=128)
Sort Key: users_table.user_id, (min(users_table.user_id)), (avg(users_table.value_1))
-> WindowAgg (cost=3.48..4.22 rows=33 width=128)
-> Sort (cost=3.48..3.56 rows=33 width=96)
Sort Key: users_table.user_id, (max(users_table.user_id)), (min(users_table.value_2))
-> HashAggregate (cost=2.07..2.65 rows=33 width=96)
Group Key: users_table.user_id
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
(18 rows)
SELECT
user_id,
1 + sum(value_1),
1 + AVG(value_2) OVER (partition by user_id)
FROM
users_table
GROUP BY
user_id, value_2
ORDER BY
user_id, value_2;
user_id | ?column? | ?column?
---------+----------+--------------------
1 | 5 | 3.2500000000000000
1 | 4 | 3.2500000000000000
1 | 6 | 3.2500000000000000
1 | 12 | 3.2500000000000000
2 | 3 | 3.5000000000000000
2 | 5 | 3.5000000000000000
2 | 13 | 3.5000000000000000
2 | 6 | 3.5000000000000000
2 | 17 | 3.5000000000000000
2 | 4 | 3.5000000000000000
3 | 3 | 4.0000000000000000
3 | 13 | 4.0000000000000000
3 | 10 | 4.0000000000000000
3 | 2 | 4.0000000000000000
3 | 17 | 4.0000000000000000
4 | 4 | 3.5000000000000000
4 | 28 | 3.5000000000000000
4 | 1 | 3.5000000000000000
4 | 11 | 3.5000000000000000
4 | 17 | 3.5000000000000000
4 | 8 | 3.5000000000000000
5 | 7 | 3.5000000000000000
5 | 17 | 3.5000000000000000
5 | 24 | 3.5000000000000000
5 | 9 | 3.5000000000000000
5 | 8 | 3.5000000000000000
5 | 10 | 3.5000000000000000
6 | 6 | 3.0000000000000000
6 | 3 | 3.0000000000000000
6 | 9 | 3.0000000000000000
6 | 3 | 3.0000000000000000
6 | 5 | 3.0000000000000000
(32 rows)
EXPLAIN
SELECT
user_id,
1 + sum(value_1),
1 + AVG(value_2) OVER (partition by user_id)
FROM
users_table
GROUP BY
user_id, value_2
ORDER BY
user_id, value_2;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: remote_scan.user_id, remote_scan.worker_column_4
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id, remote_scan.worker_column_4
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> WindowAgg (cost=2.16..3.40 rows=33 width=48)
-> GroupAggregate (cost=2.16..2.82 rows=33 width=16)
Group Key: users_table.user_id, users_table.value_2
-> Sort (cost=2.16..2.24 rows=33 width=12)
Sort Key: users_table.user_id, users_table.value_2
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
(15 rows)
SELECT
user_id,
1 + sum(value_1),
1 + AVG(value_2) OVER (partition by user_id)
FROM
users_table
GROUP BY
user_id, value_2
ORDER BY
2 DESC, 1
LIMIT 5;
user_id | ?column? | ?column?
---------+----------+--------------------
4 | 28 | 3.5000000000000000
5 | 24 | 3.5000000000000000
2 | 17 | 3.5000000000000000
3 | 17 | 4.0000000000000000
4 | 17 | 3.5000000000000000
(5 rows)
-- rank and ordering in the reverse order
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by value_2)
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, value_2 DESC;
user_id | avg | rank
---------+------------------------+------
1 | 3.6666666666666667 | 4
1 | 2.5000000000000000 | 3
1 | 3.0000000000000000 | 2
1 | 4.0000000000000000 | 1
2 | 1.5000000000000000 | 6
2 | 3.2000000000000000 | 5
2 | 1.6666666666666667 | 4
2 | 3.0000000000000000 | 3
2 | 1.3333333333333333 | 2
2 | 2.0000000000000000 | 1
3 | 2.6666666666666667 | 5
3 | 1.00000000000000000000 | 4
3 | 3.0000000000000000 | 3
3 | 2.4000000000000000 | 2
3 | 1.00000000000000000000 | 1
4 | 3.5000000000000000 | 6
4 | 3.2000000000000000 | 5
4 | 3.3333333333333333 | 4
4 | 0.00000000000000000000 | 3
4 | 3.0000000000000000 | 2
4 | 1.00000000000000000000 | 1
5 | 3.0000000000000000 | 6
5 | 2.3333333333333333 | 5
5 | 1.6000000000000000 | 4
5 | 2.8750000000000000 | 3
5 | 3.2000000000000000 | 2
5 | 3.0000000000000000 | 1
6 | 1.3333333333333333 | 5
6 | 2.0000000000000000 | 4
6 | 4.0000000000000000 | 3
6 | 1.00000000000000000000 | 2
6 | 2.5000000000000000 | 1
(32 rows)
-- order by in the window function is same as avg(value_1) DESC
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC;
user_id | avg | rank
---------+------------------------+------
1 | 4.0000000000000000 | 1
1 | 3.6666666666666667 | 2
1 | 3.0000000000000000 | 3
1 | 2.5000000000000000 | 4
2 | 3.2000000000000000 | 1
2 | 3.0000000000000000 | 2
2 | 2.0000000000000000 | 3
2 | 1.6666666666666667 | 4
2 | 1.5000000000000000 | 5
2 | 1.3333333333333333 | 6
3 | 3.0000000000000000 | 1
3 | 2.6666666666666667 | 2
3 | 2.4000000000000000 | 3
3 | 1.00000000000000000000 | 4
3 | 1.00000000000000000000 | 4
4 | 3.5000000000000000 | 1
4 | 3.3333333333333333 | 2
4 | 3.2000000000000000 | 3
4 | 3.0000000000000000 | 4
4 | 1.00000000000000000000 | 5
4 | 0.00000000000000000000 | 6
5 | 3.2000000000000000 | 1
5 | 3.0000000000000000 | 2
5 | 3.0000000000000000 | 2
5 | 2.8750000000000000 | 4
5 | 2.3333333333333333 | 5
5 | 1.6000000000000000 | 6
6 | 4.0000000000000000 | 1
6 | 2.5000000000000000 | 2
6 | 2.0000000000000000 | 3
6 | 1.3333333333333333 | 4
6 | 1.00000000000000000000 | 5
(32 rows)
EXPLAIN
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id, remote_scan.worker_column_5
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> WindowAgg (cost=3.32..4.14 rows=33 width=80)
-> Sort (cost=3.32..3.40 rows=33 width=56)
Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1))))
-> HashAggregate (cost=1.91..2.49 rows=33 width=56)
Group Key: users_table.user_id, users_table.value_2
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
(15 rows)
-- order by in the window function is same as avg(value_1) DESC
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC;
user_id | avg | rank
---------+------------------------+------
1 | 4.0000000000000000 | 1
1 | 3.6666666666666667 | 2
1 | 3.0000000000000000 | 3
1 | 2.5000000000000000 | 4
2 | 3.2000000000000000 | 1
2 | 3.0000000000000000 | 2
2 | 2.0000000000000000 | 3
2 | 1.6666666666666667 | 4
2 | 1.5000000000000000 | 5
2 | 1.3333333333333333 | 6
3 | 3.0000000000000000 | 1
3 | 2.6666666666666667 | 2
3 | 2.4000000000000000 | 3
3 | 1.00000000000000000000 | 4
3 | 1.00000000000000000000 | 4
4 | 3.5000000000000000 | 1
4 | 3.3333333333333333 | 2
4 | 3.2000000000000000 | 3
4 | 3.0000000000000000 | 4
4 | 1.00000000000000000000 | 5
4 | 0.00000000000000000000 | 6
5 | 3.2000000000000000 | 1
5 | 3.0000000000000000 | 2
5 | 3.0000000000000000 | 2
5 | 2.8750000000000000 | 4
5 | 2.3333333333333333 | 5
5 | 1.6000000000000000 | 6
6 | 4.0000000000000000 | 1
6 | 2.5000000000000000 | 2
6 | 2.0000000000000000 | 3
6 | 1.3333333333333333 | 4
6 | 1.00000000000000000000 | 5
(32 rows)
-- limit is not pushed down to worker !!
EXPLAIN
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.00..0.00 rows=0 width=0)
-> Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id, remote_scan.worker_column_5
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> WindowAgg (cost=3.32..4.14 rows=33 width=80)
-> Sort (cost=3.32..3.40 rows=33 width=56)
Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1))))
-> HashAggregate (cost=1.91..2.49 rows=33 width=56)
Group Key: users_table.user_id, users_table.value_2
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
(16 rows)
EXPLAIN
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.00..0.00 rows=0 width=0)
-> Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id, remote_scan.worker_column_5
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> WindowAgg (cost=3.32..4.14 rows=33 width=80)
-> Sort (cost=3.32..3.40 rows=33 width=56)
Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1))))
-> HashAggregate (cost=1.91..2.49 rows=33 width=56)
Group Key: users_table.user_id, users_table.value_2
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
(16 rows)
EXPLAIN
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + sum(value_2)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.00..0.00 rows=0 width=0)
-> Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id, remote_scan.worker_column_5
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> WindowAgg (cost=3.15..3.98 rows=33 width=48)
-> Sort (cost=3.15..3.23 rows=33 width=40)
Sort Key: users_table.user_id, ((1 / (1 + sum(users_table.value_2))))
-> HashAggregate (cost=1.83..2.32 rows=33 width=40)
Group Key: users_table.user_id, users_table.value_2
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
(16 rows)
EXPLAIN
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by sum(value_2))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.00..0.00 rows=0 width=0)
-> Sort (cost=0.00..0.00 rows=0 width=0)
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
Group Key: remote_scan.user_id, remote_scan.worker_column_5
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> WindowAgg (cost=2.90..3.56 rows=33 width=40)
-> Sort (cost=2.90..2.99 rows=33 width=32)
Sort Key: users_table.user_id, (sum(users_table.value_2))
-> HashAggregate (cost=1.74..2.07 rows=33 width=32)
Group Key: users_table.user_id, users_table.value_2
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
(16 rows)

View File

@ -471,7 +471,7 @@ ORDER BY
LIMIT
5;
ERROR: could not run distributed query because the window function that is used cannot be pushed down
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
-- Window functions that partition by the distribution column in subqueries in CTEs are ok
WITH top_users AS
(SELECT *

View File

@ -31,7 +31,8 @@ test: multi_read_from_secondaries
# ----------
test: multi_create_table
test: multi_create_table_constraints multi_master_protocol multi_load_data multi_behavioral_analytics_create_table
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries multi_insert_select multi_insert_select_window multi_shard_update_delete
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries multi_insert_select
test: multi_insert_select_window multi_shard_update_delete window_functions
# ----------
# Tests for partitioning support

View File

@ -2313,3 +2313,90 @@ GROUP BY
types
ORDER BY
types;
-- we've fixed a bug related to joins w/wout alias
-- while implementing top window functions
-- thus adding some tests related to that (i.e., next 3 tests)
WITH users_events AS
(
SELECT
user_id
FROM
users_table
)
SELECT
uid,
event_type,
value_2,
value_3
FROM (
(SELECT
user_id as uid
FROM
users_events
) users
JOIN
events_table
ON
users.uid = events_table.event_type
) a
ORDER BY
1,2,3,4
LIMIT 5;
-- the following queries are almost the same,
-- the only difference is the final GROUP BY
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN
(SELECT
DISTINCT ON (value_2) value_2 , user_id, value_3
FROM
users_table
WHERE
(value_1 > 3)
ORDER BY
1,2,3
) AS b
USING (user_id)
GROUP BY user_id
ORDER BY 1;
-- see the comment for the above query
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN
(SELECT
DISTINCT ON (value_2) value_2 , user_id, value_3
FROM
users_table
WHERE
(value_1 > 3)
ORDER BY
1,2,3
) AS b
USING (user_id)
GROUP BY a.user_id
ORDER BY 1;

View File

@ -0,0 +1,421 @@
-- ===================================================================
-- test top level window functions that are pushdownable
-- ===================================================================
-- a very simple window function with an aggregate and a window function
-- distribution column is on the partition by clause
SELECT
user_id, COUNT(*) OVER (PARTITION BY user_id),
rank() OVER (PARTITION BY user_id)
FROM
users_table
ORDER BY
1 DESC, 2 DESC, 3 DESC
LIMIT 5;
-- a more complicated window clause, including an aggregate
-- in both the window clause and the target entry
SELECT
user_id, avg(avg(value_3)) OVER (PARTITION BY user_id, MIN(value_2))
FROM
users_table
GROUP BY
1
ORDER BY
2 DESC NULLS LAST, 1 DESC;
-- window clause operates on the results of a subquery
SELECT
user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2))
FROM (
SELECT
DISTINCT us.user_id, us.value_2, value_1, random() as r1
FROM
users_table as us, events_table
WHERE
us.user_id = events_table.user_id AND event_type IN (1,2)
ORDER BY
user_id, value_2
) s
GROUP BY
1, value_1
ORDER BY
2 DESC, 1;
-- window function operates on the results of
-- a join
SELECT
us.user_id,
SUM(us.value_1) OVER (PARTITION BY us.user_id)
FROM
users_table us
JOIN
events_table ev
ON (us.user_id = ev.user_id)
GROUP BY
1,
value_1
ORDER BY
1,
2
LIMIT 5;
-- the same query, but this time join with an alias
SELECT
user_id, value_1, SUM(j.value_1) OVER (PARTITION BY j.user_id)
FROM
(users_table us
JOIN
events_table ev
USING (user_id )
) j
GROUP BY
user_id,
value_1
ORDER BY
3 DESC, 2 DESC, 1 DESC
LIMIT 5;
-- querying views that have window functions should be ok
CREATE VIEW window_view AS
SELECT
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
FROM
users_table
GROUP BY
user_id, value_1
HAVING count(*) > 1;
-- Window function in View works
SELECT *
FROM
window_view
ORDER BY
2 DESC, 1
LIMIT 10;
-- the other way around also should work fine
-- query a view using window functions
CREATE VIEW users_view AS SELECT * FROM users_table;
SELECT
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
FROM
users_view
GROUP BY
user_id, value_1
HAVING count(*) > 4
ORDER BY
2 DESC, 1;
DROP VIEW users_view, window_view;
-- window function uses columns from two different tables
SELECT
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
FROM
events_table, users_table
WHERE
users_table.user_id = events_table.user_id
WINDOW
my_win AS (PARTITION BY events_table.user_id, users_table.value_1 ORDER BY events_table.time DESC)
ORDER BY
rnk DESC, 1 DESC
LIMIT 10;
-- the same query with reference table column is also on the partition by clause
SELECT
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
FROM
events_table, users_ref_test_table uref
WHERE
uref.id = events_table.user_id
WINDOW
my_win AS (PARTITION BY events_table.user_id, uref.k_no ORDER BY events_table.time DESC)
ORDER BY
rnk DESC, 1 DESC
LIMIT 10;
-- similar query with no distribution column is on the partition by clause
-- is not supported
SELECT
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
FROM
events_table, users_ref_test_table uref
WHERE
uref.id = events_table.user_id
WINDOW
my_win AS (PARTITION BY events_table.value_2, uref.k_no ORDER BY events_table.time DESC)
ORDER BY
rnk DESC, 1 DESC
LIMIT 10;
-- ORDER BY in the window function is an aggragate
SELECT
user_id, rank() OVER my_win as rnk, avg(value_2) as avg_val_2
FROM
events_table
GROUP BY
user_id, date_trunc('day', time)
WINDOW
my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
ORDER BY
3 DESC, 2 DESC, 1 DESC;
-- lets push the limits of writing complex expressions aling with the window functions
SELECT
COUNT(*) OVER (PARTITION BY user_id, user_id + 1),
rank() OVER (PARTITION BY user_id) as cnt1,
COUNT(*) OVER (PARTITION BY user_id, abs(value_1 - value_2)) as cnt2,
date_trunc('min', lag(time) OVER (PARTITION BY user_id)) as datee,
rank() OVER my_win as rnnk,
avg(CASE
WHEN user_id > 4
THEN value_1
ELSE value_2
END) FILTER (WHERE user_id > 2) OVER my_win_2 as filtered_count,
sum(user_id * (5.0 / (value_1 + value_2 + 0.1)) * value_3) FILTER (WHERE value_1::text LIKE '%1%') OVER my_win_4 as cnt_with_filter_2
FROM
users_table
WINDOW
my_win AS (PARTITION BY user_id, (value_1%3)::int ORDER BY time DESC),
my_win_2 AS (PARTITION BY user_id, (value_1)::int ORDER BY time DESC),
my_win_3 AS (PARTITION BY user_id, date_trunc('min', time)),
my_win_4 AS (my_win_3 ORDER BY value_2, value_3)
ORDER BY
cnt_with_filter_2 DESC NULLS LAST, filtered_count DESC NULLS LAST, datee DESC NULLS LAST, rnnk DESC, cnt2 DESC, cnt1 DESC, user_id DESC
LIMIT 5;
-- some tests with GROUP BY along with PARTITION BY
SELECT
user_id,
rank() OVER my_win as my_rank,
avg(avg(event_type)) OVER my_win_2 as avg,
max(time) as mx_time
FROM
events_table
GROUP BY
user_id,
value_2
WINDOW
my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC),
my_win_2 AS (PARTITION BY user_id, avg(user_id) ORDER BY count(*) DESC)
ORDER BY
avg DESC,
mx_time DESC,
my_rank DESC,
user_id DESC;
-- test for range and rows mode and different window functions
-- mostly to make sure that deparsing works fine
SELECT
user_id,
rank() OVER (PARTITION BY user_id ROWS BETWEEN
UNBOUNDED PRECEDING AND CURRENT ROW),
dense_rank() OVER (PARTITION BY user_id RANGE BETWEEN
UNBOUNDED PRECEDING AND CURRENT ROW),
CUME_DIST() OVER (PARTITION BY user_id RANGE BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
PERCENT_RANK() OVER (PARTITION BY user_id ORDER BY avg(value_1) RANGE BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
FROM
users_table
GROUP BY
1
ORDER BY
4 DESC,3 DESC,2 DESC ,1 DESC;
-- some tests with GROUP BY, HAVING and LIMIT
SELECT
user_id, sum(event_type) OVER my_win , event_type
FROM
events_table
GROUP BY
user_id, event_type
HAVING count(*) > 2
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
ORDER BY
2 DESC, 3 DESC, 1 DESC
LIMIT
5;
-- Group by has more columns than partition by
SELECT
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
FROM
users_table
GROUP BY
user_id, value_1, value_2
HAVING count(*) > 2
ORDER BY
2 DESC, 1
LIMIT
10;
SELECT
DISTINCT ON (user_id) user_id, SUM(value_2) OVER (PARTITION BY user_id)
FROM
users_table
GROUP BY
user_id, value_1, value_2
HAVING count(*) > 2
ORDER BY
1, 2 DESC
LIMIT
10;
SELECT
DISTINCT ON (SUM(value_1) OVER (PARTITION BY user_id)) user_id, SUM(value_2) OVER (PARTITION BY user_id)
FROM
users_table
GROUP BY
user_id, value_1, value_2
HAVING count(*) > 2
ORDER BY
(SUM(value_1) OVER (PARTITION BY user_id)) , 2 DESC, 1
LIMIT
10;
-- not a meaningful query, with interesting syntax
SELECT
user_id,
AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)),
AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1))
FROM
users_table
GROUP BY
1
ORDER BY
3 DESC, 2 DESC, 1 DESC;
EXPLAIN
SELECT
user_id,
AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)),
AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1))
FROM
users_table
GROUP BY
1
ORDER BY
3 DESC, 2 DESC, 1 DESC;
SELECT
user_id,
1 + sum(value_1),
1 + AVG(value_2) OVER (partition by user_id)
FROM
users_table
GROUP BY
user_id, value_2
ORDER BY
user_id, value_2;
EXPLAIN
SELECT
user_id,
1 + sum(value_1),
1 + AVG(value_2) OVER (partition by user_id)
FROM
users_table
GROUP BY
user_id, value_2
ORDER BY
user_id, value_2;
SELECT
user_id,
1 + sum(value_1),
1 + AVG(value_2) OVER (partition by user_id)
FROM
users_table
GROUP BY
user_id, value_2
ORDER BY
2 DESC, 1
LIMIT 5;
-- rank and ordering in the reverse order
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by value_2)
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, value_2 DESC;
-- order by in the window function is same as avg(value_1) DESC
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC;
EXPLAIN
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC;
-- order by in the window function is same as avg(value_1) DESC
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC;
-- limit is not pushed down to worker !!
EXPLAIN
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
EXPLAIN
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
EXPLAIN
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by 1 / (1 + sum(value_2)))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;
EXPLAIN
SELECT
user_id,
avg(value_1),
RANK() OVER (partition by user_id order by sum(value_2))
FROM
users_table
GROUP BY user_id, value_2
ORDER BY user_id, avg(value_1) DESC
LIMIT 5;