mirror of https://github.com/citusdata/citus.git
Add support for window functions that can be pushed down to worker (#2008)
This is the first of series of window function work. We can now support window functions that can be pushed down to workers. Window function must have distribution column in the partition clause to be pushed down.pull/2031/head
parent
8e2c72c054
commit
76f6883d5d
|
@ -1296,7 +1296,13 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode,
|
|||
Expr *newExpression = NULL;
|
||||
|
||||
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
|
||||
if (hasAggregates)
|
||||
bool hasWindowFunction = contain_window_function((Node *) originalExpression);
|
||||
|
||||
/*
|
||||
* if the aggregate belongs to a window function, it is not mutated, but pushed
|
||||
* down to worker as it is. Master query should treat that as a Var.
|
||||
*/
|
||||
if (hasAggregates && !hasWindowFunction)
|
||||
{
|
||||
Node *newNode = MasterAggregateMutator((Node *) originalExpression,
|
||||
walkerContext);
|
||||
|
@ -1858,6 +1864,9 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
|
|||
bool hasNonPartitionColumnDistinctAgg = false;
|
||||
bool repartitionSubquery = false;
|
||||
|
||||
/* only window functions that can be pushed down reach here */
|
||||
bool pushDownWindowFunction = originalOpNode->hasWindowFuncs;
|
||||
|
||||
walkerContext->expressionList = NIL;
|
||||
|
||||
/* find max of sort group ref index */
|
||||
|
@ -1893,12 +1902,18 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
|
|||
List *newExpressionList = NIL;
|
||||
ListCell *newExpressionCell = NULL;
|
||||
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
|
||||
bool hasWindowFunction = contain_window_function((Node *) originalExpression);
|
||||
|
||||
/* reset walker context */
|
||||
walkerContext->expressionList = NIL;
|
||||
walkerContext->createGroupByClause = false;
|
||||
|
||||
if (hasAggregates)
|
||||
/*
|
||||
* If the expression uses aggregates inside window function contain agg
|
||||
* clause still returns true. We want to make sure it is not a part of
|
||||
* window function before we proceed.
|
||||
*/
|
||||
if (hasAggregates && !hasWindowFunction)
|
||||
{
|
||||
WorkerAggregateWalker((Node *) originalExpression, walkerContext);
|
||||
|
||||
|
@ -2013,6 +2028,8 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
|
|||
workerExtendedOpNode = CitusMakeNode(MultiExtendedOp);
|
||||
workerExtendedOpNode->distinctClause = NIL;
|
||||
workerExtendedOpNode->hasDistinctOn = false;
|
||||
workerExtendedOpNode->hasWindowFuncs = originalOpNode->hasWindowFuncs;
|
||||
workerExtendedOpNode->windowClause = originalOpNode->windowClause;
|
||||
|
||||
if (!queryHasAggregates)
|
||||
{
|
||||
|
@ -2042,13 +2059,48 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
|
|||
newTargetEntryListForSortClauses);
|
||||
}
|
||||
|
||||
if (workerExtendedOpNode->windowClause)
|
||||
{
|
||||
List *windowClauseList = workerExtendedOpNode->windowClause;
|
||||
ListCell *windowClauseCell = NULL;
|
||||
|
||||
foreach(windowClauseCell, windowClauseList)
|
||||
{
|
||||
WindowClause *windowClause = (WindowClause *) lfirst(windowClauseCell);
|
||||
|
||||
List *partitionClauseTargetList =
|
||||
GenerateNewTargetEntriesForSortClauses(originalOpNode->targetList,
|
||||
windowClause->partitionClause,
|
||||
&targetProjectionNumber,
|
||||
&nextSortGroupRefIndex);
|
||||
List *orderClauseTargetList =
|
||||
GenerateNewTargetEntriesForSortClauses(originalOpNode->targetList,
|
||||
windowClause->orderClause,
|
||||
&targetProjectionNumber,
|
||||
&nextSortGroupRefIndex);
|
||||
|
||||
newTargetEntryList = list_concat(newTargetEntryList,
|
||||
partitionClauseTargetList);
|
||||
newTargetEntryList = list_concat(newTargetEntryList,
|
||||
orderClauseTargetList);
|
||||
}
|
||||
}
|
||||
|
||||
workerExtendedOpNode->targetList = newTargetEntryList;
|
||||
|
||||
/*
|
||||
* If grouped by a partition column whose values are shards have disjoint sets
|
||||
* of partition values, we can push down the having qualifier.
|
||||
*
|
||||
* When a query with subquery is provided, we can't determine if
|
||||
* groupedByDisjointPartitionColumn, therefore we also check if there is a
|
||||
* window function too. If there is a window function we would know that it
|
||||
* is safe to push down (i.e. it is partitioned on distribution column, and
|
||||
* if there is a group by, it contains distribution column).
|
||||
*
|
||||
*/
|
||||
if (havingQual != NULL && groupedByDisjointPartitionColumn)
|
||||
if (havingQual != NULL &&
|
||||
(groupedByDisjointPartitionColumn || pushDownWindowFunction))
|
||||
{
|
||||
workerExtendedOpNode->havingQual = originalOpNode->havingQual;
|
||||
}
|
||||
|
@ -2067,7 +2119,10 @@ HasNonPartitionColumnDistinctAgg(List *targetEntryList, Node *havingQual,
|
|||
List *tableNodeList)
|
||||
{
|
||||
List *targetVarList = pull_var_clause((Node *) targetEntryList,
|
||||
PVC_INCLUDE_AGGREGATES);
|
||||
PVC_INCLUDE_AGGREGATES |
|
||||
PVC_RECURSE_WINDOWFUNCS);
|
||||
|
||||
/* having clause can't have window functions, no need to recurse for that */
|
||||
List *havingVarList = pull_var_clause((Node *) havingQual, PVC_INCLUDE_AGGREGATES);
|
||||
List *aggregateCheckList = list_concat(targetVarList, havingVarList);
|
||||
|
||||
|
@ -2722,7 +2777,8 @@ ErrorIfContainsUnsupportedAggregate(MultiNode *logicalPlanNode)
|
|||
* PVC_REJECT_PLACEHOLDERS is implicit if PVC_INCLUDE_PLACEHOLDERS isn't
|
||||
* specified.
|
||||
*/
|
||||
List *expressionList = pull_var_clause((Node *) targetList, PVC_INCLUDE_AGGREGATES);
|
||||
List *expressionList = pull_var_clause((Node *) targetList, PVC_INCLUDE_AGGREGATES |
|
||||
PVC_INCLUDE_WINDOWFUNCS);
|
||||
|
||||
ListCell *expressionCell = NULL;
|
||||
foreach(expressionCell, expressionList)
|
||||
|
|
|
@ -161,6 +161,7 @@ static MultiNode * SubqueryMultiNodeTree(Query *originalQuery,
|
|||
plannerRestrictionContext);
|
||||
static MultiNode * SubqueryPushdownMultiNodeTree(Query *queryTree);
|
||||
|
||||
static void FlattenJoinVars(List *columnList, Query *queryTree);
|
||||
static List * CreateSubqueryTargetEntryList(List *columnList);
|
||||
static void UpdateVarMappingsForExtendedOpNode(List *columnList,
|
||||
List *subqueryTargetEntryList);
|
||||
|
@ -212,6 +213,7 @@ static bool
|
|||
ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery)
|
||||
{
|
||||
List *qualifierList = NIL;
|
||||
StringInfo errorMessage = NULL;
|
||||
|
||||
/*
|
||||
* We check the existence of subqueries in FROM clause on the modified query
|
||||
|
@ -253,6 +255,13 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* check if the query has a window function and it is safe to pushdown */
|
||||
if (originalQuery->hasWindowFuncs &&
|
||||
SafeToPushdownWindowFunction(originalQuery, &errorMessage))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -850,8 +859,8 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
|||
* We support window functions when the window function
|
||||
* is partitioned on distribution column.
|
||||
*/
|
||||
if (subqueryTree->windowClause && !SafeToPushdownWindowFunction(subqueryTree,
|
||||
&errorInfo))
|
||||
if (subqueryTree->hasWindowFuncs && !SafeToPushdownWindowFunction(subqueryTree,
|
||||
&errorInfo))
|
||||
{
|
||||
errorDetail = (char *) errorInfo->data;
|
||||
preconditionsSatisfied = false;
|
||||
|
@ -2079,6 +2088,7 @@ DeferErrorIfQueryNotSupported(Query *queryTree)
|
|||
bool hasComplexJoinOrder = false;
|
||||
bool hasComplexRangeTableType = false;
|
||||
bool preconditionsSatisfied = true;
|
||||
StringInfo errorInfo = NULL;
|
||||
const char *errorHint = NULL;
|
||||
const char *joinHint = "Consider joining tables on partition column and have "
|
||||
"equal filter on joining columns.";
|
||||
|
@ -2097,15 +2107,16 @@ DeferErrorIfQueryNotSupported(Query *queryTree)
|
|||
errorHint = filterHint;
|
||||
}
|
||||
|
||||
if (queryTree->hasWindowFuncs)
|
||||
if (queryTree->hasWindowFuncs &&
|
||||
!SafeToPushdownWindowFunction(queryTree, &errorInfo))
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorMessage = "could not run distributed query because the window "
|
||||
"function that is used cannot be pushed down";
|
||||
errorHint = "Window functions are supported in two ways. Either add "
|
||||
"an equality filter on the distributed tables' partition "
|
||||
"column or use the window functions inside a subquery with "
|
||||
"a PARTITION BY clause containing the distribution column";
|
||||
"column or use the window functions with a PARTITION BY "
|
||||
"clause containing the distribution column";
|
||||
}
|
||||
|
||||
if (queryTree->setOperations)
|
||||
|
@ -3135,6 +3146,8 @@ MultiExtendedOpNode(Query *queryTree)
|
|||
extendedOpNode->havingQual = queryTree->havingQual;
|
||||
extendedOpNode->distinctClause = queryTree->distinctClause;
|
||||
extendedOpNode->hasDistinctOn = queryTree->hasDistinctOn;
|
||||
extendedOpNode->hasWindowFuncs = queryTree->hasWindowFuncs;
|
||||
extendedOpNode->windowClause = queryTree->windowClause;
|
||||
|
||||
return extendedOpNode;
|
||||
}
|
||||
|
@ -3396,7 +3409,8 @@ pull_var_clause_default(Node *node)
|
|||
* PVC_REJECT_PLACEHOLDERS is implicit if PVC_INCLUDE_PLACEHOLDERS
|
||||
* isn't specified.
|
||||
*/
|
||||
List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES);
|
||||
List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES |
|
||||
PVC_RECURSE_WINDOWFUNCS);
|
||||
|
||||
return columnList;
|
||||
}
|
||||
|
@ -3768,6 +3782,8 @@ SubqueryPushdownMultiNodeTree(Query *queryTree)
|
|||
havingClauseColumnList = pull_var_clause_default(queryTree->havingQual);
|
||||
columnList = list_concat(targetColumnList, havingClauseColumnList);
|
||||
|
||||
FlattenJoinVars(columnList, queryTree);
|
||||
|
||||
/* create a target entry for each unique column */
|
||||
subqueryTargetEntryList = CreateSubqueryTargetEntryList(columnList);
|
||||
|
||||
|
@ -3835,6 +3851,73 @@ SubqueryPushdownMultiNodeTree(Query *queryTree)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* FlattenJoinVars iterates over provided columnList to identify
|
||||
* Var's that are referenced from join RTE, and reverts back to their
|
||||
* original RTEs.
|
||||
*
|
||||
* This is required because Postgres allows columns to be referenced using
|
||||
* a join alias. Therefore the same column from a table could be referenced
|
||||
* twice using its absolute table name (t1.a), and without table name (a).
|
||||
* This is a problem when one of them is inside the group by clause and the
|
||||
* other is not. Postgres is smart about it to detect that both target columns
|
||||
* resolve to the same thing, and allows a single group by clause to cover
|
||||
* both target entries when standard planner is called. Since we operate on
|
||||
* the original query, we want to make sure we provide correct varno/varattno
|
||||
* values to Postgres so that it could produce valid query.
|
||||
*
|
||||
* Only exception is that, if a join is given an alias name, we do not want to
|
||||
* flatten those var's. If we do, deparsing fails since it expects to see a join
|
||||
* alias, and cannot access the RTE in the join tree by their names.
|
||||
*/
|
||||
static void
|
||||
FlattenJoinVars(List *columnList, Query *queryTree)
|
||||
{
|
||||
ListCell *columnCell = NULL;
|
||||
List *rteList = queryTree->rtable;
|
||||
|
||||
foreach(columnCell, columnList)
|
||||
{
|
||||
Var *column = (Var *) lfirst(columnCell);
|
||||
RangeTblEntry *columnRte = NULL;
|
||||
PlannerInfo *root = NULL;
|
||||
|
||||
Assert(IsA(column, Var));
|
||||
|
||||
/*
|
||||
* if join does not have an alias, it is copied over join rte.
|
||||
* There is no need to find the JoinExpr to check whether it has
|
||||
* an alias defined.
|
||||
*
|
||||
* We use the planner's flatten_join_alias_vars routine to do
|
||||
* the flattening; it wants a PlannerInfo root node, which
|
||||
* fortunately can be mostly dummy.
|
||||
*/
|
||||
columnRte = rt_fetch(column->varno, rteList);
|
||||
if (columnRte->rtekind == RTE_JOIN && columnRte->alias == NULL)
|
||||
{
|
||||
Var *normalizedVar = NULL;
|
||||
|
||||
if (root == NULL)
|
||||
{
|
||||
root = makeNode(PlannerInfo);
|
||||
root->parse = (queryTree);
|
||||
root->planner_cxt = CurrentMemoryContext;
|
||||
root->hasJoinRTEs = true;
|
||||
}
|
||||
|
||||
normalizedVar = (Var *) flatten_join_alias_vars(root, (Node *) column);
|
||||
|
||||
/*
|
||||
* We need to copy values over existing one to make sure it is updated on
|
||||
* respective places.
|
||||
*/
|
||||
memcpy(column, normalizedVar, sizeof(Var));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateSubqueryTargetEntryList creates a target entry for each unique column
|
||||
* in the column list and returns the target entry list.
|
||||
|
|
|
@ -1415,6 +1415,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
|
|||
bool hasAggregates = false;
|
||||
List *distinctClause = NIL;
|
||||
bool hasDistinctOn = false;
|
||||
bool hasWindowFuncs = false;
|
||||
List *windowClause = NIL;
|
||||
|
||||
/* we start building jobs from below the collect node */
|
||||
Assert(!CitusIsA(multiNode, MultiCollect));
|
||||
|
@ -1462,6 +1464,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
|
|||
havingQual = extendedOp->havingQual;
|
||||
distinctClause = extendedOp->distinctClause;
|
||||
hasDistinctOn = extendedOp->hasDistinctOn;
|
||||
hasWindowFuncs = extendedOp->hasWindowFuncs;
|
||||
windowClause = extendedOp->windowClause;
|
||||
}
|
||||
|
||||
/* build group clauses */
|
||||
|
@ -1508,6 +1512,8 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
|
|||
jobQuery->hasAggs = hasAggregates;
|
||||
jobQuery->hasDistinctOn = hasDistinctOn;
|
||||
jobQuery->distinctClause = distinctClause;
|
||||
jobQuery->hasWindowFuncs = hasWindowFuncs;
|
||||
jobQuery->windowClause = windowClause;
|
||||
|
||||
return jobQuery;
|
||||
}
|
||||
|
|
|
@ -176,6 +176,8 @@ typedef struct MultiExtendedOp
|
|||
Node *havingQual;
|
||||
List *distinctClause;
|
||||
bool hasDistinctOn;
|
||||
bool hasWindowFuncs;
|
||||
List *windowClause;
|
||||
} MultiExtendedOp;
|
||||
|
||||
|
||||
|
|
|
@ -654,7 +654,7 @@ ORDER BY
|
|||
LIMIT
|
||||
10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- user needs to supply partition by which should
|
||||
-- include the distribution key
|
||||
INSERT INTO agg_results_window (user_id, agg_time, value_2_agg)
|
||||
|
@ -673,7 +673,7 @@ ORDER BY
|
|||
LIMIT
|
||||
10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- user needs to supply partition by which should
|
||||
-- include the distribution key
|
||||
INSERT INTO agg_results_window (user_id, agg_time, value_2_agg)
|
||||
|
@ -692,7 +692,7 @@ ORDER BY
|
|||
LIMIT
|
||||
10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- w2 should not be pushed down
|
||||
INSERT INTO agg_results_window (user_id, value_1_agg, value_2_agg)
|
||||
SELECT * FROM
|
||||
|
@ -710,7 +710,7 @@ SELECT * FROM
|
|||
LIMIT
|
||||
10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||
INSERT INTO agg_results_window (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
|
@ -728,7 +728,7 @@ FROM
|
|||
WHERE
|
||||
my_rank > 125;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||
INSERT INTO agg_results_window (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
|
@ -746,7 +746,7 @@ FROM
|
|||
WHERE
|
||||
my_rank > 125;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- w2 should not be allowed
|
||||
INSERT INTO agg_results_window (user_id, value_2_agg, value_3_agg)
|
||||
SELECT * FROM
|
||||
|
@ -762,7 +762,7 @@ SELECT * FROM
|
|||
w2 AS (ORDER BY events_table.time)
|
||||
) as foo;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- unsupported window function with an override
|
||||
INSERT INTO agg_results_window(user_id, agg_time, value_2_agg)
|
||||
SELECT * FROM (
|
||||
|
@ -780,7 +780,7 @@ SELECT * FROM (
|
|||
w2 as (PARTITION BY user_id, time)
|
||||
) a;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- Subquery in where with unsupported window function
|
||||
INSERT INTO agg_results_window(user_id)
|
||||
SELECT
|
||||
|
@ -812,7 +812,7 @@ SELECT * FROM (
|
|||
user_id
|
||||
) a;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- UNION with only one subquery which has a partition on non-distribution column should
|
||||
-- error out
|
||||
INSERT INTO agg_results_window(user_id, value_1_agg)
|
||||
|
@ -850,5 +850,5 @@ FROM (
|
|||
)
|
||||
) AS ftop;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
DROP VIEW view_with_window_func;
|
||||
|
|
|
@ -1129,12 +1129,12 @@ SELECT id, MIN(id) over (order by word_count)
|
|||
FROM articles_hash_mx
|
||||
WHERE author_id = 1 or author_id = 2;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count
|
||||
FROM articles_hash_mx
|
||||
WHERE author_id = 5 or author_id = 2;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- complex query hitting a single shard
|
||||
SELECT
|
||||
count(DISTINCT CASE
|
||||
|
|
|
@ -1306,12 +1306,12 @@ SELECT id, MIN(id) over (order by word_count)
|
|||
FROM articles_hash
|
||||
WHERE author_id = 1 or author_id = 2;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
SELECT LAG(title, 1) over (ORDER BY word_count) prev, title, word_count
|
||||
FROM articles_hash
|
||||
WHERE author_id = 5 or author_id = 2;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- where false queries are router plannable
|
||||
SELECT *
|
||||
FROM articles_hash
|
||||
|
|
|
@ -2581,3 +2581,112 @@ ORDER BY
|
|||
3 | 1
|
||||
(4 rows)
|
||||
|
||||
-- we've fixed a bug related to joins w/wout alias
|
||||
-- while implementing top window functions
|
||||
-- thus adding some tests related to that (i.e., next 3 tests)
|
||||
WITH users_events AS
|
||||
(
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
)
|
||||
SELECT
|
||||
uid,
|
||||
event_type,
|
||||
value_2,
|
||||
value_3
|
||||
FROM (
|
||||
(SELECT
|
||||
user_id as uid
|
||||
FROM
|
||||
users_events
|
||||
) users
|
||||
JOIN
|
||||
events_table
|
||||
ON
|
||||
users.uid = events_table.event_type
|
||||
) a
|
||||
ORDER BY
|
||||
1,2,3,4
|
||||
LIMIT 5;
|
||||
uid | event_type | value_2 | value_3
|
||||
-----+------------+---------+---------
|
||||
1 | 1 | 0 | 2
|
||||
1 | 1 | 0 | 2
|
||||
1 | 1 | 0 | 2
|
||||
1 | 1 | 0 | 2
|
||||
1 | 1 | 0 | 2
|
||||
(5 rows)
|
||||
|
||||
-- the following queries are almost the same,
|
||||
-- the only difference is the final GROUP BY
|
||||
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
||||
FROM
|
||||
(SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
(value_1 > 2)
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
count(distinct value_1) > 2
|
||||
) as a
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
DISTINCT ON (value_2) value_2 , user_id, value_3
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
(value_1 > 3)
|
||||
ORDER BY
|
||||
1,2,3
|
||||
) AS b
|
||||
USING (user_id)
|
||||
GROUP BY user_id
|
||||
ORDER BY 1;
|
||||
user_id | subquery_avg
|
||||
---------+------------------------
|
||||
1 | 2.3333333333333333
|
||||
3 | 5.0000000000000000
|
||||
4 | 1.00000000000000000000
|
||||
5 |
|
||||
(4 rows)
|
||||
|
||||
-- see the comment for the above query
|
||||
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
||||
FROM
|
||||
(SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
(value_1 > 2)
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
count(distinct value_1) > 2
|
||||
) as a
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
DISTINCT ON (value_2) value_2 , user_id, value_3
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
(value_1 > 3)
|
||||
ORDER BY
|
||||
1,2,3
|
||||
) AS b
|
||||
USING (user_id)
|
||||
GROUP BY a.user_id
|
||||
ORDER BY 1;
|
||||
user_id | subquery_avg
|
||||
---------+------------------------
|
||||
1 | 2.3333333333333333
|
||||
3 | 5.0000000000000000
|
||||
4 | 1.00000000000000000000
|
||||
5 |
|
||||
(4 rows)
|
||||
|
||||
|
|
|
@ -791,7 +791,7 @@ ORDER BY
|
|||
LIMIT
|
||||
10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- user needs to supply partition by which should
|
||||
-- include the distribution key
|
||||
SELECT
|
||||
|
@ -809,7 +809,7 @@ ORDER BY
|
|||
LIMIT
|
||||
10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- user needs to supply partition by which should
|
||||
-- include the distribution key
|
||||
SELECT
|
||||
|
@ -827,7 +827,7 @@ ORDER BY
|
|||
LIMIT
|
||||
10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- w2 should not be pushed down
|
||||
SELECT * FROM
|
||||
(
|
||||
|
@ -844,7 +844,7 @@ SELECT * FROM
|
|||
ORDER BY 3 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||
LIMIT 10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- w2 should not be pushed down
|
||||
SELECT * FROM
|
||||
(
|
||||
|
@ -863,7 +863,7 @@ ORDER BY
|
|||
LIMIT
|
||||
10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||
SELECT
|
||||
user_id, time, my_rank
|
||||
|
@ -884,7 +884,7 @@ ORDER BY
|
|||
LIMIT
|
||||
10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||
SELECT
|
||||
user_id, time, my_rank
|
||||
|
@ -905,7 +905,7 @@ ORDER BY
|
|||
LIMIT
|
||||
10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- Overriding window function but not supported
|
||||
SELECT * FROM (
|
||||
SELECT
|
||||
|
@ -924,7 +924,7 @@ SELECT * FROM (
|
|||
ORDER BY
|
||||
1,2,3;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- Aggregate function on distribution column should error out
|
||||
SELECT * FROM (
|
||||
SELECT
|
||||
|
@ -937,7 +937,7 @@ SELECT * FROM (
|
|||
ORDER BY
|
||||
1 DESC, 2 DESC;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- test with reference table partitioned on only a column from reference table
|
||||
SELECT *
|
||||
FROM
|
||||
|
@ -952,7 +952,7 @@ ORDER BY
|
|||
LIMIT
|
||||
20;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- UNION ALL with only one of them is not partitioned over distribution column which
|
||||
-- should not be allowed.
|
||||
SELECT
|
||||
|
@ -975,7 +975,7 @@ GROUP BY user_id
|
|||
ORDER BY 1 DESC
|
||||
LIMIT 5;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- UNION with only one subquery which has a partition on non-distribution column should
|
||||
-- error out
|
||||
SELECT *
|
||||
|
@ -1012,5 +1012,5 @@ FROM (
|
|||
ORDER BY 2 DESC, 1 DESC
|
||||
LIMIT 5;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
DROP VIEW subq;
|
||||
|
|
|
@ -587,7 +587,7 @@ SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_w
|
|||
UNION
|
||||
SELECT x, y, rnk FROM (SELECT *, rank() OVER my_win as rnk FROM test WINDOW my_win AS (PARTITION BY y ORDER BY x DESC)) as bar;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- other set operations in joins also cannot be pushed down
|
||||
SELECT * FROM ((SELECT * FROM test) EXCEPT (SELECT * FROM test ORDER BY x LIMIT 1)) u JOIN test USING (x) ORDER BY 1,2;
|
||||
DEBUG: push down of limit count: 1
|
||||
|
|
|
@ -100,7 +100,7 @@ ORDER BY
|
|||
LIMIT
|
||||
10) as foo;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- OUTER JOINs where the outer part is recursively planned and not the other way
|
||||
-- around is not supported
|
||||
SELECT
|
||||
|
|
|
@ -0,0 +1,915 @@
|
|||
-- ===================================================================
|
||||
-- test top level window functions that are pushdownable
|
||||
-- ===================================================================
|
||||
-- a very simple window function with an aggregate and a window function
|
||||
-- distribution column is on the partition by clause
|
||||
SELECT
|
||||
user_id, COUNT(*) OVER (PARTITION BY user_id),
|
||||
rank() OVER (PARTITION BY user_id)
|
||||
FROM
|
||||
users_table
|
||||
ORDER BY
|
||||
1 DESC, 2 DESC, 3 DESC
|
||||
LIMIT 5;
|
||||
user_id | count | rank
|
||||
---------+-------+------
|
||||
6 | 10 | 1
|
||||
6 | 10 | 1
|
||||
6 | 10 | 1
|
||||
6 | 10 | 1
|
||||
6 | 10 | 1
|
||||
(5 rows)
|
||||
|
||||
-- a more complicated window clause, including an aggregate
|
||||
-- in both the window clause and the target entry
|
||||
SELECT
|
||||
user_id, avg(avg(value_3)) OVER (PARTITION BY user_id, MIN(value_2))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
2 DESC NULLS LAST, 1 DESC;
|
||||
user_id | avg
|
||||
---------+------------------
|
||||
2 | 3
|
||||
4 | 2.82608695652174
|
||||
3 | 2.70588235294118
|
||||
6 | 2.6
|
||||
1 | 2.57142857142857
|
||||
5 | 2.46153846153846
|
||||
(6 rows)
|
||||
|
||||
-- window clause operates on the results of a subquery
|
||||
SELECT
|
||||
user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2))
|
||||
FROM (
|
||||
SELECT
|
||||
DISTINCT us.user_id, us.value_2, value_1, random() as r1
|
||||
FROM
|
||||
users_table as us, events_table
|
||||
WHERE
|
||||
us.user_id = events_table.user_id AND event_type IN (1,2)
|
||||
ORDER BY
|
||||
user_id, value_2
|
||||
) s
|
||||
GROUP BY
|
||||
1, value_1
|
||||
ORDER BY
|
||||
2 DESC, 1;
|
||||
user_id | max
|
||||
---------+-----
|
||||
1 | 5
|
||||
3 | 5
|
||||
3 | 5
|
||||
4 | 5
|
||||
5 | 5
|
||||
5 | 5
|
||||
6 | 5
|
||||
6 | 5
|
||||
1 | 4
|
||||
2 | 4
|
||||
3 | 4
|
||||
3 | 4
|
||||
3 | 4
|
||||
4 | 4
|
||||
4 | 4
|
||||
5 | 4
|
||||
5 | 4
|
||||
1 | 3
|
||||
2 | 3
|
||||
2 | 3
|
||||
2 | 3
|
||||
6 | 3
|
||||
2 | 2
|
||||
4 | 2
|
||||
4 | 2
|
||||
4 | 2
|
||||
6 | 2
|
||||
1 | 1
|
||||
3 | 1
|
||||
5 | 1
|
||||
6 | 1
|
||||
5 | 0
|
||||
(32 rows)
|
||||
|
||||
-- window function operates on the results of
|
||||
-- a join
|
||||
SELECT
|
||||
us.user_id,
|
||||
SUM(us.value_1) OVER (PARTITION BY us.user_id)
|
||||
FROM
|
||||
users_table us
|
||||
JOIN
|
||||
events_table ev
|
||||
ON (us.user_id = ev.user_id)
|
||||
GROUP BY
|
||||
1,
|
||||
value_1
|
||||
ORDER BY
|
||||
1,
|
||||
2
|
||||
LIMIT 5;
|
||||
user_id | sum
|
||||
---------+-----
|
||||
1 | 13
|
||||
1 | 13
|
||||
1 | 13
|
||||
1 | 13
|
||||
2 | 10
|
||||
(5 rows)
|
||||
|
||||
-- the same query, but this time join with an alias
|
||||
SELECT
|
||||
user_id, value_1, SUM(j.value_1) OVER (PARTITION BY j.user_id)
|
||||
FROM
|
||||
(users_table us
|
||||
JOIN
|
||||
events_table ev
|
||||
USING (user_id )
|
||||
) j
|
||||
GROUP BY
|
||||
user_id,
|
||||
value_1
|
||||
ORDER BY
|
||||
3 DESC, 2 DESC, 1 DESC
|
||||
LIMIT 5;
|
||||
user_id | value_1 | sum
|
||||
---------+---------+-----
|
||||
5 | 5 | 15
|
||||
4 | 5 | 15
|
||||
3 | 5 | 15
|
||||
5 | 4 | 15
|
||||
4 | 4 | 15
|
||||
(5 rows)
|
||||
|
||||
-- querying views that have window functions should be ok
|
||||
CREATE VIEW window_view AS
|
||||
SELECT
|
||||
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_1
|
||||
HAVING count(*) > 1;
|
||||
-- Window function in View works
|
||||
SELECT *
|
||||
FROM
|
||||
window_view
|
||||
ORDER BY
|
||||
2 DESC, 1
|
||||
LIMIT 10;
|
||||
user_id | rank
|
||||
---------+------
|
||||
5 | 6
|
||||
2 | 5
|
||||
4 | 5
|
||||
5 | 5
|
||||
2 | 4
|
||||
3 | 4
|
||||
4 | 4
|
||||
5 | 4
|
||||
6 | 4
|
||||
2 | 3
|
||||
(10 rows)
|
||||
|
||||
-- the other way around also should work fine
|
||||
-- query a view using window functions
|
||||
CREATE VIEW users_view AS SELECT * FROM users_table;
|
||||
SELECT
|
||||
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||
FROM
|
||||
users_view
|
||||
GROUP BY
|
||||
user_id, value_1
|
||||
HAVING count(*) > 4
|
||||
ORDER BY
|
||||
2 DESC, 1;
|
||||
user_id | rank
|
||||
---------+------
|
||||
4 | 2
|
||||
5 | 2
|
||||
2 | 1
|
||||
3 | 1
|
||||
4 | 1
|
||||
5 | 1
|
||||
(6 rows)
|
||||
|
||||
DROP VIEW users_view, window_view;
|
||||
-- window function uses columns from two different tables
|
||||
SELECT
|
||||
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
|
||||
FROM
|
||||
events_table, users_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY events_table.user_id, users_table.value_1 ORDER BY events_table.time DESC)
|
||||
ORDER BY
|
||||
rnk DESC, 1 DESC
|
||||
LIMIT 10;
|
||||
user_id | rnk
|
||||
---------+-----
|
||||
3 | 121
|
||||
5 | 118
|
||||
2 | 116
|
||||
3 | 115
|
||||
4 | 113
|
||||
2 | 111
|
||||
5 | 109
|
||||
3 | 109
|
||||
4 | 106
|
||||
2 | 106
|
||||
(10 rows)
|
||||
|
||||
-- the same query with reference table column is also on the partition by clause
|
||||
SELECT
|
||||
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
|
||||
FROM
|
||||
events_table, users_ref_test_table uref
|
||||
WHERE
|
||||
uref.id = events_table.user_id
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY events_table.user_id, uref.k_no ORDER BY events_table.time DESC)
|
||||
ORDER BY
|
||||
rnk DESC, 1 DESC
|
||||
LIMIT 10;
|
||||
user_id | rnk
|
||||
---------+-----
|
||||
2 | 24
|
||||
2 | 23
|
||||
2 | 22
|
||||
3 | 21
|
||||
2 | 21
|
||||
3 | 20
|
||||
2 | 20
|
||||
3 | 19
|
||||
2 | 19
|
||||
3 | 18
|
||||
(10 rows)
|
||||
|
||||
-- similar query with no distribution column is on the partition by clause
|
||||
-- is not supported
|
||||
SELECT
|
||||
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
|
||||
FROM
|
||||
events_table, users_ref_test_table uref
|
||||
WHERE
|
||||
uref.id = events_table.user_id
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY events_table.value_2, uref.k_no ORDER BY events_table.time DESC)
|
||||
ORDER BY
|
||||
rnk DESC, 1 DESC
|
||||
LIMIT 10;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- ORDER BY in the window function is an aggragate
|
||||
SELECT
|
||||
user_id, rank() OVER my_win as rnk, avg(value_2) as avg_val_2
|
||||
FROM
|
||||
events_table
|
||||
GROUP BY
|
||||
user_id, date_trunc('day', time)
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
|
||||
ORDER BY
|
||||
3 DESC, 2 DESC, 1 DESC;
|
||||
user_id | rnk | avg_val_2
|
||||
---------+-----+--------------------
|
||||
1 | 1 | 3.3750000000000000
|
||||
3 | 2 | 3.1666666666666667
|
||||
5 | 1 | 2.6666666666666667
|
||||
6 | 1 | 2.5000000000000000
|
||||
4 | 1 | 2.5000000000000000
|
||||
2 | 1 | 2.4736842105263158
|
||||
4 | 2 | 2.4000000000000000
|
||||
1 | 2 | 2.1428571428571429
|
||||
5 | 2 | 2.0909090909090909
|
||||
6 | 2 | 2.0000000000000000
|
||||
2 | 2 | 2.0000000000000000
|
||||
3 | 1 | 1.8000000000000000
|
||||
(12 rows)
|
||||
|
||||
-- lets push the limits of writing complex expressions aling with the window functions
|
||||
SELECT
|
||||
COUNT(*) OVER (PARTITION BY user_id, user_id + 1),
|
||||
rank() OVER (PARTITION BY user_id) as cnt1,
|
||||
COUNT(*) OVER (PARTITION BY user_id, abs(value_1 - value_2)) as cnt2,
|
||||
date_trunc('min', lag(time) OVER (PARTITION BY user_id)) as datee,
|
||||
rank() OVER my_win as rnnk,
|
||||
avg(CASE
|
||||
WHEN user_id > 4
|
||||
THEN value_1
|
||||
ELSE value_2
|
||||
END) FILTER (WHERE user_id > 2) OVER my_win_2 as filtered_count,
|
||||
sum(user_id * (5.0 / (value_1 + value_2 + 0.1)) * value_3) FILTER (WHERE value_1::text LIKE '%1%') OVER my_win_4 as cnt_with_filter_2
|
||||
FROM
|
||||
users_table
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY user_id, (value_1%3)::int ORDER BY time DESC),
|
||||
my_win_2 AS (PARTITION BY user_id, (value_1)::int ORDER BY time DESC),
|
||||
my_win_3 AS (PARTITION BY user_id, date_trunc('min', time)),
|
||||
my_win_4 AS (my_win_3 ORDER BY value_2, value_3)
|
||||
ORDER BY
|
||||
cnt_with_filter_2 DESC NULLS LAST, filtered_count DESC NULLS LAST, datee DESC NULLS LAST, rnnk DESC, cnt2 DESC, cnt1 DESC, user_id DESC
|
||||
LIMIT 5;
|
||||
count | cnt1 | cnt2 | datee | rnnk | filtered_count | cnt_with_filter_2
|
||||
-------+------+------+--------------------------+------+------------------------+-------------------
|
||||
23 | 1 | 7 | Thu Nov 23 02:14:00 2017 | 6 | 0.00000000000000000000 | 72.7272727272727
|
||||
10 | 1 | 3 | Wed Nov 22 23:01:00 2017 | 1 | 1.00000000000000000000 | 57.1428571428571
|
||||
17 | 1 | 5 | Wed Nov 22 23:24:00 2017 | 8 | 3.0000000000000000 | 28.5714285714286
|
||||
17 | 1 | 5 | | 10 | 2.6666666666666667 | 28.5714285714286
|
||||
17 | 1 | 5 | Thu Nov 23 00:15:00 2017 | 7 | 3.6666666666666667 | 24.1935483870968
|
||||
(5 rows)
|
||||
|
||||
-- some tests with GROUP BY along with PARTITION BY
|
||||
SELECT
|
||||
user_id,
|
||||
rank() OVER my_win as my_rank,
|
||||
avg(avg(event_type)) OVER my_win_2 as avg,
|
||||
max(time) as mx_time
|
||||
FROM
|
||||
events_table
|
||||
GROUP BY
|
||||
user_id,
|
||||
value_2
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC),
|
||||
my_win_2 AS (PARTITION BY user_id, avg(user_id) ORDER BY count(*) DESC)
|
||||
ORDER BY
|
||||
avg DESC,
|
||||
mx_time DESC,
|
||||
my_rank DESC,
|
||||
user_id DESC;
|
||||
user_id | my_rank | avg | mx_time
|
||||
---------+---------+------------------------+---------------------------------
|
||||
6 | 1 | 3.0000000000000000 | Thu Nov 23 14:00:13.20013 2017
|
||||
6 | 2 | 3.0000000000000000 | Thu Nov 23 11:16:13.106691 2017
|
||||
6 | 1 | 3.0000000000000000 | Thu Nov 23 07:27:32.822068 2017
|
||||
3 | 1 | 2.9857142857142857 | Thu Nov 23 16:31:56.219594 2017
|
||||
4 | 2 | 2.9555555555555556 | Thu Nov 23 14:19:25.765876 2017
|
||||
4 | 1 | 2.9555555555555556 | Thu Nov 23 08:36:53.871919 2017
|
||||
1 | 4 | 2.8633333333333333 | Wed Nov 22 21:06:57.457147 2017
|
||||
1 | 1 | 2.8250000000000000 | Thu Nov 23 21:54:46.924477 2017
|
||||
2 | 2 | 2.7738095238095238 | Thu Nov 23 13:27:37.441959 2017
|
||||
1 | 2 | 2.7722222222222222 | Thu Nov 23 09:23:30.994345 2017
|
||||
3 | 1 | 2.7682539682539682 | Thu Nov 23 01:17:49.040685 2017
|
||||
2 | 1 | 2.7142857142857143 | Thu Nov 23 15:58:49.273421 2017
|
||||
1 | 3 | 2.5791666666666667 | Thu Nov 23 11:09:38.074595 2017
|
||||
3 | 1 | 2.5714285714285714 | Thu Nov 23 16:44:41.903713 2017
|
||||
2 | 1 | 2.5158730158730159 | Thu Nov 23 14:02:47.738901 2017
|
||||
4 | 1 | 2.47777777777777778333 | Thu Nov 23 16:20:33.264457 2017
|
||||
4 | 3 | 2.47777777777777778333 | Thu Nov 23 08:14:18.231273 2017
|
||||
4 | 3 | 2.47777777777777778333 | Thu Nov 23 07:32:45.521278 2017
|
||||
1 | 1 | 2.4000000000000000 | Thu Nov 23 10:23:27.617726 2017
|
||||
2 | 1 | 2.3869047619047619 | Thu Nov 23 17:26:14.563216 2017
|
||||
3 | 1 | 2.3841269841269841 | Thu Nov 23 18:08:26.550729 2017
|
||||
3 | 1 | 2.3841269841269841 | Thu Nov 23 09:38:45.338008 2017
|
||||
3 | 2 | 2.3841269841269841 | Thu Nov 23 06:44:50.887182 2017
|
||||
2 | 2 | 2.3095238095238095 | Thu Nov 23 04:05:16.217731 2017
|
||||
5 | 2 | 2.3000000000000000 | Thu Nov 23 14:28:51.833214 2017
|
||||
5 | 2 | 2.3000000000000000 | Thu Nov 23 14:23:09.889786 2017
|
||||
4 | 1 | 2.2000000000000000 | Thu Nov 23 18:10:21.338399 2017
|
||||
2 | 1 | 2.09126984126984126667 | Thu Nov 23 03:35:04.321504 2017
|
||||
5 | 1 | 2.0000000000000000 | Thu Nov 23 16:11:02.929469 2017
|
||||
5 | 1 | 2.0000000000000000 | Thu Nov 23 14:40:40.467511 2017
|
||||
5 | 1 | 2.0000000000000000 | Thu Nov 23 13:26:45.571108 2017
|
||||
(31 rows)
|
||||
|
||||
-- test for range and rows mode and different window functions
|
||||
-- mostly to make sure that deparsing works fine
|
||||
SELECT
|
||||
user_id,
|
||||
rank() OVER (PARTITION BY user_id ROWS BETWEEN
|
||||
UNBOUNDED PRECEDING AND CURRENT ROW),
|
||||
dense_rank() OVER (PARTITION BY user_id RANGE BETWEEN
|
||||
UNBOUNDED PRECEDING AND CURRENT ROW),
|
||||
CUME_DIST() OVER (PARTITION BY user_id RANGE BETWEEN
|
||||
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
|
||||
PERCENT_RANK() OVER (PARTITION BY user_id ORDER BY avg(value_1) RANGE BETWEEN
|
||||
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
4 DESC,3 DESC,2 DESC ,1 DESC;
|
||||
user_id | rank | dense_rank | cume_dist | percent_rank
|
||||
---------+------+------------+-----------+--------------
|
||||
6 | 1 | 1 | 1 | 0
|
||||
5 | 1 | 1 | 1 | 0
|
||||
4 | 1 | 1 | 1 | 0
|
||||
3 | 1 | 1 | 1 | 0
|
||||
2 | 1 | 1 | 1 | 0
|
||||
1 | 1 | 1 | 1 | 0
|
||||
(6 rows)
|
||||
|
||||
-- some tests with GROUP BY, HAVING and LIMIT
|
||||
SELECT
|
||||
user_id, sum(event_type) OVER my_win , event_type
|
||||
FROM
|
||||
events_table
|
||||
GROUP BY
|
||||
user_id, event_type
|
||||
HAVING count(*) > 2
|
||||
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
|
||||
ORDER BY
|
||||
2 DESC, 3 DESC, 1 DESC
|
||||
LIMIT
|
||||
5;
|
||||
user_id | sum | event_type
|
||||
---------+-----+------------
|
||||
4 | 4 | 4
|
||||
3 | 4 | 4
|
||||
2 | 4 | 4
|
||||
1 | 4 | 4
|
||||
5 | 3 | 3
|
||||
(5 rows)
|
||||
|
||||
-- Group by has more columns than partition by
|
||||
SELECT
|
||||
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_1, value_2
|
||||
HAVING count(*) > 2
|
||||
ORDER BY
|
||||
2 DESC, 1
|
||||
LIMIT
|
||||
10;
|
||||
user_id | sum
|
||||
---------+-----
|
||||
5 | 3
|
||||
4 | 2
|
||||
(2 rows)
|
||||
|
||||
SELECT
|
||||
DISTINCT ON (user_id) user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_1, value_2
|
||||
HAVING count(*) > 2
|
||||
ORDER BY
|
||||
1, 2 DESC
|
||||
LIMIT
|
||||
10;
|
||||
user_id | sum
|
||||
---------+-----
|
||||
4 | 2
|
||||
5 | 3
|
||||
(2 rows)
|
||||
|
||||
SELECT
|
||||
DISTINCT ON (SUM(value_1) OVER (PARTITION BY user_id)) user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_1, value_2
|
||||
HAVING count(*) > 2
|
||||
ORDER BY
|
||||
(SUM(value_1) OVER (PARTITION BY user_id)) , 2 DESC, 1
|
||||
LIMIT
|
||||
10;
|
||||
user_id | sum
|
||||
---------+-----
|
||||
5 | 3
|
||||
4 | 2
|
||||
(2 rows)
|
||||
|
||||
|
||||
-- not a meaningful query, with interesting syntax
|
||||
SELECT
|
||||
user_id,
|
||||
AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)),
|
||||
AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
3 DESC, 2 DESC, 1 DESC;
|
||||
user_id | avg | avg
|
||||
---------+--------------------+------------------------
|
||||
6 | 2.1000000000000000 | 6.0000000000000000
|
||||
5 | 2.6538461538461538 | 5.0000000000000000
|
||||
4 | 2.7391304347826087 | 4.0000000000000000
|
||||
3 | 2.3529411764705882 | 3.0000000000000000
|
||||
2 | 2.3333333333333333 | 2.0000000000000000
|
||||
1 | 3.2857142857142857 | 1.00000000000000000000
|
||||
(6 rows)
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)),
|
||||
AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
3 DESC, 2 DESC, 1 DESC;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------------------------------------------------
|
||||
Sort (cost=0.00..0.00 rows=0 width=0)
|
||||
Sort Key: remote_scan.avg_1 DESC, remote_scan.avg DESC, remote_scan.user_id DESC
|
||||
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
|
||||
Group Key: remote_scan.user_id
|
||||
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> WindowAgg (cost=5.06..5.80 rows=33 width=128)
|
||||
-> Sort (cost=5.06..5.14 rows=33 width=128)
|
||||
Sort Key: users_table.user_id, (min(users_table.user_id)), (avg(users_table.value_1))
|
||||
-> WindowAgg (cost=3.48..4.22 rows=33 width=128)
|
||||
-> Sort (cost=3.48..3.56 rows=33 width=96)
|
||||
Sort Key: users_table.user_id, (max(users_table.user_id)), (min(users_table.value_2))
|
||||
-> HashAggregate (cost=2.07..2.65 rows=33 width=96)
|
||||
Group Key: users_table.user_id
|
||||
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
|
||||
(18 rows)
|
||||
|
||||
SELECT
|
||||
user_id,
|
||||
1 + sum(value_1),
|
||||
1 + AVG(value_2) OVER (partition by user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_2
|
||||
ORDER BY
|
||||
user_id, value_2;
|
||||
user_id | ?column? | ?column?
|
||||
---------+----------+--------------------
|
||||
1 | 5 | 3.2500000000000000
|
||||
1 | 4 | 3.2500000000000000
|
||||
1 | 6 | 3.2500000000000000
|
||||
1 | 12 | 3.2500000000000000
|
||||
2 | 3 | 3.5000000000000000
|
||||
2 | 5 | 3.5000000000000000
|
||||
2 | 13 | 3.5000000000000000
|
||||
2 | 6 | 3.5000000000000000
|
||||
2 | 17 | 3.5000000000000000
|
||||
2 | 4 | 3.5000000000000000
|
||||
3 | 3 | 4.0000000000000000
|
||||
3 | 13 | 4.0000000000000000
|
||||
3 | 10 | 4.0000000000000000
|
||||
3 | 2 | 4.0000000000000000
|
||||
3 | 17 | 4.0000000000000000
|
||||
4 | 4 | 3.5000000000000000
|
||||
4 | 28 | 3.5000000000000000
|
||||
4 | 1 | 3.5000000000000000
|
||||
4 | 11 | 3.5000000000000000
|
||||
4 | 17 | 3.5000000000000000
|
||||
4 | 8 | 3.5000000000000000
|
||||
5 | 7 | 3.5000000000000000
|
||||
5 | 17 | 3.5000000000000000
|
||||
5 | 24 | 3.5000000000000000
|
||||
5 | 9 | 3.5000000000000000
|
||||
5 | 8 | 3.5000000000000000
|
||||
5 | 10 | 3.5000000000000000
|
||||
6 | 6 | 3.0000000000000000
|
||||
6 | 3 | 3.0000000000000000
|
||||
6 | 9 | 3.0000000000000000
|
||||
6 | 3 | 3.0000000000000000
|
||||
6 | 5 | 3.0000000000000000
|
||||
(32 rows)
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
1 + sum(value_1),
|
||||
1 + AVG(value_2) OVER (partition by user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_2
|
||||
ORDER BY
|
||||
user_id, value_2;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------------------------------------
|
||||
Sort (cost=0.00..0.00 rows=0 width=0)
|
||||
Sort Key: remote_scan.user_id, remote_scan.worker_column_4
|
||||
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
|
||||
Group Key: remote_scan.user_id, remote_scan.worker_column_4
|
||||
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> WindowAgg (cost=2.16..3.40 rows=33 width=48)
|
||||
-> GroupAggregate (cost=2.16..2.82 rows=33 width=16)
|
||||
Group Key: users_table.user_id, users_table.value_2
|
||||
-> Sort (cost=2.16..2.24 rows=33 width=12)
|
||||
Sort Key: users_table.user_id, users_table.value_2
|
||||
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
|
||||
(15 rows)
|
||||
|
||||
SELECT
|
||||
user_id,
|
||||
1 + sum(value_1),
|
||||
1 + AVG(value_2) OVER (partition by user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_2
|
||||
ORDER BY
|
||||
2 DESC, 1
|
||||
LIMIT 5;
|
||||
user_id | ?column? | ?column?
|
||||
---------+----------+--------------------
|
||||
4 | 28 | 3.5000000000000000
|
||||
5 | 24 | 3.5000000000000000
|
||||
2 | 17 | 3.5000000000000000
|
||||
3 | 17 | 4.0000000000000000
|
||||
4 | 17 | 3.5000000000000000
|
||||
(5 rows)
|
||||
|
||||
-- rank and ordering in the reverse order
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by value_2)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, value_2 DESC;
|
||||
user_id | avg | rank
|
||||
---------+------------------------+------
|
||||
1 | 3.6666666666666667 | 4
|
||||
1 | 2.5000000000000000 | 3
|
||||
1 | 3.0000000000000000 | 2
|
||||
1 | 4.0000000000000000 | 1
|
||||
2 | 1.5000000000000000 | 6
|
||||
2 | 3.2000000000000000 | 5
|
||||
2 | 1.6666666666666667 | 4
|
||||
2 | 3.0000000000000000 | 3
|
||||
2 | 1.3333333333333333 | 2
|
||||
2 | 2.0000000000000000 | 1
|
||||
3 | 2.6666666666666667 | 5
|
||||
3 | 1.00000000000000000000 | 4
|
||||
3 | 3.0000000000000000 | 3
|
||||
3 | 2.4000000000000000 | 2
|
||||
3 | 1.00000000000000000000 | 1
|
||||
4 | 3.5000000000000000 | 6
|
||||
4 | 3.2000000000000000 | 5
|
||||
4 | 3.3333333333333333 | 4
|
||||
4 | 0.00000000000000000000 | 3
|
||||
4 | 3.0000000000000000 | 2
|
||||
4 | 1.00000000000000000000 | 1
|
||||
5 | 3.0000000000000000 | 6
|
||||
5 | 2.3333333333333333 | 5
|
||||
5 | 1.6000000000000000 | 4
|
||||
5 | 2.8750000000000000 | 3
|
||||
5 | 3.2000000000000000 | 2
|
||||
5 | 3.0000000000000000 | 1
|
||||
6 | 1.3333333333333333 | 5
|
||||
6 | 2.0000000000000000 | 4
|
||||
6 | 4.0000000000000000 | 3
|
||||
6 | 1.00000000000000000000 | 2
|
||||
6 | 2.5000000000000000 | 1
|
||||
(32 rows)
|
||||
|
||||
-- order by in the window function is same as avg(value_1) DESC
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC;
|
||||
user_id | avg | rank
|
||||
---------+------------------------+------
|
||||
1 | 4.0000000000000000 | 1
|
||||
1 | 3.6666666666666667 | 2
|
||||
1 | 3.0000000000000000 | 3
|
||||
1 | 2.5000000000000000 | 4
|
||||
2 | 3.2000000000000000 | 1
|
||||
2 | 3.0000000000000000 | 2
|
||||
2 | 2.0000000000000000 | 3
|
||||
2 | 1.6666666666666667 | 4
|
||||
2 | 1.5000000000000000 | 5
|
||||
2 | 1.3333333333333333 | 6
|
||||
3 | 3.0000000000000000 | 1
|
||||
3 | 2.6666666666666667 | 2
|
||||
3 | 2.4000000000000000 | 3
|
||||
3 | 1.00000000000000000000 | 4
|
||||
3 | 1.00000000000000000000 | 4
|
||||
4 | 3.5000000000000000 | 1
|
||||
4 | 3.3333333333333333 | 2
|
||||
4 | 3.2000000000000000 | 3
|
||||
4 | 3.0000000000000000 | 4
|
||||
4 | 1.00000000000000000000 | 5
|
||||
4 | 0.00000000000000000000 | 6
|
||||
5 | 3.2000000000000000 | 1
|
||||
5 | 3.0000000000000000 | 2
|
||||
5 | 3.0000000000000000 | 2
|
||||
5 | 2.8750000000000000 | 4
|
||||
5 | 2.3333333333333333 | 5
|
||||
5 | 1.6000000000000000 | 6
|
||||
6 | 4.0000000000000000 | 1
|
||||
6 | 2.5000000000000000 | 2
|
||||
6 | 2.0000000000000000 | 3
|
||||
6 | 1.3333333333333333 | 4
|
||||
6 | 1.00000000000000000000 | 5
|
||||
(32 rows)
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Sort (cost=0.00..0.00 rows=0 width=0)
|
||||
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
|
||||
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
|
||||
Group Key: remote_scan.user_id, remote_scan.worker_column_5
|
||||
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> WindowAgg (cost=3.32..4.14 rows=33 width=80)
|
||||
-> Sort (cost=3.32..3.40 rows=33 width=56)
|
||||
Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1))))
|
||||
-> HashAggregate (cost=1.91..2.49 rows=33 width=56)
|
||||
Group Key: users_table.user_id, users_table.value_2
|
||||
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
|
||||
(15 rows)
|
||||
|
||||
-- order by in the window function is same as avg(value_1) DESC
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC;
|
||||
user_id | avg | rank
|
||||
---------+------------------------+------
|
||||
1 | 4.0000000000000000 | 1
|
||||
1 | 3.6666666666666667 | 2
|
||||
1 | 3.0000000000000000 | 3
|
||||
1 | 2.5000000000000000 | 4
|
||||
2 | 3.2000000000000000 | 1
|
||||
2 | 3.0000000000000000 | 2
|
||||
2 | 2.0000000000000000 | 3
|
||||
2 | 1.6666666666666667 | 4
|
||||
2 | 1.5000000000000000 | 5
|
||||
2 | 1.3333333333333333 | 6
|
||||
3 | 3.0000000000000000 | 1
|
||||
3 | 2.6666666666666667 | 2
|
||||
3 | 2.4000000000000000 | 3
|
||||
3 | 1.00000000000000000000 | 4
|
||||
3 | 1.00000000000000000000 | 4
|
||||
4 | 3.5000000000000000 | 1
|
||||
4 | 3.3333333333333333 | 2
|
||||
4 | 3.2000000000000000 | 3
|
||||
4 | 3.0000000000000000 | 4
|
||||
4 | 1.00000000000000000000 | 5
|
||||
4 | 0.00000000000000000000 | 6
|
||||
5 | 3.2000000000000000 | 1
|
||||
5 | 3.0000000000000000 | 2
|
||||
5 | 3.0000000000000000 | 2
|
||||
5 | 2.8750000000000000 | 4
|
||||
5 | 2.3333333333333333 | 5
|
||||
5 | 1.6000000000000000 | 6
|
||||
6 | 4.0000000000000000 | 1
|
||||
6 | 2.5000000000000000 | 2
|
||||
6 | 2.0000000000000000 | 3
|
||||
6 | 1.3333333333333333 | 4
|
||||
6 | 1.00000000000000000000 | 5
|
||||
(32 rows)
|
||||
|
||||
-- limit is not pushed down to worker !!
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC
|
||||
LIMIT 5;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit (cost=0.00..0.00 rows=0 width=0)
|
||||
-> Sort (cost=0.00..0.00 rows=0 width=0)
|
||||
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
|
||||
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
|
||||
Group Key: remote_scan.user_id, remote_scan.worker_column_5
|
||||
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> WindowAgg (cost=3.32..4.14 rows=33 width=80)
|
||||
-> Sort (cost=3.32..3.40 rows=33 width=56)
|
||||
Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1))))
|
||||
-> HashAggregate (cost=1.91..2.49 rows=33 width=56)
|
||||
Group Key: users_table.user_id, users_table.value_2
|
||||
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
|
||||
(16 rows)
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC
|
||||
LIMIT 5;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit (cost=0.00..0.00 rows=0 width=0)
|
||||
-> Sort (cost=0.00..0.00 rows=0 width=0)
|
||||
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
|
||||
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
|
||||
Group Key: remote_scan.user_id, remote_scan.worker_column_5
|
||||
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> WindowAgg (cost=3.32..4.14 rows=33 width=80)
|
||||
-> Sort (cost=3.32..3.40 rows=33 width=56)
|
||||
Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1))))
|
||||
-> HashAggregate (cost=1.91..2.49 rows=33 width=56)
|
||||
Group Key: users_table.user_id, users_table.value_2
|
||||
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
|
||||
(16 rows)
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + sum(value_2)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC
|
||||
LIMIT 5;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit (cost=0.00..0.00 rows=0 width=0)
|
||||
-> Sort (cost=0.00..0.00 rows=0 width=0)
|
||||
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
|
||||
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
|
||||
Group Key: remote_scan.user_id, remote_scan.worker_column_5
|
||||
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> WindowAgg (cost=3.15..3.98 rows=33 width=48)
|
||||
-> Sort (cost=3.15..3.23 rows=33 width=40)
|
||||
Sort Key: users_table.user_id, ((1 / (1 + sum(users_table.value_2))))
|
||||
-> HashAggregate (cost=1.83..2.32 rows=33 width=40)
|
||||
Group Key: users_table.user_id, users_table.value_2
|
||||
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
|
||||
(16 rows)
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by sum(value_2))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC
|
||||
LIMIT 5;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Limit (cost=0.00..0.00 rows=0 width=0)
|
||||
-> Sort (cost=0.00..0.00 rows=0 width=0)
|
||||
Sort Key: remote_scan.user_id, (pg_catalog.sum(((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))) / pg_catalog.sum(remote_scan.rank)) DESC
|
||||
-> HashAggregate (cost=0.00..0.00 rows=0 width=0)
|
||||
Group Key: remote_scan.user_id, remote_scan.worker_column_5
|
||||
-> Custom Scan (Citus Real-Time) (cost=0.00..0.00 rows=0 width=0)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> WindowAgg (cost=2.90..3.56 rows=33 width=40)
|
||||
-> Sort (cost=2.90..2.99 rows=33 width=32)
|
||||
Sort Key: users_table.user_id, (sum(users_table.value_2))
|
||||
-> HashAggregate (cost=1.74..2.07 rows=33 width=32)
|
||||
Group Key: users_table.user_id, users_table.value_2
|
||||
-> Seq Scan on users_table_1400000 users_table (cost=0.00..1.33 rows=33 width=12)
|
||||
(16 rows)
|
||||
|
|
@ -471,7 +471,7 @@ ORDER BY
|
|||
LIMIT
|
||||
5;
|
||||
ERROR: could not run distributed query because the window function that is used cannot be pushed down
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions inside a subquery with a PARTITION BY clause containing the distribution column
|
||||
HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column
|
||||
-- Window functions that partition by the distribution column in subqueries in CTEs are ok
|
||||
WITH top_users AS
|
||||
(SELECT *
|
||||
|
|
|
@ -31,7 +31,8 @@ test: multi_read_from_secondaries
|
|||
# ----------
|
||||
test: multi_create_table
|
||||
test: multi_create_table_constraints multi_master_protocol multi_load_data multi_behavioral_analytics_create_table
|
||||
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries multi_insert_select multi_insert_select_window multi_shard_update_delete
|
||||
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries multi_insert_select
|
||||
test: multi_insert_select_window multi_shard_update_delete window_functions
|
||||
|
||||
# ----------
|
||||
# Tests for partitioning support
|
||||
|
|
|
@ -2313,3 +2313,90 @@ GROUP BY
|
|||
types
|
||||
ORDER BY
|
||||
types;
|
||||
|
||||
-- we've fixed a bug related to joins w/wout alias
|
||||
-- while implementing top window functions
|
||||
-- thus adding some tests related to that (i.e., next 3 tests)
|
||||
WITH users_events AS
|
||||
(
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
)
|
||||
SELECT
|
||||
uid,
|
||||
event_type,
|
||||
value_2,
|
||||
value_3
|
||||
FROM (
|
||||
(SELECT
|
||||
user_id as uid
|
||||
FROM
|
||||
users_events
|
||||
) users
|
||||
JOIN
|
||||
events_table
|
||||
ON
|
||||
users.uid = events_table.event_type
|
||||
) a
|
||||
ORDER BY
|
||||
1,2,3,4
|
||||
LIMIT 5;
|
||||
|
||||
-- the following queries are almost the same,
|
||||
-- the only difference is the final GROUP BY
|
||||
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
||||
FROM
|
||||
(SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
(value_1 > 2)
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
count(distinct value_1) > 2
|
||||
) as a
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
DISTINCT ON (value_2) value_2 , user_id, value_3
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
(value_1 > 3)
|
||||
ORDER BY
|
||||
1,2,3
|
||||
) AS b
|
||||
USING (user_id)
|
||||
GROUP BY user_id
|
||||
ORDER BY 1;
|
||||
|
||||
-- see the comment for the above query
|
||||
SELECT a.user_id, avg(b.value_2) as subquery_avg
|
||||
FROM
|
||||
(SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
(value_1 > 2)
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
count(distinct value_1) > 2
|
||||
) as a
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
DISTINCT ON (value_2) value_2 , user_id, value_3
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
(value_1 > 3)
|
||||
ORDER BY
|
||||
1,2,3
|
||||
) AS b
|
||||
USING (user_id)
|
||||
GROUP BY a.user_id
|
||||
ORDER BY 1;
|
||||
|
|
|
@ -0,0 +1,421 @@
|
|||
-- ===================================================================
|
||||
-- test top level window functions that are pushdownable
|
||||
-- ===================================================================
|
||||
|
||||
-- a very simple window function with an aggregate and a window function
|
||||
-- distribution column is on the partition by clause
|
||||
SELECT
|
||||
user_id, COUNT(*) OVER (PARTITION BY user_id),
|
||||
rank() OVER (PARTITION BY user_id)
|
||||
FROM
|
||||
users_table
|
||||
ORDER BY
|
||||
1 DESC, 2 DESC, 3 DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- a more complicated window clause, including an aggregate
|
||||
-- in both the window clause and the target entry
|
||||
SELECT
|
||||
user_id, avg(avg(value_3)) OVER (PARTITION BY user_id, MIN(value_2))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
2 DESC NULLS LAST, 1 DESC;
|
||||
|
||||
-- window clause operates on the results of a subquery
|
||||
SELECT
|
||||
user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2))
|
||||
FROM (
|
||||
SELECT
|
||||
DISTINCT us.user_id, us.value_2, value_1, random() as r1
|
||||
FROM
|
||||
users_table as us, events_table
|
||||
WHERE
|
||||
us.user_id = events_table.user_id AND event_type IN (1,2)
|
||||
ORDER BY
|
||||
user_id, value_2
|
||||
) s
|
||||
GROUP BY
|
||||
1, value_1
|
||||
ORDER BY
|
||||
2 DESC, 1;
|
||||
|
||||
-- window function operates on the results of
|
||||
-- a join
|
||||
SELECT
|
||||
us.user_id,
|
||||
SUM(us.value_1) OVER (PARTITION BY us.user_id)
|
||||
FROM
|
||||
users_table us
|
||||
JOIN
|
||||
events_table ev
|
||||
ON (us.user_id = ev.user_id)
|
||||
GROUP BY
|
||||
1,
|
||||
value_1
|
||||
ORDER BY
|
||||
1,
|
||||
2
|
||||
LIMIT 5;
|
||||
|
||||
-- the same query, but this time join with an alias
|
||||
SELECT
|
||||
user_id, value_1, SUM(j.value_1) OVER (PARTITION BY j.user_id)
|
||||
FROM
|
||||
(users_table us
|
||||
JOIN
|
||||
events_table ev
|
||||
USING (user_id )
|
||||
) j
|
||||
GROUP BY
|
||||
user_id,
|
||||
value_1
|
||||
ORDER BY
|
||||
3 DESC, 2 DESC, 1 DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- querying views that have window functions should be ok
|
||||
CREATE VIEW window_view AS
|
||||
SELECT
|
||||
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_1
|
||||
HAVING count(*) > 1;
|
||||
|
||||
-- Window function in View works
|
||||
SELECT *
|
||||
FROM
|
||||
window_view
|
||||
ORDER BY
|
||||
2 DESC, 1
|
||||
LIMIT 10;
|
||||
|
||||
-- the other way around also should work fine
|
||||
-- query a view using window functions
|
||||
CREATE VIEW users_view AS SELECT * FROM users_table;
|
||||
|
||||
SELECT
|
||||
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||
FROM
|
||||
users_view
|
||||
GROUP BY
|
||||
user_id, value_1
|
||||
HAVING count(*) > 4
|
||||
ORDER BY
|
||||
2 DESC, 1;
|
||||
|
||||
DROP VIEW users_view, window_view;
|
||||
|
||||
-- window function uses columns from two different tables
|
||||
SELECT
|
||||
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
|
||||
FROM
|
||||
events_table, users_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY events_table.user_id, users_table.value_1 ORDER BY events_table.time DESC)
|
||||
ORDER BY
|
||||
rnk DESC, 1 DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- the same query with reference table column is also on the partition by clause
|
||||
SELECT
|
||||
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
|
||||
FROM
|
||||
events_table, users_ref_test_table uref
|
||||
WHERE
|
||||
uref.id = events_table.user_id
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY events_table.user_id, uref.k_no ORDER BY events_table.time DESC)
|
||||
ORDER BY
|
||||
rnk DESC, 1 DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- similar query with no distribution column is on the partition by clause
|
||||
-- is not supported
|
||||
SELECT
|
||||
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
|
||||
FROM
|
||||
events_table, users_ref_test_table uref
|
||||
WHERE
|
||||
uref.id = events_table.user_id
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY events_table.value_2, uref.k_no ORDER BY events_table.time DESC)
|
||||
ORDER BY
|
||||
rnk DESC, 1 DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- ORDER BY in the window function is an aggragate
|
||||
SELECT
|
||||
user_id, rank() OVER my_win as rnk, avg(value_2) as avg_val_2
|
||||
FROM
|
||||
events_table
|
||||
GROUP BY
|
||||
user_id, date_trunc('day', time)
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
|
||||
ORDER BY
|
||||
3 DESC, 2 DESC, 1 DESC;
|
||||
|
||||
-- lets push the limits of writing complex expressions aling with the window functions
|
||||
SELECT
|
||||
COUNT(*) OVER (PARTITION BY user_id, user_id + 1),
|
||||
rank() OVER (PARTITION BY user_id) as cnt1,
|
||||
COUNT(*) OVER (PARTITION BY user_id, abs(value_1 - value_2)) as cnt2,
|
||||
date_trunc('min', lag(time) OVER (PARTITION BY user_id)) as datee,
|
||||
rank() OVER my_win as rnnk,
|
||||
avg(CASE
|
||||
WHEN user_id > 4
|
||||
THEN value_1
|
||||
ELSE value_2
|
||||
END) FILTER (WHERE user_id > 2) OVER my_win_2 as filtered_count,
|
||||
sum(user_id * (5.0 / (value_1 + value_2 + 0.1)) * value_3) FILTER (WHERE value_1::text LIKE '%1%') OVER my_win_4 as cnt_with_filter_2
|
||||
FROM
|
||||
users_table
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY user_id, (value_1%3)::int ORDER BY time DESC),
|
||||
my_win_2 AS (PARTITION BY user_id, (value_1)::int ORDER BY time DESC),
|
||||
my_win_3 AS (PARTITION BY user_id, date_trunc('min', time)),
|
||||
my_win_4 AS (my_win_3 ORDER BY value_2, value_3)
|
||||
ORDER BY
|
||||
cnt_with_filter_2 DESC NULLS LAST, filtered_count DESC NULLS LAST, datee DESC NULLS LAST, rnnk DESC, cnt2 DESC, cnt1 DESC, user_id DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- some tests with GROUP BY along with PARTITION BY
|
||||
SELECT
|
||||
user_id,
|
||||
rank() OVER my_win as my_rank,
|
||||
avg(avg(event_type)) OVER my_win_2 as avg,
|
||||
max(time) as mx_time
|
||||
FROM
|
||||
events_table
|
||||
GROUP BY
|
||||
user_id,
|
||||
value_2
|
||||
WINDOW
|
||||
my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC),
|
||||
my_win_2 AS (PARTITION BY user_id, avg(user_id) ORDER BY count(*) DESC)
|
||||
ORDER BY
|
||||
avg DESC,
|
||||
mx_time DESC,
|
||||
my_rank DESC,
|
||||
user_id DESC;
|
||||
|
||||
-- test for range and rows mode and different window functions
|
||||
-- mostly to make sure that deparsing works fine
|
||||
SELECT
|
||||
user_id,
|
||||
rank() OVER (PARTITION BY user_id ROWS BETWEEN
|
||||
UNBOUNDED PRECEDING AND CURRENT ROW),
|
||||
dense_rank() OVER (PARTITION BY user_id RANGE BETWEEN
|
||||
UNBOUNDED PRECEDING AND CURRENT ROW),
|
||||
CUME_DIST() OVER (PARTITION BY user_id RANGE BETWEEN
|
||||
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
|
||||
PERCENT_RANK() OVER (PARTITION BY user_id ORDER BY avg(value_1) RANGE BETWEEN
|
||||
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
4 DESC,3 DESC,2 DESC ,1 DESC;
|
||||
|
||||
-- some tests with GROUP BY, HAVING and LIMIT
|
||||
SELECT
|
||||
user_id, sum(event_type) OVER my_win , event_type
|
||||
FROM
|
||||
events_table
|
||||
GROUP BY
|
||||
user_id, event_type
|
||||
HAVING count(*) > 2
|
||||
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
|
||||
ORDER BY
|
||||
2 DESC, 3 DESC, 1 DESC
|
||||
LIMIT
|
||||
5;
|
||||
|
||||
-- Group by has more columns than partition by
|
||||
SELECT
|
||||
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_1, value_2
|
||||
HAVING count(*) > 2
|
||||
ORDER BY
|
||||
2 DESC, 1
|
||||
LIMIT
|
||||
10;
|
||||
|
||||
SELECT
|
||||
DISTINCT ON (user_id) user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_1, value_2
|
||||
HAVING count(*) > 2
|
||||
ORDER BY
|
||||
1, 2 DESC
|
||||
LIMIT
|
||||
10;
|
||||
|
||||
SELECT
|
||||
DISTINCT ON (SUM(value_1) OVER (PARTITION BY user_id)) user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_1, value_2
|
||||
HAVING count(*) > 2
|
||||
ORDER BY
|
||||
(SUM(value_1) OVER (PARTITION BY user_id)) , 2 DESC, 1
|
||||
LIMIT
|
||||
10;
|
||||
|
||||
-- not a meaningful query, with interesting syntax
|
||||
SELECT
|
||||
user_id,
|
||||
AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)),
|
||||
AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
3 DESC, 2 DESC, 1 DESC;
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)),
|
||||
AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
3 DESC, 2 DESC, 1 DESC;
|
||||
|
||||
SELECT
|
||||
user_id,
|
||||
1 + sum(value_1),
|
||||
1 + AVG(value_2) OVER (partition by user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_2
|
||||
ORDER BY
|
||||
user_id, value_2;
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
1 + sum(value_1),
|
||||
1 + AVG(value_2) OVER (partition by user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_2
|
||||
ORDER BY
|
||||
user_id, value_2;
|
||||
|
||||
SELECT
|
||||
user_id,
|
||||
1 + sum(value_1),
|
||||
1 + AVG(value_2) OVER (partition by user_id)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY
|
||||
user_id, value_2
|
||||
ORDER BY
|
||||
2 DESC, 1
|
||||
LIMIT 5;
|
||||
|
||||
-- rank and ordering in the reverse order
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by value_2)
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, value_2 DESC;
|
||||
|
||||
-- order by in the window function is same as avg(value_1) DESC
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC;
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC;
|
||||
|
||||
-- order by in the window function is same as avg(value_1) DESC
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC;
|
||||
|
||||
-- limit is not pushed down to worker !!
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC
|
||||
LIMIT 5;
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC
|
||||
LIMIT 5;
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by 1 / (1 + sum(value_2)))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC
|
||||
LIMIT 5;
|
||||
|
||||
EXPLAIN
|
||||
SELECT
|
||||
user_id,
|
||||
avg(value_1),
|
||||
RANK() OVER (partition by user_id order by sum(value_2))
|
||||
FROM
|
||||
users_table
|
||||
GROUP BY user_id, value_2
|
||||
ORDER BY user_id, avg(value_1) DESC
|
||||
LIMIT 5;
|
Loading…
Reference in New Issue