mirror of https://github.com/citusdata/citus.git
Merge pull request #1668 from citusdata/window_function_preliminary_implementation
Add window function support for SUBQUERY PUSHDOWN and INSERT INTO SELECTpull/1699/head
commit
e202c51fec
|
@ -776,6 +776,7 @@ MultiTaskRouterSelectQuerySupported(Query *query)
|
||||||
{
|
{
|
||||||
List *queryList = NIL;
|
List *queryList = NIL;
|
||||||
ListCell *queryCell = NULL;
|
ListCell *queryCell = NULL;
|
||||||
|
StringInfo errorDetail = NULL;
|
||||||
|
|
||||||
ExtractQueryWalker((Node *) query, &queryList);
|
ExtractQueryWalker((Node *) query, &queryList);
|
||||||
foreach(queryCell, queryList)
|
foreach(queryCell, queryList)
|
||||||
|
@ -797,7 +798,7 @@ MultiTaskRouterSelectQuerySupported(Query *query)
|
||||||
if (subquery->limitCount != NULL)
|
if (subquery->limitCount != NULL)
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
"LIMIT clauses are not allowed in distirbuted INSERT "
|
"LIMIT clauses are not allowed in distributed INSERT "
|
||||||
"... SELECT queries",
|
"... SELECT queries",
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
}
|
}
|
||||||
|
@ -811,17 +812,34 @@ MultiTaskRouterSelectQuerySupported(Query *query)
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* group clause list must include partition column */
|
||||||
* We could potentially support window clauses where the data is partitioned
|
if (subquery->groupClause)
|
||||||
* over distribution column. For simplicity, we currently do not support window
|
|
||||||
* clauses at all.
|
|
||||||
*/
|
|
||||||
if (subquery->windowClause != NULL)
|
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
List *groupClauseList = subquery->groupClause;
|
||||||
"window functions are not allowed in distributed "
|
List *targetEntryList = subquery->targetList;
|
||||||
"INSERT ... SELECT queries",
|
List *groupTargetEntryList = GroupTargetEntryList(groupClauseList,
|
||||||
NULL, NULL);
|
targetEntryList);
|
||||||
|
bool groupOnPartitionColumn = TargetListOnPartitionColumn(subquery,
|
||||||
|
groupTargetEntryList);
|
||||||
|
if (!groupOnPartitionColumn)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"Group by list without distribution column is "
|
||||||
|
"not allowed in distributed INSERT ... "
|
||||||
|
"SELECT queries",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We support window functions when the window function
|
||||||
|
* is partitioned on distribution column.
|
||||||
|
*/
|
||||||
|
if (subquery->windowClause && !SafeToPushdownWindowFunction(subquery,
|
||||||
|
&errorDetail))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorDetail->data, NULL,
|
||||||
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subquery->setOperations != NULL)
|
if (subquery->setOperations != NULL)
|
||||||
|
|
|
@ -83,7 +83,7 @@ static DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree
|
||||||
outerMostQueryHasLimit);
|
outerMostQueryHasLimit);
|
||||||
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
|
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
|
||||||
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
||||||
static bool TargetListOnPartitionColumn(Query *query, List *targetEntryList);
|
static bool WindowPartitionOnDistributionColumn(Query *query);
|
||||||
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
||||||
static bool FullCompositeFieldList(List *compositeFieldList);
|
static bool FullCompositeFieldList(List *compositeFieldList);
|
||||||
static MultiNode * MultiPlanTree(Query *queryTree);
|
static MultiNode * MultiPlanTree(Query *queryTree);
|
||||||
|
@ -447,6 +447,7 @@ MultiSubqueryPlanTree(Query *originalQuery, Query *queryTree,
|
||||||
* - Only a single RTE_RELATION exists, which means only a single table
|
* - Only a single RTE_RELATION exists, which means only a single table
|
||||||
* name is specified on the whole query
|
* name is specified on the whole query
|
||||||
* - No sublinks exists in the subquery
|
* - No sublinks exists in the subquery
|
||||||
|
* - No window functions in the subquery
|
||||||
*
|
*
|
||||||
* Note that the caller should still call DeferErrorIfUnsupportedSubqueryRepartition()
|
* Note that the caller should still call DeferErrorIfUnsupportedSubqueryRepartition()
|
||||||
* to ensure that Citus supports the subquery. Also, this function is designed to run
|
* to ensure that Citus supports the subquery. Also, this function is designed to run
|
||||||
|
@ -466,6 +467,12 @@ SingleRelationRepartitionSubquery(Query *queryTree)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* we don't support window functions */
|
||||||
|
if (queryTree->hasWindowFuncs)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't allow joins and set operations. If join appears in the queryTree, the
|
* Don't allow joins and set operations. If join appears in the queryTree, the
|
||||||
* length would be greater than 1. If only set operations exists, the length
|
* length would be greater than 1. If only set operations exists, the length
|
||||||
|
@ -782,6 +789,7 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
||||||
{
|
{
|
||||||
bool preconditionsSatisfied = true;
|
bool preconditionsSatisfied = true;
|
||||||
char *errorDetail = NULL;
|
char *errorDetail = NULL;
|
||||||
|
StringInfo errorInfo = NULL;
|
||||||
DeferredErrorMessage *deferredError = NULL;
|
DeferredErrorMessage *deferredError = NULL;
|
||||||
|
|
||||||
deferredError = DeferErrorIfUnsupportedTableCombination(subqueryTree);
|
deferredError = DeferErrorIfUnsupportedTableCombination(subqueryTree);
|
||||||
|
@ -796,12 +804,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
||||||
errorDetail = "Subqueries without relations are unsupported";
|
errorDetail = "Subqueries without relations are unsupported";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subqueryTree->hasWindowFuncs)
|
|
||||||
{
|
|
||||||
preconditionsSatisfied = false;
|
|
||||||
errorDetail = "Window functions are currently unsupported";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (subqueryTree->limitOffset)
|
if (subqueryTree->limitOffset)
|
||||||
{
|
{
|
||||||
preconditionsSatisfied = false;
|
preconditionsSatisfied = false;
|
||||||
|
@ -871,6 +873,17 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We support window functions when the window function
|
||||||
|
* is partitioned on distribution column.
|
||||||
|
*/
|
||||||
|
if (subqueryTree->windowClause && !SafeToPushdownWindowFunction(subqueryTree,
|
||||||
|
&errorInfo))
|
||||||
|
{
|
||||||
|
errorDetail = (char *) errorInfo->data;
|
||||||
|
preconditionsSatisfied = false;
|
||||||
|
}
|
||||||
|
|
||||||
/* we don't support aggregates without group by */
|
/* we don't support aggregates without group by */
|
||||||
if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL))
|
if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL))
|
||||||
{
|
{
|
||||||
|
@ -1083,11 +1096,91 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SafeToPushdownWindowFunction checks if the query with window function is supported.
|
||||||
|
* It returns the result accordingly and modifies the error detail.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail)
|
||||||
|
{
|
||||||
|
ListCell *windowClauseCell = NULL;
|
||||||
|
List *windowClauseList = query->windowClause;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to check each window clause separately if there is a partition by clause
|
||||||
|
* and if it is partitioned on the distribution column.
|
||||||
|
*/
|
||||||
|
foreach(windowClauseCell, windowClauseList)
|
||||||
|
{
|
||||||
|
WindowClause *windowClause = lfirst(windowClauseCell);
|
||||||
|
|
||||||
|
if (!windowClause->partitionClause)
|
||||||
|
{
|
||||||
|
*errorDetail = makeStringInfo();
|
||||||
|
appendStringInfoString(*errorDetail,
|
||||||
|
"Window functions without PARTITION BY on distribution "
|
||||||
|
"column is currently unsupported");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!WindowPartitionOnDistributionColumn(query))
|
||||||
|
{
|
||||||
|
*errorDetail = makeStringInfo();
|
||||||
|
appendStringInfoString(*errorDetail,
|
||||||
|
"Window functions with PARTITION BY list missing distribution "
|
||||||
|
"column is currently unsupported");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* WindowPartitionOnDistributionColumn checks if the given subquery has one
|
||||||
|
* or more window functions and at least one of them is not partitioned by
|
||||||
|
* distribution column. The function returns false if your window function does not
|
||||||
|
* have a partition by clause or it does not include the distribution column.
|
||||||
|
*
|
||||||
|
* Please note that if the query does not have a window function, the function
|
||||||
|
* returns true.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
WindowPartitionOnDistributionColumn(Query *query)
|
||||||
|
{
|
||||||
|
List *windowClauseList = query->windowClause;
|
||||||
|
ListCell *windowClauseCell = NULL;
|
||||||
|
|
||||||
|
foreach(windowClauseCell, windowClauseList)
|
||||||
|
{
|
||||||
|
WindowClause *windowClause = lfirst(windowClauseCell);
|
||||||
|
List *groupTargetEntryList = NIL;
|
||||||
|
bool partitionOnDistributionColumn = false;
|
||||||
|
List *partitionClauseList = windowClause->partitionClause;
|
||||||
|
List *targetEntryList = query->targetList;
|
||||||
|
|
||||||
|
groupTargetEntryList =
|
||||||
|
GroupTargetEntryList(partitionClauseList, targetEntryList);
|
||||||
|
|
||||||
|
partitionOnDistributionColumn =
|
||||||
|
TargetListOnPartitionColumn(query, groupTargetEntryList);
|
||||||
|
|
||||||
|
if (!partitionOnDistributionColumn)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TargetListOnPartitionColumn checks if at least one target list entry is on
|
* TargetListOnPartitionColumn checks if at least one target list entry is on
|
||||||
* partition column.
|
* partition column.
|
||||||
*/
|
*/
|
||||||
static bool
|
bool
|
||||||
TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||||
{
|
{
|
||||||
bool targetListOnPartitionColumn = false;
|
bool targetListOnPartitionColumn = false;
|
||||||
|
|
|
@ -185,6 +185,8 @@ extern MultiTreeRoot * MultiLogicalPlanCreate(Query *originalQuery, Query *query
|
||||||
PlannerRestrictionContext *
|
PlannerRestrictionContext *
|
||||||
plannerRestrictionContext,
|
plannerRestrictionContext,
|
||||||
ParamListInfo boundParams);
|
ParamListInfo boundParams);
|
||||||
|
extern bool SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail);
|
||||||
|
extern bool TargetListOnPartitionColumn(Query *query, List *targetEntryList);
|
||||||
extern bool NeedsDistributedPlanning(Query *queryTree);
|
extern bool NeedsDistributedPlanning(Query *queryTree);
|
||||||
extern MultiNode * ParentNode(MultiNode *multiNode);
|
extern MultiNode * ParentNode(MultiNode *multiNode);
|
||||||
extern MultiNode * ChildNode(MultiUnaryNode *multiNode);
|
extern MultiNode * ChildNode(MultiUnaryNode *multiNode);
|
||||||
|
|
|
@ -1015,8 +1015,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
raw_events_second
|
raw_events_second
|
||||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
GROUP BY raw_events_second.value_3) AS foo;
|
GROUP BY raw_events_second.value_3) AS foo;
|
||||||
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
|
||||||
DETAIL: The data type of the target table's partition column should exactly match the data type of the corresponding simple column reference in the subquery.
|
|
||||||
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||||
ERROR: cannot push down this subquery
|
ERROR: cannot push down this subquery
|
||||||
DETAIL: Group by list without partition column is currently unsupported
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
|
@ -1133,9 +1132,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
GROUP BY raw_events_second.value_1
|
GROUP BY raw_events_second.value_1
|
||||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
ON (f.id = f2.id);
|
ON (f.id = f2.id);
|
||||||
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
|
||||||
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
|
|
||||||
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
||||||
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
|
@ -1164,8 +1161,10 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
GROUP BY raw_events_second.value_1
|
GROUP BY raw_events_second.value_1
|
||||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
ON (f.id = f2.id);
|
ON (f.id = f2.id);
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||||
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
-- cannot pushdown the query since the JOIN is not equi JOIN
|
-- cannot pushdown the query since the JOIN is not equi JOIN
|
||||||
INSERT INTO agg_events
|
INSERT INTO agg_events
|
||||||
(user_id, value_4_agg)
|
(user_id, value_4_agg)
|
||||||
|
|
|
@ -0,0 +1,855 @@
|
||||||
|
-- ===================================================================
|
||||||
|
-- test insert select functionality for window functions
|
||||||
|
-- ===================================================================
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
10001 | 101 | 49.5810418958104190
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- the same test with different syntax
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY time DESC) as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
10001 | 101 | 49.5810418958104190
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- similar test with lag
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg, value_3_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, lag_event_type, row_no
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type, row_number() OVER my_win as row_no
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
10001 | 101 | 49.5810418958104190
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- simple window function, partitioned and grouped by on the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, rnk, tme
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as rnk, avg(value_2) as tme
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
1188 | 101 | 49.7895622895622896
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- top level query has a group by on the result of the window function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
min(user_id), min(time), lag_event_type
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
lag_event_type;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+--------------------
|
||||||
|
1002 | 50 | 9.7844311377245509
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- window functions should work along with joins as well
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
195 | 91 | 51.0205128205128205
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- two window functions in a single subquery should work fine as well
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
202 | 91 | 50.2970297029702970
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- window functions should be fine within subquery joins
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg, value_3_agg)
|
||||||
|
SELECT sub_1.user_id, max(lag_1), max(rank_1), max(rank_2) FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_1, rank() OVER w2 as rank_1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as sub_1
|
||||||
|
JOIN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_2, rank() OVER w2 as rank_2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.value_2 ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 50) ORDER BY events_table.time)
|
||||||
|
) as sub_2
|
||||||
|
ON(sub_1.user_id = sub_2.user_id)
|
||||||
|
GROUP BY
|
||||||
|
sub_1.user_id;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
91 | 91 | 50.2637362637362637
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- GROUP BYs and PARTITION BYs should work fine together
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 5
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
7 | 6 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- aggregates in the PARTITION BY is also allows
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, avg(event_type%10)::int ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 0
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
8 | 7 | 48.8750000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- GROUP BY should not necessarly be inclusive of partitioning
|
||||||
|
-- but this query doesn't make much sense
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
1 | 1 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Group by has more columns than partition by which uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
10 | 10 | 49.1000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, max(sum) FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, SUM(value_2) OVER (PARTITION BY user_id, value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
GROUP BY user_id;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
101 | 101 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Subquery in where with window function
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 > 545 AND
|
||||||
|
value_2 < ALL (
|
||||||
|
SELECT
|
||||||
|
avg(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
4 | 4 | 35.2500000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Partition by with aggregate functions. This query does not make much sense since the
|
||||||
|
-- result of aggregate function will be the same for every row in a partition and it is
|
||||||
|
-- not going to affect the group that the count function will work on.
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY user_id, MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
1
|
||||||
|
) a;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
101 | 101 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Some more nested queries
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg, value_3_agg, value_4_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, rank, SUM(ABS(value_2 - value_3)) AS difference, COUNT(*) AS distinct_users
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY value_2 DESC)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, value_2, sum(value_3) OVER (PARTITION BY user_id, value_2) as value_3
|
||||||
|
FROM users_table
|
||||||
|
) AS A
|
||||||
|
) AS A
|
||||||
|
GROUP BY
|
||||||
|
user_id, rank;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
9501 | 101 | 49.8461214608988528
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
f3.user_id, ABS(f2.sum - f3.sum)
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_3
|
||||||
|
) f3,
|
||||||
|
(
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_2
|
||||||
|
) f2
|
||||||
|
WHERE
|
||||||
|
f3.user_id=f2.user_id
|
||||||
|
) a;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
101 | 101 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- test with reference table partitioned on columns from both
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, count(id) OVER (PARTITION BY user_id, id)
|
||||||
|
FROM
|
||||||
|
users_table, users_ref_test_table
|
||||||
|
) a;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
101 | 101 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Window functions with HAVING clause
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1 HAVING count(*) > 1
|
||||||
|
) a;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
437 | 100 | 49.9496567505720824
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Window functions with HAVING clause which uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1 HAVING count(*) > 1
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
10 | 5 | 32.4000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Window function in View works
|
||||||
|
CREATE VIEW view_with_window_func AS
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1
|
||||||
|
HAVING count(*) > 1;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
view_with_window_func;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
437 | 100 | 49.9496567505720824
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Window function in View works and the query uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
view_with_window_func
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot run avg(user_id)
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
10
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
LIMIT
|
||||||
|
5;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot test avg or distinct count
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
94 | 94 | 50.4787234042553191
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop
|
||||||
|
LIMIT
|
||||||
|
5;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot test avg or distinct count
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
101 | 101 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- lets have some queries that Citus shouldn't push down
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY event_type ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS ()
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
|
||||||
|
-- w2 should not be pushed down
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id+1, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY date_trunc('day', time) ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
-- w2 should not be allowed
|
||||||
|
INSERT INTO agg_results (user_id, value_2_agg, value_3_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
|
||||||
|
-- unsupported window function with an override
|
||||||
|
INSERT INTO agg_results(user_id, agg_time, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, sum(rank) OVER w2
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id as user_id, time, rank() over w1
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WINDOW
|
||||||
|
w AS (PARTITION BY time), w1 AS (w ORDER BY value_2, value_3)
|
||||||
|
) fab
|
||||||
|
WINDOW
|
||||||
|
w2 as (PARTITION BY user_id, time)
|
||||||
|
) a;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
-- Subquery in where with unsupported window function
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 > 545 AND
|
||||||
|
value_2 < ALL (
|
||||||
|
SELECT
|
||||||
|
avg(value_3) OVER ()
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
|
||||||
|
-- Aggregate function on distribution column should error out
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY sum(user_id), MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
) a;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
-- UNION with only one subquery which has a partition on non-distribution column should
|
||||||
|
-- error out
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by event_type) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
DROP VIEW view_with_window_func;
|
|
@ -2381,19 +2381,6 @@ ORDER BY
|
||||||
types;
|
types;
|
||||||
ERROR: cannot push down this subquery
|
ERROR: cannot push down this subquery
|
||||||
DETAIL: Offset clause is currently unsupported
|
DETAIL: Offset clause is currently unsupported
|
||||||
-- not supported due to window functions
|
|
||||||
SELECT user_id,
|
|
||||||
some_vals
|
|
||||||
FROM (
|
|
||||||
SELECT * ,
|
|
||||||
Row_number() over (PARTITION BY "user_id" ORDER BY "user_id") AS "some_vals",
|
|
||||||
Random()
|
|
||||||
FROM users_table
|
|
||||||
) user_id
|
|
||||||
ORDER BY 1,
|
|
||||||
2 limit 10;
|
|
||||||
ERROR: cannot perform distributed planning on this query
|
|
||||||
DETAIL: Subqueries without group by clause are not supported yet
|
|
||||||
-- not supported due to non relation rte
|
-- not supported due to non relation rte
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||||
FROM
|
FROM
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -2,10 +2,8 @@
|
||||||
-- multi behavioral analytics
|
-- multi behavioral analytics
|
||||||
-- this file is intended to create the table requires for the tests
|
-- this file is intended to create the table requires for the tests
|
||||||
--
|
--
|
||||||
|
|
||||||
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
||||||
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
||||||
|
|
||||||
SET citus.shard_replication_factor = 1;
|
SET citus.shard_replication_factor = 1;
|
||||||
SET citus.shard_count = 4;
|
SET citus.shard_count = 4;
|
||||||
|
|
||||||
|
@ -30,10 +28,19 @@ SELECT create_distributed_table('agg_results_third', 'user_id');
|
||||||
CREATE TABLE agg_results_fourth (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
CREATE TABLE agg_results_fourth (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||||
SELECT create_distributed_table('agg_results_fourth', 'user_id');
|
SELECT create_distributed_table('agg_results_fourth', 'user_id');
|
||||||
|
|
||||||
|
CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int);
|
||||||
|
SELECT create_reference_table('users_ref_test_table');
|
||||||
|
INSERT INTO users_ref_test_table VALUES(1,'User_1',45);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(2,'User_2',46);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(3,'User_3',47);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(4,'User_4',48);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(5,'User_5',49);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(6,'User_6',50);
|
||||||
|
|
||||||
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
||||||
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
||||||
|
|
||||||
-- create indexes for
|
-- create indexes for
|
||||||
CREATE INDEX is_index1 ON users_table(user_id);
|
CREATE INDEX is_index1 ON users_table(user_id);
|
||||||
CREATE INDEX is_index2 ON events_table(user_id);
|
CREATE INDEX is_index2 ON events_table(user_id);
|
||||||
CREATE INDEX is_index3 ON users_table(value_1);
|
CREATE INDEX is_index3 ON users_table(value_1);
|
||||||
|
@ -130,10 +137,10 @@ SELECT run_command_on_master_and_workers($f$
|
||||||
LEFTARG = user_composite_type,
|
LEFTARG = user_composite_type,
|
||||||
RIGHTARG = user_composite_type,
|
RIGHTARG = user_composite_type,
|
||||||
PROCEDURE = equal_user_composite_type_function,
|
PROCEDURE = equal_user_composite_type_function,
|
||||||
commutator = =,
|
commutator = =,
|
||||||
RESTRICT = eqsel,
|
RESTRICT = eqsel,
|
||||||
JOIN = eqjoinsel,
|
JOIN = eqjoinsel,
|
||||||
merges,
|
merges,
|
||||||
hashes
|
hashes
|
||||||
);
|
);
|
||||||
$f$);
|
$f$);
|
||||||
|
@ -187,12 +194,12 @@ SELECT run_command_on_master_and_workers($f$
|
||||||
OPERATOR 3 = (user_composite_type, user_composite_type),
|
OPERATOR 3 = (user_composite_type, user_composite_type),
|
||||||
OPERATOR 4 >= (user_composite_type, user_composite_type),
|
OPERATOR 4 >= (user_composite_type, user_composite_type),
|
||||||
OPERATOR 5 > (user_composite_type, user_composite_type),
|
OPERATOR 5 > (user_composite_type, user_composite_type),
|
||||||
|
|
||||||
FUNCTION 1 cmp_user_composite_type_function(user_composite_type, user_composite_type);
|
FUNCTION 1 cmp_user_composite_type_function(user_composite_type, user_composite_type);
|
||||||
$f$);
|
$f$);
|
||||||
|
|
||||||
SELECT run_command_on_master_and_workers($f$
|
SELECT run_command_on_master_and_workers($f$
|
||||||
|
|
||||||
CREATE OPERATOR CLASS cats_2_op_fam_class
|
CREATE OPERATOR CLASS cats_2_op_fam_class
|
||||||
DEFAULT FOR TYPE user_composite_type USING HASH AS
|
DEFAULT FOR TYPE user_composite_type USING HASH AS
|
||||||
OPERATOR 1 = (user_composite_type, user_composite_type),
|
OPERATOR 1 = (user_composite_type, user_composite_type),
|
||||||
|
@ -356,7 +363,7 @@ SET citus.shard_max_size TO "1MB";
|
||||||
|
|
||||||
CREATE TABLE events_reference_table (like events_table including all);
|
CREATE TABLE events_reference_table (like events_table including all);
|
||||||
SELECT create_reference_table('events_reference_table');
|
SELECT create_reference_table('events_reference_table');
|
||||||
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
||||||
INSERT INTO events_reference_table SELECT * FROM events_table;
|
INSERT INTO events_reference_table SELECT * FROM events_table;
|
||||||
|
|
||||||
CREATE TABLE users_reference_table (like users_table including all);
|
CREATE TABLE users_reference_table (like users_table including all);
|
||||||
|
|
|
@ -36,7 +36,7 @@ test: multi_load_data
|
||||||
|
|
||||||
test: multi_behavioral_analytics_create_table
|
test: multi_behavioral_analytics_create_table
|
||||||
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries
|
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries
|
||||||
test: multi_insert_select
|
test: multi_insert_select multi_insert_select_window
|
||||||
|
|
||||||
# ---
|
# ---
|
||||||
# Tests for partitioning support
|
# Tests for partitioning support
|
||||||
|
@ -51,7 +51,7 @@ test: multi_deparse_shard_query multi_distributed_transaction_id
|
||||||
test: multi_basic_queries multi_complex_expressions
|
test: multi_basic_queries multi_complex_expressions
|
||||||
test: multi_explain
|
test: multi_explain
|
||||||
test: multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
|
test: multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
|
||||||
test: multi_subquery_complex_reference_clause
|
test: multi_subquery_complex_reference_clause multi_subquery_window_functions
|
||||||
test: multi_subquery_in_where_reference_clause
|
test: multi_subquery_in_where_reference_clause
|
||||||
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
|
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
|
||||||
test: multi_reference_table
|
test: multi_reference_table
|
||||||
|
|
|
@ -51,9 +51,22 @@ SELECT create_distributed_table('agg_results_fourth', 'user_id');
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int);
|
||||||
|
SELECT create_reference_table('users_ref_test_table');
|
||||||
|
create_reference_table
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
INSERT INTO users_ref_test_table VALUES(1,'User_1',45);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(2,'User_2',46);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(3,'User_3',47);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(4,'User_4',48);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(5,'User_5',49);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(6,'User_6',50);
|
||||||
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
||||||
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
||||||
-- create indexes for
|
-- create indexes for
|
||||||
CREATE INDEX is_index1 ON users_table(user_id);
|
CREATE INDEX is_index1 ON users_table(user_id);
|
||||||
NOTICE: using one-phase commit for distributed DDL commands
|
NOTICE: using one-phase commit for distributed DDL commands
|
||||||
HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc'
|
HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc'
|
||||||
|
@ -187,10 +200,10 @@ SELECT run_command_on_master_and_workers($f$
|
||||||
LEFTARG = user_composite_type,
|
LEFTARG = user_composite_type,
|
||||||
RIGHTARG = user_composite_type,
|
RIGHTARG = user_composite_type,
|
||||||
PROCEDURE = equal_user_composite_type_function,
|
PROCEDURE = equal_user_composite_type_function,
|
||||||
commutator = =,
|
commutator = =,
|
||||||
RESTRICT = eqsel,
|
RESTRICT = eqsel,
|
||||||
JOIN = eqjoinsel,
|
JOIN = eqjoinsel,
|
||||||
merges,
|
merges,
|
||||||
hashes
|
hashes
|
||||||
);
|
);
|
||||||
$f$);
|
$f$);
|
||||||
|
@ -261,7 +274,7 @@ SELECT run_command_on_master_and_workers($f$
|
||||||
OPERATOR 3 = (user_composite_type, user_composite_type),
|
OPERATOR 3 = (user_composite_type, user_composite_type),
|
||||||
OPERATOR 4 >= (user_composite_type, user_composite_type),
|
OPERATOR 4 >= (user_composite_type, user_composite_type),
|
||||||
OPERATOR 5 > (user_composite_type, user_composite_type),
|
OPERATOR 5 > (user_composite_type, user_composite_type),
|
||||||
|
|
||||||
FUNCTION 1 cmp_user_composite_type_function(user_composite_type, user_composite_type);
|
FUNCTION 1 cmp_user_composite_type_function(user_composite_type, user_composite_type);
|
||||||
$f$);
|
$f$);
|
||||||
run_command_on_master_and_workers
|
run_command_on_master_and_workers
|
||||||
|
@ -270,7 +283,7 @@ $f$);
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT run_command_on_master_and_workers($f$
|
SELECT run_command_on_master_and_workers($f$
|
||||||
|
|
||||||
CREATE OPERATOR CLASS cats_2_op_fam_class
|
CREATE OPERATOR CLASS cats_2_op_fam_class
|
||||||
DEFAULT FOR TYPE user_composite_type USING HASH AS
|
DEFAULT FOR TYPE user_composite_type USING HASH AS
|
||||||
OPERATOR 1 = (user_composite_type, user_composite_type),
|
OPERATOR 1 = (user_composite_type, user_composite_type),
|
||||||
|
@ -428,7 +441,7 @@ SELECT create_reference_table('events_reference_table');
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
||||||
INSERT INTO events_reference_table SELECT * FROM events_table;
|
INSERT INTO events_reference_table SELECT * FROM events_table;
|
||||||
CREATE TABLE users_reference_table (like users_table including all);
|
CREATE TABLE users_reference_table (like users_table including all);
|
||||||
SELECT create_reference_table('users_reference_table');
|
SELECT create_reference_table('users_reference_table');
|
||||||
|
|
|
@ -0,0 +1,769 @@
|
||||||
|
-- ===================================================================
|
||||||
|
-- test insert select functionality for window functions
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- the same test with different syntax
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY time DESC) as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- similar test with lag
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg, value_3_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, lag_event_type, row_no
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type, row_number() OVER my_win as row_no
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- simple window function, partitioned and grouped by on the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, rnk, tme
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as rnk, avg(value_2) as tme
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- top level query has a group by on the result of the window function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
min(user_id), min(time), lag_event_type
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
lag_event_type;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- window functions should work along with joins as well
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- two window functions in a single subquery should work fine as well
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- window functions should be fine within subquery joins
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg, value_3_agg)
|
||||||
|
SELECT sub_1.user_id, max(lag_1), max(rank_1), max(rank_2) FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_1, rank() OVER w2 as rank_1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as sub_1
|
||||||
|
JOIN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_2, rank() OVER w2 as rank_2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.value_2 ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 50) ORDER BY events_table.time)
|
||||||
|
) as sub_2
|
||||||
|
ON(sub_1.user_id = sub_2.user_id)
|
||||||
|
GROUP BY
|
||||||
|
sub_1.user_id;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- GROUP BYs and PARTITION BYs should work fine together
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 5
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- aggregates in the PARTITION BY is also allows
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, avg(event_type%10)::int ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 0
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- GROUP BY should not necessarly be inclusive of partitioning
|
||||||
|
-- but this query doesn't make much sense
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Group by has more columns than partition by which uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, max(sum) FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, SUM(value_2) OVER (PARTITION BY user_id, value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
GROUP BY user_id;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Subquery in where with window function
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 > 545 AND
|
||||||
|
value_2 < ALL (
|
||||||
|
SELECT
|
||||||
|
avg(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Partition by with aggregate functions. This query does not make much sense since the
|
||||||
|
-- result of aggregate function will be the same for every row in a partition and it is
|
||||||
|
-- not going to affect the group that the count function will work on.
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY user_id, MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
1
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Some more nested queries
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg, value_3_agg, value_4_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, rank, SUM(ABS(value_2 - value_3)) AS difference, COUNT(*) AS distinct_users
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY value_2 DESC)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, value_2, sum(value_3) OVER (PARTITION BY user_id, value_2) as value_3
|
||||||
|
FROM users_table
|
||||||
|
) AS A
|
||||||
|
) AS A
|
||||||
|
GROUP BY
|
||||||
|
user_id, rank;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
f3.user_id, ABS(f2.sum - f3.sum)
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_3
|
||||||
|
) f3,
|
||||||
|
(
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_2
|
||||||
|
) f2
|
||||||
|
WHERE
|
||||||
|
f3.user_id=f2.user_id
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- test with reference table partitioned on columns from both
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, count(id) OVER (PARTITION BY user_id, id)
|
||||||
|
FROM
|
||||||
|
users_table, users_ref_test_table
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Window functions with HAVING clause
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1 HAVING count(*) > 1
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Window functions with HAVING clause which uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1 HAVING count(*) > 1
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Window function in View works
|
||||||
|
CREATE VIEW view_with_window_func AS
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1
|
||||||
|
HAVING count(*) > 1;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
view_with_window_func;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Window function in View works and the query uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
view_with_window_func
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot run avg(user_id)
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
LIMIT
|
||||||
|
5;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot test avg or distinct count
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop
|
||||||
|
LIMIT
|
||||||
|
5;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot test avg or distinct count
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- lets have some queries that Citus shouldn't push down
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY event_type ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS ()
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- w2 should not be pushed down
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id+1, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125;
|
||||||
|
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY date_trunc('day', time) ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125;
|
||||||
|
|
||||||
|
-- w2 should not be allowed
|
||||||
|
INSERT INTO agg_results (user_id, value_2_agg, value_3_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- unsupported window function with an override
|
||||||
|
INSERT INTO agg_results(user_id, agg_time, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, sum(rank) OVER w2
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id as user_id, time, rank() over w1
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WINDOW
|
||||||
|
w AS (PARTITION BY time), w1 AS (w ORDER BY value_2, value_3)
|
||||||
|
) fab
|
||||||
|
WINDOW
|
||||||
|
w2 as (PARTITION BY user_id, time)
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- Subquery in where with unsupported window function
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 > 545 AND
|
||||||
|
value_2 < ALL (
|
||||||
|
SELECT
|
||||||
|
avg(value_3) OVER ()
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
|
||||||
|
-- Aggregate function on distribution column should error out
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY sum(user_id), MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- UNION with only one subquery which has a partition on non-distribution column should
|
||||||
|
-- error out
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by event_type) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop;
|
||||||
|
|
||||||
|
DROP VIEW view_with_window_func;
|
|
@ -2160,18 +2160,6 @@ GROUP BY
|
||||||
ORDER BY
|
ORDER BY
|
||||||
types;
|
types;
|
||||||
|
|
||||||
-- not supported due to window functions
|
|
||||||
SELECT user_id,
|
|
||||||
some_vals
|
|
||||||
FROM (
|
|
||||||
SELECT * ,
|
|
||||||
Row_number() over (PARTITION BY "user_id" ORDER BY "user_id") AS "some_vals",
|
|
||||||
Random()
|
|
||||||
FROM users_table
|
|
||||||
) user_id
|
|
||||||
ORDER BY 1,
|
|
||||||
2 limit 10;
|
|
||||||
|
|
||||||
-- not supported due to non relation rte
|
-- not supported due to non relation rte
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||||
FROM
|
FROM
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
||||||
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
||||||
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
||||||
|
|
||||||
SET citus.enable_router_execution TO FALSE;
|
SET citus.enable_router_execution TO FALSE;
|
||||||
|
|
||||||
CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int);
|
CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int);
|
||||||
|
@ -23,15 +23,6 @@ INSERT INTO users_return_test_table VALUES(4,1,1);
|
||||||
INSERT INTO users_return_test_table VALUES(1,3,1);
|
INSERT INTO users_return_test_table VALUES(1,3,1);
|
||||||
INSERT INTO users_return_test_table VALUES(3,2,2);
|
INSERT INTO users_return_test_table VALUES(3,2,2);
|
||||||
|
|
||||||
CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int);
|
|
||||||
SELECT create_reference_table('users_ref_test_table');
|
|
||||||
INSERT INTO users_ref_test_table VALUES(1,'User_1',45);
|
|
||||||
INSERT INTO users_ref_test_table VALUES(2,'User_2',46);
|
|
||||||
INSERT INTO users_ref_test_table VALUES(3,'User_3',47);
|
|
||||||
INSERT INTO users_ref_test_table VALUES(4,'User_4',48);
|
|
||||||
INSERT INTO users_ref_test_table VALUES(5,'User_5',49);
|
|
||||||
INSERT INTO users_ref_test_table VALUES(6,'User_6',50);
|
|
||||||
|
|
||||||
-- Simple Join test with reference table
|
-- Simple Join test with reference table
|
||||||
SELECT count(*) FROM
|
SELECT count(*) FROM
|
||||||
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||||
|
@ -101,7 +92,7 @@ SELECT count(*) FROM
|
||||||
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
|
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
|
||||||
WHERE subquery_1.user_id = subquery_2.user_id ;
|
WHERE subquery_1.user_id = subquery_2.user_id ;
|
||||||
|
|
||||||
-- Should be able to push down since reference tables are inner joined
|
-- Should be able to push down since reference tables are inner joined
|
||||||
-- with hash distributed tables, the results of those joins are the parts of
|
-- with hash distributed tables, the results of those joins are the parts of
|
||||||
-- an outer join
|
-- an outer join
|
||||||
SELECT subquery_2.id FROM
|
SELECT subquery_2.id FROM
|
||||||
|
@ -122,75 +113,75 @@ SELECT * FROM
|
||||||
|
|
||||||
-- should be able to pushdown since reference table is in the
|
-- should be able to pushdown since reference table is in the
|
||||||
-- inner part of the left join
|
-- inner part of the left join
|
||||||
SELECT
|
SELECT
|
||||||
user_id, sum(value_1)
|
user_id, sum(value_1)
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_table.user_id, users_table.value_1, random()
|
users_table.user_id, users_table.value_1, random()
|
||||||
FROM
|
FROM
|
||||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
) as foo
|
) as foo
|
||||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
-- same query as above, reference table is wrapped into a subquery
|
-- same query as above, reference table is wrapped into a subquery
|
||||||
SELECT
|
SELECT
|
||||||
user_id, sum(value_1)
|
user_id, sum(value_1)
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_table.user_id, users_table.value_1, random()
|
users_table.user_id, users_table.value_1, random()
|
||||||
FROM
|
FROM
|
||||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
|
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
|
||||||
) as foo
|
) as foo
|
||||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
-- should be able to pushdown since reference table is in the
|
-- should be able to pushdown since reference table is in the
|
||||||
-- inner part of the left join
|
-- inner part of the left join
|
||||||
SELECT
|
SELECT
|
||||||
user_id, sum(value_1)
|
user_id, sum(value_1)
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_table.user_id, users_table.value_1, random()
|
users_table.user_id, users_table.value_1, random()
|
||||||
FROM
|
FROM
|
||||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
) as foo
|
) as foo
|
||||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
-- should not be able to pushdown since reference table is in the
|
-- should not be able to pushdown since reference table is in the
|
||||||
-- direct outer part of the left join
|
-- direct outer part of the left join
|
||||||
SELECT
|
SELECT
|
||||||
user_id, sum(value_1)
|
user_id, sum(value_1)
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_table.user_id, users_table.value_1, random()
|
users_table.user_id, users_table.value_1, random()
|
||||||
FROM
|
FROM
|
||||||
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
) as foo
|
) as foo
|
||||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
-- should not be able to pushdown since reference table is in the
|
-- should not be able to pushdown since reference table is in the
|
||||||
-- direct outer part of the left join wrapped into a subquery
|
-- direct outer part of the left join wrapped into a subquery
|
||||||
SELECT
|
SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
|
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
|
||||||
ON (users_table.user_id = ref_all.value_2);
|
ON (users_table.user_id = ref_all.value_2);
|
||||||
|
|
||||||
-- should not be able to pushdown since reference table is in the
|
-- should not be able to pushdown since reference table is in the
|
||||||
-- outer part of the left join
|
-- outer part of the left join
|
||||||
SELECT
|
SELECT
|
||||||
user_id, sum(value_1)
|
user_id, sum(value_1)
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_table.user_id, users_table.value_1, random()
|
users_table.user_id, users_table.value_1, random()
|
||||||
FROM
|
FROM
|
||||||
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
) as foo
|
) as foo
|
||||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
-- should be able to pushdown since reference table is in the
|
-- should be able to pushdown since reference table is in the
|
||||||
-- inner part of the left join
|
-- inner part of the left join
|
||||||
|
@ -198,18 +189,18 @@ SELECT * FROM
|
||||||
(
|
(
|
||||||
SELECT DISTINCT foo.user_id
|
SELECT DISTINCT foo.user_id
|
||||||
FROM
|
FROM
|
||||||
((SELECT
|
((SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type > 80) as "temp_data_queries"
|
event_type > 80) as "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
|
||||||
|
@ -218,23 +209,23 @@ SELECT * FROM
|
||||||
(
|
(
|
||||||
SELECT DISTINCT foo.user_id
|
SELECT DISTINCT foo.user_id
|
||||||
FROM
|
FROM
|
||||||
((SELECT
|
((SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type > 80) as "temp_data_queries"
|
event_type > 80) as "temp_data_queries"
|
||||||
LEFT JOIN
|
LEFT JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
|
||||||
-- we could even suuport the following where the subquery
|
-- we could even suuport the following where the subquery
|
||||||
-- on the outer part of the left join contains a reference table
|
-- on the outer part of the left join contains a reference table
|
||||||
SELECT max(events_all.cnt), events_all.usr_id
|
SELECT max(events_all.cnt), events_all.usr_id
|
||||||
FROM
|
FROM
|
||||||
(SELECT users_table.user_id as usr_id,
|
(SELECT users_table.user_id as usr_id,
|
||||||
|
@ -246,7 +237,7 @@ LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2
|
||||||
-- but, we fail to pushdown the following query where join that reference table appears
|
-- but, we fail to pushdown the following query where join that reference table appears
|
||||||
-- wrapped into a subquery
|
-- wrapped into a subquery
|
||||||
SELECT max(events_all.cnt),
|
SELECT max(events_all.cnt),
|
||||||
events_all.usr_id
|
events_all.usr_id
|
||||||
FROM(
|
FROM(
|
||||||
SELECT *, random() FROM
|
SELECT *, random() FROM
|
||||||
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||||
|
@ -263,141 +254,141 @@ LIMIT 5;
|
||||||
SET citus.subquery_pushdown to ON;
|
SET citus.subquery_pushdown to ON;
|
||||||
SELECT user_id, lastseen
|
SELECT user_id, lastseen
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"some_users_data".user_id, lastseen
|
"some_users_data".user_id, lastseen
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
filter_users_1.user_id, time AS lastseen
|
filter_users_1.user_id, time AS lastseen
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
user_where_1_1.user_id
|
user_where_1_1.user_id
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
|
user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
|
user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
|
||||||
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id))
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id))
|
||||||
filter_users_1
|
filter_users_1
|
||||||
JOIN LATERAL
|
JOIN LATERAL
|
||||||
(SELECT
|
(SELECT
|
||||||
user_id, time
|
user_id, time
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 12 and user_id < 16 AND
|
user_id > 12 and user_id < 16 AND
|
||||||
user_id = filter_users_1.user_id
|
user_id = filter_users_1.user_id
|
||||||
ORDER BY
|
ORDER BY
|
||||||
time DESC
|
time DESC
|
||||||
LIMIT 1) "last_events_1"
|
LIMIT 1) "last_events_1"
|
||||||
ON TRUE
|
ON TRUE
|
||||||
ORDER BY
|
ORDER BY
|
||||||
time DESC
|
time DESC
|
||||||
LIMIT 10) "some_recent_users"
|
LIMIT 10) "some_recent_users"
|
||||||
JOIN LATERAL
|
JOIN LATERAL
|
||||||
(SELECT
|
(SELECT
|
||||||
"users".user_id
|
"users".user_id
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||||
"users"."value_2" > 70
|
"users"."value_2" > 70
|
||||||
LIMIT 1) "some_users_data"
|
LIMIT 1) "some_users_data"
|
||||||
ON TRUE
|
ON TRUE
|
||||||
ORDER BY
|
ORDER BY
|
||||||
lastseen DESC
|
lastseen DESC
|
||||||
LIMIT 10) "some_users"
|
LIMIT 10) "some_users"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
user_id DESC
|
user_id DESC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
SET citus.subquery_pushdown to OFF;
|
SET citus.subquery_pushdown to OFF;
|
||||||
|
|
||||||
-- NESTED INNER JOINs with reference tables
|
-- NESTED INNER JOINs with reference tables
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS value, "generated_group_field"
|
count(*) AS value, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
|
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id", "events"."value_2"
|
"events"."time", "events"."user_id", "events"."value_2"
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
user_where_1_1.real_user_id
|
user_where_1_1.real_user_id
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id" as real_user_id
|
"users"."user_id" as real_user_id
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 and value_2 > 50 ) user_where_1_1
|
user_id > 10 and user_id < 40 and value_2 > 50 ) user_where_1_1
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 and value_3 > 50 ) user_where_1_join_1
|
user_id > 10 and user_id < 40 and value_3 > 50 ) user_where_1_join_1
|
||||||
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".user_id)) "user_filters_1"
|
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".user_id)) "user_filters_1"
|
||||||
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
||||||
GROUP BY
|
GROUP BY
|
||||||
"generated_group_field"
|
"generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
generated_group_field DESC, value DESC;
|
generated_group_field DESC, value DESC;
|
||||||
|
|
||||||
-- single level inner joins with reference tables
|
-- single level inner joins with reference tables
|
||||||
SELECT
|
SELECT
|
||||||
"value_3", count(*) AS cnt
|
"value_3", count(*) AS cnt
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"value_3", "user_id", random()
|
"value_3", "user_id", random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_in_segment_1.user_id, value_3
|
users_in_segment_1.user_id, value_3
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
user_id, value_3 * 2 as value_3
|
user_id, value_3 * 2 as value_3
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
user_id, value_3
|
user_id, value_3
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id", value_3
|
"users"."user_id", value_3
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 and value_2 > 30
|
user_id > 10 and user_id < 40 and value_2 > 30
|
||||||
) simple_user_where_1
|
) simple_user_where_1
|
||||||
) all_buckets_1
|
) all_buckets_1
|
||||||
) users_in_segment_1
|
) users_in_segment_1
|
||||||
JOIN
|
JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 and value_2 > 60
|
user_id > 10 and user_id < 40 and value_2 > 60
|
||||||
) some_users_data
|
) some_users_data
|
||||||
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
|
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
|
||||||
) segmentalias_1) "tempQuery"
|
) segmentalias_1) "tempQuery"
|
||||||
GROUP BY "value_3"
|
GROUP BY "value_3"
|
||||||
ORDER BY cnt, value_3 DESC LIMIT 10;
|
ORDER BY cnt, value_3 DESC LIMIT 10;
|
||||||
|
|
||||||
|
@ -407,42 +398,42 @@ SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT "some_users_data".user_id, "some_recent_users".value_3
|
(SELECT "some_users_data".user_id, "some_recent_users".value_3
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
filter_users_1.user_id, value_3
|
filter_users_1.user_id, value_3
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 20 and user_id < 70 and users.value_2 = 200) filter_users_1
|
user_id > 20 and user_id < 70 and users.value_2 = 200) filter_users_1
|
||||||
JOIN LATERAL
|
JOIN LATERAL
|
||||||
(SELECT
|
(SELECT
|
||||||
user_id, value_3
|
user_id, value_3
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 20 and user_id < 70 AND
|
user_id > 20 and user_id < 70 AND
|
||||||
("events".user_id = "filter_users_1".user_id)
|
("events".user_id = "filter_users_1".user_id)
|
||||||
ORDER BY
|
ORDER BY
|
||||||
value_3 DESC
|
value_3 DESC
|
||||||
LIMIT 1) "last_events_1" ON true
|
LIMIT 1) "last_events_1" ON true
|
||||||
ORDER BY value_3 DESC
|
ORDER BY value_3 DESC
|
||||||
LIMIT 10) "some_recent_users"
|
LIMIT 10) "some_recent_users"
|
||||||
JOIN LATERAL
|
JOIN LATERAL
|
||||||
(SELECT
|
(SELECT
|
||||||
"users".user_id
|
"users".user_id
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||||
users.value_2 > 200
|
users.value_2 > 200
|
||||||
LIMIT 1) "some_users_data" ON true
|
LIMIT 1) "some_users_data" ON true
|
||||||
ORDER BY
|
ORDER BY
|
||||||
value_3 DESC
|
value_3 DESC
|
||||||
LIMIT 10) "some_users"
|
LIMIT 10) "some_users"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
value_3 DESC
|
value_3 DESC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
SET citus.subquery_pushdown to OFF;
|
SET citus.subquery_pushdown to OFF;
|
||||||
|
|
||||||
|
@ -451,37 +442,37 @@ SET citus.subquery_pushdown to OFF;
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."user_id", random(), generated_group_field
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80) "temp_data_queries"
|
user_id > 80) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
LEFT JOIN
|
LEFT JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users") "left_group_by_1"
|
users_table as "users") "left_group_by_1"
|
||||||
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
group BY
|
group BY
|
||||||
"generated_group_field"
|
"generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
|
@ -490,42 +481,42 @@ count(*) AS cnt, "generated_group_field"
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."user_id", random(), generated_group_field
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80) "temp_data_queries"
|
user_id > 80) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
RIGHT JOIN
|
RIGHT JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users") "right_group_by_1"
|
users_reference_table as "users") "right_group_by_1"
|
||||||
ON ("right_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
ON ("right_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
group BY
|
group BY
|
||||||
"generated_group_field"
|
"generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
-- right join where the inner part of the join includes a reference table
|
-- right join where the inner part of the join includes a reference table
|
||||||
-- joined with hash partitioned table using non-equi join
|
-- joined with hash partitioned table using non-equi join
|
||||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||||
FROM (
|
FROM (
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -585,42 +576,42 @@ ORDER BY user_id;
|
||||||
|
|
||||||
|
|
||||||
-- LEFT JOINs used with INNER JOINs
|
-- LEFT JOINs used with INNER JOINs
|
||||||
-- events_table and users_reference_table joined
|
-- events_table and users_reference_table joined
|
||||||
-- with event_table.non_part_key < reference_table.any_key
|
-- with event_table.non_part_key < reference_table.any_key
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."user_id", random(), generated_group_field
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80) "temp_data_queries"
|
user_id > 80) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
RIGHT JOIN
|
RIGHT JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users") "left_group_by_1"
|
users_table as "users") "left_group_by_1"
|
||||||
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
group BY
|
group BY
|
||||||
"generated_group_field"
|
"generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
|
@ -631,31 +622,31 @@ FROM
|
||||||
FROM
|
FROM
|
||||||
(SELECT user_id, time
|
(SELECT user_id, time
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
user_id, time
|
user_id, time
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40) "events_1"
|
user_id > 10 and user_id < 40) "events_1"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
time DESC) "recent_events_1"
|
time DESC) "recent_events_1"
|
||||||
GROUP BY
|
GROUP BY
|
||||||
user_id
|
user_id
|
||||||
ORDER BY
|
ORDER BY
|
||||||
max(TIME) DESC) "some_recent_users"
|
max(TIME) DESC) "some_recent_users"
|
||||||
FULL JOIN
|
FULL JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users".user_id
|
"users".user_id
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
users.value_2 > 50 and users.value_2 < 55) "some_users_data"
|
users.value_2 > 50 and users.value_2 < 55) "some_users_data"
|
||||||
ON "some_users_data"."user_id" = "some_recent_users"."user_id"
|
ON "some_users_data"."user_id" = "some_recent_users"."user_id"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
user_id
|
user_id
|
||||||
limit 50;
|
limit 50;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- UNIONs and JOINs with reference tables, should error out
|
-- UNIONs and JOINs with reference tables, should error out
|
||||||
--
|
--
|
||||||
SELECT ("final_query"."event_types") as types
|
SELECT ("final_query"."event_types") as types
|
||||||
|
@ -666,130 +657,130 @@ FROM
|
||||||
FROM
|
FROM
|
||||||
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
||||||
FROM (
|
FROM (
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 0 AS event
|
"events"."user_id", "events"."time", 0 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||||
UNION
|
UNION
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 1 AS event
|
"events"."user_id", "events"."time", 1 AS event
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 2 AS event
|
"events"."user_id", "events"."time", 2 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
||||||
UNION
|
UNION
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 3 AS event
|
"events"."user_id", "events"."time", 3 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1
|
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1
|
||||||
GROUP BY "t1"."user_id") AS t) "q"
|
GROUP BY "t1"."user_id") AS t) "q"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
value_1 > 50 and value_1 < 70) AS t
|
value_1 > 50 and value_1 < 70) AS t
|
||||||
ON (t.user_id = q.user_id)) as final_query
|
ON (t.user_id = q.user_id)) as final_query
|
||||||
ORDER BY
|
ORDER BY
|
||||||
types;
|
types;
|
||||||
|
|
||||||
-- reference table exist in the subquery of union, should error out
|
-- reference table exist in the subquery of union, should error out
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||||
FROM
|
FROM
|
||||||
( SELECT
|
( SELECT
|
||||||
*, random()
|
*, random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
||||||
FROM
|
FROM
|
||||||
( SELECT
|
( SELECT
|
||||||
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
||||||
FROM (
|
FROM (
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", 0 AS event, "events"."user_id"
|
"events"."time", 0 AS event, "events"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||||
UNION
|
UNION
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
max("users"."time"),
|
max("users"."time"),
|
||||||
0 AS event,
|
0 AS event,
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events", users_table as "users"
|
events_reference_table as "events", users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
events.user_id = users.user_id AND
|
events.user_id = users.user_id AND
|
||||||
event_type IN (10, 11, 12, 13, 14, 15)
|
event_type IN (10, 11, 12, 13, 14, 15)
|
||||||
GROUP BY "users"."user_id"
|
GROUP BY "users"."user_id"
|
||||||
) as events_subquery_5
|
) as events_subquery_5
|
||||||
) events_subquery_2)
|
) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", 2 AS event, "events"."user_id"
|
"events"."time", 2 AS event, "events"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
||||||
UNION
|
UNION
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", 3 AS event, "events"."user_id"
|
"events"."time", 3 AS event, "events"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)
|
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)
|
||||||
) t1
|
) t1
|
||||||
GROUP BY "t1"."user_id") AS t) "q"
|
GROUP BY "t1"."user_id") AS t) "q"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
value_1 > 50 and value_1 < 70) AS t
|
value_1 > 50 and value_1 < 70) AS t
|
||||||
ON (t.user_id = q.user_id)) as final_query
|
ON (t.user_id = q.user_id)) as final_query
|
||||||
GROUP BY
|
GROUP BY
|
||||||
types
|
types
|
||||||
ORDER BY
|
ORDER BY
|
||||||
types;
|
types;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Should error out with UNION ALL Queries on reference tables
|
-- Should error out with UNION ALL Queries on reference tables
|
||||||
--
|
--
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||||
|
@ -802,40 +793,40 @@ FROM
|
||||||
FROM (
|
FROM (
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 0 AS event
|
"events"."user_id", "events"."time", 0 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||||
UNION ALL
|
UNION ALL
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 1 AS event
|
"events"."user_id", "events"."time", 1 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
||||||
UNION ALL
|
UNION ALL
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 2 AS event
|
"events"."user_id", "events"."time", 2 AS event
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
||||||
UNION ALL
|
UNION ALL
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 3 AS event
|
"events"."user_id", "events"."time", 3 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1
|
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1
|
||||||
GROUP BY "t1"."user_id") AS t) "q"
|
GROUP BY "t1"."user_id") AS t) "q"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT "users"."user_id"
|
(SELECT "users"."user_id"
|
||||||
FROM users_table as "users"
|
FROM users_table as "users"
|
||||||
|
@ -843,7 +834,7 @@ INNER JOIN
|
||||||
GROUP BY types
|
GROUP BY types
|
||||||
ORDER BY types;
|
ORDER BY types;
|
||||||
|
|
||||||
-- just a sanity check that we don't allow this if the reference table is on the
|
-- just a sanity check that we don't allow this if the reference table is on the
|
||||||
-- left part of the left join
|
-- left part of the left join
|
||||||
SELECT count(*) FROM
|
SELECT count(*) FROM
|
||||||
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||||
|
@ -862,58 +853,58 @@ WHERE subquery_1.user_id != subquery_2.user_id ;
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."user_id", random(), generated_group_field
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80) "temp_data_queries"
|
user_id > 80) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
RIGHT JOIN
|
RIGHT JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users") "left_group_by_1"
|
users_table as "users") "left_group_by_1"
|
||||||
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
group BY
|
group BY
|
||||||
"generated_group_field"
|
"generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
-- two hash partitioned relations are not joined
|
-- two hash partitioned relations are not joined
|
||||||
-- on partiton keys although reference table is fine
|
-- on partiton keys although reference table is fine
|
||||||
-- to push down
|
-- to push down
|
||||||
SELECT
|
SELECT
|
||||||
u1.user_id, count(*)
|
u1.user_id, count(*)
|
||||||
FROM
|
FROM
|
||||||
events_table as e1, users_table as u1
|
events_table as e1, users_table as u1
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN
|
event_type IN
|
||||||
(SELECT
|
(SELECT
|
||||||
event_type
|
event_type
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as e2
|
events_reference_table as e2
|
||||||
WHERE
|
WHERE
|
||||||
value_2 = 15 AND
|
value_2 = 15 AND
|
||||||
value_3 > 25 AND
|
value_3 > 25 AND
|
||||||
e1.value_2 > e2.value_2
|
e1.value_2 > e2.value_2
|
||||||
)
|
)
|
||||||
AND u1.user_id > e1.user_id
|
AND u1.user_id > e1.user_id
|
||||||
GROUP BY 1
|
GROUP BY 1
|
||||||
ORDER BY 2 DESC, 1 DESC
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
|
|
@ -0,0 +1,706 @@
|
||||||
|
-- ===================================================================
|
||||||
|
-- test multi subquery functionality for window functions
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE VIEW subq AS
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1
|
||||||
|
HAVING count(*) > 1;
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- the same test with different syntax
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY time DESC) as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- similar test with lag
|
||||||
|
SELECT
|
||||||
|
user_id, time, lag_event_type, row_no
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type, row_number() OVER my_win as row_no
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
4 DESC, 3 DESC NULLS LAST, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- simple window function, partitioned and grouped by on the distribution key
|
||||||
|
SELECT
|
||||||
|
user_id, rnk, avg_val_2
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as rnk, avg(value_2) as avg_val_2
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1 DESC, 3 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- top level query has a group by on the result of the window function
|
||||||
|
SELECT
|
||||||
|
min(user_id), min(time), lag_event_type, count(*)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
lag_event_type
|
||||||
|
ORDER BY
|
||||||
|
3 DESC NULLS LAST, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- window functions should work along with joins as well
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
ORDER BY 3 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- two window functions in a single subquery should work fine as well
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
ORDER BY 3 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- window functions should be fine within subquery joins
|
||||||
|
SELECT sub_1.user_id, max(lag_1), max(rank_1), max(rank_2) FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_1, rank() OVER w2 as rank_1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as sub_1
|
||||||
|
JOIN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_2, rank() OVER w2 as rank_2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.value_2 ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 50) ORDER BY events_table.time)
|
||||||
|
) as sub_2
|
||||||
|
ON(sub_1.user_id = sub_2.user_id)
|
||||||
|
GROUP BY
|
||||||
|
sub_1.user_id
|
||||||
|
ORDER BY 3 DESC, 4 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- GROUP BYs and PARTITION BYs should work fine together
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 5
|
||||||
|
GROUP BY
|
||||||
|
my_rank
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC,2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- aggregates in the PARTITION BY is also allows
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, avg(event_type%10)::int ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 0
|
||||||
|
GROUP BY
|
||||||
|
my_rank
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC,2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- GROUP BY should not necessarly be inclusive of partitioning
|
||||||
|
-- but this query doesn't make much sense
|
||||||
|
SELECT
|
||||||
|
avg(user_id), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
my_rank
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- Using previously defined supported window function on distribution key
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, sum(rank) OVER w2
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id as user_id, time, rank() over w1
|
||||||
|
FROM users_table
|
||||||
|
WINDOW
|
||||||
|
w AS (PARTITION BY user_id),
|
||||||
|
w1 AS (w ORDER BY value_2, value_3)
|
||||||
|
) fab
|
||||||
|
WINDOW
|
||||||
|
w2 as (PARTITION BY user_id, time)
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1, 2, 3 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- test with reference table partitioned on columns from both
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, it_name, count(id) OVER (PARTITION BY user_id, id)
|
||||||
|
FROM
|
||||||
|
users_table, users_ref_test_table
|
||||||
|
WHERE users_table.value_2=users_ref_test_table.k_no
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1, 2, 3
|
||||||
|
LIMIT
|
||||||
|
20;
|
||||||
|
|
||||||
|
-- Group by has more columns than partition by
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
SELECT user_id, max(sum) FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, SUM(value_2) OVER (PARTITION BY user_id, value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
GROUP BY user_id ORDER BY
|
||||||
|
2 DESC,1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- Window functions with HAVING clause
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1 HAVING count(*) > 1
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- Window function in View works
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
subq
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- Window functions with UNION/UNION ALL works
|
||||||
|
SELECT
|
||||||
|
max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY user_id
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id)) AS ftop
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- Subquery in where with window function
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 > 545 AND
|
||||||
|
value_2 < ALL (
|
||||||
|
SELECT
|
||||||
|
avg(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
ORDER BY
|
||||||
|
user_id DESC
|
||||||
|
LIMIT
|
||||||
|
3;
|
||||||
|
|
||||||
|
-- Some more nested queries
|
||||||
|
SELECT
|
||||||
|
user_id, rank, SUM(ABS(value_2 - value_3)) AS difference, COUNT(*) AS distinct_users
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY value_2 DESC)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, value_2, sum(value_3) OVER (PARTITION BY user_id, value_2) as value_3
|
||||||
|
FROM users_table
|
||||||
|
) AS A
|
||||||
|
) AS A
|
||||||
|
GROUP BY
|
||||||
|
user_id, rank
|
||||||
|
ORDER BY
|
||||||
|
difference DESC, rank DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
f3.user_id, ABS(f2.sum - f3.sum)
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_3
|
||||||
|
) f3,
|
||||||
|
(
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_2
|
||||||
|
) f2
|
||||||
|
WHERE
|
||||||
|
f3.user_id=f2.user_id
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
abs DESC
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
|
||||||
|
-- Partition by with aggregate functions. This query does not make much sense since the
|
||||||
|
-- result of aggregate function will be the same for every row in a partition and it is
|
||||||
|
-- not going to affect the group that the count function will work on.
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY user_id, MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
1
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1 DESC
|
||||||
|
LIMIT
|
||||||
|
5;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS FALSE, VERBOSE TRUE)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id)) AS ftop
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- lets have some queries that Citus shouldn't push down
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY event_type ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS ()
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- w2 should not be pushed down
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id+1, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
ORDER BY 3 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- w2 should not be pushed down
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC,2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY date_trunc('day', time) ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC,2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- Overriding window function but not supported
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, sum(rank) OVER w2
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id as user_id, time, rank() over w1
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WINDOW
|
||||||
|
w AS (PARTITION BY time), w1 AS (w ORDER BY value_2, value_3)
|
||||||
|
) fab
|
||||||
|
WINDOW
|
||||||
|
w2 as (PARTITION BY user_id, time)
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1,2,3;
|
||||||
|
|
||||||
|
|
||||||
|
-- Aggregate function on distribution column should error out
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY sum(user_id), MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1 DESC, 2 DESC;
|
||||||
|
|
||||||
|
-- test with reference table partitioned on only a column from reference table
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, it_name, count(id) OVER (PARTITION BY id)
|
||||||
|
FROM
|
||||||
|
users_table, users_ref_test_table
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1, 2, 3
|
||||||
|
LIMIT
|
||||||
|
20;
|
||||||
|
|
||||||
|
-- UNION ALL with only one of them is not partitioned over distribution column which
|
||||||
|
-- should not be allowed.
|
||||||
|
SELECT
|
||||||
|
max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by event_type), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY user_id
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- UNION with only one subquery which has a partition on non-distribution column should
|
||||||
|
-- error out
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by event_type) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id)) AS ftop
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
DROP VIEW subq;
|
Loading…
Reference in New Issue