mirror of https://github.com/citusdata/citus.git
Add window function support for SUBQUERY PUSHDOWN and INSERT INTO SELECT
This commit provides the support for window functions in subquery and insert into select queries. Note that our support for window functions is still limited because it must have a partition by clause on the distribution key. This commit makes changes in the files insert_select_planner and multi_logical_planner. The required tests are also added with files multi_subquery_window_functions.out and multi_insert_select_window.out.pull/1668/head
parent
f6b43d81ec
commit
498ac80d8b
|
@ -776,6 +776,7 @@ MultiTaskRouterSelectQuerySupported(Query *query)
|
||||||
{
|
{
|
||||||
List *queryList = NIL;
|
List *queryList = NIL;
|
||||||
ListCell *queryCell = NULL;
|
ListCell *queryCell = NULL;
|
||||||
|
StringInfo errorDetail = NULL;
|
||||||
|
|
||||||
ExtractQueryWalker((Node *) query, &queryList);
|
ExtractQueryWalker((Node *) query, &queryList);
|
||||||
foreach(queryCell, queryList)
|
foreach(queryCell, queryList)
|
||||||
|
@ -797,7 +798,7 @@ MultiTaskRouterSelectQuerySupported(Query *query)
|
||||||
if (subquery->limitCount != NULL)
|
if (subquery->limitCount != NULL)
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
"LIMIT clauses are not allowed in distirbuted INSERT "
|
"LIMIT clauses are not allowed in distributed INSERT "
|
||||||
"... SELECT queries",
|
"... SELECT queries",
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
}
|
}
|
||||||
|
@ -811,17 +812,34 @@ MultiTaskRouterSelectQuerySupported(Query *query)
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* group clause list must include partition column */
|
||||||
* We could potentially support window clauses where the data is partitioned
|
if (subquery->groupClause)
|
||||||
* over distribution column. For simplicity, we currently do not support window
|
|
||||||
* clauses at all.
|
|
||||||
*/
|
|
||||||
if (subquery->windowClause != NULL)
|
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
List *groupClauseList = subquery->groupClause;
|
||||||
"window functions are not allowed in distributed "
|
List *targetEntryList = subquery->targetList;
|
||||||
"INSERT ... SELECT queries",
|
List *groupTargetEntryList = GroupTargetEntryList(groupClauseList,
|
||||||
NULL, NULL);
|
targetEntryList);
|
||||||
|
bool groupOnPartitionColumn = TargetListOnPartitionColumn(subquery,
|
||||||
|
groupTargetEntryList);
|
||||||
|
if (!groupOnPartitionColumn)
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
|
"Group by list without distribution column is "
|
||||||
|
"not allowed in distributed INSERT ... "
|
||||||
|
"SELECT queries",
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We support window functions when the window function
|
||||||
|
* is partitioned on distribution column.
|
||||||
|
*/
|
||||||
|
if (subquery->windowClause && !SafeToPushdownWindowFunction(subquery,
|
||||||
|
&errorDetail))
|
||||||
|
{
|
||||||
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, errorDetail->data, NULL,
|
||||||
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subquery->setOperations != NULL)
|
if (subquery->setOperations != NULL)
|
||||||
|
|
|
@ -83,7 +83,7 @@ static DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree
|
||||||
outerMostQueryHasLimit);
|
outerMostQueryHasLimit);
|
||||||
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
|
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
|
||||||
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
||||||
static bool TargetListOnPartitionColumn(Query *query, List *targetEntryList);
|
static bool WindowPartitionOnDistributionColumn(Query *query);
|
||||||
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
||||||
static bool FullCompositeFieldList(List *compositeFieldList);
|
static bool FullCompositeFieldList(List *compositeFieldList);
|
||||||
static MultiNode * MultiPlanTree(Query *queryTree);
|
static MultiNode * MultiPlanTree(Query *queryTree);
|
||||||
|
@ -447,6 +447,7 @@ MultiSubqueryPlanTree(Query *originalQuery, Query *queryTree,
|
||||||
* - Only a single RTE_RELATION exists, which means only a single table
|
* - Only a single RTE_RELATION exists, which means only a single table
|
||||||
* name is specified on the whole query
|
* name is specified on the whole query
|
||||||
* - No sublinks exists in the subquery
|
* - No sublinks exists in the subquery
|
||||||
|
* - No window functions in the subquery
|
||||||
*
|
*
|
||||||
* Note that the caller should still call DeferErrorIfUnsupportedSubqueryRepartition()
|
* Note that the caller should still call DeferErrorIfUnsupportedSubqueryRepartition()
|
||||||
* to ensure that Citus supports the subquery. Also, this function is designed to run
|
* to ensure that Citus supports the subquery. Also, this function is designed to run
|
||||||
|
@ -466,6 +467,12 @@ SingleRelationRepartitionSubquery(Query *queryTree)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* we don't support window functions */
|
||||||
|
if (queryTree->hasWindowFuncs)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't allow joins and set operations. If join appears in the queryTree, the
|
* Don't allow joins and set operations. If join appears in the queryTree, the
|
||||||
* length would be greater than 1. If only set operations exists, the length
|
* length would be greater than 1. If only set operations exists, the length
|
||||||
|
@ -782,6 +789,7 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
||||||
{
|
{
|
||||||
bool preconditionsSatisfied = true;
|
bool preconditionsSatisfied = true;
|
||||||
char *errorDetail = NULL;
|
char *errorDetail = NULL;
|
||||||
|
StringInfo errorInfo = NULL;
|
||||||
DeferredErrorMessage *deferredError = NULL;
|
DeferredErrorMessage *deferredError = NULL;
|
||||||
|
|
||||||
deferredError = DeferErrorIfUnsupportedTableCombination(subqueryTree);
|
deferredError = DeferErrorIfUnsupportedTableCombination(subqueryTree);
|
||||||
|
@ -796,12 +804,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
||||||
errorDetail = "Subqueries without relations are unsupported";
|
errorDetail = "Subqueries without relations are unsupported";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subqueryTree->hasWindowFuncs)
|
|
||||||
{
|
|
||||||
preconditionsSatisfied = false;
|
|
||||||
errorDetail = "Window functions are currently unsupported";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (subqueryTree->limitOffset)
|
if (subqueryTree->limitOffset)
|
||||||
{
|
{
|
||||||
preconditionsSatisfied = false;
|
preconditionsSatisfied = false;
|
||||||
|
@ -871,6 +873,17 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We support window functions when the window function
|
||||||
|
* is partitioned on distribution column.
|
||||||
|
*/
|
||||||
|
if (subqueryTree->windowClause && !SafeToPushdownWindowFunction(subqueryTree,
|
||||||
|
&errorInfo))
|
||||||
|
{
|
||||||
|
errorDetail = (char *) errorInfo->data;
|
||||||
|
preconditionsSatisfied = false;
|
||||||
|
}
|
||||||
|
|
||||||
/* we don't support aggregates without group by */
|
/* we don't support aggregates without group by */
|
||||||
if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL))
|
if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL))
|
||||||
{
|
{
|
||||||
|
@ -1083,11 +1096,91 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SafeToPushdownWindowFunction checks if the query with window function is supported.
|
||||||
|
* It returns the result accordingly and modifies the error detail.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail)
|
||||||
|
{
|
||||||
|
ListCell *windowClauseCell = NULL;
|
||||||
|
List *windowClauseList = query->windowClause;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to check each window clause separately if there is a partition by clause
|
||||||
|
* and if it is partitioned on the distribution column.
|
||||||
|
*/
|
||||||
|
foreach(windowClauseCell, windowClauseList)
|
||||||
|
{
|
||||||
|
WindowClause *windowClause = lfirst(windowClauseCell);
|
||||||
|
|
||||||
|
if (!windowClause->partitionClause)
|
||||||
|
{
|
||||||
|
*errorDetail = makeStringInfo();
|
||||||
|
appendStringInfoString(*errorDetail,
|
||||||
|
"Window functions without PARTITION BY on distribution "
|
||||||
|
"column is currently unsupported");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!WindowPartitionOnDistributionColumn(query))
|
||||||
|
{
|
||||||
|
*errorDetail = makeStringInfo();
|
||||||
|
appendStringInfoString(*errorDetail,
|
||||||
|
"Window functions with PARTITION BY list missing distribution "
|
||||||
|
"column is currently unsupported");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* WindowPartitionOnDistributionColumn checks if the given subquery has one
|
||||||
|
* or more window functions and at least one of them is not partitioned by
|
||||||
|
* distribution column. The function returns false if your window function does not
|
||||||
|
* have a partition by clause or it does not include the distribution column.
|
||||||
|
*
|
||||||
|
* Please note that if the query does not have a window function, the function
|
||||||
|
* returns true.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
WindowPartitionOnDistributionColumn(Query *query)
|
||||||
|
{
|
||||||
|
List *windowClauseList = query->windowClause;
|
||||||
|
ListCell *windowClauseCell = NULL;
|
||||||
|
|
||||||
|
foreach(windowClauseCell, windowClauseList)
|
||||||
|
{
|
||||||
|
WindowClause *windowClause = lfirst(windowClauseCell);
|
||||||
|
List *groupTargetEntryList = NIL;
|
||||||
|
bool partitionOnDistributionColumn = false;
|
||||||
|
List *partitionClauseList = windowClause->partitionClause;
|
||||||
|
List *targetEntryList = query->targetList;
|
||||||
|
|
||||||
|
groupTargetEntryList =
|
||||||
|
GroupTargetEntryList(partitionClauseList, targetEntryList);
|
||||||
|
|
||||||
|
partitionOnDistributionColumn =
|
||||||
|
TargetListOnPartitionColumn(query, groupTargetEntryList);
|
||||||
|
|
||||||
|
if (!partitionOnDistributionColumn)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TargetListOnPartitionColumn checks if at least one target list entry is on
|
* TargetListOnPartitionColumn checks if at least one target list entry is on
|
||||||
* partition column.
|
* partition column.
|
||||||
*/
|
*/
|
||||||
static bool
|
bool
|
||||||
TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||||
{
|
{
|
||||||
bool targetListOnPartitionColumn = false;
|
bool targetListOnPartitionColumn = false;
|
||||||
|
|
|
@ -185,6 +185,8 @@ extern MultiTreeRoot * MultiLogicalPlanCreate(Query *originalQuery, Query *query
|
||||||
PlannerRestrictionContext *
|
PlannerRestrictionContext *
|
||||||
plannerRestrictionContext,
|
plannerRestrictionContext,
|
||||||
ParamListInfo boundParams);
|
ParamListInfo boundParams);
|
||||||
|
extern bool SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail);
|
||||||
|
extern bool TargetListOnPartitionColumn(Query *query, List *targetEntryList);
|
||||||
extern bool NeedsDistributedPlanning(Query *queryTree);
|
extern bool NeedsDistributedPlanning(Query *queryTree);
|
||||||
extern MultiNode * ParentNode(MultiNode *multiNode);
|
extern MultiNode * ParentNode(MultiNode *multiNode);
|
||||||
extern MultiNode * ChildNode(MultiUnaryNode *multiNode);
|
extern MultiNode * ChildNode(MultiUnaryNode *multiNode);
|
||||||
|
|
|
@ -1015,8 +1015,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
raw_events_second
|
raw_events_second
|
||||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
GROUP BY raw_events_second.value_3) AS foo;
|
GROUP BY raw_events_second.value_3) AS foo;
|
||||||
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
|
||||||
DETAIL: The data type of the target table's partition column should exactly match the data type of the corresponding simple column reference in the subquery.
|
|
||||||
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||||
ERROR: cannot push down this subquery
|
ERROR: cannot push down this subquery
|
||||||
DETAIL: Group by list without partition column is currently unsupported
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
|
@ -1133,9 +1132,7 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
GROUP BY raw_events_second.value_1
|
GROUP BY raw_events_second.value_1
|
||||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
ON (f.id = f2.id);
|
ON (f.id = f2.id);
|
||||||
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
|
||||||
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
|
|
||||||
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
|
|
||||||
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
|
@ -1164,8 +1161,10 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
GROUP BY raw_events_second.value_1
|
GROUP BY raw_events_second.value_1
|
||||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
ON (f.id = f2.id);
|
ON (f.id = f2.id);
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
DEBUG: Group by list without distribution column is not allowed in distributed INSERT ... SELECT queries
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||||
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||||
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||||
-- cannot pushdown the query since the JOIN is not equi JOIN
|
-- cannot pushdown the query since the JOIN is not equi JOIN
|
||||||
INSERT INTO agg_events
|
INSERT INTO agg_events
|
||||||
(user_id, value_4_agg)
|
(user_id, value_4_agg)
|
||||||
|
|
|
@ -0,0 +1,855 @@
|
||||||
|
-- ===================================================================
|
||||||
|
-- test insert select functionality for window functions
|
||||||
|
-- ===================================================================
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
10001 | 101 | 49.5810418958104190
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- the same test with different syntax
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY time DESC) as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
10001 | 101 | 49.5810418958104190
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- similar test with lag
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg, value_3_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, lag_event_type, row_no
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type, row_number() OVER my_win as row_no
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
10001 | 101 | 49.5810418958104190
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- simple window function, partitioned and grouped by on the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, rnk, tme
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as rnk, avg(value_2) as tme
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
1188 | 101 | 49.7895622895622896
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- top level query has a group by on the result of the window function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
min(user_id), min(time), lag_event_type
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
lag_event_type;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+--------------------
|
||||||
|
1002 | 50 | 9.7844311377245509
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- window functions should work along with joins as well
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
195 | 91 | 51.0205128205128205
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- two window functions in a single subquery should work fine as well
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
202 | 91 | 50.2970297029702970
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- window functions should be fine within subquery joins
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg, value_3_agg)
|
||||||
|
SELECT sub_1.user_id, max(lag_1), max(rank_1), max(rank_2) FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_1, rank() OVER w2 as rank_1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as sub_1
|
||||||
|
JOIN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_2, rank() OVER w2 as rank_2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.value_2 ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 50) ORDER BY events_table.time)
|
||||||
|
) as sub_2
|
||||||
|
ON(sub_1.user_id = sub_2.user_id)
|
||||||
|
GROUP BY
|
||||||
|
sub_1.user_id;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
91 | 91 | 50.2637362637362637
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- GROUP BYs and PARTITION BYs should work fine together
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 5
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
7 | 6 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- aggregates in the PARTITION BY is also allows
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, avg(event_type%10)::int ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 0
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
8 | 7 | 48.8750000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- GROUP BY should not necessarly be inclusive of partitioning
|
||||||
|
-- but this query doesn't make much sense
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
1 | 1 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Group by has more columns than partition by which uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
10 | 10 | 49.1000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, max(sum) FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, SUM(value_2) OVER (PARTITION BY user_id, value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
GROUP BY user_id;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
101 | 101 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Subquery in where with window function
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 > 545 AND
|
||||||
|
value_2 < ALL (
|
||||||
|
SELECT
|
||||||
|
avg(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
4 | 4 | 35.2500000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Partition by with aggregate functions. This query does not make much sense since the
|
||||||
|
-- result of aggregate function will be the same for every row in a partition and it is
|
||||||
|
-- not going to affect the group that the count function will work on.
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY user_id, MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
1
|
||||||
|
) a;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
101 | 101 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Some more nested queries
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg, value_3_agg, value_4_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, rank, SUM(ABS(value_2 - value_3)) AS difference, COUNT(*) AS distinct_users
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY value_2 DESC)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, value_2, sum(value_3) OVER (PARTITION BY user_id, value_2) as value_3
|
||||||
|
FROM users_table
|
||||||
|
) AS A
|
||||||
|
) AS A
|
||||||
|
GROUP BY
|
||||||
|
user_id, rank;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
9501 | 101 | 49.8461214608988528
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
f3.user_id, ABS(f2.sum - f3.sum)
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_3
|
||||||
|
) f3,
|
||||||
|
(
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_2
|
||||||
|
) f2
|
||||||
|
WHERE
|
||||||
|
f3.user_id=f2.user_id
|
||||||
|
) a;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
101 | 101 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- test with reference table partitioned on columns from both
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, count(id) OVER (PARTITION BY user_id, id)
|
||||||
|
FROM
|
||||||
|
users_table, users_ref_test_table
|
||||||
|
) a;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
101 | 101 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Window functions with HAVING clause
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1 HAVING count(*) > 1
|
||||||
|
) a;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
437 | 100 | 49.9496567505720824
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Window functions with HAVING clause which uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1 HAVING count(*) > 1
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
10 | 5 | 32.4000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Window function in View works
|
||||||
|
CREATE VIEW view_with_window_func AS
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1
|
||||||
|
HAVING count(*) > 1;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
view_with_window_func;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
437 | 100 | 49.9496567505720824
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- Window function in View works and the query uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
view_with_window_func
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot run avg(user_id)
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
10
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
LIMIT
|
||||||
|
5;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot test avg or distinct count
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
94 | 94 | 50.4787234042553191
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop
|
||||||
|
LIMIT
|
||||||
|
5;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot test avg or distinct count
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
101 | 101 | 50.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
-- lets have some queries that Citus shouldn't push down
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY event_type ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS ()
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
|
||||||
|
-- w2 should not be pushed down
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id+1, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY date_trunc('day', time) ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
-- w2 should not be allowed
|
||||||
|
INSERT INTO agg_results (user_id, value_2_agg, value_3_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
|
||||||
|
-- unsupported window function with an override
|
||||||
|
INSERT INTO agg_results(user_id, agg_time, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, sum(rank) OVER w2
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id as user_id, time, rank() over w1
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WINDOW
|
||||||
|
w AS (PARTITION BY time), w1 AS (w ORDER BY value_2, value_3)
|
||||||
|
) fab
|
||||||
|
WINDOW
|
||||||
|
w2 as (PARTITION BY user_id, time)
|
||||||
|
) a;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
-- Subquery in where with unsupported window function
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 > 545 AND
|
||||||
|
value_2 < ALL (
|
||||||
|
SELECT
|
||||||
|
avg(value_3) OVER ()
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
|
||||||
|
-- Aggregate function on distribution column should error out
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY sum(user_id), MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
) a;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
-- UNION with only one subquery which has a partition on non-distribution column should
|
||||||
|
-- error out
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by event_type) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Window functions with PARTITION BY list missing distribution column is currently unsupported
|
||||||
|
DROP VIEW view_with_window_func;
|
|
@ -2381,19 +2381,6 @@ ORDER BY
|
||||||
types;
|
types;
|
||||||
ERROR: cannot push down this subquery
|
ERROR: cannot push down this subquery
|
||||||
DETAIL: Offset clause is currently unsupported
|
DETAIL: Offset clause is currently unsupported
|
||||||
-- not supported due to window functions
|
|
||||||
SELECT user_id,
|
|
||||||
some_vals
|
|
||||||
FROM (
|
|
||||||
SELECT * ,
|
|
||||||
Row_number() over (PARTITION BY "user_id" ORDER BY "user_id") AS "some_vals",
|
|
||||||
Random()
|
|
||||||
FROM users_table
|
|
||||||
) user_id
|
|
||||||
ORDER BY 1,
|
|
||||||
2 limit 10;
|
|
||||||
ERROR: cannot perform distributed planning on this query
|
|
||||||
DETAIL: Subqueries without group by clause are not supported yet
|
|
||||||
-- not supported due to non relation rte
|
-- not supported due to non relation rte
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||||
FROM
|
FROM
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -2,10 +2,8 @@
|
||||||
-- multi behavioral analytics
|
-- multi behavioral analytics
|
||||||
-- this file is intended to create the table requires for the tests
|
-- this file is intended to create the table requires for the tests
|
||||||
--
|
--
|
||||||
|
|
||||||
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
||||||
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
||||||
|
|
||||||
SET citus.shard_replication_factor = 1;
|
SET citus.shard_replication_factor = 1;
|
||||||
SET citus.shard_count = 4;
|
SET citus.shard_count = 4;
|
||||||
|
|
||||||
|
@ -30,10 +28,19 @@ SELECT create_distributed_table('agg_results_third', 'user_id');
|
||||||
CREATE TABLE agg_results_fourth (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
CREATE TABLE agg_results_fourth (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||||
SELECT create_distributed_table('agg_results_fourth', 'user_id');
|
SELECT create_distributed_table('agg_results_fourth', 'user_id');
|
||||||
|
|
||||||
|
CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int);
|
||||||
|
SELECT create_reference_table('users_ref_test_table');
|
||||||
|
INSERT INTO users_ref_test_table VALUES(1,'User_1',45);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(2,'User_2',46);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(3,'User_3',47);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(4,'User_4',48);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(5,'User_5',49);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(6,'User_6',50);
|
||||||
|
|
||||||
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
||||||
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
||||||
|
|
||||||
-- create indexes for
|
-- create indexes for
|
||||||
CREATE INDEX is_index1 ON users_table(user_id);
|
CREATE INDEX is_index1 ON users_table(user_id);
|
||||||
CREATE INDEX is_index2 ON events_table(user_id);
|
CREATE INDEX is_index2 ON events_table(user_id);
|
||||||
CREATE INDEX is_index3 ON users_table(value_1);
|
CREATE INDEX is_index3 ON users_table(value_1);
|
||||||
|
@ -130,10 +137,10 @@ SELECT run_command_on_master_and_workers($f$
|
||||||
LEFTARG = user_composite_type,
|
LEFTARG = user_composite_type,
|
||||||
RIGHTARG = user_composite_type,
|
RIGHTARG = user_composite_type,
|
||||||
PROCEDURE = equal_user_composite_type_function,
|
PROCEDURE = equal_user_composite_type_function,
|
||||||
commutator = =,
|
commutator = =,
|
||||||
RESTRICT = eqsel,
|
RESTRICT = eqsel,
|
||||||
JOIN = eqjoinsel,
|
JOIN = eqjoinsel,
|
||||||
merges,
|
merges,
|
||||||
hashes
|
hashes
|
||||||
);
|
);
|
||||||
$f$);
|
$f$);
|
||||||
|
@ -187,12 +194,12 @@ SELECT run_command_on_master_and_workers($f$
|
||||||
OPERATOR 3 = (user_composite_type, user_composite_type),
|
OPERATOR 3 = (user_composite_type, user_composite_type),
|
||||||
OPERATOR 4 >= (user_composite_type, user_composite_type),
|
OPERATOR 4 >= (user_composite_type, user_composite_type),
|
||||||
OPERATOR 5 > (user_composite_type, user_composite_type),
|
OPERATOR 5 > (user_composite_type, user_composite_type),
|
||||||
|
|
||||||
FUNCTION 1 cmp_user_composite_type_function(user_composite_type, user_composite_type);
|
FUNCTION 1 cmp_user_composite_type_function(user_composite_type, user_composite_type);
|
||||||
$f$);
|
$f$);
|
||||||
|
|
||||||
SELECT run_command_on_master_and_workers($f$
|
SELECT run_command_on_master_and_workers($f$
|
||||||
|
|
||||||
CREATE OPERATOR CLASS cats_2_op_fam_class
|
CREATE OPERATOR CLASS cats_2_op_fam_class
|
||||||
DEFAULT FOR TYPE user_composite_type USING HASH AS
|
DEFAULT FOR TYPE user_composite_type USING HASH AS
|
||||||
OPERATOR 1 = (user_composite_type, user_composite_type),
|
OPERATOR 1 = (user_composite_type, user_composite_type),
|
||||||
|
@ -356,7 +363,7 @@ SET citus.shard_max_size TO "1MB";
|
||||||
|
|
||||||
CREATE TABLE events_reference_table (like events_table including all);
|
CREATE TABLE events_reference_table (like events_table including all);
|
||||||
SELECT create_reference_table('events_reference_table');
|
SELECT create_reference_table('events_reference_table');
|
||||||
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
||||||
INSERT INTO events_reference_table SELECT * FROM events_table;
|
INSERT INTO events_reference_table SELECT * FROM events_table;
|
||||||
|
|
||||||
CREATE TABLE users_reference_table (like users_table including all);
|
CREATE TABLE users_reference_table (like users_table including all);
|
||||||
|
|
|
@ -36,7 +36,7 @@ test: multi_load_data
|
||||||
|
|
||||||
test: multi_behavioral_analytics_create_table
|
test: multi_behavioral_analytics_create_table
|
||||||
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries
|
test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries
|
||||||
test: multi_insert_select
|
test: multi_insert_select multi_insert_select_window
|
||||||
|
|
||||||
# ---
|
# ---
|
||||||
# Tests for partitioning support
|
# Tests for partitioning support
|
||||||
|
@ -51,7 +51,7 @@ test: multi_deparse_shard_query multi_distributed_transaction_id
|
||||||
test: multi_basic_queries multi_complex_expressions
|
test: multi_basic_queries multi_complex_expressions
|
||||||
test: multi_explain
|
test: multi_explain
|
||||||
test: multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
|
test: multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
|
||||||
test: multi_subquery_complex_reference_clause
|
test: multi_subquery_complex_reference_clause multi_subquery_window_functions
|
||||||
test: multi_subquery_in_where_reference_clause
|
test: multi_subquery_in_where_reference_clause
|
||||||
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
|
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
|
||||||
test: multi_reference_table
|
test: multi_reference_table
|
||||||
|
|
|
@ -51,9 +51,22 @@ SELECT create_distributed_table('agg_results_fourth', 'user_id');
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int);
|
||||||
|
SELECT create_reference_table('users_ref_test_table');
|
||||||
|
create_reference_table
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
INSERT INTO users_ref_test_table VALUES(1,'User_1',45);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(2,'User_2',46);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(3,'User_3',47);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(4,'User_4',48);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(5,'User_5',49);
|
||||||
|
INSERT INTO users_ref_test_table VALUES(6,'User_6',50);
|
||||||
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
||||||
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
||||||
-- create indexes for
|
-- create indexes for
|
||||||
CREATE INDEX is_index1 ON users_table(user_id);
|
CREATE INDEX is_index1 ON users_table(user_id);
|
||||||
NOTICE: using one-phase commit for distributed DDL commands
|
NOTICE: using one-phase commit for distributed DDL commands
|
||||||
HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc'
|
HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc'
|
||||||
|
@ -187,10 +200,10 @@ SELECT run_command_on_master_and_workers($f$
|
||||||
LEFTARG = user_composite_type,
|
LEFTARG = user_composite_type,
|
||||||
RIGHTARG = user_composite_type,
|
RIGHTARG = user_composite_type,
|
||||||
PROCEDURE = equal_user_composite_type_function,
|
PROCEDURE = equal_user_composite_type_function,
|
||||||
commutator = =,
|
commutator = =,
|
||||||
RESTRICT = eqsel,
|
RESTRICT = eqsel,
|
||||||
JOIN = eqjoinsel,
|
JOIN = eqjoinsel,
|
||||||
merges,
|
merges,
|
||||||
hashes
|
hashes
|
||||||
);
|
);
|
||||||
$f$);
|
$f$);
|
||||||
|
@ -261,7 +274,7 @@ SELECT run_command_on_master_and_workers($f$
|
||||||
OPERATOR 3 = (user_composite_type, user_composite_type),
|
OPERATOR 3 = (user_composite_type, user_composite_type),
|
||||||
OPERATOR 4 >= (user_composite_type, user_composite_type),
|
OPERATOR 4 >= (user_composite_type, user_composite_type),
|
||||||
OPERATOR 5 > (user_composite_type, user_composite_type),
|
OPERATOR 5 > (user_composite_type, user_composite_type),
|
||||||
|
|
||||||
FUNCTION 1 cmp_user_composite_type_function(user_composite_type, user_composite_type);
|
FUNCTION 1 cmp_user_composite_type_function(user_composite_type, user_composite_type);
|
||||||
$f$);
|
$f$);
|
||||||
run_command_on_master_and_workers
|
run_command_on_master_and_workers
|
||||||
|
@ -270,7 +283,7 @@ $f$);
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT run_command_on_master_and_workers($f$
|
SELECT run_command_on_master_and_workers($f$
|
||||||
|
|
||||||
CREATE OPERATOR CLASS cats_2_op_fam_class
|
CREATE OPERATOR CLASS cats_2_op_fam_class
|
||||||
DEFAULT FOR TYPE user_composite_type USING HASH AS
|
DEFAULT FOR TYPE user_composite_type USING HASH AS
|
||||||
OPERATOR 1 = (user_composite_type, user_composite_type),
|
OPERATOR 1 = (user_composite_type, user_composite_type),
|
||||||
|
@ -428,7 +441,7 @@ SELECT create_reference_table('events_reference_table');
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
CREATE INDEX events_ref_val2 on events_reference_table(value_2);
|
||||||
INSERT INTO events_reference_table SELECT * FROM events_table;
|
INSERT INTO events_reference_table SELECT * FROM events_table;
|
||||||
CREATE TABLE users_reference_table (like users_table including all);
|
CREATE TABLE users_reference_table (like users_table including all);
|
||||||
SELECT create_reference_table('users_reference_table');
|
SELECT create_reference_table('users_reference_table');
|
||||||
|
|
|
@ -0,0 +1,769 @@
|
||||||
|
-- ===================================================================
|
||||||
|
-- test insert select functionality for window functions
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- the same test with different syntax
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY time DESC) as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- similar test with lag
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg, value_3_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, lag_event_type, row_no
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type, row_number() OVER my_win as row_no
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- simple window function, partitioned and grouped by on the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, rnk, tme
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as rnk, avg(value_2) as tme
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- top level query has a group by on the result of the window function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
min(user_id), min(time), lag_event_type
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
lag_event_type;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- window functions should work along with joins as well
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- two window functions in a single subquery should work fine as well
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- window functions should be fine within subquery joins
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg, value_3_agg)
|
||||||
|
SELECT sub_1.user_id, max(lag_1), max(rank_1), max(rank_2) FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_1, rank() OVER w2 as rank_1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as sub_1
|
||||||
|
JOIN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_2, rank() OVER w2 as rank_2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.value_2 ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 50) ORDER BY events_table.time)
|
||||||
|
) as sub_2
|
||||||
|
ON(sub_1.user_id = sub_2.user_id)
|
||||||
|
GROUP BY
|
||||||
|
sub_1.user_id;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- GROUP BYs and PARTITION BYs should work fine together
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 5
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- aggregates in the PARTITION BY is also allows
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, avg(event_type%10)::int ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 0
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- GROUP BY should not necessarly be inclusive of partitioning
|
||||||
|
-- but this query doesn't make much sense
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
avg(user_id), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
my_rank;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Group by has more columns than partition by which uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, max(sum) FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, SUM(value_2) OVER (PARTITION BY user_id, value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
GROUP BY user_id;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Subquery in where with window function
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 > 545 AND
|
||||||
|
value_2 < ALL (
|
||||||
|
SELECT
|
||||||
|
avg(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Partition by with aggregate functions. This query does not make much sense since the
|
||||||
|
-- result of aggregate function will be the same for every row in a partition and it is
|
||||||
|
-- not going to affect the group that the count function will work on.
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY user_id, MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
1
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Some more nested queries
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg, value_3_agg, value_4_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, rank, SUM(ABS(value_2 - value_3)) AS difference, COUNT(*) AS distinct_users
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY value_2 DESC)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, value_2, sum(value_3) OVER (PARTITION BY user_id, value_2) as value_3
|
||||||
|
FROM users_table
|
||||||
|
) AS A
|
||||||
|
) AS A
|
||||||
|
GROUP BY
|
||||||
|
user_id, rank;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
f3.user_id, ABS(f2.sum - f3.sum)
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_3
|
||||||
|
) f3,
|
||||||
|
(
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_2
|
||||||
|
) f2
|
||||||
|
WHERE
|
||||||
|
f3.user_id=f2.user_id
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- test with reference table partitioned on columns from both
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, count(id) OVER (PARTITION BY user_id, id)
|
||||||
|
FROM
|
||||||
|
users_table, users_ref_test_table
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Window functions with HAVING clause
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1 HAVING count(*) > 1
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Window functions with HAVING clause which uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1 HAVING count(*) > 1
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Window function in View works
|
||||||
|
CREATE VIEW view_with_window_func AS
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1
|
||||||
|
HAVING count(*) > 1;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
view_with_window_func;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- Window function in View works and the query uses coordinator insert ... select
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
view_with_window_func
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot run avg(user_id)
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
LIMIT
|
||||||
|
5;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot test avg or distinct count
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop
|
||||||
|
LIMIT
|
||||||
|
5;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- since there is a limit but not order, we cannot test avg or distinct count
|
||||||
|
SELECT count(*) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
-- lets have some queries that Citus shouldn't push down
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY event_type ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS ()
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- w2 should not be pushed down
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id+1, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125;
|
||||||
|
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY date_trunc('day', time) ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125;
|
||||||
|
|
||||||
|
-- w2 should not be allowed
|
||||||
|
INSERT INTO agg_results (user_id, value_2_agg, value_3_agg)
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (ORDER BY events_table.time)
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- unsupported window function with an override
|
||||||
|
INSERT INTO agg_results(user_id, agg_time, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, sum(rank) OVER w2
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id as user_id, time, rank() over w1
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WINDOW
|
||||||
|
w AS (PARTITION BY time), w1 AS (w ORDER BY value_2, value_3)
|
||||||
|
) fab
|
||||||
|
WINDOW
|
||||||
|
w2 as (PARTITION BY user_id, time)
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- Subquery in where with unsupported window function
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 > 545 AND
|
||||||
|
value_2 < ALL (
|
||||||
|
SELECT
|
||||||
|
avg(value_3) OVER ()
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY
|
||||||
|
user_id;
|
||||||
|
|
||||||
|
-- Aggregate function on distribution column should error out
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY sum(user_id), MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
) a;
|
||||||
|
|
||||||
|
-- UNION with only one subquery which has a partition on non-distribution column should
|
||||||
|
-- error out
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by event_type) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
) AS ftop;
|
||||||
|
|
||||||
|
DROP VIEW view_with_window_func;
|
|
@ -2160,18 +2160,6 @@ GROUP BY
|
||||||
ORDER BY
|
ORDER BY
|
||||||
types;
|
types;
|
||||||
|
|
||||||
-- not supported due to window functions
|
|
||||||
SELECT user_id,
|
|
||||||
some_vals
|
|
||||||
FROM (
|
|
||||||
SELECT * ,
|
|
||||||
Row_number() over (PARTITION BY "user_id" ORDER BY "user_id") AS "some_vals",
|
|
||||||
Random()
|
|
||||||
FROM users_table
|
|
||||||
) user_id
|
|
||||||
ORDER BY 1,
|
|
||||||
2 limit 10;
|
|
||||||
|
|
||||||
-- not supported due to non relation rte
|
-- not supported due to non relation rte
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||||
FROM
|
FROM
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
||||||
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
-- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
||||||
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
||||||
|
|
||||||
SET citus.enable_router_execution TO FALSE;
|
SET citus.enable_router_execution TO FALSE;
|
||||||
|
|
||||||
CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int);
|
CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int);
|
||||||
|
@ -23,15 +23,6 @@ INSERT INTO users_return_test_table VALUES(4,1,1);
|
||||||
INSERT INTO users_return_test_table VALUES(1,3,1);
|
INSERT INTO users_return_test_table VALUES(1,3,1);
|
||||||
INSERT INTO users_return_test_table VALUES(3,2,2);
|
INSERT INTO users_return_test_table VALUES(3,2,2);
|
||||||
|
|
||||||
CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int);
|
|
||||||
SELECT create_reference_table('users_ref_test_table');
|
|
||||||
INSERT INTO users_ref_test_table VALUES(1,'User_1',45);
|
|
||||||
INSERT INTO users_ref_test_table VALUES(2,'User_2',46);
|
|
||||||
INSERT INTO users_ref_test_table VALUES(3,'User_3',47);
|
|
||||||
INSERT INTO users_ref_test_table VALUES(4,'User_4',48);
|
|
||||||
INSERT INTO users_ref_test_table VALUES(5,'User_5',49);
|
|
||||||
INSERT INTO users_ref_test_table VALUES(6,'User_6',50);
|
|
||||||
|
|
||||||
-- Simple Join test with reference table
|
-- Simple Join test with reference table
|
||||||
SELECT count(*) FROM
|
SELECT count(*) FROM
|
||||||
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
(SELECT random() FROM user_buy_test_table JOIN users_ref_test_table
|
||||||
|
@ -101,7 +92,7 @@ SELECT count(*) FROM
|
||||||
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
|
ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2
|
||||||
WHERE subquery_1.user_id = subquery_2.user_id ;
|
WHERE subquery_1.user_id = subquery_2.user_id ;
|
||||||
|
|
||||||
-- Should be able to push down since reference tables are inner joined
|
-- Should be able to push down since reference tables are inner joined
|
||||||
-- with hash distributed tables, the results of those joins are the parts of
|
-- with hash distributed tables, the results of those joins are the parts of
|
||||||
-- an outer join
|
-- an outer join
|
||||||
SELECT subquery_2.id FROM
|
SELECT subquery_2.id FROM
|
||||||
|
@ -122,75 +113,75 @@ SELECT * FROM
|
||||||
|
|
||||||
-- should be able to pushdown since reference table is in the
|
-- should be able to pushdown since reference table is in the
|
||||||
-- inner part of the left join
|
-- inner part of the left join
|
||||||
SELECT
|
SELECT
|
||||||
user_id, sum(value_1)
|
user_id, sum(value_1)
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_table.user_id, users_table.value_1, random()
|
users_table.user_id, users_table.value_1, random()
|
||||||
FROM
|
FROM
|
||||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
INNER JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
) as foo
|
) as foo
|
||||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
-- same query as above, reference table is wrapped into a subquery
|
-- same query as above, reference table is wrapped into a subquery
|
||||||
SELECT
|
SELECT
|
||||||
user_id, sum(value_1)
|
user_id, sum(value_1)
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_table.user_id, users_table.value_1, random()
|
users_table.user_id, users_table.value_1, random()
|
||||||
FROM
|
FROM
|
||||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
|
INNER JOIN (SELECT *, random() FROM events_reference_table) as ref_all ON (ref_all.value_2 = users_table.user_id)
|
||||||
) as foo
|
) as foo
|
||||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
-- should be able to pushdown since reference table is in the
|
-- should be able to pushdown since reference table is in the
|
||||||
-- inner part of the left join
|
-- inner part of the left join
|
||||||
SELECT
|
SELECT
|
||||||
user_id, sum(value_1)
|
user_id, sum(value_1)
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_table.user_id, users_table.value_1, random()
|
users_table.user_id, users_table.value_1, random()
|
||||||
FROM
|
FROM
|
||||||
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
users_table LEFT JOIN events_table ON (users_table.user_id = events_table.user_id)
|
||||||
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
LEFT JOIN events_reference_table ON (events_reference_table.value_2 = users_table.user_id)
|
||||||
) as foo
|
) as foo
|
||||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
-- should not be able to pushdown since reference table is in the
|
-- should not be able to pushdown since reference table is in the
|
||||||
-- direct outer part of the left join
|
-- direct outer part of the left join
|
||||||
SELECT
|
SELECT
|
||||||
user_id, sum(value_1)
|
user_id, sum(value_1)
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_table.user_id, users_table.value_1, random()
|
users_table.user_id, users_table.value_1, random()
|
||||||
FROM
|
FROM
|
||||||
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
) as foo
|
) as foo
|
||||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
-- should not be able to pushdown since reference table is in the
|
-- should not be able to pushdown since reference table is in the
|
||||||
-- direct outer part of the left join wrapped into a subquery
|
-- direct outer part of the left join wrapped into a subquery
|
||||||
SELECT
|
SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
|
(SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table
|
||||||
ON (users_table.user_id = ref_all.value_2);
|
ON (users_table.user_id = ref_all.value_2);
|
||||||
|
|
||||||
-- should not be able to pushdown since reference table is in the
|
-- should not be able to pushdown since reference table is in the
|
||||||
-- outer part of the left join
|
-- outer part of the left join
|
||||||
SELECT
|
SELECT
|
||||||
user_id, sum(value_1)
|
user_id, sum(value_1)
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_table.user_id, users_table.value_1, random()
|
users_table.user_id, users_table.value_1, random()
|
||||||
FROM
|
FROM
|
||||||
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
events_reference_table LEFT JOIN users_table ON (users_table.user_id = events_reference_table.value_2)
|
||||||
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
LEFT JOIN events_table ON (events_table.user_id = users_table.user_id)
|
||||||
) as foo
|
) as foo
|
||||||
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
GROUP BY user_id ORDER BY 2 DESC LIMIT 10;
|
||||||
|
|
||||||
-- should be able to pushdown since reference table is in the
|
-- should be able to pushdown since reference table is in the
|
||||||
-- inner part of the left join
|
-- inner part of the left join
|
||||||
|
@ -198,18 +189,18 @@ SELECT * FROM
|
||||||
(
|
(
|
||||||
SELECT DISTINCT foo.user_id
|
SELECT DISTINCT foo.user_id
|
||||||
FROM
|
FROM
|
||||||
((SELECT
|
((SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type > 80) as "temp_data_queries"
|
event_type > 80) as "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
|
||||||
|
@ -218,23 +209,23 @@ SELECT * FROM
|
||||||
(
|
(
|
||||||
SELECT DISTINCT foo.user_id
|
SELECT DISTINCT foo.user_id
|
||||||
FROM
|
FROM
|
||||||
((SELECT
|
((SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
"events"."time", "events"."user_id" as event_user_id, value_2 as event_val_2, random()
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type > 80) as "temp_data_queries"
|
event_type > 80) as "temp_data_queries"
|
||||||
LEFT JOIN
|
LEFT JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
user_id > 80 and value_2 = 5) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN
|
||||||
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
(SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar;
|
||||||
|
|
||||||
-- we could even suuport the following where the subquery
|
-- we could even suuport the following where the subquery
|
||||||
-- on the outer part of the left join contains a reference table
|
-- on the outer part of the left join contains a reference table
|
||||||
SELECT max(events_all.cnt), events_all.usr_id
|
SELECT max(events_all.cnt), events_all.usr_id
|
||||||
FROM
|
FROM
|
||||||
(SELECT users_table.user_id as usr_id,
|
(SELECT users_table.user_id as usr_id,
|
||||||
|
@ -246,7 +237,7 @@ LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2
|
||||||
-- but, we fail to pushdown the following query where join that reference table appears
|
-- but, we fail to pushdown the following query where join that reference table appears
|
||||||
-- wrapped into a subquery
|
-- wrapped into a subquery
|
||||||
SELECT max(events_all.cnt),
|
SELECT max(events_all.cnt),
|
||||||
events_all.usr_id
|
events_all.usr_id
|
||||||
FROM(
|
FROM(
|
||||||
SELECT *, random() FROM
|
SELECT *, random() FROM
|
||||||
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
(SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||||
|
@ -263,141 +254,141 @@ LIMIT 5;
|
||||||
SET citus.subquery_pushdown to ON;
|
SET citus.subquery_pushdown to ON;
|
||||||
SELECT user_id, lastseen
|
SELECT user_id, lastseen
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"some_users_data".user_id, lastseen
|
"some_users_data".user_id, lastseen
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
filter_users_1.user_id, time AS lastseen
|
filter_users_1.user_id, time AS lastseen
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
user_where_1_1.user_id
|
user_where_1_1.user_id
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
|
user_id > 12 and user_id < 16 and value_1 > 20) user_where_1_1
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
|
user_id > 12 and user_id < 16 and value_2 > 60) user_where_1_join_1
|
||||||
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id))
|
ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id))
|
||||||
filter_users_1
|
filter_users_1
|
||||||
JOIN LATERAL
|
JOIN LATERAL
|
||||||
(SELECT
|
(SELECT
|
||||||
user_id, time
|
user_id, time
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 12 and user_id < 16 AND
|
user_id > 12 and user_id < 16 AND
|
||||||
user_id = filter_users_1.user_id
|
user_id = filter_users_1.user_id
|
||||||
ORDER BY
|
ORDER BY
|
||||||
time DESC
|
time DESC
|
||||||
LIMIT 1) "last_events_1"
|
LIMIT 1) "last_events_1"
|
||||||
ON TRUE
|
ON TRUE
|
||||||
ORDER BY
|
ORDER BY
|
||||||
time DESC
|
time DESC
|
||||||
LIMIT 10) "some_recent_users"
|
LIMIT 10) "some_recent_users"
|
||||||
JOIN LATERAL
|
JOIN LATERAL
|
||||||
(SELECT
|
(SELECT
|
||||||
"users".user_id
|
"users".user_id
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||||
"users"."value_2" > 70
|
"users"."value_2" > 70
|
||||||
LIMIT 1) "some_users_data"
|
LIMIT 1) "some_users_data"
|
||||||
ON TRUE
|
ON TRUE
|
||||||
ORDER BY
|
ORDER BY
|
||||||
lastseen DESC
|
lastseen DESC
|
||||||
LIMIT 10) "some_users"
|
LIMIT 10) "some_users"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
user_id DESC
|
user_id DESC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
SET citus.subquery_pushdown to OFF;
|
SET citus.subquery_pushdown to OFF;
|
||||||
|
|
||||||
-- NESTED INNER JOINs with reference tables
|
-- NESTED INNER JOINs with reference tables
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS value, "generated_group_field"
|
count(*) AS value, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
|
DISTINCT "pushedDownQuery"."user_id", "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
"eventQuery"."user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id", "events"."value_2"
|
"events"."time", "events"."user_id", "events"."value_2"
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
user_id > 10 and user_id < 40 AND event_type IN (40, 41, 42, 43, 44, 45) ) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
user_where_1_1.real_user_id
|
user_where_1_1.real_user_id
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id" as real_user_id
|
"users"."user_id" as real_user_id
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 and value_2 > 50 ) user_where_1_1
|
user_id > 10 and user_id < 40 and value_2 > 50 ) user_where_1_1
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 and value_3 > 50 ) user_where_1_join_1
|
user_id > 10 and user_id < 40 and value_3 > 50 ) user_where_1_join_1
|
||||||
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".user_id)) "user_filters_1"
|
ON ("user_where_1_1".real_user_id = "user_where_1_join_1".user_id)) "user_filters_1"
|
||||||
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery"
|
||||||
GROUP BY
|
GROUP BY
|
||||||
"generated_group_field"
|
"generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
generated_group_field DESC, value DESC;
|
generated_group_field DESC, value DESC;
|
||||||
|
|
||||||
-- single level inner joins with reference tables
|
-- single level inner joins with reference tables
|
||||||
SELECT
|
SELECT
|
||||||
"value_3", count(*) AS cnt
|
"value_3", count(*) AS cnt
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"value_3", "user_id", random()
|
"value_3", "user_id", random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
users_in_segment_1.user_id, value_3
|
users_in_segment_1.user_id, value_3
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
user_id, value_3 * 2 as value_3
|
user_id, value_3 * 2 as value_3
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
user_id, value_3
|
user_id, value_3
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id", value_3
|
"users"."user_id", value_3
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 and value_2 > 30
|
user_id > 10 and user_id < 40 and value_2 > 30
|
||||||
) simple_user_where_1
|
) simple_user_where_1
|
||||||
) all_buckets_1
|
) all_buckets_1
|
||||||
) users_in_segment_1
|
) users_in_segment_1
|
||||||
JOIN
|
JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40 and value_2 > 60
|
user_id > 10 and user_id < 40 and value_2 > 60
|
||||||
) some_users_data
|
) some_users_data
|
||||||
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
|
ON ("users_in_segment_1".user_id = "some_users_data".user_id)
|
||||||
) segmentalias_1) "tempQuery"
|
) segmentalias_1) "tempQuery"
|
||||||
GROUP BY "value_3"
|
GROUP BY "value_3"
|
||||||
ORDER BY cnt, value_3 DESC LIMIT 10;
|
ORDER BY cnt, value_3 DESC LIMIT 10;
|
||||||
|
|
||||||
|
@ -407,42 +398,42 @@ SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT "some_users_data".user_id, "some_recent_users".value_3
|
(SELECT "some_users_data".user_id, "some_recent_users".value_3
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
filter_users_1.user_id, value_3
|
filter_users_1.user_id, value_3
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 20 and user_id < 70 and users.value_2 = 200) filter_users_1
|
user_id > 20 and user_id < 70 and users.value_2 = 200) filter_users_1
|
||||||
JOIN LATERAL
|
JOIN LATERAL
|
||||||
(SELECT
|
(SELECT
|
||||||
user_id, value_3
|
user_id, value_3
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 20 and user_id < 70 AND
|
user_id > 20 and user_id < 70 AND
|
||||||
("events".user_id = "filter_users_1".user_id)
|
("events".user_id = "filter_users_1".user_id)
|
||||||
ORDER BY
|
ORDER BY
|
||||||
value_3 DESC
|
value_3 DESC
|
||||||
LIMIT 1) "last_events_1" ON true
|
LIMIT 1) "last_events_1" ON true
|
||||||
ORDER BY value_3 DESC
|
ORDER BY value_3 DESC
|
||||||
LIMIT 10) "some_recent_users"
|
LIMIT 10) "some_recent_users"
|
||||||
JOIN LATERAL
|
JOIN LATERAL
|
||||||
(SELECT
|
(SELECT
|
||||||
"users".user_id
|
"users".user_id
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||||
users.value_2 > 200
|
users.value_2 > 200
|
||||||
LIMIT 1) "some_users_data" ON true
|
LIMIT 1) "some_users_data" ON true
|
||||||
ORDER BY
|
ORDER BY
|
||||||
value_3 DESC
|
value_3 DESC
|
||||||
LIMIT 10) "some_users"
|
LIMIT 10) "some_users"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
value_3 DESC
|
value_3 DESC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
SET citus.subquery_pushdown to OFF;
|
SET citus.subquery_pushdown to OFF;
|
||||||
|
|
||||||
|
@ -451,37 +442,37 @@ SET citus.subquery_pushdown to OFF;
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."user_id", random(), generated_group_field
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80) "temp_data_queries"
|
user_id > 80) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
LEFT JOIN
|
LEFT JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users") "left_group_by_1"
|
users_table as "users") "left_group_by_1"
|
||||||
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
group BY
|
group BY
|
||||||
"generated_group_field"
|
"generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
|
@ -490,42 +481,42 @@ count(*) AS cnt, "generated_group_field"
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."user_id", random(), generated_group_field
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80) "temp_data_queries"
|
user_id > 80) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
RIGHT JOIN
|
RIGHT JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users") "right_group_by_1"
|
users_reference_table as "users") "right_group_by_1"
|
||||||
ON ("right_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
ON ("right_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
group BY
|
group BY
|
||||||
"generated_group_field"
|
"generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
-- right join where the inner part of the join includes a reference table
|
-- right join where the inner part of the join includes a reference table
|
||||||
-- joined with hash partitioned table using non-equi join
|
-- joined with hash partitioned table using non-equi join
|
||||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event
|
||||||
FROM (
|
FROM (
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -585,42 +576,42 @@ ORDER BY user_id;
|
||||||
|
|
||||||
|
|
||||||
-- LEFT JOINs used with INNER JOINs
|
-- LEFT JOINs used with INNER JOINs
|
||||||
-- events_table and users_reference_table joined
|
-- events_table and users_reference_table joined
|
||||||
-- with event_table.non_part_key < reference_table.any_key
|
-- with event_table.non_part_key < reference_table.any_key
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."user_id", random(), generated_group_field
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80) "temp_data_queries"
|
user_id > 80) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
RIGHT JOIN
|
RIGHT JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users") "left_group_by_1"
|
users_table as "users") "left_group_by_1"
|
||||||
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
group BY
|
group BY
|
||||||
"generated_group_field"
|
"generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
|
@ -631,31 +622,31 @@ FROM
|
||||||
FROM
|
FROM
|
||||||
(SELECT user_id, time
|
(SELECT user_id, time
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
user_id, time
|
user_id, time
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 10 and user_id < 40) "events_1"
|
user_id > 10 and user_id < 40) "events_1"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
time DESC) "recent_events_1"
|
time DESC) "recent_events_1"
|
||||||
GROUP BY
|
GROUP BY
|
||||||
user_id
|
user_id
|
||||||
ORDER BY
|
ORDER BY
|
||||||
max(TIME) DESC) "some_recent_users"
|
max(TIME) DESC) "some_recent_users"
|
||||||
FULL JOIN
|
FULL JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users".user_id
|
"users".user_id
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
users.value_2 > 50 and users.value_2 < 55) "some_users_data"
|
users.value_2 > 50 and users.value_2 < 55) "some_users_data"
|
||||||
ON "some_users_data"."user_id" = "some_recent_users"."user_id"
|
ON "some_users_data"."user_id" = "some_recent_users"."user_id"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
user_id
|
user_id
|
||||||
limit 50;
|
limit 50;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- UNIONs and JOINs with reference tables, should error out
|
-- UNIONs and JOINs with reference tables, should error out
|
||||||
--
|
--
|
||||||
SELECT ("final_query"."event_types") as types
|
SELECT ("final_query"."event_types") as types
|
||||||
|
@ -666,130 +657,130 @@ FROM
|
||||||
FROM
|
FROM
|
||||||
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
||||||
FROM (
|
FROM (
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 0 AS event
|
"events"."user_id", "events"."time", 0 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||||
UNION
|
UNION
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 1 AS event
|
"events"."user_id", "events"."time", 1 AS event
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 2 AS event
|
"events"."user_id", "events"."time", 2 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
||||||
UNION
|
UNION
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 3 AS event
|
"events"."user_id", "events"."time", 3 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1
|
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1
|
||||||
GROUP BY "t1"."user_id") AS t) "q"
|
GROUP BY "t1"."user_id") AS t) "q"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
value_1 > 50 and value_1 < 70) AS t
|
value_1 > 50 and value_1 < 70) AS t
|
||||||
ON (t.user_id = q.user_id)) as final_query
|
ON (t.user_id = q.user_id)) as final_query
|
||||||
ORDER BY
|
ORDER BY
|
||||||
types;
|
types;
|
||||||
|
|
||||||
-- reference table exist in the subquery of union, should error out
|
-- reference table exist in the subquery of union, should error out
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||||
FROM
|
FROM
|
||||||
( SELECT
|
( SELECT
|
||||||
*, random()
|
*, random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
"t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types"
|
||||||
FROM
|
FROM
|
||||||
( SELECT
|
( SELECT
|
||||||
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
"t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events
|
||||||
FROM (
|
FROM (
|
||||||
(SELECT
|
(SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", 0 AS event, "events"."user_id"
|
"events"."time", 0 AS event, "events"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||||
UNION
|
UNION
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT * FROM
|
SELECT * FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
max("users"."time"),
|
max("users"."time"),
|
||||||
0 AS event,
|
0 AS event,
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events", users_table as "users"
|
events_reference_table as "events", users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
events.user_id = users.user_id AND
|
events.user_id = users.user_id AND
|
||||||
event_type IN (10, 11, 12, 13, 14, 15)
|
event_type IN (10, 11, 12, 13, 14, 15)
|
||||||
GROUP BY "users"."user_id"
|
GROUP BY "users"."user_id"
|
||||||
) as events_subquery_5
|
) as events_subquery_5
|
||||||
) events_subquery_2)
|
) events_subquery_2)
|
||||||
UNION
|
UNION
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", 2 AS event, "events"."user_id"
|
"events"."time", 2 AS event, "events"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
||||||
UNION
|
UNION
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", 3 AS event, "events"."user_id"
|
"events"."time", 3 AS event, "events"."user_id"
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)
|
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)
|
||||||
) t1
|
) t1
|
||||||
GROUP BY "t1"."user_id") AS t) "q"
|
GROUP BY "t1"."user_id") AS t) "q"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users"
|
users_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
value_1 > 50 and value_1 < 70) AS t
|
value_1 > 50 and value_1 < 70) AS t
|
||||||
ON (t.user_id = q.user_id)) as final_query
|
ON (t.user_id = q.user_id)) as final_query
|
||||||
GROUP BY
|
GROUP BY
|
||||||
types
|
types
|
||||||
ORDER BY
|
ORDER BY
|
||||||
types;
|
types;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Should error out with UNION ALL Queries on reference tables
|
-- Should error out with UNION ALL Queries on reference tables
|
||||||
--
|
--
|
||||||
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
|
||||||
|
@ -802,40 +793,40 @@ FROM
|
||||||
FROM (
|
FROM (
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 0 AS event
|
"events"."user_id", "events"."time", 0 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
event_type IN (10, 11, 12, 13, 14, 15) ) events_subquery_1)
|
||||||
UNION ALL
|
UNION ALL
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 1 AS event
|
"events"."user_id", "events"."time", 1 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
event_type IN (15, 16, 17, 18, 19) ) events_subquery_2)
|
||||||
UNION ALL
|
UNION ALL
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 2 AS event
|
"events"."user_id", "events"."time", 2 AS event
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as "events"
|
events_reference_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
event_type IN (20, 21, 22, 23, 24, 25) ) events_subquery_3)
|
||||||
UNION ALL
|
UNION ALL
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."user_id", "events"."time", 3 AS event
|
"events"."user_id", "events"."time", 3 AS event
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1
|
event_type IN (26, 27, 28, 29, 30, 13)) events_subquery_4)) t1
|
||||||
GROUP BY "t1"."user_id") AS t) "q"
|
GROUP BY "t1"."user_id") AS t) "q"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT "users"."user_id"
|
(SELECT "users"."user_id"
|
||||||
FROM users_table as "users"
|
FROM users_table as "users"
|
||||||
|
@ -843,7 +834,7 @@ INNER JOIN
|
||||||
GROUP BY types
|
GROUP BY types
|
||||||
ORDER BY types;
|
ORDER BY types;
|
||||||
|
|
||||||
-- just a sanity check that we don't allow this if the reference table is on the
|
-- just a sanity check that we don't allow this if the reference table is on the
|
||||||
-- left part of the left join
|
-- left part of the left join
|
||||||
SELECT count(*) FROM
|
SELECT count(*) FROM
|
||||||
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
(SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table
|
||||||
|
@ -862,58 +853,58 @@ WHERE subquery_1.user_id != subquery_2.user_id ;
|
||||||
SELECT
|
SELECT
|
||||||
count(*) AS cnt, "generated_group_field"
|
count(*) AS cnt, "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"eventQuery"."user_id", random(), generated_group_field
|
"eventQuery"."user_id", random(), generated_group_field
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"multi_group_wrapper_1".*, generated_group_field, random()
|
"multi_group_wrapper_1".*, generated_group_field, random()
|
||||||
FROM
|
FROM
|
||||||
(SELECT *
|
(SELECT *
|
||||||
FROM
|
FROM
|
||||||
(SELECT
|
(SELECT
|
||||||
"events"."time", "events"."user_id" as event_user_id
|
"events"."time", "events"."user_id" as event_user_id
|
||||||
FROM
|
FROM
|
||||||
events_table as "events"
|
events_table as "events"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80) "temp_data_queries"
|
user_id > 80) "temp_data_queries"
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id"
|
"users"."user_id"
|
||||||
FROM
|
FROM
|
||||||
users_reference_table as "users"
|
users_reference_table as "users"
|
||||||
WHERE
|
WHERE
|
||||||
user_id > 80 and value_2 = 5) "user_filters_1"
|
user_id > 80 and value_2 = 5) "user_filters_1"
|
||||||
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
ON ("temp_data_queries".event_user_id < "user_filters_1".user_id)) AS "multi_group_wrapper_1"
|
||||||
RIGHT JOIN
|
RIGHT JOIN
|
||||||
(SELECT
|
(SELECT
|
||||||
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
"users"."user_id" AS "user_id", value_2 AS "generated_group_field"
|
||||||
FROM
|
FROM
|
||||||
users_table as "users") "left_group_by_1"
|
users_table as "users") "left_group_by_1"
|
||||||
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
ON ("left_group_by_1".user_id > "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery"
|
||||||
group BY
|
group BY
|
||||||
"generated_group_field"
|
"generated_group_field"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
cnt DESC, generated_group_field ASC
|
cnt DESC, generated_group_field ASC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
-- two hash partitioned relations are not joined
|
-- two hash partitioned relations are not joined
|
||||||
-- on partiton keys although reference table is fine
|
-- on partiton keys although reference table is fine
|
||||||
-- to push down
|
-- to push down
|
||||||
SELECT
|
SELECT
|
||||||
u1.user_id, count(*)
|
u1.user_id, count(*)
|
||||||
FROM
|
FROM
|
||||||
events_table as e1, users_table as u1
|
events_table as e1, users_table as u1
|
||||||
WHERE
|
WHERE
|
||||||
event_type IN
|
event_type IN
|
||||||
(SELECT
|
(SELECT
|
||||||
event_type
|
event_type
|
||||||
FROM
|
FROM
|
||||||
events_reference_table as e2
|
events_reference_table as e2
|
||||||
WHERE
|
WHERE
|
||||||
value_2 = 15 AND
|
value_2 = 15 AND
|
||||||
value_3 > 25 AND
|
value_3 > 25 AND
|
||||||
e1.value_2 > e2.value_2
|
e1.value_2 > e2.value_2
|
||||||
)
|
)
|
||||||
AND u1.user_id > e1.user_id
|
AND u1.user_id > e1.user_id
|
||||||
GROUP BY 1
|
GROUP BY 1
|
||||||
ORDER BY 2 DESC, 1 DESC
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
|
|
@ -0,0 +1,706 @@
|
||||||
|
-- ===================================================================
|
||||||
|
-- test multi subquery functionality for window functions
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE VIEW subq AS
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1
|
||||||
|
HAVING count(*) > 1;
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- the same test with different syntax
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY time DESC) as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- similar test with lag
|
||||||
|
SELECT
|
||||||
|
user_id, time, lag_event_type, row_no
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type, row_number() OVER my_win as row_no
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
4 DESC, 3 DESC NULLS LAST, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- simple window function, partitioned and grouped by on the distribution key
|
||||||
|
SELECT
|
||||||
|
user_id, rnk, avg_val_2
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as rnk, avg(value_2) as avg_val_2
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1 DESC, 3 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- top level query has a group by on the result of the window function
|
||||||
|
SELECT
|
||||||
|
min(user_id), min(time), lag_event_type, count(*)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, lag(event_type) OVER my_win as lag_event_type
|
||||||
|
FROM
|
||||||
|
events_table WINDOW my_win AS (PARTITION BY user_id ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
lag_event_type
|
||||||
|
ORDER BY
|
||||||
|
3 DESC NULLS LAST, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- window functions should work along with joins as well
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
ORDER BY 3 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- two window functions in a single subquery should work fine as well
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
ORDER BY 3 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- window functions should be fine within subquery joins
|
||||||
|
SELECT sub_1.user_id, max(lag_1), max(rank_1), max(rank_2) FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_1, rank() OVER w2 as rank_1
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as sub_1
|
||||||
|
JOIN
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1 as lag_2, rank() OVER w2 as rank_2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.value_2 ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id, (events_table.value_2 % 50) ORDER BY events_table.time)
|
||||||
|
) as sub_2
|
||||||
|
ON(sub_1.user_id = sub_2.user_id)
|
||||||
|
GROUP BY
|
||||||
|
sub_1.user_id
|
||||||
|
ORDER BY 3 DESC, 4 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- GROUP BYs and PARTITION BYs should work fine together
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 5
|
||||||
|
GROUP BY
|
||||||
|
my_rank
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC,2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- aggregates in the PARTITION BY is also allows
|
||||||
|
SELECT
|
||||||
|
avg(user_id), max(time), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, avg(event_type%10)::int ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 0
|
||||||
|
GROUP BY
|
||||||
|
my_rank
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC,2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- GROUP BY should not necessarly be inclusive of partitioning
|
||||||
|
-- but this query doesn't make much sense
|
||||||
|
SELECT
|
||||||
|
avg(user_id), my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
|
||||||
|
) as foo
|
||||||
|
GROUP BY
|
||||||
|
my_rank
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- Using previously defined supported window function on distribution key
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, sum(rank) OVER w2
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id as user_id, time, rank() over w1
|
||||||
|
FROM users_table
|
||||||
|
WINDOW
|
||||||
|
w AS (PARTITION BY user_id),
|
||||||
|
w1 AS (w ORDER BY value_2, value_3)
|
||||||
|
) fab
|
||||||
|
WINDOW
|
||||||
|
w2 as (PARTITION BY user_id, time)
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1, 2, 3 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- test with reference table partitioned on columns from both
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, it_name, count(id) OVER (PARTITION BY user_id, id)
|
||||||
|
FROM
|
||||||
|
users_table, users_ref_test_table
|
||||||
|
WHERE users_table.value_2=users_ref_test_table.k_no
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1, 2, 3
|
||||||
|
LIMIT
|
||||||
|
20;
|
||||||
|
|
||||||
|
-- Group by has more columns than partition by
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
SELECT user_id, max(sum) FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, SUM(value_2) OVER (PARTITION BY user_id, value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1, value_2
|
||||||
|
) a
|
||||||
|
GROUP BY user_id ORDER BY
|
||||||
|
2 DESC,1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- Window functions with HAVING clause
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_1 HAVING count(*) > 1
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- Window function in View works
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
subq
|
||||||
|
ORDER BY
|
||||||
|
2 DESC, 1
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- Window functions with UNION/UNION ALL works
|
||||||
|
SELECT
|
||||||
|
max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY user_id
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id)) AS ftop
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- Subquery in where with window function
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 > 545 AND
|
||||||
|
value_2 < ALL (
|
||||||
|
SELECT
|
||||||
|
avg(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id
|
||||||
|
)
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
ORDER BY
|
||||||
|
user_id DESC
|
||||||
|
LIMIT
|
||||||
|
3;
|
||||||
|
|
||||||
|
-- Some more nested queries
|
||||||
|
SELECT
|
||||||
|
user_id, rank, SUM(ABS(value_2 - value_3)) AS difference, COUNT(*) AS distinct_users
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER (PARTITION BY user_id ORDER BY value_2 DESC)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, value_2, sum(value_3) OVER (PARTITION BY user_id, value_2) as value_3
|
||||||
|
FROM users_table
|
||||||
|
) AS A
|
||||||
|
) AS A
|
||||||
|
GROUP BY
|
||||||
|
user_id, rank
|
||||||
|
ORDER BY
|
||||||
|
difference DESC, rank DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
f3.user_id, ABS(f2.sum - f3.sum)
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_3) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_3
|
||||||
|
) f3,
|
||||||
|
(
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id, sum(value_2) OVER (PARTITION BY user_id)
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, value_2
|
||||||
|
) f2
|
||||||
|
WHERE
|
||||||
|
f3.user_id=f2.user_id
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
abs DESC
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
|
||||||
|
-- Partition by with aggregate functions. This query does not make much sense since the
|
||||||
|
-- result of aggregate function will be the same for every row in a partition and it is
|
||||||
|
-- not going to affect the group that the count function will work on.
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY user_id, MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
1
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1 DESC
|
||||||
|
LIMIT
|
||||||
|
5;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS FALSE, VERBOSE TRUE)
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id)) AS ftop
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- lets have some queries that Citus shouldn't push down
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (PARTITION BY event_type ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS ()
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- user needs to supply partition by which should
|
||||||
|
-- include the distribution key
|
||||||
|
SELECT
|
||||||
|
user_id, time, rnk
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*, rank() OVER my_win as rnk
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
WINDOW my_win AS (ORDER BY time DESC)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- w2 should not be pushed down
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (PARTITION BY users_table.user_id+1, (events_table.value_2 % 25) ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
ORDER BY 3 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- w2 should not be pushed down
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_table.user_id, lag(users_table.user_id) OVER w1, rank() OVER w2
|
||||||
|
FROM
|
||||||
|
users_table, events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id and
|
||||||
|
event_type < 25
|
||||||
|
WINDOW w1 AS (PARTITION BY users_table.user_id, events_table.event_type ORDER BY events_table.time),
|
||||||
|
w2 AS (ORDER BY events_table.time)
|
||||||
|
) as foo
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC, 2 DESC NULLS LAST
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC,2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- GROUP BY includes the partition key, but not the WINDOW function
|
||||||
|
SELECT
|
||||||
|
user_id, time, my_rank
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, rank() OVER my_win as my_rank
|
||||||
|
FROM
|
||||||
|
events_table
|
||||||
|
GROUP BY
|
||||||
|
user_id, date_trunc('day', time)
|
||||||
|
WINDOW my_win AS (PARTITION BY date_trunc('day', time) ORDER BY avg(event_type))
|
||||||
|
) as foo
|
||||||
|
WHERE
|
||||||
|
my_rank > 125
|
||||||
|
ORDER BY
|
||||||
|
3 DESC, 1 DESC,2 DESC
|
||||||
|
LIMIT
|
||||||
|
10;
|
||||||
|
|
||||||
|
-- Overriding window function but not supported
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, date_trunc('day', time) as time, sum(rank) OVER w2
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id as user_id, time, rank() over w1
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WINDOW
|
||||||
|
w AS (PARTITION BY time), w1 AS (w ORDER BY value_2, value_3)
|
||||||
|
) fab
|
||||||
|
WINDOW
|
||||||
|
w2 as (PARTITION BY user_id, time)
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1,2,3;
|
||||||
|
|
||||||
|
|
||||||
|
-- Aggregate function on distribution column should error out
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT
|
||||||
|
user_id, COUNT(*) OVER (PARTITION BY sum(user_id), MIN(value_2))
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1 DESC, 2 DESC;
|
||||||
|
|
||||||
|
-- test with reference table partitioned on only a column from reference table
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id, it_name, count(id) OVER (PARTITION BY id)
|
||||||
|
FROM
|
||||||
|
users_table, users_ref_test_table
|
||||||
|
) a
|
||||||
|
ORDER BY
|
||||||
|
1, 2, 3
|
||||||
|
LIMIT
|
||||||
|
20;
|
||||||
|
|
||||||
|
-- UNION ALL with only one of them is not partitioned over distribution column which
|
||||||
|
-- should not be allowed.
|
||||||
|
SELECT
|
||||||
|
max(avg)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (1, 2, 3, 4, 5))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (6, 7, 8, 9, 10))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (11, 12, 13, 14, 15))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (16, 17, 18, 19, 20))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by user_id), user_id FROM events_table where event_type IN (21, 22, 23, 24, 25))
|
||||||
|
UNION ALL
|
||||||
|
(SELECT avg(value_3) over (partition by event_type), user_id FROM events_table where event_type IN (26, 27, 28, 29, 30))
|
||||||
|
) b
|
||||||
|
GROUP BY user_id
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- UNION with only one subquery which has a partition on non-distribution column should
|
||||||
|
-- error out
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
( SELECT user_id,
|
||||||
|
sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_1
|
||||||
|
GROUP BY
|
||||||
|
user_id)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(counter)
|
||||||
|
FROM
|
||||||
|
(SELECT
|
||||||
|
user_id, sum(value_2) over (partition by user_id) AS counter
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
user_id, sum(value_2) over (partition by event_type) AS counter
|
||||||
|
FROM
|
||||||
|
events_table) user_id_2
|
||||||
|
GROUP BY
|
||||||
|
user_id)) AS ftop
|
||||||
|
ORDER BY 2 DESC, 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
DROP VIEW subq;
|
Loading…
Reference in New Issue