mirror of https://github.com/citusdata/citus.git
Merge pull request #1852 from citusdata/group_by_on_function
Treat recurring tuples as reference tables for GROUP BY checkspull/1874/head
commit
b5784ca03a
|
@ -109,7 +109,8 @@ static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationLis
|
||||||
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
||||||
static bool WindowPartitionOnDistributionColumn(Query *query);
|
static bool WindowPartitionOnDistributionColumn(Query *query);
|
||||||
static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList);
|
static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList);
|
||||||
static bool RangeTableListContainsOnlyReferenceTables(List *rangeTableList);
|
static bool FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *));
|
||||||
|
static bool IsDistributedTableRTE(Node *node);
|
||||||
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
||||||
static bool FullCompositeFieldList(List *compositeFieldList);
|
static bool FullCompositeFieldList(List *compositeFieldList);
|
||||||
static MultiNode * MultiNodeTree(Query *queryTree);
|
static MultiNode * MultiNodeTree(Query *queryTree);
|
||||||
|
@ -1510,12 +1511,12 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We could still behave as if the target list is on partition column if
|
* We could still behave as if the target list is on partition column if
|
||||||
* all range table entries are reference tables and all target expressions
|
* all range table entries are reference tables or intermediate results,
|
||||||
* are column references to the given query level.
|
* and all target expressions are column references to the given query level.
|
||||||
*/
|
*/
|
||||||
if (!targetListOnPartitionColumn)
|
if (!targetListOnPartitionColumn)
|
||||||
{
|
{
|
||||||
if (RangeTableListContainsOnlyReferenceTables(query->rtable) &&
|
if (!FindNodeCheckInRangeTableList(query->rtable, IsDistributedTableRTE) &&
|
||||||
AllTargetExpressionsAreColumnReferences(targetEntryList))
|
AllTargetExpressionsAreColumnReferences(targetEntryList))
|
||||||
{
|
{
|
||||||
targetListOnPartitionColumn = true;
|
targetListOnPartitionColumn = true;
|
||||||
|
@ -1580,40 +1581,51 @@ AllTargetExpressionsAreColumnReferences(List *targetEntryList)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RangeTableListContainsOnlyReferenceTables returns true if all range table
|
* FindNodeCheckInRangeTableList finds a node for which the check
|
||||||
* entries are reference tables.
|
* function returns true.
|
||||||
*
|
*
|
||||||
* The function returns false for range table entries that are not relations.
|
* FindNodeCheckInRangeTableList relies on FindNodeCheck() but only
|
||||||
*
|
* considers the range table entries.
|
||||||
* Note that the function doesn't recurse into subqueries, returns false when
|
|
||||||
* a subquery is found.
|
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
RangeTableListContainsOnlyReferenceTables(List *rangeTableList)
|
FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *))
|
||||||
{
|
{
|
||||||
ListCell *rangeTableCell = NULL;
|
return range_table_walker(rtable, FindNodeCheck, check, QTW_EXAMINE_RTES);
|
||||||
foreach(rangeTableCell, rangeTableList)
|
}
|
||||||
{
|
|
||||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
|
|
||||||
|
|
||||||
if (rangeTableEntry->rtekind == RTE_RELATION)
|
|
||||||
{
|
|
||||||
Oid relationId = rangeTableEntry->relid;
|
|
||||||
|
|
||||||
if (!IsDistributedTable(relationId))
|
/*
|
||||||
|
* IsDistributedTableRTE gets a node and returns true if the node
|
||||||
|
* is a range table relation entry that points to a distributed
|
||||||
|
* relation (i.e., excluding reference tables).
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
IsDistributedTableRTE(Node *node)
|
||||||
|
{
|
||||||
|
RangeTblEntry *rangeTableEntry = NULL;
|
||||||
|
Oid relationId = InvalidOid;
|
||||||
|
|
||||||
|
if (node == NULL)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PartitionMethod(relationId) != DISTRIBUTE_BY_NONE)
|
if (!IsA(node, RangeTblEntry))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
rangeTableEntry = (RangeTblEntry *) node;
|
||||||
|
if (rangeTableEntry->rtekind != RTE_RELATION)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
relationId = rangeTableEntry->relid;
|
||||||
|
if (!IsDistributedTable(relationId) ||
|
||||||
|
PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -1361,6 +1361,186 @@ ORDER BY 1 DESC
|
||||||
LIMIT 4;
|
LIMIT 4;
|
||||||
ERROR: cannot push down this subquery
|
ERROR: cannot push down this subquery
|
||||||
DETAIL: Distinct on columns without partition column is currently unsupported
|
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||||
|
-- test the read_intermediate_result() for GROUP BYs
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
SELECT broadcast_intermediate_result('squares', 'SELECT s, s*s FROM generate_series(1,200) s');
|
||||||
|
broadcast_intermediate_result
|
||||||
|
-------------------------------
|
||||||
|
200
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- single appereance of read_intermediate_result
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(res.val) as mx
|
||||||
|
FROM
|
||||||
|
read_intermediate_result('squares', 'binary') AS res (val int, val_square int)
|
||||||
|
GROUP BY res.val_square) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
5
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- similar to the above, with DISTINCT on intermediate result
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT DISTINCT res.val AS mx
|
||||||
|
FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) squares ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
5
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- single appereance of read_intermediate_result but inside a subquery
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN (
|
||||||
|
SELECT *,random() FROM (SELECT
|
||||||
|
max(res.val) as mx
|
||||||
|
FROM
|
||||||
|
(SELECT val, val_square FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) res
|
||||||
|
GROUP BY res.val_square) foo)
|
||||||
|
squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
5
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- multiple read_intermediate_results in the same subquery is OK
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(res.val) as mx
|
||||||
|
FROM
|
||||||
|
read_intermediate_result('squares', 'binary') AS res (val int, val_square int),
|
||||||
|
read_intermediate_result('squares', 'binary') AS res2 (val int, val_square int)
|
||||||
|
WHERE res.val = res2.val_square
|
||||||
|
GROUP BY res2.val_square) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
1
|
||||||
|
4
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
-- mixed recurring tuples should be supported
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(res.val) as mx
|
||||||
|
FROM
|
||||||
|
read_intermediate_result('squares', 'binary') AS res (val int, val_square int),
|
||||||
|
generate_series(0, 10) i
|
||||||
|
WHERE
|
||||||
|
res.val = i
|
||||||
|
GROUP BY
|
||||||
|
i) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
5
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- should error out since there is a distributed table and
|
||||||
|
-- there are no columns on the GROUP BY from the distributed table
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_reference_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(val_square) as mx
|
||||||
|
FROM
|
||||||
|
read_intermediate_result('squares', 'binary') AS res (val int, val_square int), events_table
|
||||||
|
WHERE
|
||||||
|
events_table.user_id = res.val GROUP BY res.val) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
|
ROLLBACK;
|
||||||
|
-- should work since we're using an immutable function as recurring tuple
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(i+5)as mx
|
||||||
|
FROM
|
||||||
|
generate_series(0, 10) as i GROUP BY i) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
5
|
||||||
|
6
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
-- should not work since we're
|
||||||
|
-- using an immutable function as recurring tuple
|
||||||
|
-- along with a distributed table, where GROUP BY is
|
||||||
|
-- on the recurring tuple
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_reference_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(i+5)as mx
|
||||||
|
FROM
|
||||||
|
generate_series(0, 10) as i, events_table
|
||||||
|
WHERE
|
||||||
|
events_table.user_id = i GROUP BY i) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
DROP TABLE user_buy_test_table;
|
DROP TABLE user_buy_test_table;
|
||||||
DROP TABLE users_ref_test_table;
|
DROP TABLE users_ref_test_table;
|
||||||
DROP TABLE users_return_test_table;
|
DROP TABLE users_return_test_table;
|
||||||
|
|
|
@ -1089,6 +1089,142 @@ SELECT * FROM
|
||||||
ORDER BY 1 DESC
|
ORDER BY 1 DESC
|
||||||
LIMIT 4;
|
LIMIT 4;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- test the read_intermediate_result() for GROUP BYs
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
SELECT broadcast_intermediate_result('squares', 'SELECT s, s*s FROM generate_series(1,200) s');
|
||||||
|
|
||||||
|
-- single appereance of read_intermediate_result
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(res.val) as mx
|
||||||
|
FROM
|
||||||
|
read_intermediate_result('squares', 'binary') AS res (val int, val_square int)
|
||||||
|
GROUP BY res.val_square) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- similar to the above, with DISTINCT on intermediate result
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT DISTINCT res.val AS mx
|
||||||
|
FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) squares ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- single appereance of read_intermediate_result but inside a subquery
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN (
|
||||||
|
SELECT *,random() FROM (SELECT
|
||||||
|
max(res.val) as mx
|
||||||
|
FROM
|
||||||
|
(SELECT val, val_square FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) res
|
||||||
|
GROUP BY res.val_square) foo)
|
||||||
|
squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- multiple read_intermediate_results in the same subquery is OK
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(res.val) as mx
|
||||||
|
FROM
|
||||||
|
read_intermediate_result('squares', 'binary') AS res (val int, val_square int),
|
||||||
|
read_intermediate_result('squares', 'binary') AS res2 (val int, val_square int)
|
||||||
|
WHERE res.val = res2.val_square
|
||||||
|
GROUP BY res2.val_square) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- mixed recurring tuples should be supported
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(res.val) as mx
|
||||||
|
FROM
|
||||||
|
read_intermediate_result('squares', 'binary') AS res (val int, val_square int),
|
||||||
|
generate_series(0, 10) i
|
||||||
|
WHERE
|
||||||
|
res.val = i
|
||||||
|
GROUP BY
|
||||||
|
i) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- should error out since there is a distributed table and
|
||||||
|
-- there are no columns on the GROUP BY from the distributed table
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_reference_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(val_square) as mx
|
||||||
|
FROM
|
||||||
|
read_intermediate_result('squares', 'binary') AS res (val int, val_square int), events_table
|
||||||
|
WHERE
|
||||||
|
events_table.user_id = res.val GROUP BY res.val) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
-- should work since we're using an immutable function as recurring tuple
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(i+5)as mx
|
||||||
|
FROM
|
||||||
|
generate_series(0, 10) as i GROUP BY i) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
|
||||||
|
-- should not work since we're
|
||||||
|
-- using an immutable function as recurring tuple
|
||||||
|
-- along with a distributed table, where GROUP BY is
|
||||||
|
-- on the recurring tuple
|
||||||
|
SELECT
|
||||||
|
DISTINCT user_id
|
||||||
|
FROM
|
||||||
|
users_reference_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
max(i+5)as mx
|
||||||
|
FROM
|
||||||
|
generate_series(0, 10) as i, events_table
|
||||||
|
WHERE
|
||||||
|
events_table.user_id = i GROUP BY i) squares
|
||||||
|
ON (mx = user_id)
|
||||||
|
ORDER BY 1
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
DROP TABLE user_buy_test_table;
|
DROP TABLE user_buy_test_table;
|
||||||
DROP TABLE users_ref_test_table;
|
DROP TABLE users_ref_test_table;
|
||||||
DROP TABLE users_return_test_table;
|
DROP TABLE users_return_test_table;
|
||||||
|
|
Loading…
Reference in New Issue