mirror of https://github.com/citusdata/citus.git
Merge pull request #1852 from citusdata/group_by_on_function
Treat recurring tuples as reference tables for GROUP BY checkspull/1874/head
commit
b5784ca03a
|
@ -109,7 +109,8 @@ static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationLis
|
|||
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
||||
static bool WindowPartitionOnDistributionColumn(Query *query);
|
||||
static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList);
|
||||
static bool RangeTableListContainsOnlyReferenceTables(List *rangeTableList);
|
||||
static bool FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *));
|
||||
static bool IsDistributedTableRTE(Node *node);
|
||||
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
||||
static bool FullCompositeFieldList(List *compositeFieldList);
|
||||
static MultiNode * MultiNodeTree(Query *queryTree);
|
||||
|
@ -1510,12 +1511,12 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
|||
|
||||
/*
|
||||
* We could still behave as if the target list is on partition column if
|
||||
* all range table entries are reference tables and all target expressions
|
||||
* are column references to the given query level.
|
||||
* all range table entries are reference tables or intermediate results,
|
||||
* and all target expressions are column references to the given query level.
|
||||
*/
|
||||
if (!targetListOnPartitionColumn)
|
||||
{
|
||||
if (RangeTableListContainsOnlyReferenceTables(query->rtable) &&
|
||||
if (!FindNodeCheckInRangeTableList(query->rtable, IsDistributedTableRTE) &&
|
||||
AllTargetExpressionsAreColumnReferences(targetEntryList))
|
||||
{
|
||||
targetListOnPartitionColumn = true;
|
||||
|
@ -1580,40 +1581,51 @@ AllTargetExpressionsAreColumnReferences(List *targetEntryList)
|
|||
|
||||
|
||||
/*
|
||||
* RangeTableListContainsOnlyReferenceTables returns true if all range table
|
||||
* entries are reference tables.
|
||||
* FindNodeCheckInRangeTableList finds a node for which the check
|
||||
* function returns true.
|
||||
*
|
||||
* The function returns false for range table entries that are not relations.
|
||||
*
|
||||
* Note that the function doesn't recurse into subqueries, returns false when
|
||||
* a subquery is found.
|
||||
* FindNodeCheckInRangeTableList relies on FindNodeCheck() but only
|
||||
* considers the range table entries.
|
||||
*/
|
||||
static bool
|
||||
RangeTableListContainsOnlyReferenceTables(List *rangeTableList)
|
||||
FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *))
|
||||
{
|
||||
ListCell *rangeTableCell = NULL;
|
||||
foreach(rangeTableCell, rangeTableList)
|
||||
{
|
||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
|
||||
return range_table_walker(rtable, FindNodeCheck, check, QTW_EXAMINE_RTES);
|
||||
}
|
||||
|
||||
if (rangeTableEntry->rtekind == RTE_RELATION)
|
||||
{
|
||||
Oid relationId = rangeTableEntry->relid;
|
||||
|
||||
if (!IsDistributedTable(relationId))
|
||||
/*
|
||||
* IsDistributedTableRTE gets a node and returns true if the node
|
||||
* is a range table relation entry that points to a distributed
|
||||
* relation (i.e., excluding reference tables).
|
||||
*/
|
||||
static bool
|
||||
IsDistributedTableRTE(Node *node)
|
||||
{
|
||||
RangeTblEntry *rangeTableEntry = NULL;
|
||||
Oid relationId = InvalidOid;
|
||||
|
||||
if (node == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (PartitionMethod(relationId) != DISTRIBUTE_BY_NONE)
|
||||
if (!IsA(node, RangeTblEntry))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
rangeTableEntry = (RangeTblEntry *) node;
|
||||
if (rangeTableEntry->rtekind != RTE_RELATION)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
relationId = rangeTableEntry->relid;
|
||||
if (!IsDistributedTable(relationId) ||
|
||||
PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -1361,6 +1361,186 @@ ORDER BY 1 DESC
|
|||
LIMIT 4;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||
-- test the read_intermediate_result() for GROUP BYs
|
||||
BEGIN;
|
||||
|
||||
SELECT broadcast_intermediate_result('squares', 'SELECT s, s*s FROM generate_series(1,200) s');
|
||||
broadcast_intermediate_result
|
||||
-------------------------------
|
||||
200
|
||||
(1 row)
|
||||
|
||||
-- single appereance of read_intermediate_result
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(res.val) as mx
|
||||
FROM
|
||||
read_intermediate_result('squares', 'binary') AS res (val int, val_square int)
|
||||
GROUP BY res.val_square) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
user_id
|
||||
---------
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
(5 rows)
|
||||
|
||||
-- similar to the above, with DISTINCT on intermediate result
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
JOIN
|
||||
(SELECT DISTINCT res.val AS mx
|
||||
FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) squares ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
user_id
|
||||
---------
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
(5 rows)
|
||||
|
||||
-- single appereance of read_intermediate_result but inside a subquery
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table
|
||||
JOIN (
|
||||
SELECT *,random() FROM (SELECT
|
||||
max(res.val) as mx
|
||||
FROM
|
||||
(SELECT val, val_square FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) res
|
||||
GROUP BY res.val_square) foo)
|
||||
squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
user_id
|
||||
---------
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
(5 rows)
|
||||
|
||||
-- multiple read_intermediate_results in the same subquery is OK
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(res.val) as mx
|
||||
FROM
|
||||
read_intermediate_result('squares', 'binary') AS res (val int, val_square int),
|
||||
read_intermediate_result('squares', 'binary') AS res2 (val int, val_square int)
|
||||
WHERE res.val = res2.val_square
|
||||
GROUP BY res2.val_square) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
user_id
|
||||
---------
|
||||
1
|
||||
4
|
||||
(2 rows)
|
||||
|
||||
-- mixed recurring tuples should be supported
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(res.val) as mx
|
||||
FROM
|
||||
read_intermediate_result('squares', 'binary') AS res (val int, val_square int),
|
||||
generate_series(0, 10) i
|
||||
WHERE
|
||||
res.val = i
|
||||
GROUP BY
|
||||
i) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
user_id
|
||||
---------
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
(5 rows)
|
||||
|
||||
-- should error out since there is a distributed table and
|
||||
-- there are no columns on the GROUP BY from the distributed table
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_reference_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(val_square) as mx
|
||||
FROM
|
||||
read_intermediate_result('squares', 'binary') AS res (val int, val_square int), events_table
|
||||
WHERE
|
||||
events_table.user_id = res.val GROUP BY res.val) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Group by list without partition column is currently unsupported
|
||||
ROLLBACK;
|
||||
-- should work since we're using an immutable function as recurring tuple
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(i+5)as mx
|
||||
FROM
|
||||
generate_series(0, 10) as i GROUP BY i) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
user_id
|
||||
---------
|
||||
5
|
||||
6
|
||||
(2 rows)
|
||||
|
||||
-- should not work since we're
|
||||
-- using an immutable function as recurring tuple
|
||||
-- along with a distributed table, where GROUP BY is
|
||||
-- on the recurring tuple
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_reference_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(i+5)as mx
|
||||
FROM
|
||||
generate_series(0, 10) as i, events_table
|
||||
WHERE
|
||||
events_table.user_id = i GROUP BY i) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Group by list without partition column is currently unsupported
|
||||
DROP TABLE user_buy_test_table;
|
||||
DROP TABLE users_ref_test_table;
|
||||
DROP TABLE users_return_test_table;
|
||||
|
|
|
@ -1089,6 +1089,142 @@ SELECT * FROM
|
|||
ORDER BY 1 DESC
|
||||
LIMIT 4;
|
||||
|
||||
|
||||
|
||||
-- test the read_intermediate_result() for GROUP BYs
|
||||
BEGIN;
|
||||
|
||||
SELECT broadcast_intermediate_result('squares', 'SELECT s, s*s FROM generate_series(1,200) s');
|
||||
|
||||
-- single appereance of read_intermediate_result
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(res.val) as mx
|
||||
FROM
|
||||
read_intermediate_result('squares', 'binary') AS res (val int, val_square int)
|
||||
GROUP BY res.val_square) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
|
||||
-- similar to the above, with DISTINCT on intermediate result
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
JOIN
|
||||
(SELECT DISTINCT res.val AS mx
|
||||
FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) squares ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
|
||||
-- single appereance of read_intermediate_result but inside a subquery
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table
|
||||
JOIN (
|
||||
SELECT *,random() FROM (SELECT
|
||||
max(res.val) as mx
|
||||
FROM
|
||||
(SELECT val, val_square FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) res
|
||||
GROUP BY res.val_square) foo)
|
||||
squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
|
||||
-- multiple read_intermediate_results in the same subquery is OK
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(res.val) as mx
|
||||
FROM
|
||||
read_intermediate_result('squares', 'binary') AS res (val int, val_square int),
|
||||
read_intermediate_result('squares', 'binary') AS res2 (val int, val_square int)
|
||||
WHERE res.val = res2.val_square
|
||||
GROUP BY res2.val_square) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
|
||||
-- mixed recurring tuples should be supported
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(res.val) as mx
|
||||
FROM
|
||||
read_intermediate_result('squares', 'binary') AS res (val int, val_square int),
|
||||
generate_series(0, 10) i
|
||||
WHERE
|
||||
res.val = i
|
||||
GROUP BY
|
||||
i) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
|
||||
-- should error out since there is a distributed table and
|
||||
-- there are no columns on the GROUP BY from the distributed table
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_reference_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(val_square) as mx
|
||||
FROM
|
||||
read_intermediate_result('squares', 'binary') AS res (val int, val_square int), events_table
|
||||
WHERE
|
||||
events_table.user_id = res.val GROUP BY res.val) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
|
||||
ROLLBACK;
|
||||
|
||||
-- should work since we're using an immutable function as recurring tuple
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(i+5)as mx
|
||||
FROM
|
||||
generate_series(0, 10) as i GROUP BY i) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
|
||||
|
||||
-- should not work since we're
|
||||
-- using an immutable function as recurring tuple
|
||||
-- along with a distributed table, where GROUP BY is
|
||||
-- on the recurring tuple
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_reference_table
|
||||
JOIN
|
||||
(SELECT
|
||||
max(i+5)as mx
|
||||
FROM
|
||||
generate_series(0, 10) as i, events_table
|
||||
WHERE
|
||||
events_table.user_id = i GROUP BY i) squares
|
||||
ON (mx = user_id)
|
||||
ORDER BY 1
|
||||
LIMIT 5;
|
||||
|
||||
DROP TABLE user_buy_test_table;
|
||||
DROP TABLE users_ref_test_table;
|
||||
DROP TABLE users_return_test_table;
|
||||
|
|
Loading…
Reference in New Issue