mirror of https://github.com/citusdata/citus.git
Merge pull request #1816 from citusdata/fix_reference_regression
Relax the checks for ensuring distribution columns in the target listpull/1805/head
commit
46c9922def
|
@ -84,6 +84,8 @@ static DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree
|
||||||
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
|
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
|
||||||
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
||||||
static bool WindowPartitionOnDistributionColumn(Query *query);
|
static bool WindowPartitionOnDistributionColumn(Query *query);
|
||||||
|
static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList);
|
||||||
|
static bool RangeTableListContainsOnlyReferenceTables(List *rangeTableList);
|
||||||
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
||||||
static bool FullCompositeFieldList(List *compositeFieldList);
|
static bool FullCompositeFieldList(List *compositeFieldList);
|
||||||
static MultiNode * MultiPlanTree(Query *queryTree);
|
static MultiNode * MultiPlanTree(Query *queryTree);
|
||||||
|
@ -1234,10 +1236,118 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We could still behave as if the target list is on partition column if
|
||||||
|
* all range table entries are reference tables and all target expressions
|
||||||
|
* are column references to the given query level.
|
||||||
|
*/
|
||||||
|
if (!targetListOnPartitionColumn)
|
||||||
|
{
|
||||||
|
if (RangeTableListContainsOnlyReferenceTables(query->rtable) &&
|
||||||
|
AllTargetExpressionsAreColumnReferences(targetEntryList))
|
||||||
|
{
|
||||||
|
targetListOnPartitionColumn = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return targetListOnPartitionColumn;
|
return targetListOnPartitionColumn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AllTargetExpressionsAreColumnReferences returns true if non of the
|
||||||
|
* elements in the target entry list belong to an outer query (for
|
||||||
|
* example the query is a sublink and references to another query
|
||||||
|
* in the from list).
|
||||||
|
*
|
||||||
|
* The function also returns true if any of the target entries is not
|
||||||
|
* a column itself. This might be too restrictive, but, given that we're
|
||||||
|
* handling a very specific type of queries, that seems acceptable for now.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
AllTargetExpressionsAreColumnReferences(List *targetEntryList)
|
||||||
|
{
|
||||||
|
ListCell *targetEntryCell = NULL;
|
||||||
|
|
||||||
|
foreach(targetEntryCell, targetEntryList)
|
||||||
|
{
|
||||||
|
TargetEntry *targetEntry = lfirst(targetEntryCell);
|
||||||
|
Var *candidateColumn = NULL;
|
||||||
|
Expr *strippedColumnExpression = (Expr *) strip_implicit_coercions(
|
||||||
|
(Node *) targetEntry->expr);
|
||||||
|
|
||||||
|
if (IsA(strippedColumnExpression, Var))
|
||||||
|
{
|
||||||
|
candidateColumn = (Var *) strippedColumnExpression;
|
||||||
|
}
|
||||||
|
else if (IsA(strippedColumnExpression, FieldSelect))
|
||||||
|
{
|
||||||
|
FieldSelect *compositeField = (FieldSelect *) strippedColumnExpression;
|
||||||
|
Expr *fieldExpression = compositeField->arg;
|
||||||
|
|
||||||
|
if (IsA(fieldExpression, Var))
|
||||||
|
{
|
||||||
|
candidateColumn = (Var *) fieldExpression;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we don't support target entries that are not columns */
|
||||||
|
if (candidateColumn == NULL)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (candidateColumn->varlevelsup > 0)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RangeTableListContainsOnlyReferenceTables returns true if all range table
|
||||||
|
* entries are reference tables.
|
||||||
|
*
|
||||||
|
* The function returns false for range table entries that are not relations.
|
||||||
|
*
|
||||||
|
* Note that the function doesn't recurse into subqueries, returns false when
|
||||||
|
* a subquery is found.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
RangeTableListContainsOnlyReferenceTables(List *rangeTableList)
|
||||||
|
{
|
||||||
|
ListCell *rangeTableCell = NULL;
|
||||||
|
foreach(rangeTableCell, rangeTableList)
|
||||||
|
{
|
||||||
|
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
|
||||||
|
|
||||||
|
if (rangeTableEntry->rtekind == RTE_RELATION)
|
||||||
|
{
|
||||||
|
Oid relationId = rangeTableEntry->relid;
|
||||||
|
|
||||||
|
if (!IsDistributedTable(relationId))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PartitionMethod(relationId) != DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FullCompositeFieldList gets a composite field list, and checks if all fields
|
* FullCompositeFieldList gets a composite field list, and checks if all fields
|
||||||
* of composite type are used in the list.
|
* of composite type are used in the list.
|
||||||
|
|
|
@ -1193,6 +1193,116 @@ ORDER BY 1 LIMIT 3;
|
||||||
2
|
2
|
||||||
(3 rows)
|
(3 rows)
|
||||||
|
|
||||||
|
-- should be able to pushdown since one of the subqueries has distinct on reference tables
|
||||||
|
-- and there is only reference table in that subquery
|
||||||
|
SELECT
|
||||||
|
distinct_users, event_type, time
|
||||||
|
FROM
|
||||||
|
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||||
|
(SELECT DISTINCT user_id as distinct_users FROM users_reference_table) users_ref ON (events_dist.user_id = users_ref.distinct_users)
|
||||||
|
ORDER BY time DESC
|
||||||
|
LIMIT 5
|
||||||
|
OFFSET 0;
|
||||||
|
distinct_users | event_type | time
|
||||||
|
----------------+------------+---------------------------------
|
||||||
|
78 | 815 | Tue Jan 21 05:59:54.833395 2014
|
||||||
|
92 | 826 | Tue Jan 21 05:57:26.643861 2014
|
||||||
|
65 | 241 | Tue Jan 21 05:56:52.624231 2014
|
||||||
|
23 | 573 | Tue Jan 21 05:55:28.796818 2014
|
||||||
|
98 | 23 | Tue Jan 21 05:54:57.987456 2014
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- the same query wuth multiple reference tables in the subquery
|
||||||
|
SELECT
|
||||||
|
distinct_users, event_type, time
|
||||||
|
FROM
|
||||||
|
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||||
|
(SELECT DISTINCT users_reference_table.user_id as distinct_users FROM users_reference_table, events_reference_table
|
||||||
|
WHERE events_reference_table.user_id = users_reference_table.user_id AND events_reference_table.event_type IN (1,2,3,4)) users_ref
|
||||||
|
ON (events_dist.user_id = users_ref.distinct_users)
|
||||||
|
ORDER BY time DESC
|
||||||
|
LIMIT 5
|
||||||
|
OFFSET 0;
|
||||||
|
distinct_users | event_type | time
|
||||||
|
----------------+------------+---------------------------------
|
||||||
|
65 | 241 | Tue Jan 21 05:56:52.624231 2014
|
||||||
|
98 | 23 | Tue Jan 21 05:54:57.987456 2014
|
||||||
|
26 | 957 | Tue Jan 21 05:43:16.99674 2014
|
||||||
|
44 | 682 | Tue Jan 21 05:43:00.838945 2014
|
||||||
|
81 | 852 | Tue Jan 21 05:34:56.310878 2014
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- similar query as the above, but with group bys
|
||||||
|
SELECT
|
||||||
|
distinct_users, event_type, time
|
||||||
|
FROM
|
||||||
|
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||||
|
(SELECT user_id as distinct_users FROM users_reference_table GROUP BY distinct_users) users_ref ON (events_dist.user_id = users_ref.distinct_users)
|
||||||
|
ORDER BY time DESC
|
||||||
|
LIMIT 5
|
||||||
|
OFFSET 0;
|
||||||
|
distinct_users | event_type | time
|
||||||
|
----------------+------------+---------------------------------
|
||||||
|
78 | 815 | Tue Jan 21 05:59:54.833395 2014
|
||||||
|
92 | 826 | Tue Jan 21 05:57:26.643861 2014
|
||||||
|
65 | 241 | Tue Jan 21 05:56:52.624231 2014
|
||||||
|
23 | 573 | Tue Jan 21 05:55:28.796818 2014
|
||||||
|
98 | 23 | Tue Jan 21 05:54:57.987456 2014
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
-- should not push down this query since there is a distributed table (i.e., events_table)
|
||||||
|
-- which is not in the DISTINCT clause
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, events_table WHERE users_reference_table.user_id = events_table.value_4
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT users_reference_table.user_id FROM users_reference_table, events_table WHERE users_reference_table.user_id = events_table.value_4
|
||||||
|
GROUP BY 1
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
|
-- similiar to the above examples, this time there is a subquery
|
||||||
|
-- whose output is not in the DISTINCT clause
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, (SELECT user_id, random() FROM events_table) as us_events WHERE users_reference_table.user_id = us_events.user_id
|
||||||
|
) as foo;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||||
|
-- the following query is safe to push down since the DISTINCT clause include distribution column
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT users_reference_table.user_id, us_events.user_id FROM users_reference_table, (SELECT user_id, random() FROM events_table WHERE event_type IN (2,3)) as us_events WHERE users_reference_table.user_id = us_events.user_id
|
||||||
|
) as foo
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 4;
|
||||||
|
user_id | user_id
|
||||||
|
---------+---------
|
||||||
|
98 | 98
|
||||||
|
96 | 96
|
||||||
|
90 | 90
|
||||||
|
81 | 81
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
-- should not pushdown since there is a non partition column on the DISTINCT clause
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_reference_table.user_id, us_events.value_4
|
||||||
|
FROM
|
||||||
|
users_reference_table,
|
||||||
|
(SELECT user_id, value_4, random() FROM events_table WHERE event_type IN (2,3)) as us_events
|
||||||
|
WHERE
|
||||||
|
users_reference_table.user_id = us_events.user_id
|
||||||
|
) as foo
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 4;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||||
DROP TABLE user_buy_test_table;
|
DROP TABLE user_buy_test_table;
|
||||||
DROP TABLE users_ref_test_table;
|
DROP TABLE users_ref_test_table;
|
||||||
DROP TABLE users_return_test_table;
|
DROP TABLE users_return_test_table;
|
||||||
|
|
|
@ -343,3 +343,26 @@ ORDER BY user_id
|
||||||
LIMIT 5;
|
LIMIT 5;
|
||||||
ERROR: cannot push down this subquery
|
ERROR: cannot push down this subquery
|
||||||
DETAIL: Group by list without partition column is currently unsupported
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
|
-- similar query with slightly more complex group by
|
||||||
|
-- though the error message is a bit confusing
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 >
|
||||||
|
(SELECT
|
||||||
|
max(value_2)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||||
|
GROUP BY
|
||||||
|
(users_table.user_id * 2)
|
||||||
|
)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY user_id
|
||||||
|
LIMIT 5;
|
||||||
|
ERROR: cannot push down this subquery
|
||||||
|
DETAIL: Group by list without partition column is currently unsupported
|
||||||
|
|
|
@ -950,6 +950,81 @@ SELECT foo.user_id FROM
|
||||||
) as foo
|
) as foo
|
||||||
ORDER BY 1 LIMIT 3;
|
ORDER BY 1 LIMIT 3;
|
||||||
|
|
||||||
|
-- should be able to pushdown since one of the subqueries has distinct on reference tables
|
||||||
|
-- and there is only reference table in that subquery
|
||||||
|
SELECT
|
||||||
|
distinct_users, event_type, time
|
||||||
|
FROM
|
||||||
|
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||||
|
(SELECT DISTINCT user_id as distinct_users FROM users_reference_table) users_ref ON (events_dist.user_id = users_ref.distinct_users)
|
||||||
|
ORDER BY time DESC
|
||||||
|
LIMIT 5
|
||||||
|
OFFSET 0;
|
||||||
|
|
||||||
|
-- the same query wuth multiple reference tables in the subquery
|
||||||
|
SELECT
|
||||||
|
distinct_users, event_type, time
|
||||||
|
FROM
|
||||||
|
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||||
|
(SELECT DISTINCT users_reference_table.user_id as distinct_users FROM users_reference_table, events_reference_table
|
||||||
|
WHERE events_reference_table.user_id = users_reference_table.user_id AND events_reference_table.event_type IN (1,2,3,4)) users_ref
|
||||||
|
ON (events_dist.user_id = users_ref.distinct_users)
|
||||||
|
ORDER BY time DESC
|
||||||
|
LIMIT 5
|
||||||
|
OFFSET 0;
|
||||||
|
|
||||||
|
-- similar query as the above, but with group bys
|
||||||
|
SELECT
|
||||||
|
distinct_users, event_type, time
|
||||||
|
FROM
|
||||||
|
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||||
|
(SELECT user_id as distinct_users FROM users_reference_table GROUP BY distinct_users) users_ref ON (events_dist.user_id = users_ref.distinct_users)
|
||||||
|
ORDER BY time DESC
|
||||||
|
LIMIT 5
|
||||||
|
OFFSET 0;
|
||||||
|
|
||||||
|
-- should not push down this query since there is a distributed table (i.e., events_table)
|
||||||
|
-- which is not in the DISTINCT clause
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, events_table WHERE users_reference_table.user_id = events_table.value_4
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT users_reference_table.user_id FROM users_reference_table, events_table WHERE users_reference_table.user_id = events_table.value_4
|
||||||
|
GROUP BY 1
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- similiar to the above examples, this time there is a subquery
|
||||||
|
-- whose output is not in the DISTINCT clause
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, (SELECT user_id, random() FROM events_table) as us_events WHERE users_reference_table.user_id = us_events.user_id
|
||||||
|
) as foo;
|
||||||
|
|
||||||
|
-- the following query is safe to push down since the DISTINCT clause include distribution column
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT users_reference_table.user_id, us_events.user_id FROM users_reference_table, (SELECT user_id, random() FROM events_table WHERE event_type IN (2,3)) as us_events WHERE users_reference_table.user_id = us_events.user_id
|
||||||
|
) as foo
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 4;
|
||||||
|
|
||||||
|
-- should not pushdown since there is a non partition column on the DISTINCT clause
|
||||||
|
SELECT * FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
DISTINCT users_reference_table.user_id, us_events.value_4
|
||||||
|
FROM
|
||||||
|
users_reference_table,
|
||||||
|
(SELECT user_id, value_4, random() FROM events_table WHERE event_type IN (2,3)) as us_events
|
||||||
|
WHERE
|
||||||
|
users_reference_table.user_id = us_events.user_id
|
||||||
|
) as foo
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 4;
|
||||||
|
|
||||||
DROP TABLE user_buy_test_table;
|
DROP TABLE user_buy_test_table;
|
||||||
DROP TABLE users_ref_test_table;
|
DROP TABLE users_ref_test_table;
|
||||||
DROP TABLE users_return_test_table;
|
DROP TABLE users_return_test_table;
|
||||||
|
|
|
@ -297,3 +297,25 @@ GROUP BY user_id
|
||||||
HAVING count(*) > 66
|
HAVING count(*) > 66
|
||||||
ORDER BY user_id
|
ORDER BY user_id
|
||||||
LIMIT 5;
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- similar query with slightly more complex group by
|
||||||
|
-- though the error message is a bit confusing
|
||||||
|
SELECT
|
||||||
|
user_id
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
value_2 >
|
||||||
|
(SELECT
|
||||||
|
max(value_2)
|
||||||
|
FROM
|
||||||
|
events_reference_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||||
|
GROUP BY
|
||||||
|
(users_table.user_id * 2)
|
||||||
|
)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(*) > 66
|
||||||
|
ORDER BY user_id
|
||||||
|
LIMIT 5;
|
||||||
|
|
Loading…
Reference in New Issue