mirror of https://github.com/citusdata/citus.git
Merge pull request #1816 from citusdata/fix_reference_regression
Relax the checks for ensuring distribution columns in the target listpull/1805/head
commit
46c9922def
|
@ -84,6 +84,8 @@ static DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree
|
|||
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
|
||||
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
||||
static bool WindowPartitionOnDistributionColumn(Query *query);
|
||||
static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList);
|
||||
static bool RangeTableListContainsOnlyReferenceTables(List *rangeTableList);
|
||||
static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query);
|
||||
static bool FullCompositeFieldList(List *compositeFieldList);
|
||||
static MultiNode * MultiPlanTree(Query *queryTree);
|
||||
|
@ -1234,10 +1236,118 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We could still behave as if the target list is on partition column if
|
||||
* all range table entries are reference tables and all target expressions
|
||||
* are column references to the given query level.
|
||||
*/
|
||||
if (!targetListOnPartitionColumn)
|
||||
{
|
||||
if (RangeTableListContainsOnlyReferenceTables(query->rtable) &&
|
||||
AllTargetExpressionsAreColumnReferences(targetEntryList))
|
||||
{
|
||||
targetListOnPartitionColumn = true;
|
||||
}
|
||||
}
|
||||
|
||||
return targetListOnPartitionColumn;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AllTargetExpressionsAreColumnReferences returns true if non of the
|
||||
* elements in the target entry list belong to an outer query (for
|
||||
* example the query is a sublink and references to another query
|
||||
* in the from list).
|
||||
*
|
||||
* The function also returns true if any of the target entries is not
|
||||
* a column itself. This might be too restrictive, but, given that we're
|
||||
* handling a very specific type of queries, that seems acceptable for now.
|
||||
*/
|
||||
static bool
|
||||
AllTargetExpressionsAreColumnReferences(List *targetEntryList)
|
||||
{
|
||||
ListCell *targetEntryCell = NULL;
|
||||
|
||||
foreach(targetEntryCell, targetEntryList)
|
||||
{
|
||||
TargetEntry *targetEntry = lfirst(targetEntryCell);
|
||||
Var *candidateColumn = NULL;
|
||||
Expr *strippedColumnExpression = (Expr *) strip_implicit_coercions(
|
||||
(Node *) targetEntry->expr);
|
||||
|
||||
if (IsA(strippedColumnExpression, Var))
|
||||
{
|
||||
candidateColumn = (Var *) strippedColumnExpression;
|
||||
}
|
||||
else if (IsA(strippedColumnExpression, FieldSelect))
|
||||
{
|
||||
FieldSelect *compositeField = (FieldSelect *) strippedColumnExpression;
|
||||
Expr *fieldExpression = compositeField->arg;
|
||||
|
||||
if (IsA(fieldExpression, Var))
|
||||
{
|
||||
candidateColumn = (Var *) fieldExpression;
|
||||
}
|
||||
}
|
||||
|
||||
/* we don't support target entries that are not columns */
|
||||
if (candidateColumn == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (candidateColumn->varlevelsup > 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RangeTableListContainsOnlyReferenceTables returns true if all range table
|
||||
* entries are reference tables.
|
||||
*
|
||||
* The function returns false for range table entries that are not relations.
|
||||
*
|
||||
* Note that the function doesn't recurse into subqueries, returns false when
|
||||
* a subquery is found.
|
||||
*/
|
||||
static bool
|
||||
RangeTableListContainsOnlyReferenceTables(List *rangeTableList)
|
||||
{
|
||||
ListCell *rangeTableCell = NULL;
|
||||
foreach(rangeTableCell, rangeTableList)
|
||||
{
|
||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
|
||||
|
||||
if (rangeTableEntry->rtekind == RTE_RELATION)
|
||||
{
|
||||
Oid relationId = rangeTableEntry->relid;
|
||||
|
||||
if (!IsDistributedTable(relationId))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (PartitionMethod(relationId) != DISTRIBUTE_BY_NONE)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* FullCompositeFieldList gets a composite field list, and checks if all fields
|
||||
* of composite type are used in the list.
|
||||
|
|
|
@ -1193,6 +1193,116 @@ ORDER BY 1 LIMIT 3;
|
|||
2
|
||||
(3 rows)
|
||||
|
||||
-- should be able to pushdown since one of the subqueries has distinct on reference tables
|
||||
-- and there is only reference table in that subquery
|
||||
SELECT
|
||||
distinct_users, event_type, time
|
||||
FROM
|
||||
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||
(SELECT DISTINCT user_id as distinct_users FROM users_reference_table) users_ref ON (events_dist.user_id = users_ref.distinct_users)
|
||||
ORDER BY time DESC
|
||||
LIMIT 5
|
||||
OFFSET 0;
|
||||
distinct_users | event_type | time
|
||||
----------------+------------+---------------------------------
|
||||
78 | 815 | Tue Jan 21 05:59:54.833395 2014
|
||||
92 | 826 | Tue Jan 21 05:57:26.643861 2014
|
||||
65 | 241 | Tue Jan 21 05:56:52.624231 2014
|
||||
23 | 573 | Tue Jan 21 05:55:28.796818 2014
|
||||
98 | 23 | Tue Jan 21 05:54:57.987456 2014
|
||||
(5 rows)
|
||||
|
||||
-- the same query wuth multiple reference tables in the subquery
|
||||
SELECT
|
||||
distinct_users, event_type, time
|
||||
FROM
|
||||
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||
(SELECT DISTINCT users_reference_table.user_id as distinct_users FROM users_reference_table, events_reference_table
|
||||
WHERE events_reference_table.user_id = users_reference_table.user_id AND events_reference_table.event_type IN (1,2,3,4)) users_ref
|
||||
ON (events_dist.user_id = users_ref.distinct_users)
|
||||
ORDER BY time DESC
|
||||
LIMIT 5
|
||||
OFFSET 0;
|
||||
distinct_users | event_type | time
|
||||
----------------+------------+---------------------------------
|
||||
65 | 241 | Tue Jan 21 05:56:52.624231 2014
|
||||
98 | 23 | Tue Jan 21 05:54:57.987456 2014
|
||||
26 | 957 | Tue Jan 21 05:43:16.99674 2014
|
||||
44 | 682 | Tue Jan 21 05:43:00.838945 2014
|
||||
81 | 852 | Tue Jan 21 05:34:56.310878 2014
|
||||
(5 rows)
|
||||
|
||||
-- similar query as the above, but with group bys
|
||||
SELECT
|
||||
distinct_users, event_type, time
|
||||
FROM
|
||||
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||
(SELECT user_id as distinct_users FROM users_reference_table GROUP BY distinct_users) users_ref ON (events_dist.user_id = users_ref.distinct_users)
|
||||
ORDER BY time DESC
|
||||
LIMIT 5
|
||||
OFFSET 0;
|
||||
distinct_users | event_type | time
|
||||
----------------+------------+---------------------------------
|
||||
78 | 815 | Tue Jan 21 05:59:54.833395 2014
|
||||
92 | 826 | Tue Jan 21 05:57:26.643861 2014
|
||||
65 | 241 | Tue Jan 21 05:56:52.624231 2014
|
||||
23 | 573 | Tue Jan 21 05:55:28.796818 2014
|
||||
98 | 23 | Tue Jan 21 05:54:57.987456 2014
|
||||
(5 rows)
|
||||
|
||||
-- should not push down this query since there is a distributed table (i.e., events_table)
|
||||
-- which is not in the DISTINCT clause
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, events_table WHERE users_reference_table.user_id = events_table.value_4
|
||||
) as foo;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT users_reference_table.user_id FROM users_reference_table, events_table WHERE users_reference_table.user_id = events_table.value_4
|
||||
GROUP BY 1
|
||||
) as foo;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Group by list without partition column is currently unsupported
|
||||
-- similiar to the above examples, this time there is a subquery
|
||||
-- whose output is not in the DISTINCT clause
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, (SELECT user_id, random() FROM events_table) as us_events WHERE users_reference_table.user_id = us_events.user_id
|
||||
) as foo;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||
-- the following query is safe to push down since the DISTINCT clause include distribution column
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT DISTINCT users_reference_table.user_id, us_events.user_id FROM users_reference_table, (SELECT user_id, random() FROM events_table WHERE event_type IN (2,3)) as us_events WHERE users_reference_table.user_id = us_events.user_id
|
||||
) as foo
|
||||
ORDER BY 1 DESC
|
||||
LIMIT 4;
|
||||
user_id | user_id
|
||||
---------+---------
|
||||
98 | 98
|
||||
96 | 96
|
||||
90 | 90
|
||||
81 | 81
|
||||
(4 rows)
|
||||
|
||||
-- should not pushdown since there is a non partition column on the DISTINCT clause
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT
|
||||
DISTINCT users_reference_table.user_id, us_events.value_4
|
||||
FROM
|
||||
users_reference_table,
|
||||
(SELECT user_id, value_4, random() FROM events_table WHERE event_type IN (2,3)) as us_events
|
||||
WHERE
|
||||
users_reference_table.user_id = us_events.user_id
|
||||
) as foo
|
||||
ORDER BY 1 DESC
|
||||
LIMIT 4;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||
DROP TABLE user_buy_test_table;
|
||||
DROP TABLE users_ref_test_table;
|
||||
DROP TABLE users_return_test_table;
|
||||
|
|
|
@ -343,3 +343,26 @@ ORDER BY user_id
|
|||
LIMIT 5;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Group by list without partition column is currently unsupported
|
||||
-- similar query with slightly more complex group by
|
||||
-- though the error message is a bit confusing
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_reference_table
|
||||
WHERE
|
||||
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||
GROUP BY
|
||||
(users_table.user_id * 2)
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Group by list without partition column is currently unsupported
|
||||
|
|
|
@ -950,6 +950,81 @@ SELECT foo.user_id FROM
|
|||
) as foo
|
||||
ORDER BY 1 LIMIT 3;
|
||||
|
||||
-- should be able to pushdown since one of the subqueries has distinct on reference tables
|
||||
-- and there is only reference table in that subquery
|
||||
SELECT
|
||||
distinct_users, event_type, time
|
||||
FROM
|
||||
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||
(SELECT DISTINCT user_id as distinct_users FROM users_reference_table) users_ref ON (events_dist.user_id = users_ref.distinct_users)
|
||||
ORDER BY time DESC
|
||||
LIMIT 5
|
||||
OFFSET 0;
|
||||
|
||||
-- the same query wuth multiple reference tables in the subquery
|
||||
SELECT
|
||||
distinct_users, event_type, time
|
||||
FROM
|
||||
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||
(SELECT DISTINCT users_reference_table.user_id as distinct_users FROM users_reference_table, events_reference_table
|
||||
WHERE events_reference_table.user_id = users_reference_table.user_id AND events_reference_table.event_type IN (1,2,3,4)) users_ref
|
||||
ON (events_dist.user_id = users_ref.distinct_users)
|
||||
ORDER BY time DESC
|
||||
LIMIT 5
|
||||
OFFSET 0;
|
||||
|
||||
-- similar query as the above, but with group bys
|
||||
SELECT
|
||||
distinct_users, event_type, time
|
||||
FROM
|
||||
(SELECT user_id, time, event_type FROM events_table) as events_dist INNER JOIN
|
||||
(SELECT user_id as distinct_users FROM users_reference_table GROUP BY distinct_users) users_ref ON (events_dist.user_id = users_ref.distinct_users)
|
||||
ORDER BY time DESC
|
||||
LIMIT 5
|
||||
OFFSET 0;
|
||||
|
||||
-- should not push down this query since there is a distributed table (i.e., events_table)
|
||||
-- which is not in the DISTINCT clause
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, events_table WHERE users_reference_table.user_id = events_table.value_4
|
||||
) as foo;
|
||||
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT users_reference_table.user_id FROM users_reference_table, events_table WHERE users_reference_table.user_id = events_table.value_4
|
||||
GROUP BY 1
|
||||
) as foo;
|
||||
|
||||
-- similiar to the above examples, this time there is a subquery
|
||||
-- whose output is not in the DISTINCT clause
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT DISTINCT users_reference_table.user_id FROM users_reference_table, (SELECT user_id, random() FROM events_table) as us_events WHERE users_reference_table.user_id = us_events.user_id
|
||||
) as foo;
|
||||
|
||||
-- the following query is safe to push down since the DISTINCT clause include distribution column
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT DISTINCT users_reference_table.user_id, us_events.user_id FROM users_reference_table, (SELECT user_id, random() FROM events_table WHERE event_type IN (2,3)) as us_events WHERE users_reference_table.user_id = us_events.user_id
|
||||
) as foo
|
||||
ORDER BY 1 DESC
|
||||
LIMIT 4;
|
||||
|
||||
-- should not pushdown since there is a non partition column on the DISTINCT clause
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT
|
||||
DISTINCT users_reference_table.user_id, us_events.value_4
|
||||
FROM
|
||||
users_reference_table,
|
||||
(SELECT user_id, value_4, random() FROM events_table WHERE event_type IN (2,3)) as us_events
|
||||
WHERE
|
||||
users_reference_table.user_id = us_events.user_id
|
||||
) as foo
|
||||
ORDER BY 1 DESC
|
||||
LIMIT 4;
|
||||
|
||||
DROP TABLE user_buy_test_table;
|
||||
DROP TABLE users_ref_test_table;
|
||||
DROP TABLE users_return_test_table;
|
||||
|
|
|
@ -297,3 +297,25 @@ GROUP BY user_id
|
|||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
|
||||
-- similar query with slightly more complex group by
|
||||
-- though the error message is a bit confusing
|
||||
SELECT
|
||||
user_id
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
value_2 >
|
||||
(SELECT
|
||||
max(value_2)
|
||||
FROM
|
||||
events_reference_table
|
||||
WHERE
|
||||
users_table.user_id = events_reference_table.user_id AND event_type = 50
|
||||
GROUP BY
|
||||
(users_table.user_id * 2)
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING count(*) > 66
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
|
|
Loading…
Reference in New Issue