Improve definition of RelationInfoContainsOnlyRecurringTuples

Before this commit, we considered !ContainsRecurringRTE() enough
for NotContainsOnlyRecurringTuples. However, instead, we can check
for existince of any distributed table.

DESCRIPTION: Fixes a bug that causes wrong results with complex outer joins
pull/3578/head
Onder Kalaci 2020-03-06 11:08:18 +01:00
parent 321d0152c1
commit 2ed19181fe
3 changed files with 463 additions and 18 deletions

View File

@ -81,9 +81,9 @@ static bool ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo);
static bool RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo,
RelOptInfo *relationInfo,
RecurringTuplesType *recurType);
static bool IsRecurringRTE(RangeTblEntry *rangeTableEntry,
RecurringTuplesType *recurType);
static bool IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType);
static bool ContainsRecurringRTE(RangeTblEntry *rangeTableEntry,
RecurringTuplesType *recurType);
static bool ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType);
static bool HasRecurringTuples(Node *node, RecurringTuplesType *recurType);
static MultiNode * SubqueryPushdownMultiNodeTree(Query *queryTree);
static List * FlattenJoinVars(List *columnList, Query *queryTree);
@ -751,7 +751,7 @@ FromClauseRecurringTupleType(Query *queryTree)
* Try to figure out which type of recurring tuples we have to produce a
* relevant error message. If there are several we'll pick the first one.
*/
IsRecurringRangeTable(queryTree->rtable, &recurType);
ContainsRecurringRangeTable(queryTree->rtable, &recurType);
return recurType;
}
@ -1336,7 +1336,6 @@ static bool
RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo,
RelOptInfo *relationInfo)
{
RecurringTuplesType recurType;
Relids relids = bms_copy(relationInfo->relids);
int relationId = -1;
@ -1344,11 +1343,19 @@ RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo,
{
RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId];
/* relationInfo has this range table entry */
if (!IsRecurringRTE(rangeTableEntry, &recurType))
if (FindNodeCheckInRangeTableList(list_make1(rangeTableEntry),
IsDistributedTableRTE))
{
/* we already found a distributed table, no need to check further */
return false;
}
/*
* If there are no distributed tables, there should be at least
* one recurring rte.
*/
RecurringTuplesType recurType PG_USED_FOR_ASSERTS_ONLY;
Assert(ContainsRecurringRTE(rangeTableEntry, &recurType));
}
return true;
@ -1376,7 +1383,7 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati
RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId];
/* relationInfo has this range table entry */
if (IsRecurringRTE(rangeTableEntry, recurType))
if (ContainsRecurringRTE(rangeTableEntry, recurType))
{
return true;
}
@ -1387,24 +1394,24 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati
/*
* IsRecurringRTE returns whether the range table entry will generate
* the same set of tuples when repeating it in a query on different
* shards.
* ContainsRecurringRTE returns whether the range table entry contains
* any entry that generates the same set of tuples when repeating it in
* a query on different shards.
*/
static bool
IsRecurringRTE(RangeTblEntry *rangeTableEntry, RecurringTuplesType *recurType)
ContainsRecurringRTE(RangeTblEntry *rangeTableEntry, RecurringTuplesType *recurType)
{
return IsRecurringRangeTable(list_make1(rangeTableEntry), recurType);
return ContainsRecurringRangeTable(list_make1(rangeTableEntry), recurType);
}
/*
* IsRecurringRangeTable returns whether the range table will generate
* the same set of tuples when repeating it in a query on different
* shards.
* ContainsRecurringRangeTable returns whether the range table list contains
* any entry that generates the same set of tuples when repeating it in
* a query on different shards.
*/
static bool
IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType)
ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType)
{
return range_table_walker(rangeTable, HasRecurringTuples, recurType,
QTW_EXAMINE_RTES_BEFORE);

View File

@ -762,10 +762,255 @@ SELECT user_id, value_1 FROM cte_user_with_view ORDER BY 1, 2 LIMIT 10 OFFSET 2;
---------------------------------------------------------------------
(0 rows)
-- test case added for https://github.com/citusdata/citus/issues/3565
CREATE TABLE test_cte
(
user_id varchar
);
INSERT INTO test_cte
SELECT *
FROM (VALUES ('1'), ('1'), ('2'), ('2'), ('3'), ('4'), ('5'), ('6'), ('7'), ('8')) AS foo;
CREATE TABLE test_cte_distributed
(
user_id varchar
);
SELECT *
FROM create_distributed_table('test_cte_distributed', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO test_cte_distributed
SELECT *
FROM (VALUES ('1'), ('3'), ('3'), ('5'), ('8')) AS foo;
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
),
exsist_in_distributed AS (
SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE EXISTS(SELECT *
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)
)
SELECT
*
FROM
distinct_undistribured LEFT JOIN exsist_in_distributed
ON distinct_undistribured.user_id = exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
user_id | user_id
---------------------------------------------------------------------
7 |
6 |
4 |
2 |
8 | 8
5 | 5
3 | 3
1 | 1
(8 rows)
-- same query, but the CTE is written as subquery
WITH distinct_undistribured AS
(SELECT DISTINCT user_id
FROM test_cte)
SELECT *
FROM distinct_undistribured
LEFT JOIN
(SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE EXISTS
(SELECT *
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed
ON distinct_undistribured.user_id = exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
-- similar query as the above, but this time
-- use NOT EXITS, which is pretty common struct
WITH distinct_undistribured AS
(SELECT DISTINCT user_id
FROM test_cte)
SELECT *
FROM distinct_undistribured
LEFT JOIN
(SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE NOT EXISTS
(SELECT NULL
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id;
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
-- same NOT EXISTS struct, but with CTE
-- so should work
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
),
not_exsist_in_distributed AS (
SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE NOT EXISTS(SELECT NULL
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)
)
SELECT
*
FROM
distinct_undistribured LEFT JOIN not_exsist_in_distributed
ON distinct_undistribured.user_id = not_exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
user_id | user_id
---------------------------------------------------------------------
8 |
7 |
6 |
5 |
4 |
3 |
2 |
1 |
(8 rows)
-- similar query, but this time the second
-- part of the query is not inside a CTE
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
)
SELECT count(*)
FROM distinct_undistribured
LEFT JOIN
(SELECT *,
random()
FROM test_cte_distributed d1
WHERE NOT EXISTS
(SELECT NULL
FROM distinct_undistribured d2
WHERE d1.user_id = d2.user_id )) AS bar USING (user_id);
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
-- should work fine with cte inlinig disabled
SET citus.enable_cte_inlining TO false;
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
),
exsist_in_distributed AS (
SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE EXISTS(SELECT *
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)
)
SELECT
*
FROM
distinct_undistribured LEFT JOIN exsist_in_distributed
ON distinct_undistribured.user_id = exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
user_id | user_id
---------------------------------------------------------------------
7 |
6 |
4 |
2 |
8 | 8
5 | 5
3 | 3
1 | 1
(8 rows)
WITH distinct_undistribured AS
(SELECT DISTINCT user_id
FROM test_cte)
SELECT *
FROM distinct_undistribured
LEFT JOIN
(SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE EXISTS
(SELECT *
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed
ON distinct_undistribured.user_id = exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
WITH distinct_undistribured AS
(SELECT DISTINCT user_id
FROM test_cte)
SELECT *
FROM distinct_undistribured
LEFT JOIN
(SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE NOT EXISTS
(SELECT NULL
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id;
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
-- NOT EXISTS struct, with cte inlining disabled
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
),
not_exsist_in_distributed AS (
SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE NOT EXISTS(SELECT NULL
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)
)
SELECT
*
FROM
distinct_undistribured LEFT JOIN not_exsist_in_distributed
ON distinct_undistribured.user_id = not_exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
user_id | user_id
---------------------------------------------------------------------
8 |
7 |
6 |
5 |
4 |
3 |
2 |
1 |
(8 rows)
-- similar query, but this time the second
-- part of the query is not inside a CTE
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
)
SELECT count(*)
FROM distinct_undistribured
LEFT JOIN
(SELECT *,
random()
FROM test_cte_distributed d1
WHERE NOT EXISTS
(SELECT NULL
FROM distinct_undistribured d2
WHERE d1.user_id = d2.user_id )) AS bar USING (user_id);
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
RESET citus.enable_cte_inlining;
DROP VIEW basic_view;
DROP VIEW cte_view;
DROP SCHEMA with_basics CASCADE;
NOTICE: drop cascades to 3 other objects
NOTICE: drop cascades to 5 other objects
DETAIL: drop cascades to table users_table
drop cascades to table events_table
drop cascades to type xy
drop cascades to table test_cte
drop cascades to table test_cte_distributed

View File

@ -505,6 +505,199 @@ WITH cte_user_with_view AS
)
SELECT user_id, value_1 FROM cte_user_with_view ORDER BY 1, 2 LIMIT 10 OFFSET 2;
-- test case added for https://github.com/citusdata/citus/issues/3565
CREATE TABLE test_cte
(
user_id varchar
);
INSERT INTO test_cte
SELECT *
FROM (VALUES ('1'), ('1'), ('2'), ('2'), ('3'), ('4'), ('5'), ('6'), ('7'), ('8')) AS foo;
CREATE TABLE test_cte_distributed
(
user_id varchar
);
SELECT *
FROM create_distributed_table('test_cte_distributed', 'user_id');
INSERT INTO test_cte_distributed
SELECT *
FROM (VALUES ('1'), ('3'), ('3'), ('5'), ('8')) AS foo;
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
),
exsist_in_distributed AS (
SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE EXISTS(SELECT *
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)
)
SELECT
*
FROM
distinct_undistribured LEFT JOIN exsist_in_distributed
ON distinct_undistribured.user_id = exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
-- same query, but the CTE is written as subquery
WITH distinct_undistribured AS
(SELECT DISTINCT user_id
FROM test_cte)
SELECT *
FROM distinct_undistribured
LEFT JOIN
(SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE EXISTS
(SELECT *
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed
ON distinct_undistribured.user_id = exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
-- similar query as the above, but this time
-- use NOT EXITS, which is pretty common struct
WITH distinct_undistribured AS
(SELECT DISTINCT user_id
FROM test_cte)
SELECT *
FROM distinct_undistribured
LEFT JOIN
(SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE NOT EXISTS
(SELECT NULL
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id;
-- same NOT EXISTS struct, but with CTE
-- so should work
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
),
not_exsist_in_distributed AS (
SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE NOT EXISTS(SELECT NULL
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)
)
SELECT
*
FROM
distinct_undistribured LEFT JOIN not_exsist_in_distributed
ON distinct_undistribured.user_id = not_exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
-- similar query, but this time the second
-- part of the query is not inside a CTE
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
)
SELECT count(*)
FROM distinct_undistribured
LEFT JOIN
(SELECT *,
random()
FROM test_cte_distributed d1
WHERE NOT EXISTS
(SELECT NULL
FROM distinct_undistribured d2
WHERE d1.user_id = d2.user_id )) AS bar USING (user_id);
-- should work fine with cte inlinig disabled
SET citus.enable_cte_inlining TO false;
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
),
exsist_in_distributed AS (
SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE EXISTS(SELECT *
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)
)
SELECT
*
FROM
distinct_undistribured LEFT JOIN exsist_in_distributed
ON distinct_undistribured.user_id = exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
WITH distinct_undistribured AS
(SELECT DISTINCT user_id
FROM test_cte)
SELECT *
FROM distinct_undistribured
LEFT JOIN
(SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE EXISTS
(SELECT *
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed
ON distinct_undistribured.user_id = exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
WITH distinct_undistribured AS
(SELECT DISTINCT user_id
FROM test_cte)
SELECT *
FROM distinct_undistribured
LEFT JOIN
(SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE NOT EXISTS
(SELECT NULL
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id;
-- NOT EXISTS struct, with cte inlining disabled
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
),
not_exsist_in_distributed AS (
SELECT DISTINCT user_id
FROM test_cte_distributed
WHERE NOT EXISTS(SELECT NULL
FROM distinct_undistribured
WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)
)
SELECT
*
FROM
distinct_undistribured LEFT JOIN not_exsist_in_distributed
ON distinct_undistribured.user_id = not_exsist_in_distributed.user_id
ORDER BY 2 DESC, 1 DESC;
-- similar query, but this time the second
-- part of the query is not inside a CTE
WITH distinct_undistribured AS (
SELECT DISTINCT user_id
FROM test_cte
)
SELECT count(*)
FROM distinct_undistribured
LEFT JOIN
(SELECT *,
random()
FROM test_cte_distributed d1
WHERE NOT EXISTS
(SELECT NULL
FROM distinct_undistribured d2
WHERE d1.user_id = d2.user_id )) AS bar USING (user_id);
RESET citus.enable_cte_inlining;
DROP VIEW basic_view;
DROP VIEW cte_view;
DROP SCHEMA with_basics CASCADE;