diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 4653c6e37..d1f031e1b 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -81,9 +81,9 @@ static bool ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo); static bool RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relationInfo, RecurringTuplesType *recurType); -static bool IsRecurringRTE(RangeTblEntry *rangeTableEntry, - RecurringTuplesType *recurType); -static bool IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType); +static bool ContainsRecurringRTE(RangeTblEntry *rangeTableEntry, + RecurringTuplesType *recurType); +static bool ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType); static bool HasRecurringTuples(Node *node, RecurringTuplesType *recurType); static MultiNode * SubqueryPushdownMultiNodeTree(Query *queryTree); static List * FlattenJoinVars(List *columnList, Query *queryTree); @@ -751,7 +751,7 @@ FromClauseRecurringTupleType(Query *queryTree) * Try to figure out which type of recurring tuples we have to produce a * relevant error message. If there are several we'll pick the first one. */ - IsRecurringRangeTable(queryTree->rtable, &recurType); + ContainsRecurringRangeTable(queryTree->rtable, &recurType); return recurType; } @@ -1336,7 +1336,6 @@ static bool RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relationInfo) { - RecurringTuplesType recurType; Relids relids = bms_copy(relationInfo->relids); int relationId = -1; @@ -1344,11 +1343,19 @@ RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo, { RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId]; - /* relationInfo has this range table entry */ - if (!IsRecurringRTE(rangeTableEntry, &recurType)) + if (FindNodeCheckInRangeTableList(list_make1(rangeTableEntry), + IsDistributedTableRTE)) { + /* we already found a distributed table, no need to check further */ return false; } + + /* + * If there are no distributed tables, there should be at least + * one recurring rte. + */ + RecurringTuplesType recurType PG_USED_FOR_ASSERTS_ONLY; + Assert(ContainsRecurringRTE(rangeTableEntry, &recurType)); } return true; @@ -1376,7 +1383,7 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId]; /* relationInfo has this range table entry */ - if (IsRecurringRTE(rangeTableEntry, recurType)) + if (ContainsRecurringRTE(rangeTableEntry, recurType)) { return true; } @@ -1387,24 +1394,24 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati /* - * IsRecurringRTE returns whether the range table entry will generate - * the same set of tuples when repeating it in a query on different - * shards. + * ContainsRecurringRTE returns whether the range table entry contains + * any entry that generates the same set of tuples when repeating it in + * a query on different shards. */ static bool -IsRecurringRTE(RangeTblEntry *rangeTableEntry, RecurringTuplesType *recurType) +ContainsRecurringRTE(RangeTblEntry *rangeTableEntry, RecurringTuplesType *recurType) { - return IsRecurringRangeTable(list_make1(rangeTableEntry), recurType); + return ContainsRecurringRangeTable(list_make1(rangeTableEntry), recurType); } /* - * IsRecurringRangeTable returns whether the range table will generate - * the same set of tuples when repeating it in a query on different - * shards. + * ContainsRecurringRangeTable returns whether the range table list contains + * any entry that generates the same set of tuples when repeating it in + * a query on different shards. */ static bool -IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType) +ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType) { return range_table_walker(rangeTable, HasRecurringTuples, recurType, QTW_EXAMINE_RTES_BEFORE); diff --git a/src/test/regress/expected/with_basics.out b/src/test/regress/expected/with_basics.out index 153ec2651..8c7fce459 100644 --- a/src/test/regress/expected/with_basics.out +++ b/src/test/regress/expected/with_basics.out @@ -762,10 +762,255 @@ SELECT user_id, value_1 FROM cte_user_with_view ORDER BY 1, 2 LIMIT 10 OFFSET 2; --------------------------------------------------------------------- (0 rows) +-- test case added for https://github.com/citusdata/citus/issues/3565 +CREATE TABLE test_cte +( + user_id varchar +); +INSERT INTO test_cte +SELECT * +FROM (VALUES ('1'), ('1'), ('2'), ('2'), ('3'), ('4'), ('5'), ('6'), ('7'), ('8')) AS foo; +CREATE TABLE test_cte_distributed +( + user_id varchar +); +SELECT * +FROM create_distributed_table('test_cte_distributed', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO test_cte_distributed +SELECT * +FROM (VALUES ('1'), ('3'), ('3'), ('5'), ('8')) AS foo; +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +), + exsist_in_distributed AS ( + SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE EXISTS(SELECT * + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id) + ) +SELECT + * +FROM + distinct_undistribured LEFT JOIN exsist_in_distributed + ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; + user_id | user_id +--------------------------------------------------------------------- + 7 | + 6 | + 4 | + 2 | + 8 | 8 + 5 | 5 + 3 | 3 + 1 | 1 +(8 rows) + +-- same query, but the CTE is written as subquery +WITH distinct_undistribured AS + (SELECT DISTINCT user_id + FROM test_cte) +SELECT * +FROM distinct_undistribured +LEFT JOIN + (SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE EXISTS + (SELECT * + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed + ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- similar query as the above, but this time +-- use NOT EXITS, which is pretty common struct +WITH distinct_undistribured AS + (SELECT DISTINCT user_id + FROM test_cte) +SELECT * +FROM distinct_undistribured +LEFT JOIN + (SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE NOT EXISTS + (SELECT NULL + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- same NOT EXISTS struct, but with CTE +-- so should work +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +), + not_exsist_in_distributed AS ( + SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE NOT EXISTS(SELECT NULL + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id) + ) +SELECT + * +FROM + distinct_undistribured LEFT JOIN not_exsist_in_distributed + ON distinct_undistribured.user_id = not_exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; + user_id | user_id +--------------------------------------------------------------------- + 8 | + 7 | + 6 | + 5 | + 4 | + 3 | + 2 | + 1 | +(8 rows) + +-- similar query, but this time the second +-- part of the query is not inside a CTE +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +) +SELECT count(*) +FROM distinct_undistribured +LEFT JOIN + (SELECT *, + random() + FROM test_cte_distributed d1 + WHERE NOT EXISTS + (SELECT NULL + FROM distinct_undistribured d2 + WHERE d1.user_id = d2.user_id )) AS bar USING (user_id); +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- should work fine with cte inlinig disabled +SET citus.enable_cte_inlining TO false; +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +), + exsist_in_distributed AS ( + SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE EXISTS(SELECT * + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id) + ) +SELECT + * +FROM + distinct_undistribured LEFT JOIN exsist_in_distributed + ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; + user_id | user_id +--------------------------------------------------------------------- + 7 | + 6 | + 4 | + 2 | + 8 | 8 + 5 | 5 + 3 | 3 + 1 | 1 +(8 rows) + +WITH distinct_undistribured AS + (SELECT DISTINCT user_id + FROM test_cte) +SELECT * +FROM distinct_undistribured +LEFT JOIN + (SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE EXISTS + (SELECT * + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed + ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +WITH distinct_undistribured AS + (SELECT DISTINCT user_id + FROM test_cte) +SELECT * +FROM distinct_undistribured +LEFT JOIN + (SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE NOT EXISTS + (SELECT NULL + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- NOT EXISTS struct, with cte inlining disabled +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +), + not_exsist_in_distributed AS ( + SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE NOT EXISTS(SELECT NULL + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id) + ) +SELECT + * +FROM + distinct_undistribured LEFT JOIN not_exsist_in_distributed + ON distinct_undistribured.user_id = not_exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; + user_id | user_id +--------------------------------------------------------------------- + 8 | + 7 | + 6 | + 5 | + 4 | + 3 | + 2 | + 1 | +(8 rows) + +-- similar query, but this time the second +-- part of the query is not inside a CTE +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +) +SELECT count(*) +FROM distinct_undistribured +LEFT JOIN + (SELECT *, + random() + FROM test_cte_distributed d1 + WHERE NOT EXISTS + (SELECT NULL + FROM distinct_undistribured d2 + WHERE d1.user_id = d2.user_id )) AS bar USING (user_id); +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +RESET citus.enable_cte_inlining; DROP VIEW basic_view; DROP VIEW cte_view; DROP SCHEMA with_basics CASCADE; -NOTICE: drop cascades to 3 other objects +NOTICE: drop cascades to 5 other objects DETAIL: drop cascades to table users_table drop cascades to table events_table drop cascades to type xy +drop cascades to table test_cte +drop cascades to table test_cte_distributed diff --git a/src/test/regress/sql/with_basics.sql b/src/test/regress/sql/with_basics.sql index ac556f9cd..88e048a02 100644 --- a/src/test/regress/sql/with_basics.sql +++ b/src/test/regress/sql/with_basics.sql @@ -505,6 +505,199 @@ WITH cte_user_with_view AS ) SELECT user_id, value_1 FROM cte_user_with_view ORDER BY 1, 2 LIMIT 10 OFFSET 2; + +-- test case added for https://github.com/citusdata/citus/issues/3565 +CREATE TABLE test_cte +( + user_id varchar +); +INSERT INTO test_cte +SELECT * +FROM (VALUES ('1'), ('1'), ('2'), ('2'), ('3'), ('4'), ('5'), ('6'), ('7'), ('8')) AS foo; +CREATE TABLE test_cte_distributed +( + user_id varchar +); +SELECT * +FROM create_distributed_table('test_cte_distributed', 'user_id'); +INSERT INTO test_cte_distributed +SELECT * +FROM (VALUES ('1'), ('3'), ('3'), ('5'), ('8')) AS foo; + +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +), + exsist_in_distributed AS ( + SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE EXISTS(SELECT * + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id) + ) +SELECT + * +FROM + distinct_undistribured LEFT JOIN exsist_in_distributed + ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; + +-- same query, but the CTE is written as subquery +WITH distinct_undistribured AS + (SELECT DISTINCT user_id + FROM test_cte) +SELECT * +FROM distinct_undistribured +LEFT JOIN + (SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE EXISTS + (SELECT * + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed + ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; + +-- similar query as the above, but this time +-- use NOT EXITS, which is pretty common struct +WITH distinct_undistribured AS + (SELECT DISTINCT user_id + FROM test_cte) +SELECT * +FROM distinct_undistribured +LEFT JOIN + (SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE NOT EXISTS + (SELECT NULL + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; + +-- same NOT EXISTS struct, but with CTE +-- so should work +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +), + not_exsist_in_distributed AS ( + SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE NOT EXISTS(SELECT NULL + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id) + ) +SELECT + * +FROM + distinct_undistribured LEFT JOIN not_exsist_in_distributed + ON distinct_undistribured.user_id = not_exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; + +-- similar query, but this time the second +-- part of the query is not inside a CTE +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +) +SELECT count(*) +FROM distinct_undistribured +LEFT JOIN + (SELECT *, + random() + FROM test_cte_distributed d1 + WHERE NOT EXISTS + (SELECT NULL + FROM distinct_undistribured d2 + WHERE d1.user_id = d2.user_id )) AS bar USING (user_id); + + +-- should work fine with cte inlinig disabled +SET citus.enable_cte_inlining TO false; +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +), + exsist_in_distributed AS ( + SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE EXISTS(SELECT * + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id) + ) +SELECT + * +FROM + distinct_undistribured LEFT JOIN exsist_in_distributed + ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; + +WITH distinct_undistribured AS + (SELECT DISTINCT user_id + FROM test_cte) +SELECT * +FROM distinct_undistribured +LEFT JOIN + (SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE EXISTS + (SELECT * + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed + ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; + +WITH distinct_undistribured AS + (SELECT DISTINCT user_id + FROM test_cte) +SELECT * +FROM distinct_undistribured +LEFT JOIN + (SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE NOT EXISTS + (SELECT NULL + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; + +-- NOT EXISTS struct, with cte inlining disabled +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +), + not_exsist_in_distributed AS ( + SELECT DISTINCT user_id + FROM test_cte_distributed + WHERE NOT EXISTS(SELECT NULL + FROM distinct_undistribured + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id) + ) +SELECT + * +FROM + distinct_undistribured LEFT JOIN not_exsist_in_distributed + ON distinct_undistribured.user_id = not_exsist_in_distributed.user_id +ORDER BY 2 DESC, 1 DESC; + +-- similar query, but this time the second +-- part of the query is not inside a CTE +WITH distinct_undistribured AS ( + SELECT DISTINCT user_id + FROM test_cte +) +SELECT count(*) +FROM distinct_undistribured +LEFT JOIN + (SELECT *, + random() + FROM test_cte_distributed d1 + WHERE NOT EXISTS + (SELECT NULL + FROM distinct_undistribured d2 + WHERE d1.user_id = d2.user_id )) AS bar USING (user_id); + +RESET citus.enable_cte_inlining; + + DROP VIEW basic_view; DROP VIEW cte_view; DROP SCHEMA with_basics CASCADE;