diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index 93e6e95ca..d17102117 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -852,16 +852,23 @@ RecursivelyPlanDistributedJoinNode(Node *distributedNode, Query *query, * parts with the nonrecurring part consisting of INNER JOIN. * * left join ( INNER JOIN ) - * We should recursively plan nonrecurring part i.e. (dist INNER JOIN dist) as a whole. + * We should recursively plan nonrecurring part i.e. (dist INNER JOIN dist). */ JoinExpr *joinExpr = (JoinExpr *) distributedNode; Node *leftNode = joinExpr->larg; Node *rightNode = joinExpr->rarg; - RecursivelyPlanDistributedJoinNode(leftNode, query, - recursivePlanningContext); - RecursivelyPlanDistributedJoinNode(rightNode, query, - recursivePlanningContext); + if (!IsJoinNodeRecurring(leftNode, query)) + { + RecursivelyPlanDistributedJoinNode(leftNode, query, + recursivePlanningContext); + } + + if (!IsJoinNodeRecurring(rightNode, query)) + { + RecursivelyPlanDistributedJoinNode(rightNode, query, + recursivePlanningContext); + } return; } diff --git a/src/test/regress/expected/multi_outer_join.out b/src/test/regress/expected/multi_outer_join.out index 1748a0dfc..e373e8674 100644 --- a/src/test/regress/expected/multi_outer_join.out +++ b/src/test/regress/expected/multi_outer_join.out @@ -414,8 +414,42 @@ FROM LEFT JOIN multi_outer_join_right r1 ON (l1.l_custkey = r1.r_custkey) RIGHT JOIN multi_outer_join_third_reference t1 ON (r1.r_custkey = t1.t_custkey) ORDER BY l_custkey, r_custkey, t_custkey; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join +LOG: join order: [ "multi_outer_join_left" ] +LOG: join order: [ "multi_outer_join_right" ] + l_custkey | r_custkey | t_custkey +--------------------------------------------------------------------- + 11 | 11 | 11 + 12 | 12 | 12 + 14 | 14 | 14 + 16 | 16 | 16 + 17 | 17 | 17 + 18 | 18 | 18 + 20 | 20 | 20 + 21 | 21 | 21 + 22 | 22 | 22 + 24 | 24 | 24 + 26 | 26 | 26 + 27 | 27 | 27 + 28 | 28 | 28 + 30 | 30 | 30 + | | 1 + | | 2 + | | 3 + | | 4 + | | 5 + | | 6 + | | 7 + | | 8 + | | 9 + | | 10 + | | 13 + | | 15 + | | 19 + | | 23 + | | 25 + | | 29 +(30 rows) + -- Right join with single shard left most table should work SELECT t_custkey, r_custkey, l_custkey diff --git a/src/test/regress/expected/multi_outer_join_reference.out b/src/test/regress/expected/multi_outer_join_reference.out index 1e705d14d..d74ca59a8 100644 --- a/src/test/regress/expected/multi_outer_join_reference.out +++ b/src/test/regress/expected/multi_outer_join_reference.out @@ -408,8 +408,42 @@ FROM multi_outer_join_left_hash l1 LEFT JOIN multi_outer_join_right_hash r1 ON (l1.l_custkey = r1.r_custkey) RIGHT JOIN multi_outer_join_third_reference t1 ON (r1.r_custkey = t1.t_custkey); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join +LOG: join order: [ "multi_outer_join_left_hash" ] +LOG: join order: [ "multi_outer_join_right_hash" ] + l_custkey | r_custkey | t_custkey +--------------------------------------------------------------------- + 15 | 15 | 15 + 24 | 24 | 24 + 25 | 25 | 25 + 26 | 26 | 26 + 14 | 14 | 14 + 13 | 13 | 13 + 21 | 21 | 21 + 28 | 28 | 28 + 11 | 11 | 11 + 12 | 12 | 12 + 22 | 22 | 22 + 23 | 23 | 23 + 27 | 27 | 27 + 29 | 29 | 29 + 30 | 30 | 30 + | | 20 + | | 17 + | | 10 + | | 18 + | | 2 + | | 5 + | | 19 + | | 8 + | | 6 + | | 16 + | | 4 + | | 1 + | | 3 + | | 9 + | | 7 +(30 rows) + -- Right join with single shard left most table should work SELECT t_custkey, r_custkey, l_custkey diff --git a/src/test/regress/expected/sqlancer_failures.out b/src/test/regress/expected/sqlancer_failures.out index 207e71d56..e78442978 100644 --- a/src/test/regress/expected/sqlancer_failures.out +++ b/src/test/regress/expected/sqlancer_failures.out @@ -178,23 +178,35 @@ SELECT create_distributed_table('distributed_table', 'user_id'); SELECT count(*) FROM distributed_table a LEFT JOIN reference_table b ON (true) RIGHT JOIN reference_table c ON (true); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 0 +(1 row) + SELECT count(*) FROM distributed_table a LEFT JOIN (SELECT * FROM reference_table OFFSET 0) b ON (true) RIGHT JOIN (SELECT * FROM reference_table OFFSET 0) c ON (true); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 0 +(1 row) + SELECT count(*) FROM distributed_table a LEFT JOIN reference_table b ON (true) RIGHT JOIN reference_table c ON (c.id > 0); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 0 +(1 row) + SELECT count(*) FROM distributed_table a LEFT JOIN (SELECT * FROM reference_table OFFSET 0) b ON (true) RIGHT JOIN (SELECT * FROM reference_table OFFSET 0) c ON (c.id > 0); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 0 +(1 row) + -- drop existing sqlancer tables before next tests DROP TABLE t0, t1, t2, t3, t4 CASCADE; CREATE TABLE tbl1(a REAL, b FLOAT, c money); @@ -264,8 +276,11 @@ SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0 LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN) RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY)) ) AS foo; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 0 +(1 row) + -- first subquery has the same join tree as above, so we should error out SELECT count(*) FROM ( SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0 @@ -281,21 +296,29 @@ UNION ALL SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0 RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND ((0.0602135)::MONEY) WHERE (NOT (((t0.c0)LIKE((t4.c0))))) ISNULL ) AS foo; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 0 +(1 row) + -- unsupported outer JOIN inside a subquery in WHERE clause SELECT * FROM distributed_table WHERE buy_count > ( SELECT count(*) FROM distributed_table a LEFT JOIN reference_table b ON (true) RIGHT JOIN reference_table c ON (false)); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id | item_id | buy_count +--------------------------------------------------------------------- +(0 rows) + -- unsupported outer JOIN via subqueries SELECT count(*) FROM (SELECT *, random() FROM distributed_table) AS a LEFT JOIN (SELECT *, random() FROM reference_table) AS b ON (true) RIGHT JOIN (SELECT *, random() FROM reference_table) AS c ON (false); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 0 +(1 row) + -- unsupported outer JOIN in a sublevel subquery SELECT count(*) @@ -310,8 +333,11 @@ JOIN RIGHT JOIN reference_table c ON (true) ) AS unsupported_join ON (true); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 0 +(1 row) + SELECT count(*) FROM @@ -325,8 +351,11 @@ JOIN RIGHT JOIN (SELECT * FROM reference_table OFFSET 0) c ON (true) ) AS unsupported_join ON (true); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 0 +(1 row) + -- unsupported outer JOIN in a sublevel INNER JOIN SELECT unsupported_join.* @@ -336,8 +365,10 @@ FROM RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q) JOIN (reference_table d JOIN reference_table e ON(true)) ON (true); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + x | y | z | t | e | f | q | it_name | k_no +--------------------------------------------------------------------- +(0 rows) + -- unsupported outer JOIN in a sublevel LEFT JOIN SELECT unsupported_join.* @@ -347,8 +378,10 @@ FROM RIGHT JOIN reference_table c ON (true)) as unsupported_join LEFT JOIN (reference_table d JOIN reference_table e ON(true)) ON (true); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id | item_id | buy_count | id | it_name | k_no | id | it_name | k_no +--------------------------------------------------------------------- +(0 rows) + SELECT unsupported_join.* FROM @@ -363,8 +396,10 @@ LEFT JOIN ON(true) ) ON (true); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + user_id | item_id | buy_count | id | it_name | k_no | id | it_name | k_no +--------------------------------------------------------------------- +(0 rows) + -- unsupported outer JOIN in a sublevel RIGHT JOIN SELECT unsupported_join.* @@ -374,8 +409,10 @@ FROM RIGHT JOIN reference_table c ON (false)) as unsupported_join RIGHT JOIN (reference_table d JOIN reference_table e ON(true)) ON (true); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id | item_id | buy_count | id | it_name | k_no | id | it_name | k_no +--------------------------------------------------------------------- +(0 rows) + SELECT unsupported_join.* FROM @@ -390,8 +427,10 @@ RIGHT JOIN ON(true) ) ON (true); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + user_id | item_id | buy_count | id | it_name | k_no | id | it_name | k_no +--------------------------------------------------------------------- +(0 rows) + EXPLAIN SELECT unsupported_join.* FROM @@ -400,7 +439,44 @@ FROM RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q) JOIN (reference_table d JOIN reference_table e ON(true)) ON (d.id > 0); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=100000 width=12) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on distributed_table_92862439 a (cost=0.00..30.40 rows=2040 width=12) + -> Distributed Subplan XXX_2 + -> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on reference_table_92862438 b (cost=0.00..17.80 rows=780 width=76) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Nested Loop (cost=0.01..1979855436727.95 rows=158184000000000 width=164) + -> Nested Loop (cost=0.01..2555436708.20 rows=202800000000 width=164) + -> Nested Loop Left Join (cost=0.01..20436687.80 rows=780000000 width=164) + -> Seq Scan on reference_table_92862438 c (cost=0.00..17.80 rows=780 width=76) + -> Materialize (cost=0.01..38682.00 rows=1000000 width=88) + -> Nested Loop Left Join (cost=0.01..20010.01 rows=1000000 width=88) + -> Function Scan on read_intermediate_result intermediate_result (cost=0.00..10.00 rows=1000 width=12) + -> Function Scan on read_intermediate_result intermediate_result_1 (cost=0.00..10.00 rows=1000 width=76) + -> Materialize (cost=0.00..21.05 rows=260 width=0) + -> Seq Scan on reference_table_92862438 d (cost=0.00..19.75 rows=260 width=0) + Filter: (id > 0) + -> Materialize (cost=0.00..21.70 rows=780 width=0) + -> Seq Scan on reference_table_92862438 e (cost=0.00..17.80 rows=780 width=0) + JIT: + Functions: 16 + Options: Inlining true, Optimization true, Expressions true, Deforming true +(35 rows) + SET client_min_messages TO WARNING; DROP SCHEMA sqlancer_failures CASCADE;