diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index a3933807e..ca53b4b86 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -85,8 +85,7 @@ static bool ExtractFromExpressionWalker(Node *node, QualifierWalkerContext *walkerContext); static List * MultiTableNodeList(List *tableEntryList, List *rangeTableList); static List * AddMultiCollectNodes(List *tableNodeList); -static MultiNode * MultiJoinTree(List *joinOrderList, List *collectTableList, - List *joinClauseList); +static MultiNode * MultiJoinTree(List *joinOrderList, List *collectTableList); static MultiCollect * CollectNodeForTable(List *collectTableList, uint32 rangeTableId); static MultiSelect * MultiSelectNode(List *whereClauseList); static bool IsSelectClause(Node *clause); @@ -681,7 +680,7 @@ MultiNodeTree(Query *queryTree) } /* build join tree using the join order and collected tables */ - joinTreeNode = MultiJoinTree(joinOrderList, collectTableList, joinClauseList); + joinTreeNode = MultiJoinTree(joinOrderList, collectTableList); currentTopNode = joinTreeNode; } @@ -1569,7 +1568,7 @@ AddMultiCollectNodes(List *tableNodeList) * this tree after every table in the list has been joined. */ static MultiNode * -MultiJoinTree(List *joinOrderList, List *collectTableList, List *joinWhereClauseList) +MultiJoinTree(List *joinOrderList, List *collectTableList) { MultiNode *currentTopNode = NULL; ListCell *joinOrderCell = NULL; diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index a2590d48d..7fdbfa829 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -1654,8 +1654,8 @@ UpdateAllColumnAttributes(Node *columnContainer, List *rangeTableList, static void UpdateColumnAttributes(Var *column, List *rangeTableList, List *dependentJobList) { - Index originalTableId = column->varnosyn; - AttrNumber originalColumnId = column->varattnosyn; + Index originalTableId = column->varno; + AttrNumber originalColumnId = column->varattno; /* find the new table identifier */ Index newTableId = NewTableId(originalTableId, rangeTableList); diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 75ded42bc..5caeb49c2 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -174,12 +174,6 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery, return true; } - /* if there is right recursive join, fix join order can not handle it */ - if (HasRightRecursiveJoin(rewrittenQuery->jointree)) - { - return true; - } - /* * We process function and VALUES RTEs as subqueries, since the join order planner * does not know how to handle them. @@ -189,6 +183,12 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery, return true; } + /* if there is right recursive join, fix join order can not handle it */ + if (HasRightRecursiveJoin(rewrittenQuery->jointree)) + { + return true; + } + /* * Some unsupported join clauses in logical planner * may be supported by subquery pushdown planner. diff --git a/src/include/distributed/multi_join_order.h b/src/include/distributed/multi_join_order.h index c6ca6b0ca..459146266 100644 --- a/src/include/distributed/multi_join_order.h +++ b/src/include/distributed/multi_join_order.h @@ -19,7 +19,6 @@ #include "nodes/pg_list.h" #include "nodes/primnodes.h" -#include "server/distributed/distributed_planner.h" /* diff --git a/src/test/regress/expected/cross_join.out b/src/test/regress/expected/cross_join.out index 5887a9ae3..c1101e482 100644 --- a/src/test/regress/expected/cross_join.out +++ b/src/test/regress/expected/cross_join.out @@ -4,11 +4,8 @@ -- a distributed table can be cross joined with a reference table -- and the CROSS JOIN can be in the outer part of an outer JOIN SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id); - count ---------------------------------------------------------------------- - 176649 -(1 row) - +ERROR: cannot perform distributed planning on this query +DETAIL: Cartesian products are currently unsupported -- two distributed tables cannot be cross joined -- as it lacks distribution key equality SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u; @@ -33,17 +30,11 @@ SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table r (1 row) SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table; - count ---------------------------------------------------------------------- - 606 -(1 row) - +ERROR: cannot perform distributed planning on this query +DETAIL: Cartesian products are currently unsupported SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table; - count ---------------------------------------------------------------------- - 606 -(1 row) - +ERROR: cannot perform distributed planning on this query +DETAIL: Cartesian products are currently unsupported SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table; count --------------------------------------------------------------------- @@ -162,9 +153,11 @@ SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT J -- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column -- so not safe to pushdown SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id); -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id); -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id); ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2" diff --git a/src/test/regress/expected/cte_inline.out b/src/test/regress/expected/cte_inline.out index 39d48e915..b479ad7ac 100644 --- a/src/test/regress/expected/cte_inline.out +++ b/src/test/regress/expected/cte_inline.out @@ -779,6 +779,18 @@ FROM cte LEFT JOIN test_table USING (key); DEBUG: CTE cte is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] +DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] +DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] +DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] +DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] +DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] count --------------------------------------------------------------------- 1021 diff --git a/src/test/regress/expected/join_pushdown.out b/src/test/regress/expected/join_pushdown.out index c71478d30..3bb4f0981 100644 --- a/src/test/regress/expected/join_pushdown.out +++ b/src/test/regress/expected/join_pushdown.out @@ -151,8 +151,7 @@ ORDER BY 1; -- Full outer join with different distribution column types, should error out SELECT * FROM test_table_1 full join test_table_2 using(id); -ERROR: cannot push down this subquery -DETAIL: Shards of relations in subquery need to have 1-to-1 shard partitioning +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- Test when the non-distributed column has the value of NULL INSERT INTO test_table_1 VALUES(7, NULL); INSERT INTO test_table_2 VALUES(7, NULL); diff --git a/src/test/regress/expected/multi_insert_select.out b/src/test/regress/expected/multi_insert_select.out index 0e2db7f93..43f02fffb 100644 --- a/src/test/regress/expected/multi_insert_select.out +++ b/src/test/regress/expected/multi_insert_select.out @@ -845,7 +845,6 @@ SET client_min_messages TO WARNING; raw_events_first.user_id FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- same as the above with INNER JOIN INSERT INTO agg_events (user_id) SELECT @@ -867,7 +866,6 @@ DETAIL: Cartesian products are currently unsupported raw_events_first.user_id FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- same as the above with INNER JOIN -- we support this with route to coordinator SELECT coordinator_plan($Q$ @@ -903,7 +901,6 @@ FROM raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1 WHERE raw_events_first.user_id = 10; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- same as the above with INNER JOIN -- we support this with route to coordinator SELECT coordinator_plan($Q$ diff --git a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out index fc3f62385..507ba3b8d 100644 --- a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out +++ b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out @@ -354,7 +354,7 @@ FROM ( GROUP BY user_id ) AS shard_union ORDER BY user_lastseen DESC; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not pushable since lateral join is not on the partition key INSERT INTO agg_results_third (user_id, agg_time, value_2_agg) SELECT @@ -382,7 +382,8 @@ FROM ( GROUP BY user_id ) AS shard_union ORDER BY user_lastseen DESC; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning -- not pushable since lateral join is not on the partition key INSERT INTO agg_results_third (user_id, agg_time, value_2_agg) SELECT @@ -410,7 +411,7 @@ FROM ( GROUP BY user_id ) AS shard_union ORDER BY user_lastseen DESC; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator --------------------------------------------------------------------- --------------------------------------------------------------------- -- Count the number of distinct users_table who are in segment X and Y and Z @@ -514,14 +515,15 @@ SELECT user_id, value_2 FROM users_table WHERE value_1 = 101 AND value_2 >= 5 AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id!=users_table.user_id); -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not pushable since the join is not the partition key INSERT INTO agg_results_third(user_id, value_2_agg) SELECT user_id, value_2 FROM users_table WHERE value_1 = 101 AND value_2 >= 5 AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND event_type=users_table.user_id); -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning --------------------------------------------------------------------- --------------------------------------------------------------------- -- Customers who have done X and Y, and satisfy other customer specific criteria diff --git a/src/test/regress/expected/multi_subquery.out b/src/test/regress/expected/multi_subquery.out index f4c4ccc21..a0cebb69a 100644 --- a/src/test/regress/expected/multi_subquery.out +++ b/src/test/regress/expected/multi_subquery.out @@ -425,7 +425,7 @@ FROM events_table t1 LEFT JOIN users_table t2 ON t1.user_id > t2.user_id ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC LIMIT 5; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- outer joins on reference tables with expressions should work SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3 FROM events_table t1 @@ -466,7 +466,7 @@ SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3 LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id) ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC LIMIT 5; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- outer joins as subqueries should work -- https://github.com/citusdata/citus/issues/2739 SELECT user_id, value_1, event_type diff --git a/src/test/regress/expected/multi_subquery_behavioral_analytics.out b/src/test/regress/expected/multi_subquery_behavioral_analytics.out index 9284b7f92..171dc731d 100644 --- a/src/test/regress/expected/multi_subquery_behavioral_analytics.out +++ b/src/test/regress/expected/multi_subquery_behavioral_analytics.out @@ -561,9 +561,9 @@ ORDER BY 1 DESC, 2 DESC LIMIT 3; user_id | value_2 --------------------------------------------------------------------- - 5 | 5 - 5 | 5 - 5 | 2 + 6 | 4 + 6 | 4 + 6 | 4 (3 rows) --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_subquery_in_where_clause.out b/src/test/regress/expected/multi_subquery_in_where_clause.out index 834cef505..9dfb163af 100644 --- a/src/test/regress/expected/multi_subquery_in_where_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_clause.out @@ -682,5 +682,5 @@ SELECT user_id, value_2 FROM users_table WHERE AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=1 AND value_3 > 1 AND test_join_function(events_table.user_id, users_table.user_id)) ORDER BY 1 DESC, 2 DESC LIMIT 3; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator DROP FUNCTION test_join_function(int,int); diff --git a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out index 52cbe3917..7bf59629d 100644 --- a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out @@ -44,7 +44,10 @@ ORDER BY user_id LIMIT 3; user_id --------------------------------------------------------------------- -(0 rows) + 1 + 2 + 3 +(3 rows) -- subqueries in WHERE with NOT EXISTS operator, should not work since -- there is a correlated subquery in WHERE clause @@ -62,7 +65,8 @@ WHERE users_reference_table.user_id = events_table.user_id ) LIMIT 3; -ERROR: correlated subqueries are not supported when the FROM clause contains a reference table +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning -- immutable functions are also treated as reference tables, query should not -- work since there is a correlated subquery in the WHERE clause SELECT diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index 11f78ea34..1bedf32c8 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -143,7 +143,7 @@ SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems SELECT o_orderkey, l_orderkey FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey) ORDER BY o_orderkey LIMIT 1; o_orderkey | l_orderkey --------------------------------------------------------------------- - 2 | + 32 | 32 (1 row) -- however, this works @@ -157,7 +157,7 @@ SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; count --------------------------------------------------------------------- - 1706 + 700 (1 row) -- view on the outer side is supported diff --git a/src/test/regress/expected/non_colocated_subquery_joins.out b/src/test/regress/expected/non_colocated_subquery_joins.out index d03d4ecf3..bb6ea90dd 100644 --- a/src/test/regress/expected/non_colocated_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_subquery_joins.out @@ -989,7 +989,31 @@ DEBUG: Router planner cannot handle multi-shard select queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +DEBUG: join prunable for task partitionId 0 and 1 +DEBUG: join prunable for task partitionId 0 and 2 +DEBUG: join prunable for task partitionId 0 and 3 +DEBUG: join prunable for task partitionId 1 and 0 +DEBUG: join prunable for task partitionId 1 and 2 +DEBUG: join prunable for task partitionId 1 and 3 +DEBUG: join prunable for task partitionId 2 and 0 +DEBUG: join prunable for task partitionId 2 and 1 +DEBUG: join prunable for task partitionId 2 and 3 +DEBUG: join prunable for task partitionId 3 and 0 +DEBUG: join prunable for task partitionId 3 and 1 +DEBUG: join prunable for task partitionId 3 and 2 +DEBUG: pruning merge fetch taskId 1 +DEBUG: pruning merge fetch taskId 2 +DEBUG: pruning merge fetch taskId 4 +DEBUG: pruning merge fetch taskId 5 +DEBUG: pruning merge fetch taskId 7 +DEBUG: pruning merge fetch taskId 8 +DEBUG: pruning merge fetch taskId 10 +DEBUG: pruning merge fetch taskId 11 + valid +--------------------------------------------------------------------- + t +(1 row) + -- similar to the above, make sure that we skip recursive planning when -- the subquery contains only intermediate results SELECT COUNT(*) = 176 diff --git a/src/test/regress/expected/subquery_in_where.out b/src/test/regress/expected/subquery_in_where.out index eb56acd87..7e1360db1 100644 --- a/src/test/regress/expected/subquery_in_where.out +++ b/src/test/regress/expected/subquery_in_where.out @@ -1018,7 +1018,7 @@ WHERE NOT EXISTS WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1); avg --------------------------------------------------------------------- - 2.5544554455445545 + 2.5385934819897084 (1 row) -- a [correlated] lateral join can also be pushed down even if the subquery