fix tests

outer-join-noncolocated-dist-tables
aykutbozkurt 2022-12-19 19:45:37 +03:00
parent be39d028c7
commit 8951e99d39
16 changed files with 81 additions and 52 deletions

View File

@ -85,8 +85,7 @@ static bool ExtractFromExpressionWalker(Node *node,
QualifierWalkerContext *walkerContext);
static List * MultiTableNodeList(List *tableEntryList, List *rangeTableList);
static List * AddMultiCollectNodes(List *tableNodeList);
static MultiNode * MultiJoinTree(List *joinOrderList, List *collectTableList,
List *joinClauseList);
static MultiNode * MultiJoinTree(List *joinOrderList, List *collectTableList);
static MultiCollect * CollectNodeForTable(List *collectTableList, uint32 rangeTableId);
static MultiSelect * MultiSelectNode(List *whereClauseList);
static bool IsSelectClause(Node *clause);
@ -681,7 +680,7 @@ MultiNodeTree(Query *queryTree)
}
/* build join tree using the join order and collected tables */
joinTreeNode = MultiJoinTree(joinOrderList, collectTableList, joinClauseList);
joinTreeNode = MultiJoinTree(joinOrderList, collectTableList);
currentTopNode = joinTreeNode;
}
@ -1569,7 +1568,7 @@ AddMultiCollectNodes(List *tableNodeList)
* this tree after every table in the list has been joined.
*/
static MultiNode *
MultiJoinTree(List *joinOrderList, List *collectTableList, List *joinWhereClauseList)
MultiJoinTree(List *joinOrderList, List *collectTableList)
{
MultiNode *currentTopNode = NULL;
ListCell *joinOrderCell = NULL;

View File

@ -1654,8 +1654,8 @@ UpdateAllColumnAttributes(Node *columnContainer, List *rangeTableList,
static void
UpdateColumnAttributes(Var *column, List *rangeTableList, List *dependentJobList)
{
Index originalTableId = column->varnosyn;
AttrNumber originalColumnId = column->varattnosyn;
Index originalTableId = column->varno;
AttrNumber originalColumnId = column->varattno;
/* find the new table identifier */
Index newTableId = NewTableId(originalTableId, rangeTableList);

View File

@ -174,12 +174,6 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery,
return true;
}
/* if there is right recursive join, fix join order can not handle it */
if (HasRightRecursiveJoin(rewrittenQuery->jointree))
{
return true;
}
/*
* We process function and VALUES RTEs as subqueries, since the join order planner
* does not know how to handle them.
@ -189,6 +183,12 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery,
return true;
}
/* if there is right recursive join, fix join order can not handle it */
if (HasRightRecursiveJoin(rewrittenQuery->jointree))
{
return true;
}
/*
* Some unsupported join clauses in logical planner
* may be supported by subquery pushdown planner.

View File

@ -19,7 +19,6 @@
#include "nodes/pg_list.h"
#include "nodes/primnodes.h"
#include "server/distributed/distributed_planner.h"
/*

View File

@ -4,11 +4,8 @@
-- a distributed table can be cross joined with a reference table
-- and the CROSS JOIN can be in the outer part of an outer JOIN
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id);
count
---------------------------------------------------------------------
176649
(1 row)
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- two distributed tables cannot be cross joined
-- as it lacks distribution key equality
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u;
@ -33,17 +30,11 @@ SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table r
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
count
---------------------------------------------------------------------
606
(1 row)
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
count
---------------------------------------------------------------------
606
(1 row)
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table;
count
---------------------------------------------------------------------
@ -162,9 +153,11 @@ SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT J
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column
-- so not safe to pushdown
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: the query contains a join that requires repartitioning
HINT: Set citus.enable_repartition_joins to on to enable repartitioning
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: the query contains a join that requires repartitioning
HINT: Set citus.enable_repartition_joins to on to enable repartitioning
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2"

View File

@ -779,6 +779,18 @@ FROM
cte LEFT JOIN test_table USING (key);
DEBUG: CTE cte is going to be inlined via distributed planning
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823]
DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647]
DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1]
DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1]
DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
count
---------------------------------------------------------------------
1021

View File

@ -151,8 +151,7 @@ ORDER BY 1;
-- Full outer join with different distribution column types, should error out
SELECT * FROM test_table_1 full join test_table_2 using(id);
ERROR: cannot push down this subquery
DETAIL: Shards of relations in subquery need to have 1-to-1 shard partitioning
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- Test when the non-distributed column has the value of NULL
INSERT INTO test_table_1 VALUES(7, NULL);
INSERT INTO test_table_2 VALUES(7, NULL);

View File

@ -845,7 +845,6 @@ SET client_min_messages TO WARNING;
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- same as the above with INNER JOIN
INSERT INTO agg_events (user_id)
SELECT
@ -867,7 +866,6 @@ DETAIL: Cartesian products are currently unsupported
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- same as the above with INNER JOIN
-- we support this with route to coordinator
SELECT coordinator_plan($Q$
@ -903,7 +901,6 @@ FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
WHERE
raw_events_first.user_id = 10;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- same as the above with INNER JOIN
-- we support this with route to coordinator
SELECT coordinator_plan($Q$

View File

@ -354,7 +354,7 @@ FROM (
GROUP BY user_id
) AS shard_union
ORDER BY user_lastseen DESC;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- not pushable since lateral join is not on the partition key
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
SELECT
@ -382,7 +382,8 @@ FROM (
GROUP BY user_id
) AS shard_union
ORDER BY user_lastseen DESC;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: the query contains a join that requires repartitioning
HINT: Set citus.enable_repartition_joins to on to enable repartitioning
-- not pushable since lateral join is not on the partition key
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
SELECT
@ -410,7 +411,7 @@ FROM (
GROUP BY user_id
) AS shard_union
ORDER BY user_lastseen DESC;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Count the number of distinct users_table who are in segment X and Y and Z
@ -514,14 +515,15 @@ SELECT user_id, value_2 FROM users_table WHERE
value_1 = 101
AND value_2 >= 5
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id!=users_table.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- not pushable since the join is not the partition key
INSERT INTO agg_results_third(user_id, value_2_agg)
SELECT user_id, value_2 FROM users_table WHERE
value_1 = 101
AND value_2 >= 5
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND event_type=users_table.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: the query contains a join that requires repartitioning
HINT: Set citus.enable_repartition_joins to on to enable repartitioning
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Customers who have done X and Y, and satisfy other customer specific criteria

View File

@ -425,7 +425,7 @@ FROM events_table t1
LEFT JOIN users_table t2 ON t1.user_id > t2.user_id
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- outer joins on reference tables with expressions should work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
@ -466,7 +466,7 @@ SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
-- outer joins as subqueries should work
-- https://github.com/citusdata/citus/issues/2739
SELECT user_id, value_1, event_type

View File

@ -561,9 +561,9 @@ ORDER BY 1 DESC, 2 DESC
LIMIT 3;
user_id | value_2
---------------------------------------------------------------------
5 | 5
5 | 5
5 | 2
6 | 4
6 | 4
6 | 4
(3 rows)
---------------------------------------------------------------------

View File

@ -682,5 +682,5 @@ SELECT user_id, value_2 FROM users_table WHERE
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=1 AND value_3 > 1 AND test_join_function(events_table.user_id, users_table.user_id))
ORDER BY 1 DESC, 2 DESC
LIMIT 3;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
DROP FUNCTION test_join_function(int,int);

View File

@ -44,7 +44,10 @@ ORDER BY user_id
LIMIT 3;
user_id
---------------------------------------------------------------------
(0 rows)
1
2
3
(3 rows)
-- subqueries in WHERE with NOT EXISTS operator, should not work since
-- there is a correlated subquery in WHERE clause
@ -62,7 +65,8 @@ WHERE
users_reference_table.user_id = events_table.user_id
)
LIMIT 3;
ERROR: correlated subqueries are not supported when the FROM clause contains a reference table
ERROR: the query contains a join that requires repartitioning
HINT: Set citus.enable_repartition_joins to on to enable repartitioning
-- immutable functions are also treated as reference tables, query should not
-- work since there is a correlated subquery in the WHERE clause
SELECT

View File

@ -143,7 +143,7 @@ SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems
SELECT o_orderkey, l_orderkey FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey) ORDER BY o_orderkey LIMIT 1;
o_orderkey | l_orderkey
---------------------------------------------------------------------
2 |
32 | 32
(1 row)
-- however, this works
@ -157,7 +157,7 @@ SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey
SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
count
---------------------------------------------------------------------
1706
700
(1 row)
-- view on the outer side is supported

View File

@ -989,7 +989,31 @@ DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: join prunable for task partitionId 0 and 1
DEBUG: join prunable for task partitionId 0 and 2
DEBUG: join prunable for task partitionId 0 and 3
DEBUG: join prunable for task partitionId 1 and 0
DEBUG: join prunable for task partitionId 1 and 2
DEBUG: join prunable for task partitionId 1 and 3
DEBUG: join prunable for task partitionId 2 and 0
DEBUG: join prunable for task partitionId 2 and 1
DEBUG: join prunable for task partitionId 2 and 3
DEBUG: join prunable for task partitionId 3 and 0
DEBUG: join prunable for task partitionId 3 and 1
DEBUG: join prunable for task partitionId 3 and 2
DEBUG: pruning merge fetch taskId 1
DEBUG: pruning merge fetch taskId 2
DEBUG: pruning merge fetch taskId 4
DEBUG: pruning merge fetch taskId 5
DEBUG: pruning merge fetch taskId 7
DEBUG: pruning merge fetch taskId 8
DEBUG: pruning merge fetch taskId 10
DEBUG: pruning merge fetch taskId 11
valid
---------------------------------------------------------------------
t
(1 row)
-- similar to the above, make sure that we skip recursive planning when
-- the subquery contains only intermediate results
SELECT COUNT(*) = 176

View File

@ -1018,7 +1018,7 @@ WHERE NOT EXISTS
WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1);
avg
---------------------------------------------------------------------
2.5544554455445545
2.5385934819897084
(1 row)
-- a [correlated] lateral join can also be pushed down even if the subquery