mirror of https://github.com/citusdata/citus.git
Disallow outer joins `ON TRUE` with ref & dist tables when ref table is outer relation (#4255)
Disallow `ON TRUE` outer joins with reference & distributed tables when reference table is outer relation by fixing the logic bug made when calling `LeftListIsSubset` function. Also, be more defensive when removing duplicate join restrictions when join clause is empty for non-inner joins as they might still contain useful information for non-inner joins.pull/4243/head
parent
6e493624af
commit
c49077d594
|
@ -1744,6 +1744,8 @@ multi_join_restriction_hook(PlannerInfo *root,
|
|||
*/
|
||||
joinRestrictionContext->hasSemiJoin = joinRestrictionContext->hasSemiJoin ||
|
||||
extra->sjinfo->jointype == JOIN_SEMI;
|
||||
joinRestrictionContext->hasOnlyInnerJoin = joinRestrictionContext->hasOnlyInnerJoin &&
|
||||
extra->sjinfo->jointype == JOIN_INNER;
|
||||
|
||||
MemoryContextSwitchTo(oldMemoryContext);
|
||||
}
|
||||
|
@ -2133,6 +2135,7 @@ CreateAndPushPlannerRestrictionContext(void)
|
|||
|
||||
/* we'll apply logical AND as we add tables */
|
||||
plannerRestrictionContext->relationRestrictionContext->allReferenceTables = true;
|
||||
plannerRestrictionContext->joinRestrictionContext->hasOnlyInnerJoin = true;
|
||||
|
||||
plannerRestrictionContextList = lcons(plannerRestrictionContext,
|
||||
plannerRestrictionContextList);
|
||||
|
|
|
@ -1906,7 +1906,12 @@ FilterJoinRestrictionContext(JoinRestrictionContext *joinRestrictionContext, Rel
|
|||
}
|
||||
}
|
||||
|
||||
/* the filtered restriction might not have semiJoin, but it is OK for now */
|
||||
/*
|
||||
* No need to re calculate has join fields as we are still operating on
|
||||
* the same query and as these values are calculated per-query basis.
|
||||
*/
|
||||
filtererdJoinRestrictionContext->hasOnlyInnerJoin =
|
||||
joinRestrictionContext->hasOnlyInnerJoin;
|
||||
filtererdJoinRestrictionContext->hasSemiJoin = joinRestrictionContext->hasSemiJoin;
|
||||
|
||||
return filtererdJoinRestrictionContext;
|
||||
|
@ -2028,7 +2033,11 @@ RemoveDuplicateJoinRestrictions(JoinRestrictionContext *joinRestrictionContext)
|
|||
lappend(filteredContext->joinRestrictionList, joinRestriction);
|
||||
}
|
||||
|
||||
/* the filtered restriction might not have semiJoin, but it is OK for now */
|
||||
/*
|
||||
* No need to re calculate has join fields as we are still operating on
|
||||
* the same query and as these values are calculated per-query basis.
|
||||
*/
|
||||
filteredContext->hasOnlyInnerJoin = joinRestrictionContext->hasOnlyInnerJoin;
|
||||
filteredContext->hasSemiJoin = joinRestrictionContext->hasSemiJoin;
|
||||
|
||||
return filteredContext;
|
||||
|
@ -2062,21 +2071,30 @@ ContextCoversJoinRestriction(JoinRestrictionContext *joinRestrictionContext,
|
|||
continue;
|
||||
}
|
||||
|
||||
List *joinRestrictInfoListInTest =
|
||||
joinRestrictionInTest->joinRestrictInfoList;
|
||||
bool hasJoinRestriction = list_length(joinRestrictInfoListInTest) > 0;
|
||||
bool hasOnlyInnerJoin = joinRestrictionContext->hasOnlyInnerJoin;
|
||||
if (!hasOnlyInnerJoin && !hasJoinRestriction)
|
||||
{
|
||||
/*
|
||||
* If join doesn't have a restriction (e.g., ON (true)) and planner
|
||||
* is aware of at least one non-inner JOIN (e.g., outer/semi joins),
|
||||
* we should not eliminiate joinRestrictionInTest. It can still be
|
||||
* useful for detecting not supported outer-join checks even if it
|
||||
* doesn't help for colocation checks.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* We check whether the restrictions in joinRestrictionInTest is a subset
|
||||
* of the restrictions in joinRestrictionInContext in the sense that all the
|
||||
* restrictions in the latter already exists in the former.
|
||||
*
|
||||
* Also, note that list_difference() returns a list that contains all the
|
||||
* cells in joinRestrictInfoList that are not in inputJoinRestrictInfoList.
|
||||
* Finally, each element in these lists is a pointer to RestrictInfo
|
||||
* structure, where equal() function is implemented for the struct.
|
||||
*/
|
||||
List *joinRestrictInfoListInContext =
|
||||
joinRestrictionInContext->joinRestrictInfoList;
|
||||
List *joinRestrictInfoListInTest =
|
||||
joinRestrictionInTest->joinRestrictInfoList;
|
||||
if (LeftListIsSubset(joinRestrictInfoListInContext, joinRestrictInfoListInTest))
|
||||
if (LeftListIsSubset(joinRestrictInfoListInTest, joinRestrictInfoListInContext))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -76,6 +76,7 @@ typedef struct JoinRestrictionContext
|
|||
{
|
||||
List *joinRestrictionList;
|
||||
bool hasSemiJoin;
|
||||
bool hasOnlyInnerJoin;
|
||||
} JoinRestrictionContext;
|
||||
|
||||
typedef struct JoinRestriction
|
||||
|
|
|
@ -151,5 +151,176 @@ SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER
|
|||
) AS foo;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
CREATE TABLE reference_table(id int, it_name varchar(25), k_no int);
|
||||
SELECT create_reference_table('reference_table');
|
||||
create_reference_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
CREATE TABLE distributed_table(user_id int, item_id int, buy_count int);
|
||||
SELECT create_distributed_table('distributed_table', 'user_id');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- postgres plans below queries by evaluating joins as below:
|
||||
-- L
|
||||
-- / \
|
||||
-- ref L
|
||||
-- / \
|
||||
-- dist ref
|
||||
-- so we should error out as reference table is in the outer part of the top level (left) outer join
|
||||
SELECT count(*) FROM distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (true);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
SELECT count(*) FROM distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (c.id > 0);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- drop existing sqlancer tables before next tests
|
||||
DROP TABLE t0, t1, t2, t3, t4 CASCADE;
|
||||
CREATE TABLE IF NOT EXISTS t0(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983);
|
||||
CREATE UNLOGGED TABLE IF NOT EXISTS t1(LIKE t0);
|
||||
CREATE TABLE t2(LIKE t0 INCLUDING INDEXES);
|
||||
CREATE UNLOGGED TABLE t3(LIKE t0 EXCLUDING STATISTICS);
|
||||
CREATE TABLE t4(LIKE t1);
|
||||
SELECT create_distributed_table('t0', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_reference_table('t1');
|
||||
create_reference_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('t2', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('t3', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_reference_table('t4');
|
||||
create_reference_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- whole join tree for below query is:
|
||||
-- L
|
||||
-- / \
|
||||
-- t1(ref) L
|
||||
-- / \
|
||||
-- t0(dist) t4(ref)
|
||||
-- -- so we should error out
|
||||
SELECT count(*) FROM (
|
||||
SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
|
||||
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
|
||||
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
|
||||
) AS foo;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- first subquery has the same join tree as above, so we should error out
|
||||
SELECT count(*) FROM (
|
||||
SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
|
||||
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
|
||||
RIGHT OUTER JOIN t1 ON (CAST(0.024767844 AS MONEY)) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
|
||||
WHERE NOT (((t0.c0)LIKE((t4.c0))))
|
||||
UNION ALL SELECT t4.c1, t0.c0, t0.c1 FROM ONLY t0
|
||||
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
|
||||
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
|
||||
WHERE NOT (NOT (((t0.c0)LIKE((t4.c0)))))
|
||||
UNION ALL SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
|
||||
LEFT OUTER JOIN t4 ON (masklen('142.158.96.44'))::BOOLEAN
|
||||
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND ((0.0602135)::MONEY)
|
||||
WHERE (NOT (((t0.c0)LIKE((t4.c0))))) ISNULL
|
||||
) AS foo;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- unsupported outer JOIN inside a subquery in WHERE clause
|
||||
SELECT * FROM distributed_table WHERE buy_count > (
|
||||
SELECT count(*) FROM distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (false));
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- unsupported outer JOIN via subqueries
|
||||
SELECT count(*) FROM (SELECT *, random() FROM distributed_table) AS a
|
||||
LEFT JOIN (SELECT *, random() FROM reference_table) AS b ON (true)
|
||||
RIGHT JOIN (SELECT *, random() FROM reference_table) AS c ON (false);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- unsupported outer JOIN in a sublevel subquery
|
||||
SELECT
|
||||
count(*)
|
||||
FROM
|
||||
(
|
||||
SELECT a.* FROM distributed_table a JOIN distributed_table b USING (user_id)
|
||||
) AS bar
|
||||
JOIN
|
||||
(
|
||||
SELECT a.* FROM distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (true)
|
||||
) AS unsupported_join
|
||||
ON (true);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- unsupported outer JOIN in a sublevel INNER JOIN
|
||||
SELECT
|
||||
unsupported_join.*
|
||||
FROM
|
||||
(distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q)
|
||||
JOIN
|
||||
(reference_table d JOIN reference_table e ON(true)) ON (true);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- unsupported outer JOIN in a sublevel LEFT JOIN
|
||||
SELECT
|
||||
unsupported_join.*
|
||||
FROM
|
||||
(distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (true)) as unsupported_join
|
||||
LEFT JOIN
|
||||
(reference_table d JOIN reference_table e ON(true)) ON (true);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- unsupported outer JOIN in a sublevel RIGHT JOIN
|
||||
SELECT
|
||||
unsupported_join.*
|
||||
FROM
|
||||
(distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (false)) as unsupported_join
|
||||
RIGHT JOIN
|
||||
(reference_table d JOIN reference_table e ON(true)) ON (true);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
EXPLAIN SELECT
|
||||
unsupported_join.*
|
||||
FROM
|
||||
(distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q)
|
||||
JOIN
|
||||
(reference_table d JOIN reference_table e ON(true)) ON (d.id > 0);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA sqlancer_failures CASCADE;
|
||||
|
|
|
@ -58,5 +58,136 @@ SELECT count(*) FROM (
|
|||
SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range))))))))
|
||||
) AS foo;
|
||||
|
||||
CREATE TABLE reference_table(id int, it_name varchar(25), k_no int);
|
||||
SELECT create_reference_table('reference_table');
|
||||
|
||||
CREATE TABLE distributed_table(user_id int, item_id int, buy_count int);
|
||||
SELECT create_distributed_table('distributed_table', 'user_id');
|
||||
|
||||
-- postgres plans below queries by evaluating joins as below:
|
||||
-- L
|
||||
-- / \
|
||||
-- ref L
|
||||
-- / \
|
||||
-- dist ref
|
||||
-- so we should error out as reference table is in the outer part of the top level (left) outer join
|
||||
|
||||
SELECT count(*) FROM distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (true);
|
||||
|
||||
SELECT count(*) FROM distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (c.id > 0);
|
||||
|
||||
-- drop existing sqlancer tables before next tests
|
||||
DROP TABLE t0, t1, t2, t3, t4 CASCADE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS t0(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983);
|
||||
CREATE UNLOGGED TABLE IF NOT EXISTS t1(LIKE t0);
|
||||
CREATE TABLE t2(LIKE t0 INCLUDING INDEXES);
|
||||
CREATE UNLOGGED TABLE t3(LIKE t0 EXCLUDING STATISTICS);
|
||||
CREATE TABLE t4(LIKE t1);
|
||||
|
||||
SELECT create_distributed_table('t0', 'c0');
|
||||
SELECT create_reference_table('t1');
|
||||
SELECT create_distributed_table('t2', 'c0');
|
||||
SELECT create_distributed_table('t3', 'c0');
|
||||
SELECT create_reference_table('t4');
|
||||
|
||||
-- whole join tree for below query is:
|
||||
-- L
|
||||
-- / \
|
||||
-- t1(ref) L
|
||||
-- / \
|
||||
-- t0(dist) t4(ref)
|
||||
-- -- so we should error out
|
||||
SELECT count(*) FROM (
|
||||
SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
|
||||
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
|
||||
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
|
||||
) AS foo;
|
||||
|
||||
-- first subquery has the same join tree as above, so we should error out
|
||||
SELECT count(*) FROM (
|
||||
SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
|
||||
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
|
||||
RIGHT OUTER JOIN t1 ON (CAST(0.024767844 AS MONEY)) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
|
||||
WHERE NOT (((t0.c0)LIKE((t4.c0))))
|
||||
UNION ALL SELECT t4.c1, t0.c0, t0.c1 FROM ONLY t0
|
||||
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
|
||||
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
|
||||
WHERE NOT (NOT (((t0.c0)LIKE((t4.c0)))))
|
||||
UNION ALL SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
|
||||
LEFT OUTER JOIN t4 ON (masklen('142.158.96.44'))::BOOLEAN
|
||||
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND ((0.0602135)::MONEY)
|
||||
WHERE (NOT (((t0.c0)LIKE((t4.c0))))) ISNULL
|
||||
) AS foo;
|
||||
|
||||
-- unsupported outer JOIN inside a subquery in WHERE clause
|
||||
SELECT * FROM distributed_table WHERE buy_count > (
|
||||
SELECT count(*) FROM distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (false));
|
||||
|
||||
-- unsupported outer JOIN via subqueries
|
||||
SELECT count(*) FROM (SELECT *, random() FROM distributed_table) AS a
|
||||
LEFT JOIN (SELECT *, random() FROM reference_table) AS b ON (true)
|
||||
RIGHT JOIN (SELECT *, random() FROM reference_table) AS c ON (false);
|
||||
|
||||
-- unsupported outer JOIN in a sublevel subquery
|
||||
SELECT
|
||||
count(*)
|
||||
FROM
|
||||
(
|
||||
SELECT a.* FROM distributed_table a JOIN distributed_table b USING (user_id)
|
||||
) AS bar
|
||||
JOIN
|
||||
(
|
||||
SELECT a.* FROM distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (true)
|
||||
) AS unsupported_join
|
||||
ON (true);
|
||||
|
||||
-- unsupported outer JOIN in a sublevel INNER JOIN
|
||||
SELECT
|
||||
unsupported_join.*
|
||||
FROM
|
||||
(distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q)
|
||||
JOIN
|
||||
(reference_table d JOIN reference_table e ON(true)) ON (true);
|
||||
|
||||
-- unsupported outer JOIN in a sublevel LEFT JOIN
|
||||
SELECT
|
||||
unsupported_join.*
|
||||
FROM
|
||||
(distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (true)) as unsupported_join
|
||||
LEFT JOIN
|
||||
(reference_table d JOIN reference_table e ON(true)) ON (true);
|
||||
|
||||
-- unsupported outer JOIN in a sublevel RIGHT JOIN
|
||||
SELECT
|
||||
unsupported_join.*
|
||||
FROM
|
||||
(distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (false)) as unsupported_join
|
||||
RIGHT JOIN
|
||||
(reference_table d JOIN reference_table e ON(true)) ON (true);
|
||||
|
||||
EXPLAIN SELECT
|
||||
unsupported_join.*
|
||||
FROM
|
||||
(distributed_table a
|
||||
LEFT JOIN reference_table b ON (true)
|
||||
RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q)
|
||||
JOIN
|
||||
(reference_table d JOIN reference_table e ON(true)) ON (d.id > 0);
|
||||
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA sqlancer_failures CASCADE;
|
||||
|
|
Loading…
Reference in New Issue