Disallow outer joins `ON TRUE` with ref & dist tables when ref table is outer relation (#4255)

Disallow `ON TRUE` outer joins with reference & distributed tables
when reference table is outer relation by fixing the logic bug made
when calling `LeftListIsSubset` function.

Also, be more defensive when removing duplicate join restrictions
when join clause is empty for non-inner joins as they might still
contain useful information for non-inner joins.
pull/4243/head
Onur Tirtir 2020-10-19 16:58:11 +03:00 committed by GitHub
parent 6e493624af
commit c49077d594
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 334 additions and 10 deletions

View File

@ -1744,6 +1744,8 @@ multi_join_restriction_hook(PlannerInfo *root,
*/
joinRestrictionContext->hasSemiJoin = joinRestrictionContext->hasSemiJoin ||
extra->sjinfo->jointype == JOIN_SEMI;
joinRestrictionContext->hasOnlyInnerJoin = joinRestrictionContext->hasOnlyInnerJoin &&
extra->sjinfo->jointype == JOIN_INNER;
MemoryContextSwitchTo(oldMemoryContext);
}
@ -2133,6 +2135,7 @@ CreateAndPushPlannerRestrictionContext(void)
/* we'll apply logical AND as we add tables */
plannerRestrictionContext->relationRestrictionContext->allReferenceTables = true;
plannerRestrictionContext->joinRestrictionContext->hasOnlyInnerJoin = true;
plannerRestrictionContextList = lcons(plannerRestrictionContext,
plannerRestrictionContextList);

View File

@ -1906,7 +1906,12 @@ FilterJoinRestrictionContext(JoinRestrictionContext *joinRestrictionContext, Rel
}
}
/* the filtered restriction might not have semiJoin, but it is OK for now */
/*
* No need to re calculate has join fields as we are still operating on
* the same query and as these values are calculated per-query basis.
*/
filtererdJoinRestrictionContext->hasOnlyInnerJoin =
joinRestrictionContext->hasOnlyInnerJoin;
filtererdJoinRestrictionContext->hasSemiJoin = joinRestrictionContext->hasSemiJoin;
return filtererdJoinRestrictionContext;
@ -2028,7 +2033,11 @@ RemoveDuplicateJoinRestrictions(JoinRestrictionContext *joinRestrictionContext)
lappend(filteredContext->joinRestrictionList, joinRestriction);
}
/* the filtered restriction might not have semiJoin, but it is OK for now */
/*
* No need to re calculate has join fields as we are still operating on
* the same query and as these values are calculated per-query basis.
*/
filteredContext->hasOnlyInnerJoin = joinRestrictionContext->hasOnlyInnerJoin;
filteredContext->hasSemiJoin = joinRestrictionContext->hasSemiJoin;
return filteredContext;
@ -2062,21 +2071,30 @@ ContextCoversJoinRestriction(JoinRestrictionContext *joinRestrictionContext,
continue;
}
List *joinRestrictInfoListInTest =
joinRestrictionInTest->joinRestrictInfoList;
bool hasJoinRestriction = list_length(joinRestrictInfoListInTest) > 0;
bool hasOnlyInnerJoin = joinRestrictionContext->hasOnlyInnerJoin;
if (!hasOnlyInnerJoin && !hasJoinRestriction)
{
/*
* If join doesn't have a restriction (e.g., ON (true)) and planner
* is aware of at least one non-inner JOIN (e.g., outer/semi joins),
* we should not eliminiate joinRestrictionInTest. It can still be
* useful for detecting not supported outer-join checks even if it
* doesn't help for colocation checks.
*/
continue;
}
/*
* We check whether the restrictions in joinRestrictionInTest is a subset
* of the restrictions in joinRestrictionInContext in the sense that all the
* restrictions in the latter already exists in the former.
*
* Also, note that list_difference() returns a list that contains all the
* cells in joinRestrictInfoList that are not in inputJoinRestrictInfoList.
* Finally, each element in these lists is a pointer to RestrictInfo
* structure, where equal() function is implemented for the struct.
*/
List *joinRestrictInfoListInContext =
joinRestrictionInContext->joinRestrictInfoList;
List *joinRestrictInfoListInTest =
joinRestrictionInTest->joinRestrictInfoList;
if (LeftListIsSubset(joinRestrictInfoListInContext, joinRestrictInfoListInTest))
if (LeftListIsSubset(joinRestrictInfoListInTest, joinRestrictInfoListInContext))
{
return true;
}

View File

@ -76,6 +76,7 @@ typedef struct JoinRestrictionContext
{
List *joinRestrictionList;
bool hasSemiJoin;
bool hasOnlyInnerJoin;
} JoinRestrictionContext;
typedef struct JoinRestriction

View File

@ -151,5 +151,176 @@ SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER
) AS foo;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
CREATE TABLE reference_table(id int, it_name varchar(25), k_no int);
SELECT create_reference_table('reference_table');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE distributed_table(user_id int, item_id int, buy_count int);
SELECT create_distributed_table('distributed_table', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- postgres plans below queries by evaluating joins as below:
-- L
-- / \
-- ref L
-- / \
-- dist ref
-- so we should error out as reference table is in the outer part of the top level (left) outer join
SELECT count(*) FROM distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (true);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (c.id > 0);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- drop existing sqlancer tables before next tests
DROP TABLE t0, t1, t2, t3, t4 CASCADE;
CREATE TABLE IF NOT EXISTS t0(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983);
CREATE UNLOGGED TABLE IF NOT EXISTS t1(LIKE t0);
CREATE TABLE t2(LIKE t0 INCLUDING INDEXES);
CREATE UNLOGGED TABLE t3(LIKE t0 EXCLUDING STATISTICS);
CREATE TABLE t4(LIKE t1);
SELECT create_distributed_table('t0', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_reference_table('t1');
create_reference_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('t2', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('t3', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_reference_table('t4');
create_reference_table
---------------------------------------------------------------------
(1 row)
-- whole join tree for below query is:
-- L
-- / \
-- t1(ref) L
-- / \
-- t0(dist) t4(ref)
-- -- so we should error out
SELECT count(*) FROM (
SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
) AS foo;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- first subquery has the same join tree as above, so we should error out
SELECT count(*) FROM (
SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
RIGHT OUTER JOIN t1 ON (CAST(0.024767844 AS MONEY)) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
WHERE NOT (((t0.c0)LIKE((t4.c0))))
UNION ALL SELECT t4.c1, t0.c0, t0.c1 FROM ONLY t0
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
WHERE NOT (NOT (((t0.c0)LIKE((t4.c0)))))
UNION ALL SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
LEFT OUTER JOIN t4 ON (masklen('142.158.96.44'))::BOOLEAN
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND ((0.0602135)::MONEY)
WHERE (NOT (((t0.c0)LIKE((t4.c0))))) ISNULL
) AS foo;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- unsupported outer JOIN inside a subquery in WHERE clause
SELECT * FROM distributed_table WHERE buy_count > (
SELECT count(*) FROM distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (false));
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- unsupported outer JOIN via subqueries
SELECT count(*) FROM (SELECT *, random() FROM distributed_table) AS a
LEFT JOIN (SELECT *, random() FROM reference_table) AS b ON (true)
RIGHT JOIN (SELECT *, random() FROM reference_table) AS c ON (false);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- unsupported outer JOIN in a sublevel subquery
SELECT
count(*)
FROM
(
SELECT a.* FROM distributed_table a JOIN distributed_table b USING (user_id)
) AS bar
JOIN
(
SELECT a.* FROM distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (true)
) AS unsupported_join
ON (true);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- unsupported outer JOIN in a sublevel INNER JOIN
SELECT
unsupported_join.*
FROM
(distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q)
JOIN
(reference_table d JOIN reference_table e ON(true)) ON (true);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- unsupported outer JOIN in a sublevel LEFT JOIN
SELECT
unsupported_join.*
FROM
(distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (true)) as unsupported_join
LEFT JOIN
(reference_table d JOIN reference_table e ON(true)) ON (true);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- unsupported outer JOIN in a sublevel RIGHT JOIN
SELECT
unsupported_join.*
FROM
(distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (false)) as unsupported_join
RIGHT JOIN
(reference_table d JOIN reference_table e ON(true)) ON (true);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
EXPLAIN SELECT
unsupported_join.*
FROM
(distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q)
JOIN
(reference_table d JOIN reference_table e ON(true)) ON (d.id > 0);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SET client_min_messages TO WARNING;
DROP SCHEMA sqlancer_failures CASCADE;

View File

@ -58,5 +58,136 @@ SELECT count(*) FROM (
SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range))))))))
) AS foo;
CREATE TABLE reference_table(id int, it_name varchar(25), k_no int);
SELECT create_reference_table('reference_table');
CREATE TABLE distributed_table(user_id int, item_id int, buy_count int);
SELECT create_distributed_table('distributed_table', 'user_id');
-- postgres plans below queries by evaluating joins as below:
-- L
-- / \
-- ref L
-- / \
-- dist ref
-- so we should error out as reference table is in the outer part of the top level (left) outer join
SELECT count(*) FROM distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (true);
SELECT count(*) FROM distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (c.id > 0);
-- drop existing sqlancer tables before next tests
DROP TABLE t0, t1, t2, t3, t4 CASCADE;
CREATE TABLE IF NOT EXISTS t0(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983);
CREATE UNLOGGED TABLE IF NOT EXISTS t1(LIKE t0);
CREATE TABLE t2(LIKE t0 INCLUDING INDEXES);
CREATE UNLOGGED TABLE t3(LIKE t0 EXCLUDING STATISTICS);
CREATE TABLE t4(LIKE t1);
SELECT create_distributed_table('t0', 'c0');
SELECT create_reference_table('t1');
SELECT create_distributed_table('t2', 'c0');
SELECT create_distributed_table('t3', 'c0');
SELECT create_reference_table('t4');
-- whole join tree for below query is:
-- L
-- / \
-- t1(ref) L
-- / \
-- t0(dist) t4(ref)
-- -- so we should error out
SELECT count(*) FROM (
SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
) AS foo;
-- first subquery has the same join tree as above, so we should error out
SELECT count(*) FROM (
SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
RIGHT OUTER JOIN t1 ON (CAST(0.024767844 AS MONEY)) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
WHERE NOT (((t0.c0)LIKE((t4.c0))))
UNION ALL SELECT t4.c1, t0.c0, t0.c1 FROM ONLY t0
LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN)
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY))
WHERE NOT (NOT (((t0.c0)LIKE((t4.c0)))))
UNION ALL SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0
LEFT OUTER JOIN t4 ON (masklen('142.158.96.44'))::BOOLEAN
RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND ((0.0602135)::MONEY)
WHERE (NOT (((t0.c0)LIKE((t4.c0))))) ISNULL
) AS foo;
-- unsupported outer JOIN inside a subquery in WHERE clause
SELECT * FROM distributed_table WHERE buy_count > (
SELECT count(*) FROM distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (false));
-- unsupported outer JOIN via subqueries
SELECT count(*) FROM (SELECT *, random() FROM distributed_table) AS a
LEFT JOIN (SELECT *, random() FROM reference_table) AS b ON (true)
RIGHT JOIN (SELECT *, random() FROM reference_table) AS c ON (false);
-- unsupported outer JOIN in a sublevel subquery
SELECT
count(*)
FROM
(
SELECT a.* FROM distributed_table a JOIN distributed_table b USING (user_id)
) AS bar
JOIN
(
SELECT a.* FROM distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (true)
) AS unsupported_join
ON (true);
-- unsupported outer JOIN in a sublevel INNER JOIN
SELECT
unsupported_join.*
FROM
(distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q)
JOIN
(reference_table d JOIN reference_table e ON(true)) ON (true);
-- unsupported outer JOIN in a sublevel LEFT JOIN
SELECT
unsupported_join.*
FROM
(distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (true)) as unsupported_join
LEFT JOIN
(reference_table d JOIN reference_table e ON(true)) ON (true);
-- unsupported outer JOIN in a sublevel RIGHT JOIN
SELECT
unsupported_join.*
FROM
(distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (false)) as unsupported_join
RIGHT JOIN
(reference_table d JOIN reference_table e ON(true)) ON (true);
EXPLAIN SELECT
unsupported_join.*
FROM
(distributed_table a
LEFT JOIN reference_table b ON (true)
RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q)
JOIN
(reference_table d JOIN reference_table e ON(true)) ON (d.id > 0);
SET client_min_messages TO WARNING;
DROP SCHEMA sqlancer_failures CASCADE;