Merge pull request #3597 from citusdata/refactor_outer_join_tests

Refactor outer join checks
pull/4247/head
Önder Kalacı 2020-10-14 15:23:55 +02:00 committed by GitHub
commit 291154665f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 881 additions and 97 deletions

View File

@ -79,10 +79,8 @@ static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin(
PlannerRestrictionContext *plannerRestrictionContext);
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
static bool ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo);
static bool RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo,
RelOptInfo *relationInfo,
RecurringTuplesType *recurType);
static RecurringTuplesType FetchFirstRecurType(PlannerInfo *plannerInfo, RelOptInfo *
relationInfo);
static bool ContainsRecurringRTE(RangeTblEntry *rangeTableEntry,
RecurringTuplesType *recurType);
static bool ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType);
@ -776,7 +774,6 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
plannerRestrictionContext->joinRestrictionContext->joinRestrictionList;
ListCell *joinRestrictionCell = NULL;
RecurringTuplesType recurType = RECURRING_TUPLES_INVALID;
foreach(joinRestrictionCell, joinRestrictionList)
{
JoinRestriction *joinRestriction = (JoinRestriction *) lfirst(
@ -799,20 +796,45 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
continue;
}
if (ShouldRecurseForRecurringTuplesJoinChecks(outerrel) &&
RelationInfoContainsRecurringTuples(plannerInfo, outerrel, &recurType))
/*
* If the outer side of the join doesn't have any distributed tables
* (e.g., contains only recurring tuples), Citus should not pushdown
* the query. The reason is that recurring tuples on every shard would
* be added to the result, which is wrong.
*/
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel))
{
/*
* Find the first (or only) recurring RTE to give a meaningful
* error to the user.
*/
recurType = FetchFirstRecurType(plannerInfo, outerrel);
break;
}
}
else if (joinType == JOIN_FULL)
{
if ((ShouldRecurseForRecurringTuplesJoinChecks(innerrel) &&
RelationInfoContainsRecurringTuples(plannerInfo, innerrel,
&recurType)) ||
(ShouldRecurseForRecurringTuplesJoinChecks(outerrel) &&
RelationInfoContainsRecurringTuples(plannerInfo, outerrel, &recurType)))
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrel))
{
/*
* Find the first (or only) recurring RTE to give a meaningful
* error to the user.
*/
recurType = FetchFirstRecurType(plannerInfo, innerrel);
break;
}
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel))
{
/*
* Find the first (or only) recurring RTE to give a meaningful
* error to the user.
*/
recurType = FetchFirstRecurType(plannerInfo, outerrel);
break;
}
}
@ -846,6 +868,7 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
"Complex subqueries and CTEs cannot be in the outer "
"part of the outer join", NULL);
}
return NULL;
}
@ -1071,7 +1094,8 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree)
* Extract all range table indexes from the join tree. Note that sub-queries
* that get pulled up by PostgreSQL don't appear in this join tree.
*/
ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
ExtractRangeTableIndexWalker((Node *) queryTree->jointree,
&joinTreeTableIndexList);
foreach_int(joinTreeTableIndex, joinTreeTableIndexList)
{
@ -1180,13 +1204,15 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot push down this subquery",
"Intersect and Except are currently unsupported", NULL);
"Intersect and Except are currently unsupported",
NULL);
}
if (IsA(leftArg, RangeTblRef))
{
leftArgRTI = ((RangeTblRef *) leftArg)->rtindex;
Query *leftArgSubquery = rt_fetch(leftArgRTI, subqueryTree->rtable)->subquery;
Query *leftArgSubquery = rt_fetch(leftArgRTI,
subqueryTree->rtable)->subquery;
recurType = FromClauseRecurringTupleType(leftArgSubquery);
if (recurType != RECURRING_TUPLES_INVALID)
{
@ -1260,81 +1286,14 @@ ExtractSetOperationStatmentWalker(Node *node, List **setOperationList)
(*setOperationList) = lappend(*setOperationList, setOperation);
}
bool walkerResult = expression_tree_walker(node, ExtractSetOperationStatmentWalker,
bool walkerResult = expression_tree_walker(node,
ExtractSetOperationStatmentWalker,
setOperationList);
return walkerResult;
}
/*
* ShouldRecurseForRecurringTuplesJoinChecks is a helper function for deciding
* on whether the input relOptInfo should be checked for table expressions that
* generate the same tuples in every query on a shard. We use this to avoid
* redundant checks and false positives in complex join trees.
*/
static bool
ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo)
{
bool shouldRecurse = true;
/*
* We shouldn't recursively go down for joins since we're already
* going to process each join seperately. Otherwise we'd restrict
* the coverage. See the below sketch where (h) denotes a hash
* distributed relation, (r) denotes a reference table, (L) denotes
* LEFT JOIN and (I) denotes INNER JOIN. If we're to recurse into
* the inner join, we'd be preventing to push down the following
* join tree, which is actually safe to push down.
*
* (L)
* / \
* (I) h
* / \
* r h
*/
if (relOptInfo->reloptkind == RELOPT_JOINREL)
{
return false;
}
/*
* Note that we treat the same query where relations appear in subqueries
* differently. (i.e., use SELECT * FROM r; instead of r)
*
* In that case, to relax some restrictions, we do the following optimization:
* If the subplan (i.e., plannerInfo corresponding to the subquery) contains any
* joins, we skip reference table checks keeping in mind that the join is already
* going to be processed seperately. This optimization should suffice for many
* use cases.
*/
if (relOptInfo->reloptkind == RELOPT_BASEREL && relOptInfo->subroot != NULL)
{
PlannerInfo *subroot = relOptInfo->subroot;
if (list_length(subroot->join_rel_list) > 0)
{
RelOptInfo *subqueryJoin = linitial(subroot->join_rel_list);
/*
* Subqueries without relations (e.g. SELECT 1) are a little funny.
* They are treated as having a join, but the join is between 0
* relations and won't be in the join restriction list and therefore
* won't be revisited in DeferredErrorIfUnsupportedRecurringTuplesJoin.
*
* We therefore only skip joins with >0 relations.
*/
if (bms_num_members(subqueryJoin->relids) > 0)
{
shouldRecurse = false;
}
}
}
return shouldRecurse;
}
/*
* RelationInfoContainsOnlyRecurringTuples returns false if any of the relations in
* a RelOptInfo is not recurring.
@ -1370,19 +1329,20 @@ RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo,
/*
* RelationInfoContainsRecurringTuples checks whether the relationInfo
* FetchFirstRecurType checks whether the relationInfo
* contains any recurring table expression, namely a reference table,
* or immutable function. If found, RelationInfoContainsRecurringTuples
* or immutable function. If found, FetchFirstRecurType
* returns true.
*
* Note that since relation ids of relationInfo indexes to the range
* table entry list of planner info, planner info is also passed.
*/
static bool
RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relationInfo,
RecurringTuplesType *recurType)
static RecurringTuplesType
FetchFirstRecurType(PlannerInfo *plannerInfo, RelOptInfo *
relationInfo)
{
Relids relids = relationInfo->relids;
RecurringTuplesType recurType = RECURRING_TUPLES_INVALID;
int relationId = -1;
while ((relationId = bms_next_member(relids, relationId)) >= 0)
@ -1390,13 +1350,13 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati
RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId];
/* relationInfo has this range table entry */
if (ContainsRecurringRTE(rangeTableEntry, recurType))
if (ContainsRecurringRTE(rangeTableEntry, &recurType))
{
return true;
return recurType;
}
}
return false;
return recurType;
}

View File

@ -0,0 +1,253 @@
-- this test file relies on multi_behavioral_analytics_create_table
-- and aims to have variety of tests covering CROSS JOINs
-- "t1 CROSS JOIN t2" is equivalent of "t1 JOIN t2 ON true"
-- a distributed table can be cross joined with a reference table
-- and the CROSS JOIN can be in the outer part of an outer JOIN
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id);
count
---------------------------------------------------------------------
176649
(1 row)
-- two distributed tables cannot be cross joined
-- as it lacks distribution key equality
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u;
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
SELECT count(*) FROM events_reference_table e1, events_table e2, users_table u;
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- we can provide the distribution key equality via WHERE clause
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u WHERE u.user_id = e2.user_id;
count
---------------------------------------------------------------------
176649
(1 row)
-- two reference tables are JOINed, and later CROSS JOINed with a distributed table
-- it is safe to pushdown
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
count
---------------------------------------------------------------------
606
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
count
---------------------------------------------------------------------
606
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
count
---------------------------------------------------------------------
606
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table;
count
---------------------------------------------------------------------
3636
(1 row)
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
-- it is safe to pushdown
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON false;
count
---------------------------------------------------------------------
101
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON false;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON true;
count
---------------------------------------------------------------------
3636
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON true;
count
---------------------------------------------------------------------
3636
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON (ref1.id = users_table.user_id);
count
---------------------------------------------------------------------
606
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON (ref1.id = users_table.user_id);
count
---------------------------------------------------------------------
606
(1 row)
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
-- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table
-- so we cannot pushdown
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id > 0);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (users_table.user_id > 0);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON false;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on distribution keys
-- so safe to pushdown
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (u1.user_id = u2.user_id);
count
---------------------------------------------------------------------
11802
(1 row)
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (u1.user_id = u2.user_id);
count
---------------------------------------------------------------------
11802
(1 row)
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (u1.user_id = u2.user_id);
count
---------------------------------------------------------------------
11802
(1 row)
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (u1.user_id = u2.user_id);
count
---------------------------------------------------------------------
11802
(1 row)
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column
-- so not safe to pushdown
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2"
-- can be repartitioned on ref2.id
Set citus.enable_repartition_joins to on;
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (ref2.id = u2.user_id);
count
---------------------------------------------------------------------
10201
(1 row)
reset citus.enable_repartition_joins;
-- although the following has the "ref LEFT JOIN dist" type of query, the LEFT JOIN is eliminated by Postgres
-- because the INNER JOIN eliminates the LEFT JOIN
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) JOIN users_table u2 ON (u2.user_id = users_table.user_id);
count
---------------------------------------------------------------------
11802
(1 row)
-- this is the same query as the above, but this time the outer query is also LEFT JOIN, meaning that Postgres
-- cannot eliminate the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) LEFT JOIN users_table u2 ON (u2.user_id = users_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- cross join that goes through non-colocated subquery logic
-- for the "events_table" subquery as both distributed tables
-- do not have JOIN on the distribution key
SELECT max(events_all.cnt),
events_all.usr_id
FROM
(SELECT *, random() FROM
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
) AS events_all
CROSS JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
max | usr_id
---------------------------------------------------------------------
432 | 2
391 | 4
364 | 5
357 | 3
105 | 1
(5 rows)
-- cross join that goes through non-colocated subquery logic
-- for the "events_all" subquery as both distributed tables
-- do not have JOIN on the distribution key
SELECT max(events_all.cnt),
events_all.usr_id
FROM events_table
CROSS JOIN (SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
max | usr_id
---------------------------------------------------------------------
432 | 2
391 | 4
364 | 5
357 | 3
105 | 1
(5 rows)
-- cross join is between a reference table and distributed table, and
-- deep inside a subquery. The subquery can be in the outer part of the LEFT JOIN
SELECT
users_table.*
FROM
(SELECT
events_all.*, random()
FROM
events_reference_table JOIN users_table USING(user_id)
JOIN
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
CROSS JOIN users_table
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (user_id = usr_id)
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
LEFT JOIN users_table USING (user_id)
ORDER BY 1,2,3,4 LIMIT 5;
user_id | time | value_1 | value_2 | value_3 | value_4
---------------------------------------------------------------------
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
(5 rows)

View File

@ -189,8 +189,13 @@ SELECT subquery_1.user_id
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2
ON subquery_1.user_id = subquery_2.user_id
ORDER BY 1;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
user_id
---------------------------------------------------------------------
1
3
(3 rows)
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT
@ -646,8 +651,141 @@ GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
max | usr_id
---------------------------------------------------------------------
432 | 2
391 | 4
364 | 5
357 | 3
105 | 1
(5 rows)
-- should be fine even if the tables are deep inside subqueries
SELECT max(events_all.cnt),
events_all.usr_id
FROM
(SELECT *, random() FROM
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
max | usr_id
---------------------------------------------------------------------
432 | 2
391 | 4
364 | 5
357 | 3
105 | 1
(5 rows)
-- should be fine with FULL join as well
SELECT max(events_all.cnt),
events_all.usr_id
FROM events_table
FULL JOIN (SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
max | usr_id
---------------------------------------------------------------------
432 | 2
391 | 4
364 | 5
357 | 3
105 | 1
(5 rows)
-- two levels of "(ref_table JOIN dist_table) LEFT JOIN"
-- should be fine as well
SELECT
users_table.*
FROM
(SELECT
events_all.*, random()
FROM
events_reference_table JOIN users_table USING(user_id)
JOIN
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (user_id = usr_id)
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
LEFT JOIN users_table USING (user_id)
ORDER BY 1,2,3,4 LIMIT 5;
user_id | time | value_1 | value_2 | value_3 | value_4
---------------------------------------------------------------------
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
(5 rows)
-- we should be able to support OUTER joins involving
-- reference tables even if the subquery is in WHERE clause
SELECT count(*)
FROM events_table
WHERE user_id IN
(SELECT subquery_1.user_id
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT *,
random()
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id);
count
---------------------------------------------------------------------
101
(1 row)
-- we should be able to support OUTER joins involving
-- reference tables even if the subquery is in the outer part of a JOIN
SELECT count(*)
FROM users_table
RIGHT JOIN
(SELECT subquery_1.user_id
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT *,
random()
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id) AS foo USING (user_id);
count
---------------------------------------------------------------------
12664199
(1 row)
-- LATERAL JOINs used with INNER JOINs with reference tables
SET citus.subquery_pushdown to ON;
NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks.
@ -1823,6 +1961,104 @@ LIMIT 5;
6
(1 row)
-- outer part of the LEFT JOIN consists only reference tables, so we cannot push down
-- we have different combinations for ON condition, true/false/two column join/single column filter
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id > 5);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 5);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- outer part of the LEFT JOIN consists only reference tables within a subquery, so we cannot push down
-- we have different combinations for ON condition, true/false/two column join/single column filter
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id > 5);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- one example where unsupported outer join is deep inside a subquery
SELECT *, random() FROM (
SELECT *,random() FROM user_buy_test_table WHERE user_id > (
SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- In theory, we should be able to pushdown this query
-- however, as the LEFT JOIN condition is between a reference table and the distributed table
-- Postgres generates a LEFT JOIN alternative among those tables
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
-- same as the above query, but this time LEFT JOIN condition is between distributed tables
-- so Postgres doesn't generate join restriction between reference and distributed tables
SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id);
count
---------------------------------------------------------------------
3
(1 row)
-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down
SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
-- should be fine as OUTER part is the distributed table
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON true;
count
---------------------------------------------------------------------
24
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON false;
count
---------------------------------------------------------------------
4
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON (ref1.id = user_id);
count
---------------------------------------------------------------------
4
(1 row)
DROP TABLE user_buy_test_table;
DROP TABLE users_ref_test_table;
DROP TABLE users_return_test_table;

View File

@ -108,5 +108,48 @@ EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN
Filter: (c0 = 2)
(7 rows)
CREATE TABLE IF NOT EXISTS t6(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983);
CREATE UNLOGGED TABLE IF NOT EXISTS t7(LIKE t6);
CREATE TABLE t8(LIKE t6 INCLUDING INDEXES);
CREATE UNLOGGED TABLE t9(LIKE t6 EXCLUDING STATISTICS);
CREATE TABLE t10(LIKE t7);
SELECT create_distributed_table('t6', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
ALTER TABLE t6 ALTER COLUMN c0 SET NOT NULL;
SELECT create_reference_table('t7');
create_reference_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('t8', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
ALTER TABLE t8 ALTER COLUMN c0 SET NOT NULL;
SELECT create_distributed_table('t9', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
ALTER TABLE t9 ALTER COLUMN c0 SET NOT NULL;
SELECT create_reference_table('t10');
create_reference_table
---------------------------------------------------------------------
(1 row)
SELECT count(*) FROM (
SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range))))))))
) AS foo;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SET client_min_messages TO WARNING;
DROP SCHEMA sqlancer_failures CASCADE;

View File

@ -88,7 +88,7 @@ test: subquery_prepared_statements pg12 cte_inline pg13
# ----------
test: multi_deparse_shard_query multi_distributed_transaction_id intermediate_results limit_intermediate_size rollback_to_savepoint
test: multi_explain hyperscale_tutorial partitioned_intermediate_results distributed_intermediate_results multi_real_time_transaction
test: multi_basic_queries multi_complex_expressions multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
test: multi_basic_queries cross_join multi_complex_expressions multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
test: multi_subquery_complex_reference_clause multi_subquery_window_functions multi_view multi_sql_function multi_prepare_sql
test: sql_procedure multi_function_in_join row_types materialized_view undistribute_table
test: multi_subquery_in_where_reference_clause full_join adaptive_executor propagate_set_commands

View File

@ -0,0 +1,131 @@
-- this test file relies on multi_behavioral_analytics_create_table
-- and aims to have variety of tests covering CROSS JOINs
-- "t1 CROSS JOIN t2" is equivalent of "t1 JOIN t2 ON true"
-- a distributed table can be cross joined with a reference table
-- and the CROSS JOIN can be in the outer part of an outer JOIN
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id);
-- two distributed tables cannot be cross joined
-- as it lacks distribution key equality
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u;
SELECT count(*) FROM events_reference_table e1, events_table e2, users_table u;
-- we can provide the distribution key equality via WHERE clause
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u WHERE u.user_id = e2.user_id;
-- two reference tables are JOINed, and later CROSS JOINed with a distributed table
-- it is safe to pushdown
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table;
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
-- it is safe to pushdown
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON false;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON false;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON (ref1.id = users_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON (ref1.id = users_table.user_id);
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
-- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table
-- so we cannot pushdown
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id > 0);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (users_table.user_id > 0);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON false;
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on distribution keys
-- so safe to pushdown
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (u1.user_id = u2.user_id);
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (u1.user_id = u2.user_id);
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (u1.user_id = u2.user_id);
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (u1.user_id = u2.user_id);
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column
-- so not safe to pushdown
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id);
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id);
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id);
-- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2"
-- can be repartitioned on ref2.id
Set citus.enable_repartition_joins to on;
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (ref2.id = u2.user_id);
reset citus.enable_repartition_joins;
-- although the following has the "ref LEFT JOIN dist" type of query, the LEFT JOIN is eliminated by Postgres
-- because the INNER JOIN eliminates the LEFT JOIN
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) JOIN users_table u2 ON (u2.user_id = users_table.user_id);
-- this is the same query as the above, but this time the outer query is also LEFT JOIN, meaning that Postgres
-- cannot eliminate the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) LEFT JOIN users_table u2 ON (u2.user_id = users_table.user_id);
-- cross join that goes through non-colocated subquery logic
-- for the "events_table" subquery as both distributed tables
-- do not have JOIN on the distribution key
SELECT max(events_all.cnt),
events_all.usr_id
FROM
(SELECT *, random() FROM
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
) AS events_all
CROSS JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- cross join that goes through non-colocated subquery logic
-- for the "events_all" subquery as both distributed tables
-- do not have JOIN on the distribution key
SELECT max(events_all.cnt),
events_all.usr_id
FROM events_table
CROSS JOIN (SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- cross join is between a reference table and distributed table, and
-- deep inside a subquery. The subquery can be in the outer part of the LEFT JOIN
SELECT
users_table.*
FROM
(SELECT
events_all.*, random()
FROM
events_reference_table JOIN users_table USING(user_id)
JOIN
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
CROSS JOIN users_table
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (user_id = usr_id)
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
LEFT JOIN users_table USING (user_id)
ORDER BY 1,2,3,4 LIMIT 5;

View File

@ -401,6 +401,102 @@ ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- should be fine even if the tables are deep inside subqueries
SELECT max(events_all.cnt),
events_all.usr_id
FROM
(SELECT *, random() FROM
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- should be fine with FULL join as well
SELECT max(events_all.cnt),
events_all.usr_id
FROM events_table
FULL JOIN (SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- two levels of "(ref_table JOIN dist_table) LEFT JOIN"
-- should be fine as well
SELECT
users_table.*
FROM
(SELECT
events_all.*, random()
FROM
events_reference_table JOIN users_table USING(user_id)
JOIN
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (user_id = usr_id)
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
LEFT JOIN users_table USING (user_id)
ORDER BY 1,2,3,4 LIMIT 5;
-- we should be able to support OUTER joins involving
-- reference tables even if the subquery is in WHERE clause
SELECT count(*)
FROM events_table
WHERE user_id IN
(SELECT subquery_1.user_id
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT *,
random()
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id);
-- we should be able to support OUTER joins involving
-- reference tables even if the subquery is in the outer part of a JOIN
SELECT count(*)
FROM users_table
RIGHT JOIN
(SELECT subquery_1.user_id
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT *,
random()
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id) AS foo USING (user_id);
-- LATERAL JOINs used with INNER JOINs with reference tables
SET citus.subquery_pushdown to ON;
SELECT user_id, lastseen
@ -1329,6 +1425,52 @@ JOIN
ORDER BY 1
LIMIT 5;
-- outer part of the LEFT JOIN consists only reference tables, so we cannot push down
-- we have different combinations for ON condition, true/false/two column join/single column filter
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false;
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false;
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id > 5);
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 5);
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id);
-- outer part of the LEFT JOIN consists only reference tables within a subquery, so we cannot push down
-- we have different combinations for ON condition, true/false/two column join/single column filter
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true;
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true;
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false;
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false;
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id > 5);
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19);
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id);
-- one example where unsupported outer join is deep inside a subquery
SELECT *, random() FROM (
SELECT *,random() FROM user_buy_test_table WHERE user_id > (
SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo;
-- In theory, we should be able to pushdown this query
-- however, as the LEFT JOIN condition is between a reference table and the distributed table
-- Postgres generates a LEFT JOIN alternative among those tables
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id);
-- same as the above query, but this time LEFT JOIN condition is between distributed tables
-- so Postgres doesn't generate join restriction between reference and distributed tables
SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id);
-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down
SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true;
-- should be fine as OUTER part is the distributed table
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON false;
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON (ref1.id = user_id);
DROP TABLE user_buy_test_table;
DROP TABLE users_ref_test_table;
DROP TABLE users_return_test_table;

View File

@ -39,5 +39,24 @@ INSERT INTO t5 VALUES (CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END);
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = 2;
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END;
CREATE TABLE IF NOT EXISTS t6(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983);
CREATE UNLOGGED TABLE IF NOT EXISTS t7(LIKE t6);
CREATE TABLE t8(LIKE t6 INCLUDING INDEXES);
CREATE UNLOGGED TABLE t9(LIKE t6 EXCLUDING STATISTICS);
CREATE TABLE t10(LIKE t7);
SELECT create_distributed_table('t6', 'c0');
ALTER TABLE t6 ALTER COLUMN c0 SET NOT NULL;
SELECT create_reference_table('t7');
SELECT create_distributed_table('t8', 'c0');
ALTER TABLE t8 ALTER COLUMN c0 SET NOT NULL;
SELECT create_distributed_table('t9', 'c0');
ALTER TABLE t9 ALTER COLUMN c0 SET NOT NULL;
SELECT create_reference_table('t10');
SELECT count(*) FROM (
SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range))))))))
) AS foo;
SET client_min_messages TO WARNING;
DROP SCHEMA sqlancer_failures CASCADE;