Improve outer join checks

Before this commit, the logic was:
    - As long as the outer side of the JOIN is not a JOIN (e.g., relation
      or subquery etc.), we check for the existence of any recurring
      tuples. There were two implications of this decision.

      First, even if a subquery which is on the outer side contains
      distributed table JOIN reference table, Citus would unnecessarily throw
      an error. Note that, the JOIN inside the subquery would already
      be going to be tested recursively. But, as long as that check
      passes, there is no reason for the upper JOIN to fail. An example, which
      used to fail and now works:

	SELECT * FROM (SELECT * FROM dist JOIN ref) as foo LEFT JOIN dist;

      Second, certain JOINs, especially with ON (true) conditions were not
      represented as Citus expects the JOINs to be in the format
      DeferredErrorIfUnsupportedRecurringTuplesJoin().
pull/3597/head
Onder Kalaci 2020-03-09 11:57:03 +01:00
parent 1a28858c47
commit de33079065
3 changed files with 211 additions and 96 deletions

View File

@ -79,10 +79,8 @@ static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin(
PlannerRestrictionContext *plannerRestrictionContext);
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
static bool ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo);
static bool RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo,
RelOptInfo *relationInfo,
RecurringTuplesType *recurType);
static RecurringTuplesType FetchFirstRecurType(PlannerInfo *plannerInfo, RelOptInfo *
relationInfo);
static bool ContainsRecurringRTE(RangeTblEntry *rangeTableEntry,
RecurringTuplesType *recurType);
static bool ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType);
@ -776,7 +774,6 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
plannerRestrictionContext->joinRestrictionContext->joinRestrictionList;
ListCell *joinRestrictionCell = NULL;
RecurringTuplesType recurType = RECURRING_TUPLES_INVALID;
foreach(joinRestrictionCell, joinRestrictionList)
{
JoinRestriction *joinRestriction = (JoinRestriction *) lfirst(
@ -799,20 +796,33 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
continue;
}
if (ShouldRecurseForRecurringTuplesJoinChecks(outerrel) &&
RelationInfoContainsRecurringTuples(plannerInfo, outerrel, &recurType))
/*
* If the outer side of the join doesn't have any distributed tables
* (e.g., contains only recurring tuples), Citus should not pushdown
* the query. The reason is that recurring tuples on every shard would
* be added to the result, which is wrong.
*/
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel))
{
recurType = FetchFirstRecurType(plannerInfo, outerrel);
break;
}
}
else if (joinType == JOIN_FULL)
{
if ((ShouldRecurseForRecurringTuplesJoinChecks(innerrel) &&
RelationInfoContainsRecurringTuples(plannerInfo, innerrel,
&recurType)) ||
(ShouldRecurseForRecurringTuplesJoinChecks(outerrel) &&
RelationInfoContainsRecurringTuples(plannerInfo, outerrel, &recurType)))
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrel))
{
recurType = FetchFirstRecurType(plannerInfo, innerrel);
break;
}
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel))
{
recurType = FetchFirstRecurType(plannerInfo, outerrel);
break;
}
}
@ -846,6 +856,7 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
"Complex subqueries and CTEs cannot be in the outer "
"part of the outer join", NULL);
}
return NULL;
}
@ -1071,7 +1082,8 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree)
* Extract all range table indexes from the join tree. Note that sub-queries
* that get pulled up by PostgreSQL don't appear in this join tree.
*/
ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
ExtractRangeTableIndexWalker((Node *) queryTree->jointree,
&joinTreeTableIndexList);
foreach_int(joinTreeTableIndex, joinTreeTableIndexList)
{
@ -1180,13 +1192,15 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot push down this subquery",
"Intersect and Except are currently unsupported", NULL);
"Intersect and Except are currently unsupported",
NULL);
}
if (IsA(leftArg, RangeTblRef))
{
leftArgRTI = ((RangeTblRef *) leftArg)->rtindex;
Query *leftArgSubquery = rt_fetch(leftArgRTI, subqueryTree->rtable)->subquery;
Query *leftArgSubquery = rt_fetch(leftArgRTI,
subqueryTree->rtable)->subquery;
recurType = FromClauseRecurringTupleType(leftArgSubquery);
if (recurType != RECURRING_TUPLES_INVALID)
{
@ -1260,81 +1274,14 @@ ExtractSetOperationStatmentWalker(Node *node, List **setOperationList)
(*setOperationList) = lappend(*setOperationList, setOperation);
}
bool walkerResult = expression_tree_walker(node, ExtractSetOperationStatmentWalker,
bool walkerResult = expression_tree_walker(node,
ExtractSetOperationStatmentWalker,
setOperationList);
return walkerResult;
}
/*
* ShouldRecurseForRecurringTuplesJoinChecks is a helper function for deciding
* on whether the input relOptInfo should be checked for table expressions that
* generate the same tuples in every query on a shard. We use this to avoid
* redundant checks and false positives in complex join trees.
*/
static bool
ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo)
{
bool shouldRecurse = true;
/*
* We shouldn't recursively go down for joins since we're already
* going to process each join seperately. Otherwise we'd restrict
* the coverage. See the below sketch where (h) denotes a hash
* distributed relation, (r) denotes a reference table, (L) denotes
* LEFT JOIN and (I) denotes INNER JOIN. If we're to recurse into
* the inner join, we'd be preventing to push down the following
* join tree, which is actually safe to push down.
*
* (L)
* / \
* (I) h
* / \
* r h
*/
if (relOptInfo->reloptkind == RELOPT_JOINREL)
{
return false;
}
/*
* Note that we treat the same query where relations appear in subqueries
* differently. (i.e., use SELECT * FROM r; instead of r)
*
* In that case, to relax some restrictions, we do the following optimization:
* If the subplan (i.e., plannerInfo corresponding to the subquery) contains any
* joins, we skip reference table checks keeping in mind that the join is already
* going to be processed seperately. This optimization should suffice for many
* use cases.
*/
if (relOptInfo->reloptkind == RELOPT_BASEREL && relOptInfo->subroot != NULL)
{
PlannerInfo *subroot = relOptInfo->subroot;
if (list_length(subroot->join_rel_list) > 0)
{
RelOptInfo *subqueryJoin = linitial(subroot->join_rel_list);
/*
* Subqueries without relations (e.g. SELECT 1) are a little funny.
* They are treated as having a join, but the join is between 0
* relations and won't be in the join restriction list and therefore
* won't be revisited in DeferredErrorIfUnsupportedRecurringTuplesJoin.
*
* We therefore only skip joins with >0 relations.
*/
if (bms_num_members(subqueryJoin->relids) > 0)
{
shouldRecurse = false;
}
}
}
return shouldRecurse;
}
/*
* RelationInfoContainsOnlyRecurringTuples returns false if any of the relations in
* a RelOptInfo is not recurring.
@ -1370,19 +1317,20 @@ RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo,
/*
* RelationInfoContainsRecurringTuples checks whether the relationInfo
* FetchFirstRecurType checks whether the relationInfo
* contains any recurring table expression, namely a reference table,
* or immutable function. If found, RelationInfoContainsRecurringTuples
* or immutable function. If found, FetchFirstRecurType
* returns true.
*
* Note that since relation ids of relationInfo indexes to the range
* table entry list of planner info, planner info is also passed.
*/
static bool
RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relationInfo,
RecurringTuplesType *recurType)
static RecurringTuplesType
FetchFirstRecurType(PlannerInfo *plannerInfo, RelOptInfo *
relationInfo)
{
Relids relids = relationInfo->relids;
RecurringTuplesType recurType = RECURRING_TUPLES_INVALID;
int relationId = -1;
while ((relationId = bms_next_member(relids, relationId)) >= 0)
@ -1390,13 +1338,13 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati
RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId];
/* relationInfo has this range table entry */
if (ContainsRecurringRTE(rangeTableEntry, recurType))
if (ContainsRecurringRTE(rangeTableEntry, &recurType))
{
return true;
return recurType;
}
}
return false;
return recurType;
}

View File

@ -189,8 +189,13 @@ SELECT subquery_1.user_id
ON tt1.user_id = ref.id) subquery_2_inner) subquery_2
ON subquery_1.user_id = subquery_2.user_id
ORDER BY 1;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
user_id
---------------------------------------------------------------------
1
3
(3 rows)
-- should be able to pushdown since reference table is in the
-- inner part of the left join
SELECT
@ -646,8 +651,15 @@ GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
max | usr_id
---------------------------------------------------------------------
432 | 2
391 | 4
364 | 5
357 | 3
105 | 1
(5 rows)
-- LATERAL JOINs used with INNER JOINs with reference tables
SET citus.subquery_pushdown to ON;
NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks.
@ -1823,6 +1835,110 @@ LIMIT 5;
6
(1 row)
-- outer part of the LEFT JOIN consists only reference tables, so we cannot push down
-- we have different combinations for ON condition, true/false/two column join/single column filter
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id > 5);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 5);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- outer part of the LEFT JOIN consists only reference tables within a subquery, so we cannot push down
-- we have different combinations for ON condition, true/false/two column join/single column filter
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id > 5);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- one example where unsupported outer join is deep inside a subquery
SELECT *, random() FROM (
SELECT *,random() FROM user_buy_test_table WHERE user_id > (
SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- In theory, we should be able to pushdown this query
-- however, as the LEFT JOIN condition is between a reference table and the distributed table
-- Postgres generates a LEFT JOIN alternative among those tables
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
-- same as the above query, but this time LEFT JOIN condition is between distributed tables
-- so Postgres doesn't generate join restriction between reference and distributed tables
SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id);
count
---------------------------------------------------------------------
3
(1 row)
-- again, in theory should be OK to pushdown but
-- Postgres generates join restriction between reference and distributed tables
-- in one of the cases
SELECT count(*) FROM user_buy_test_table a LEFT JOIN users_ref_test_table b ON (true) RIGHT JOIN users_ref_test_table c ON (b.id = c.id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down
SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
-- should be fine as OUTER part is the distributed table
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON true;
count
---------------------------------------------------------------------
24
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON false;
count
---------------------------------------------------------------------
4
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON (ref1.id = user_id);
count
---------------------------------------------------------------------
4
(1 row)
DROP TABLE user_buy_test_table;
DROP TABLE users_ref_test_table;
DROP TABLE users_return_test_table;

View File

@ -1329,6 +1329,57 @@ JOIN
ORDER BY 1
LIMIT 5;
-- outer part of the LEFT JOIN consists only reference tables, so we cannot push down
-- we have different combinations for ON condition, true/false/two column join/single column filter
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false;
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false;
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id > 5);
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 5);
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id);
-- outer part of the LEFT JOIN consists only reference tables within a subquery, so we cannot push down
-- we have different combinations for ON condition, true/false/two column join/single column filter
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true;
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true;
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false;
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false;
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id > 5);
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19);
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id);
-- one example where unsupported outer join is deep inside a subquery
SELECT *, random() FROM (
SELECT *,random() FROM user_buy_test_table WHERE user_id > (
SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo;
-- In theory, we should be able to pushdown this query
-- however, as the LEFT JOIN condition is between a reference table and the distributed table
-- Postgres generates a LEFT JOIN alternative among those tables
SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id);
-- same as the above query, but this time LEFT JOIN condition is between distributed tables
-- so Postgres doesn't generate join restriction between reference and distributed tables
SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id);
-- again, in theory should be OK to pushdown but
-- Postgres generates join restriction between reference and distributed tables
-- in one of the cases
SELECT count(*) FROM user_buy_test_table a LEFT JOIN users_ref_test_table b ON (true) RIGHT JOIN users_ref_test_table c ON (b.id = c.id);
-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down
SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true;
-- should be fine as OUTER part is the distributed table
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON false;
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON (ref1.id = user_id);
DROP TABLE user_buy_test_table;
DROP TABLE users_ref_test_table;
DROP TABLE users_return_test_table;