diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index b26b4053f..4bc5589f8 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -79,10 +79,8 @@ static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin( PlannerRestrictionContext *plannerRestrictionContext); static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree); static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList); -static bool ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo); -static bool RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, - RelOptInfo *relationInfo, - RecurringTuplesType *recurType); +static RecurringTuplesType FetchFirstRecurType(PlannerInfo *plannerInfo, RelOptInfo * + relationInfo); static bool ContainsRecurringRTE(RangeTblEntry *rangeTableEntry, RecurringTuplesType *recurType); static bool ContainsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType); @@ -776,7 +774,6 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( plannerRestrictionContext->joinRestrictionContext->joinRestrictionList; ListCell *joinRestrictionCell = NULL; RecurringTuplesType recurType = RECURRING_TUPLES_INVALID; - foreach(joinRestrictionCell, joinRestrictionList) { JoinRestriction *joinRestriction = (JoinRestriction *) lfirst( @@ -799,20 +796,33 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( continue; } - if (ShouldRecurseForRecurringTuplesJoinChecks(outerrel) && - RelationInfoContainsRecurringTuples(plannerInfo, outerrel, &recurType)) + + /* + * If the outer side of the join doesn't have any distributed tables + * (e.g., contains only recurring tuples), Citus should not pushdown + * the query. The reason is that recurring tuples on every shard would + * be added to the result, which is wrong. + */ + if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel)) { + recurType = FetchFirstRecurType(plannerInfo, outerrel); + break; } } else if (joinType == JOIN_FULL) { - if ((ShouldRecurseForRecurringTuplesJoinChecks(innerrel) && - RelationInfoContainsRecurringTuples(plannerInfo, innerrel, - &recurType)) || - (ShouldRecurseForRecurringTuplesJoinChecks(outerrel) && - RelationInfoContainsRecurringTuples(plannerInfo, outerrel, &recurType))) + if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrel)) { + recurType = FetchFirstRecurType(plannerInfo, innerrel); + + break; + } + + if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel)) + { + recurType = FetchFirstRecurType(plannerInfo, outerrel); + break; } } @@ -846,6 +856,7 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( "Complex subqueries and CTEs cannot be in the outer " "part of the outer join", NULL); } + return NULL; } @@ -1071,7 +1082,8 @@ DeferErrorIfUnsupportedTableCombination(Query *queryTree) * Extract all range table indexes from the join tree. Note that sub-queries * that get pulled up by PostgreSQL don't appear in this join tree. */ - ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, + &joinTreeTableIndexList); foreach_int(joinTreeTableIndex, joinTreeTableIndexList) { @@ -1180,13 +1192,15 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree) { return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, "cannot push down this subquery", - "Intersect and Except are currently unsupported", NULL); + "Intersect and Except are currently unsupported", + NULL); } if (IsA(leftArg, RangeTblRef)) { leftArgRTI = ((RangeTblRef *) leftArg)->rtindex; - Query *leftArgSubquery = rt_fetch(leftArgRTI, subqueryTree->rtable)->subquery; + Query *leftArgSubquery = rt_fetch(leftArgRTI, + subqueryTree->rtable)->subquery; recurType = FromClauseRecurringTupleType(leftArgSubquery); if (recurType != RECURRING_TUPLES_INVALID) { @@ -1260,81 +1274,14 @@ ExtractSetOperationStatmentWalker(Node *node, List **setOperationList) (*setOperationList) = lappend(*setOperationList, setOperation); } - bool walkerResult = expression_tree_walker(node, ExtractSetOperationStatmentWalker, + bool walkerResult = expression_tree_walker(node, + ExtractSetOperationStatmentWalker, setOperationList); return walkerResult; } -/* - * ShouldRecurseForRecurringTuplesJoinChecks is a helper function for deciding - * on whether the input relOptInfo should be checked for table expressions that - * generate the same tuples in every query on a shard. We use this to avoid - * redundant checks and false positives in complex join trees. - */ -static bool -ShouldRecurseForRecurringTuplesJoinChecks(RelOptInfo *relOptInfo) -{ - bool shouldRecurse = true; - - /* - * We shouldn't recursively go down for joins since we're already - * going to process each join seperately. Otherwise we'd restrict - * the coverage. See the below sketch where (h) denotes a hash - * distributed relation, (r) denotes a reference table, (L) denotes - * LEFT JOIN and (I) denotes INNER JOIN. If we're to recurse into - * the inner join, we'd be preventing to push down the following - * join tree, which is actually safe to push down. - * - * (L) - * / \ - * (I) h - * / \ - * r h - */ - if (relOptInfo->reloptkind == RELOPT_JOINREL) - { - return false; - } - - /* - * Note that we treat the same query where relations appear in subqueries - * differently. (i.e., use SELECT * FROM r; instead of r) - * - * In that case, to relax some restrictions, we do the following optimization: - * If the subplan (i.e., plannerInfo corresponding to the subquery) contains any - * joins, we skip reference table checks keeping in mind that the join is already - * going to be processed seperately. This optimization should suffice for many - * use cases. - */ - if (relOptInfo->reloptkind == RELOPT_BASEREL && relOptInfo->subroot != NULL) - { - PlannerInfo *subroot = relOptInfo->subroot; - - if (list_length(subroot->join_rel_list) > 0) - { - RelOptInfo *subqueryJoin = linitial(subroot->join_rel_list); - - /* - * Subqueries without relations (e.g. SELECT 1) are a little funny. - * They are treated as having a join, but the join is between 0 - * relations and won't be in the join restriction list and therefore - * won't be revisited in DeferredErrorIfUnsupportedRecurringTuplesJoin. - * - * We therefore only skip joins with >0 relations. - */ - if (bms_num_members(subqueryJoin->relids) > 0) - { - shouldRecurse = false; - } - } - } - - return shouldRecurse; -} - - /* * RelationInfoContainsOnlyRecurringTuples returns false if any of the relations in * a RelOptInfo is not recurring. @@ -1370,19 +1317,20 @@ RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo, /* - * RelationInfoContainsRecurringTuples checks whether the relationInfo + * FetchFirstRecurType checks whether the relationInfo * contains any recurring table expression, namely a reference table, - * or immutable function. If found, RelationInfoContainsRecurringTuples + * or immutable function. If found, FetchFirstRecurType * returns true. * * Note that since relation ids of relationInfo indexes to the range * table entry list of planner info, planner info is also passed. */ -static bool -RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relationInfo, - RecurringTuplesType *recurType) +static RecurringTuplesType +FetchFirstRecurType(PlannerInfo *plannerInfo, RelOptInfo * + relationInfo) { Relids relids = relationInfo->relids; + RecurringTuplesType recurType = RECURRING_TUPLES_INVALID; int relationId = -1; while ((relationId = bms_next_member(relids, relationId)) >= 0) @@ -1390,13 +1338,13 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId]; /* relationInfo has this range table entry */ - if (ContainsRecurringRTE(rangeTableEntry, recurType)) + if (ContainsRecurringRTE(rangeTableEntry, &recurType)) { - return true; + return recurType; } } - return false; + return recurType; } diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 0a743e9ab..a40a6221c 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -189,8 +189,13 @@ SELECT subquery_1.user_id ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id +--------------------------------------------------------------------- + 1 + 3 + +(3 rows) + -- should be able to pushdown since reference table is in the -- inner part of the left join SELECT @@ -646,8 +651,15 @@ GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + max | usr_id +--------------------------------------------------------------------- + 432 | 2 + 391 | 4 + 364 | 5 + 357 | 3 + 105 | 1 +(5 rows) + -- LATERAL JOINs used with INNER JOINs with reference tables SET citus.subquery_pushdown to ON; NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks. @@ -1823,6 +1835,110 @@ LIMIT 5; 6 (1 row) +-- outer part of the LEFT JOIN consists only reference tables, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id > 5); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 5); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- outer part of the LEFT JOIN consists only reference tables within a subquery, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id > 5); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- one example where unsupported outer join is deep inside a subquery +SELECT *, random() FROM ( +SELECT *,random() FROM user_buy_test_table WHERE user_id > ( +SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- In theory, we should be able to pushdown this query +-- however, as the LEFT JOIN condition is between a reference table and the distributed table +-- Postgres generates a LEFT JOIN alternative among those tables +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- same as the above query, but this time LEFT JOIN condition is between distributed tables +-- so Postgres doesn't generate join restriction between reference and distributed tables +SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id); + count +--------------------------------------------------------------------- + 3 +(1 row) + +-- again, in theory should be OK to pushdown but +-- Postgres generates join restriction between reference and distributed tables +-- in one of the cases +SELECT count(*) FROM user_buy_test_table a LEFT JOIN users_ref_test_table b ON (true) RIGHT JOIN users_ref_test_table c ON (b.id = c.id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down +SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- should be fine as OUTER part is the distributed table +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON true; + count +--------------------------------------------------------------------- + 24 +(1 row) + +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON false; + count +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON (ref1.id = user_id); + count +--------------------------------------------------------------------- + 4 +(1 row) + DROP TABLE user_buy_test_table; DROP TABLE users_ref_test_table; DROP TABLE users_return_test_table; diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index ec37df5ae..2704e37db 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -1329,6 +1329,57 @@ JOIN ORDER BY 1 LIMIT 5; +-- outer part of the LEFT JOIN consists only reference tables, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true; +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true; +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false; +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false; +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id > 5); +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 5); +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); + + +-- outer part of the LEFT JOIN consists only reference tables within a subquery, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true; +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true; +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false; +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false; +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id > 5); +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19); +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); + +-- one example where unsupported outer join is deep inside a subquery +SELECT *, random() FROM ( +SELECT *,random() FROM user_buy_test_table WHERE user_id > ( +SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; + +-- In theory, we should be able to pushdown this query +-- however, as the LEFT JOIN condition is between a reference table and the distributed table +-- Postgres generates a LEFT JOIN alternative among those tables +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); + +-- same as the above query, but this time LEFT JOIN condition is between distributed tables +-- so Postgres doesn't generate join restriction between reference and distributed tables +SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id); + +-- again, in theory should be OK to pushdown but +-- Postgres generates join restriction between reference and distributed tables +-- in one of the cases +SELECT count(*) FROM user_buy_test_table a LEFT JOIN users_ref_test_table b ON (true) RIGHT JOIN users_ref_test_table c ON (b.id = c.id); + +-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down +SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true; + +-- should be fine as OUTER part is the distributed table +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON true; +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON false; +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON (ref1.id = user_id); + DROP TABLE user_buy_test_table; DROP TABLE users_ref_test_table; DROP TABLE users_return_test_table;