From b36b59dd4f40464abb7460e63975dadfe93f57ab Mon Sep 17 00:00:00 2001 From: Murat Tuncer Date: Tue, 15 Jan 2019 11:13:30 +0300 Subject: [PATCH] Relax reference table restrictions in subquery union pushdowns We used to error out if there is a reference table in the query participating a union. This has caused pushdownable queries to be evaluated in coordinator. Now we let reference tables inside union queries as long as there is a distributed table in from clause. Existing join checks (reference table on the outer part) sufficient enought that we do not need check the join relation of reference tables. --- .../planner/query_pushdown_planning.c | 86 ++++--- ...ulti_subquery_complex_reference_clause.out | 217 +++++++++++++++--- ...ulti_subquery_complex_reference_clause.sql | 138 ++++++++--- 3 files changed, 351 insertions(+), 90 deletions(-) diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index b8314cfb5..4b439a528 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -65,6 +65,7 @@ static bool IsNodeQuery(Node *node); static bool IsOuterJoinExpr(Node *node); static bool WindowPartitionOnDistributionColumn(Query *query); static DeferredErrorMessage * DeferErrorIfFromClauseRecurs(Query *queryTree); +static RecurringTuplesType FromClauseRecurringTupleType(Query *queryTree); static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin( PlannerRestrictionContext *plannerRestrictionContext); static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree); @@ -572,30 +573,7 @@ DeferErrorIfFromClauseRecurs(Query *queryTree) return NULL; } - if (FindNodeCheckInRangeTableList(queryTree->rtable, IsDistributedTableRTE)) - { - /* - * There is a distributed table somewhere in the FROM clause. - * - * In the typical case this means that the query does not recur, - * but there are two exceptions: - * - * - outer joins such as reference_table LEFT JOIN distributed_table - * - FROM reference_table WHERE .. (SELECT .. FROM distributed_table) .. - * - * However, we check all subqueries and joins separately, so we would - * find such conditions in other calls. - */ - return NULL; - } - - - /* - * Try to figure out which type of recurring tuples we have to produce a - * relevant error message. If there are several we'll pick the first one. - */ - IsRecurringRangeTable(queryTree->rtable, &recurType); - + recurType = FromClauseRecurringTupleType(queryTree); if (recurType == RECURRING_TUPLES_REFERENCE_TABLE) { return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, @@ -644,6 +622,50 @@ DeferErrorIfFromClauseRecurs(Query *queryTree) } +/* + * FromClauseRecurringTupleType returns tuple recurrence information + * in query result based on range table entries in from clause. + * + * Returned information is used to prepare appropriate deferred error + * message for subquery pushdown checks. + */ +static RecurringTuplesType +FromClauseRecurringTupleType(Query *queryTree) +{ + RecurringTuplesType recurType = RECURRING_TUPLES_INVALID; + + if (queryTree->rtable == NIL) + { + return RECURRING_TUPLES_EMPTY_JOIN_TREE; + } + + if (FindNodeCheckInRangeTableList(queryTree->rtable, IsDistributedTableRTE)) + { + /* + * There is a distributed table somewhere in the FROM clause. + * + * In the typical case this means that the query does not recur, + * but there are two exceptions: + * + * - outer joins such as reference_table LEFT JOIN distributed_table + * - FROM reference_table WHERE .. (SELECT .. FROM distributed_table) .. + * + * However, we check all subqueries and joins separately, so we would + * find such conditions in other calls. + */ + return RECURRING_TUPLES_INVALID; + } + + /* + * Try to figure out which type of recurring tuples we have to produce a + * relevant error message. If there are several we'll pick the first one. + */ + IsRecurringRangeTable(queryTree->rtable, &recurType); + + return recurType; +} + + /* * DeferredErrorIfUnsupportedRecurringTuplesJoin returns true if there exists a outer join * between reference table and distributed tables which does not follow @@ -1062,11 +1084,11 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree) if (IsA(leftArg, RangeTblRef)) { - Node *leftArgSubquery = NULL; + Query *leftArgSubquery = NULL; leftArgRTI = ((RangeTblRef *) leftArg)->rtindex; - leftArgSubquery = (Node *) rt_fetch(leftArgRTI, - subqueryTree->rtable)->subquery; - if (HasRecurringTuples(leftArgSubquery, &recurType)) + leftArgSubquery = rt_fetch(leftArgRTI, subqueryTree->rtable)->subquery; + recurType = FromClauseRecurringTupleType(leftArgSubquery); + if (recurType != RECURRING_TUPLES_INVALID) { break; } @@ -1074,11 +1096,11 @@ DeferErrorIfUnsupportedUnionQuery(Query *subqueryTree) if (IsA(rightArg, RangeTblRef)) { - Node *rightArgSubquery = NULL; + Query *rightArgSubquery = NULL; rightArgRTI = ((RangeTblRef *) rightArg)->rtindex; - rightArgSubquery = (Node *) rt_fetch(rightArgRTI, - subqueryTree->rtable)->subquery; - if (HasRecurringTuples(rightArgSubquery, &recurType)) + rightArgSubquery = rt_fetch(rightArgRTI, subqueryTree->rtable)->subquery; + recurType = FromClauseRecurringTupleType(rightArgSubquery); + if (recurType != RECURRING_TUPLES_INVALID) { break; } diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 876f51cce..20b689a61 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -150,15 +150,22 @@ WHERE subquery_1.user_id = subquery_2.user_id ; 4 (1 row) - -- Should be able to push down since reference tables are inner joined - -- with hash distributed tables, the results of those joins are the parts of - -- an outer join -SELECT subquery_2.id FROM - (SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2 - ON tt1.user_id = tt2.user_id) subquery_1 - RIGHT JOIN - (SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref - ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5; +-- Should be able to push down since reference tables are inner joined +-- with hash distributed tables, the results of those joins are the parts of +-- an outer join +SELECT subquery_2.id + FROM + (SELECT tt1.user_id, random() + FROM user_buy_test_table AS tt1 + JOIN users_return_test_table as tt2 + ON tt1.user_id = tt2.user_id) subquery_1 + RIGHT JOIN + (SELECT tt1.user_id, ref.id, random() + FROM user_buy_test_table as tt1 + JOIN users_ref_test_table as ref + ON tt1.user_id = ref.id) subquery_2 + ON subquery_1.user_id = subquery_2.user_id + ORDER BY 1 DESC LIMIT 5; id ---- 3 @@ -166,14 +173,23 @@ SELECT subquery_2.id FROM 1 (3 rows) --- the same query as the above, but this Citus fails to pushdown the query --- since the outer part of the right join doesn't include any joins -SELECT * FROM - (SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2 - ON tt1.user_id = tt2.user_id) subquery_1 - RIGHT JOIN - (SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref - ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id; +-- almost the same query as the above, but reference table join wrapped with +-- another subquery +SELECT subquery_1.user_id + FROM + (SELECT tt1.user_id, random() + FROM user_buy_test_table AS tt1 + JOIN users_return_test_table as tt2 + ON tt1.user_id = tt2.user_id) subquery_1 + RIGHT JOIN + (SELECT *, random() + FROM + (SELECT tt1.user_id, random() + FROM user_buy_test_table as tt1 + JOIN users_ref_test_table as ref + ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 + ON subquery_1.user_id = subquery_2.user_id + ORDER BY 1; ERROR: cannot pushdown the subquery DETAIL: There exist a reference table in the outer part of the outer join -- should be able to pushdown since reference table is in the @@ -337,15 +353,155 @@ ON user_buy_test_table.item_id = users_ref_test_table.id; ERROR: cannot pushdown the subquery DETAIL: There exist a subquery without FROM in the outer part of the outer join -- can perform a union with subquery without FROM +-- with pulling data to coordinator +SET client_min_messages TO DEBUG; SELECT count(*) FROM (SELECT user_id FROM user_buy_test_table UNION ALL SELECT id FROM (SELECT 5 AS id) users_ref_test_table) subquery_1; +DEBUG: generating subplan 41_1 for subquery SELECT user_id FROM public.user_buy_test_table +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 41_2 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('41_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION ALL SELECT users_ref_test_table.id FROM (SELECT 5 AS id) users_ref_test_table +DEBUG: Plan 41 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('41_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) subquery_1 +DEBUG: Creating router plan +DEBUG: Plan is router executable count ------- 5 (1 row) + +-- union involving reference table and distributed table subqueries +-- is supported with pulling data to coordinator +SELECT * FROM + (SELECT id FROM users_ref_test_table + UNION + SELECT user_id FROM user_buy_test_table) sub +ORDER BY 1 DESC; +DEBUG: generating subplan 44_1 for subquery SELECT user_id FROM public.user_buy_test_table +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 44_2 for subquery SELECT users_ref_test_table.id FROM public.users_ref_test_table UNION SELECT intermediate_result.user_id FROM read_intermediate_result('44_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: Plan 44 query after replacing subqueries and CTEs: SELECT id FROM (SELECT intermediate_result.id FROM read_intermediate_result('44_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) sub ORDER BY id DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + id +---- + 7 + 6 + 5 + 4 + 3 + 2 + 1 +(7 rows) + +-- same query but inner query has wrapped into another subquery +SELECT * FROM + (SELECT id, random() * 0 FROM (SELECT id FROM users_ref_test_table) sub1 + UNION + SELECT user_id, random() * 0 FROM (SELECT user_id FROM user_buy_test_table) sub2) sub +ORDER BY 1 DESC; +DEBUG: generating subplan 47_1 for subquery SELECT user_id, (random() OPERATOR(pg_catalog.*) (0)::double precision) FROM (SELECT user_buy_test_table.user_id FROM public.user_buy_test_table) sub2 +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 47_2 for subquery SELECT sub1.id, (random() OPERATOR(pg_catalog.*) (0)::double precision) FROM (SELECT users_ref_test_table.id FROM public.users_ref_test_table) sub1 UNION SELECT intermediate_result.user_id, intermediate_result."?column?" FROM read_intermediate_result('47_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "?column?" double precision) +DEBUG: Plan 47 query after replacing subqueries and CTEs: SELECT id, "?column?" FROM (SELECT intermediate_result.id, intermediate_result."?column?" FROM read_intermediate_result('47_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer, "?column?" double precision)) sub ORDER BY id DESC +DEBUG: Creating router plan +DEBUG: Plan is router executable + id | ?column? +----+---------- + 7 | 0 + 6 | 0 + 5 | 0 + 4 | 0 + 3 | 0 + 2 | 0 + 1 | 0 +(7 rows) + +-- query can be pushed down when a reference table inside union query is +-- joined with a distributed table +SELECT * FROM + (SELECT user_id FROM users_ref_test_table ref JOIN user_buy_test_table dis + on (ref.id = dis.user_id) + UNION + SELECT user_id FROM user_buy_test_table) sub +ORDER BY 1 DESC; + user_id +--------- + 7 + 3 + 2 + 1 +(4 rows) + +-- same query with inner query wrapped with a subquery +SELECT * FROM + (SELECT user_id, random() * 0 FROM (SELECT dis.user_id FROM users_ref_test_table ref join user_buy_test_table dis + on (ref.id = dis.user_id)) sub1 + UNION + SELECT user_id, random() * 0 FROM (SELECT user_id FROM user_buy_test_table) sub2) sub +ORDER BY 1 DESC; + user_id | ?column? +---------+---------- + 7 | 0 + 3 | 0 + 2 | 0 + 1 | 0 +(4 rows) + +SELECT * FROM + (SELECT user_id FROM users_ref_test_table ref JOIN user_buy_test_table dis + on (ref.id = dis.user_id) WHERE id in (SELECT user_id from user_buy_test_table) + UNION + SELECT user_id FROM user_buy_test_table) sub +ORDER BY 1 DESC; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT * FROM + (SELECT user_id FROM users_ref_test_table ref JOIN user_buy_test_table dis + on (ref.id = dis.user_id) + UNION + SELECT user_id FROM user_buy_test_table WHERE user_id in (select id from users_ref_test_table)) sub +ORDER BY 1 DESC; + user_id +--------- + 3 + 2 + 1 +(3 rows) + +-- query can be pushed down when a reference table inside union query is +-- joined with a distributed table. outer join is also supported +-- if reference table is in the inner part +SELECT * FROM + (SELECT user_id FROM users_ref_test_table ref RIGHT JOIN user_buy_test_table dis + on (ref.id = dis.user_id) + UNION + SELECT user_id FROM user_buy_test_table) sub +ORDER BY 1 DESC; + user_id +--------- + 7 + 3 + 2 + 1 +(4 rows) + +-- query can be pushed down when a reference table inside union query is +-- joined with a distributed table. reference table cannot be at +-- the outer part. +SELECT * FROM + (SELECT user_id FROM users_ref_test_table ref LEFT JOIN user_buy_test_table dis + on (ref.id = dis.user_id) + UNION + SELECT user_id FROM user_buy_test_table) sub +ORDER BY 1 DESC; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +RESET client_min_messages; -- should be able to pushdown since reference table is in the -- inner part of the left join SELECT @@ -453,7 +609,7 @@ SELECT * FROM (SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar; ERROR: cannot pushdown the subquery DETAIL: There exist a reference table in the outer part of the outer join --- we could even suuport the following where the subquery +-- we could even support the following where the subquery -- on the outer part of the left join contains a reference table SELECT max(events_all.cnt), events_all.usr_id FROM @@ -471,17 +627,16 @@ LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 105 | 1 (5 rows) --- but, we fail to pushdown the following query where join that reference table appears --- wrapped into a subquery +-- we still support the query when the join involving reference table is wrapped with a subquery SELECT max(events_all.cnt), events_all.usr_id - FROM( -SELECT *, random() FROM - (SELECT users_table.user_id AS usr_id, count(*) AS cnt - FROM events_reference_table - INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) - GROUP BY users_table.user_id) AS events_all_inner) AS events_all -LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) + FROM (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all + LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC @@ -1179,8 +1334,8 @@ SELECT count(*) FROM (SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2 WHERE subquery_1.user_id != subquery_2.user_id ; -DEBUG: generating subplan 79_1 for subquery SELECT user_buy_test_table.user_id, random() AS random FROM (public.user_buy_test_table LEFT JOIN public.users_ref_test_table ON ((user_buy_test_table.user_id OPERATOR(pg_catalog.>) users_ref_test_table.id))) -DEBUG: Plan 79 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT user_buy_test_table.user_id, random() AS random FROM (public.user_buy_test_table LEFT JOIN public.users_ref_test_table ON ((user_buy_test_table.item_id OPERATOR(pg_catalog.>) users_ref_test_table.id)))) subquery_1, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('79_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) subquery_2 WHERE (subquery_1.user_id OPERATOR(pg_catalog.<>) subquery_2.user_id) +DEBUG: generating subplan 86_1 for subquery SELECT user_buy_test_table.user_id, random() AS random FROM (public.user_buy_test_table LEFT JOIN public.users_ref_test_table ON ((user_buy_test_table.user_id OPERATOR(pg_catalog.>) users_ref_test_table.id))) +DEBUG: Plan 86 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT user_buy_test_table.user_id, random() AS random FROM (public.user_buy_test_table LEFT JOIN public.users_ref_test_table ON ((user_buy_test_table.item_id OPERATOR(pg_catalog.>) users_ref_test_table.id)))) subquery_1, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('86_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) subquery_2 WHERE (subquery_1.user_id OPERATOR(pg_catalog.<>) subquery_2.user_id) count ------- 67 @@ -1225,8 +1380,8 @@ count(*) AS cnt, "generated_group_field" ORDER BY cnt DESC, generated_group_field ASC LIMIT 10; -DEBUG: generating subplan 81_1 for subquery SELECT user_id, value_2 AS generated_group_field FROM public.users_table users -DEBUG: Plan 81 query after replacing subqueries and CTEs: SELECT count(*) AS cnt, generated_group_field FROM (SELECT "eventQuery".user_id, random() AS random, "eventQuery".generated_group_field FROM (SELECT multi_group_wrapper_1."time", multi_group_wrapper_1.event_user_id, multi_group_wrapper_1.user_id, left_group_by_1.generated_group_field, random() AS random FROM ((SELECT temp_data_queries."time", temp_data_queries.event_user_id, user_filters_1.user_id FROM ((SELECT events."time", events.user_id AS event_user_id FROM public.events_table events WHERE (events.user_id OPERATOR(pg_catalog.>) 2)) temp_data_queries JOIN (SELECT users.user_id FROM public.users_reference_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 2) AND (users.value_2 OPERATOR(pg_catalog.=) 5))) user_filters_1 ON ((temp_data_queries.event_user_id OPERATOR(pg_catalog.<) user_filters_1.user_id)))) multi_group_wrapper_1 RIGHT JOIN (SELECT intermediate_result.user_id, intermediate_result.generated_group_field FROM read_intermediate_result('81_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, generated_group_field integer)) left_group_by_1 ON ((left_group_by_1.user_id OPERATOR(pg_catalog.>) multi_group_wrapper_1.event_user_id)))) "eventQuery") "pushedDownQuery" GROUP BY generated_group_field ORDER BY (count(*)) DESC, generated_group_field LIMIT 10 +DEBUG: generating subplan 88_1 for subquery SELECT user_id, value_2 AS generated_group_field FROM public.users_table users +DEBUG: Plan 88 query after replacing subqueries and CTEs: SELECT count(*) AS cnt, generated_group_field FROM (SELECT "eventQuery".user_id, random() AS random, "eventQuery".generated_group_field FROM (SELECT multi_group_wrapper_1."time", multi_group_wrapper_1.event_user_id, multi_group_wrapper_1.user_id, left_group_by_1.generated_group_field, random() AS random FROM ((SELECT temp_data_queries."time", temp_data_queries.event_user_id, user_filters_1.user_id FROM ((SELECT events."time", events.user_id AS event_user_id FROM public.events_table events WHERE (events.user_id OPERATOR(pg_catalog.>) 2)) temp_data_queries JOIN (SELECT users.user_id FROM public.users_reference_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 2) AND (users.value_2 OPERATOR(pg_catalog.=) 5))) user_filters_1 ON ((temp_data_queries.event_user_id OPERATOR(pg_catalog.<) user_filters_1.user_id)))) multi_group_wrapper_1 RIGHT JOIN (SELECT intermediate_result.user_id, intermediate_result.generated_group_field FROM read_intermediate_result('88_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, generated_group_field integer)) left_group_by_1 ON ((left_group_by_1.user_id OPERATOR(pg_catalog.>) multi_group_wrapper_1.event_user_id)))) "eventQuery") "pushedDownQuery" GROUP BY generated_group_field ORDER BY (count(*)) DESC, generated_group_field LIMIT 10 ERROR: cannot pushdown the subquery DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join RESET client_min_messages; diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 24aedcef8..169143725 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -90,24 +90,40 @@ SELECT count(*) FROM ON user_buy_test_table.user_id > users_ref_test_table.id AND users_ref_test_table.k_no > 44 AND user_buy_test_table.user_id > 44) subquery_2 WHERE subquery_1.user_id = subquery_2.user_id ; - -- Should be able to push down since reference tables are inner joined - -- with hash distributed tables, the results of those joins are the parts of - -- an outer join -SELECT subquery_2.id FROM - (SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2 - ON tt1.user_id = tt2.user_id) subquery_1 - RIGHT JOIN - (SELECT tt1.user_id, ref.id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref - ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id ORDER BY 1 DESC LIMIT 5; +-- Should be able to push down since reference tables are inner joined +-- with hash distributed tables, the results of those joins are the parts of +-- an outer join +SELECT subquery_2.id + FROM + (SELECT tt1.user_id, random() + FROM user_buy_test_table AS tt1 + JOIN users_return_test_table as tt2 + ON tt1.user_id = tt2.user_id) subquery_1 + RIGHT JOIN + (SELECT tt1.user_id, ref.id, random() + FROM user_buy_test_table as tt1 + JOIN users_ref_test_table as ref + ON tt1.user_id = ref.id) subquery_2 + ON subquery_1.user_id = subquery_2.user_id + ORDER BY 1 DESC LIMIT 5; --- the same query as the above, but this Citus fails to pushdown the query --- since the outer part of the right join doesn't include any joins -SELECT * FROM - (SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2 - ON tt1.user_id = tt2.user_id) subquery_1 - RIGHT JOIN - (SELECT *, random() FROM (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref - ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id; +-- almost the same query as the above, but reference table join wrapped with +-- another subquery +SELECT subquery_1.user_id + FROM + (SELECT tt1.user_id, random() + FROM user_buy_test_table AS tt1 + JOIN users_return_test_table as tt2 + ON tt1.user_id = tt2.user_id) subquery_1 + RIGHT JOIN + (SELECT *, random() + FROM + (SELECT tt1.user_id, random() + FROM user_buy_test_table as tt1 + JOIN users_ref_test_table as ref + ON tt1.user_id = ref.id) subquery_2_inner) subquery_2 + ON subquery_1.user_id = subquery_2.user_id + ORDER BY 1; -- should be able to pushdown since reference table is in the -- inner part of the left join @@ -198,10 +214,79 @@ SELECT count(*) FROM user_buy_test_table RIGHT JOIN (SELECT 5 AS id) users_ref_t ON user_buy_test_table.item_id = users_ref_test_table.id; -- can perform a union with subquery without FROM +-- with pulling data to coordinator +SET client_min_messages TO DEBUG; SELECT count(*) FROM (SELECT user_id FROM user_buy_test_table UNION ALL SELECT id FROM (SELECT 5 AS id) users_ref_test_table) subquery_1; + +-- union involving reference table and distributed table subqueries +-- is supported with pulling data to coordinator +SELECT * FROM + (SELECT id FROM users_ref_test_table + UNION + SELECT user_id FROM user_buy_test_table) sub +ORDER BY 1 DESC; + +-- same query but inner query has wrapped into another subquery +SELECT * FROM + (SELECT id, random() * 0 FROM (SELECT id FROM users_ref_test_table) sub1 + UNION + SELECT user_id, random() * 0 FROM (SELECT user_id FROM user_buy_test_table) sub2) sub +ORDER BY 1 DESC; + +-- query can be pushed down when a reference table inside union query is +-- joined with a distributed table +SELECT * FROM + (SELECT user_id FROM users_ref_test_table ref JOIN user_buy_test_table dis + on (ref.id = dis.user_id) + UNION + SELECT user_id FROM user_buy_test_table) sub +ORDER BY 1 DESC; + +-- same query with inner query wrapped with a subquery +SELECT * FROM + (SELECT user_id, random() * 0 FROM (SELECT dis.user_id FROM users_ref_test_table ref join user_buy_test_table dis + on (ref.id = dis.user_id)) sub1 + UNION + SELECT user_id, random() * 0 FROM (SELECT user_id FROM user_buy_test_table) sub2) sub +ORDER BY 1 DESC; + +SELECT * FROM + (SELECT user_id FROM users_ref_test_table ref JOIN user_buy_test_table dis + on (ref.id = dis.user_id) WHERE id in (SELECT user_id from user_buy_test_table) + UNION + SELECT user_id FROM user_buy_test_table) sub +ORDER BY 1 DESC; + +SELECT * FROM + (SELECT user_id FROM users_ref_test_table ref JOIN user_buy_test_table dis + on (ref.id = dis.user_id) + UNION + SELECT user_id FROM user_buy_test_table WHERE user_id in (select id from users_ref_test_table)) sub +ORDER BY 1 DESC; + +-- query can be pushed down when a reference table inside union query is +-- joined with a distributed table. outer join is also supported +-- if reference table is in the inner part +SELECT * FROM + (SELECT user_id FROM users_ref_test_table ref RIGHT JOIN user_buy_test_table dis + on (ref.id = dis.user_id) + UNION + SELECT user_id FROM user_buy_test_table) sub +ORDER BY 1 DESC; + +-- query can be pushed down when a reference table inside union query is +-- joined with a distributed table. reference table cannot be at +-- the outer part. +SELECT * FROM + (SELECT user_id FROM users_ref_test_table ref LEFT JOIN user_buy_test_table dis + on (ref.id = dis.user_id) + UNION + SELECT user_id FROM user_buy_test_table) sub +ORDER BY 1 DESC; +RESET client_min_messages; -- should be able to pushdown since reference table is in the -- inner part of the left join @@ -291,7 +376,7 @@ SELECT * FROM user_id > 2 and value_2 = 1) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN (SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar; --- we could even suuport the following where the subquery +-- we could even support the following where the subquery -- on the outer part of the left join contains a reference table SELECT max(events_all.cnt), events_all.usr_id FROM @@ -301,17 +386,16 @@ FROM INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) GROUP BY users_table.user_id) AS events_all LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC LIMIT 5; --- but, we fail to pushdown the following query where join that reference table appears --- wrapped into a subquery +-- we still support the query when the join involving reference table is wrapped with a subquery SELECT max(events_all.cnt), events_all.usr_id - FROM( -SELECT *, random() FROM - (SELECT users_table.user_id AS usr_id, count(*) AS cnt - FROM events_reference_table - INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) - GROUP BY users_table.user_id) AS events_all_inner) AS events_all -LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) + FROM (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all + LEFT JOIN events_table ON (events_all.usr_id = events_table.user_id) GROUP BY 2 ORDER BY 1 DESC, 2 DESC