From b0efffae1cb63683a7e9ede5b068122cbc7b9808 Mon Sep 17 00:00:00 2001 From: velioglu Date: Wed, 9 Aug 2017 17:57:57 +0300 Subject: [PATCH] Correct planner and add more tests --- .../planner/multi_logical_planner.c | 99 +++++----------- .../planner/multi_physical_planner.c | 15 ++- .../relation_restriction_equivalence.c | 3 +- .../distributed/multi_logical_planner.h | 3 +- src/test/regress/expected/multi_explain.out | 61 +++++----- src/test/regress/expected/multi_explain_0.out | 61 +++++----- .../expected/multi_reference_table.out | 3 +- ...ulti_subquery_complex_reference_clause.out | 111 ++++++++++++++++-- ...lti_subquery_in_where_reference_clause.out | 85 +++++++++++++- .../regress/expected/multi_subquery_union.out | 95 +++++++++++++++ ...i_behavioral_analytics_create_table.source | 8 ++ ...i_behavioral_analytics_create_table.source | 16 +++ .../regress/sql/multi_reference_table.sql | 3 +- ...ulti_subquery_complex_reference_clause.sql | 81 ++++++++++++- ...lti_subquery_in_where_reference_clause.sql | 75 +++++++++++- src/test/regress/sql/multi_subquery_union.sql | 91 ++++++++++++++ 16 files changed, 649 insertions(+), 161 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 16fb25336..f1da91072 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -546,7 +546,8 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, { return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, "cannot pushdown the subquery", - "There exist a reference table in the outer part of the outer join", + "There exist a reference table in the outer part of the " + "outer join", NULL); } @@ -1426,18 +1427,19 @@ MultiPlanTree(Query *queryTree) /* * HasUnsupportedReferenceTableJoin returns true if there exists a outer join - * exist between reference table and distributed tables which does not obey the - * rules : - * - Reference tables can not be located in the outer part of the semi join (or - * the inner part of the anti join). Otherwise, we may have duplicate results. - * Although getting duplicate results is not possible by checking the equality - * on the column of the reference table and partition column of distributed table, - * we still keep these checks. Because, using the reference table in the outer - * part of the semi join is not very common. - * - Reference tables can not be located in the outer part of the left join and - * inner part of the right join. Otherwise we will definitely have duplicate rows. - * Beside, reference tables can not be used with full outer joins because of the - * same reason. + * between reference table and distributed tables which does not follow + * the rules : + * - Reference tables can not be located in the outer part of the semi join or the + * anti join. Otherwise, we may have duplicate results. Although getting duplicate + * results is not possible by checking the equality on the column of the reference + * table and partition column of distributed table, we still keep these checks. + * Because, using the reference table in the outer part of the semi join or anti + * join is not very common. + * - Reference tables can not be located in the outer part of the left join + * (Note that PostgreSQL converts right joins to left joins. While converting + * join types, innerrel and outerrel are also switched.) Otherwise we will + * definitely have duplicate rows. Beside, reference tables can not be used + * with full outer joins because of the same reason. */ static bool HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionContext) @@ -1455,58 +1457,20 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo RelOptInfo *innerrel = joinRestriction->innerrel; RelOptInfo *outerrel = joinRestriction->outerrel; - switch (joinType) + if (joinType == JOIN_SEMI || joinType == JOIN_ANTI || joinType == JOIN_LEFT) { - case JOIN_SEMI: + if (RelationInfoHasReferenceTable(plannerInfo, outerrel)) { - if (RelationInfoHasReferenceTable(plannerInfo, outerrel)) - { - return true; - } + return true; } - break; - - case JOIN_ANTI: + } + else if (joinType == JOIN_FULL) + { + if (RelationInfoHasReferenceTable(plannerInfo, innerrel) || + RelationInfoHasReferenceTable(plannerInfo, outerrel)) { - if (RelationInfoHasReferenceTable(plannerInfo, innerrel)) - { - return true; - } + return true; } - break; - - case JOIN_LEFT: - { - if (RelationInfoHasReferenceTable(plannerInfo, outerrel)) - { - return true; - } - } - break; - - case JOIN_RIGHT: - { - if (RelationInfoHasReferenceTable(plannerInfo, innerrel)) - { - return true; - } - } - break; - - case JOIN_FULL: - { - if (RelationInfoHasReferenceTable(plannerInfo, innerrel) || - RelationInfoHasReferenceTable( - plannerInfo, outerrel)) - { - return true; - } - } - break; - - default: - { } - break; } } @@ -1515,7 +1479,7 @@ HasUnsupportedReferenceTableJoin(PlannerRestrictionContext *plannerRestrictionCo /* - * ReferenceTableExist check whether the relationInfo has reference table. + * RelationInfoHasReferenceTable check whether the relationInfo has reference table. * Since relation ids of relationInfo indexes to the range table entry list of * planner info, planner info is also passed. */ @@ -1549,7 +1513,7 @@ HasReferenceTable(Node *node) { List *relationList = NIL; ListCell *relationCell = NULL; - ExtractRangeTableRelationWalkerInRTE(node, &relationList); + ExtractRangeTableRelationWalkerWithRTEExpand(node, &relationList); foreach(relationCell, relationList) { @@ -2933,13 +2897,13 @@ ExtractRangeTableRelationWalker(Node *node, List **rangeTableRelationList) /* - * ExtractRangeTableRelationWalkerInRTE obtains the list of relations from the - * given node. Note that the difference between this function and + * ExtractRangeTableRelationWalkerWithRTEExpand obtains the list of relations + * from the given node. Note that the difference between this function and * ExtractRangeTableRelationWalker is that this one recursively * walk into range table entries if it can. */ bool -ExtractRangeTableRelationWalkerInRTE(Node *node, List **rangeTableRelationList) +ExtractRangeTableRelationWalkerWithRTEExpand(Node *node, List **rangeTableRelationList) { bool walkIsComplete = false; @@ -2950,8 +2914,7 @@ ExtractRangeTableRelationWalkerInRTE(Node *node, List **rangeTableRelationList) else if (IsA(node, RangeTblEntry)) { RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node; - List *rangeTableList = NIL; - rangeTableList = lappend(rangeTableList, rangeTableEntry); + List *rangeTableList = list_make1(rangeTableEntry); if (rangeTableEntry->rtekind == RTE_RELATION) { @@ -2960,7 +2923,7 @@ ExtractRangeTableRelationWalkerInRTE(Node *node, List **rangeTableRelationList) else { walkIsComplete = range_table_walker(rangeTableList, - ExtractRangeTableRelationWalkerInRTE, + ExtractRangeTableRelationWalkerWithRTEExpand, rangeTableRelationList, 0); } } diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index 851597393..9c0d1341f 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -2111,7 +2111,7 @@ ErrorIfUnsupportedShardDistribution(Query *query) { Oid firstTableRelationId = InvalidOid; List *relationIdList = RelationIdList(query); - List *distributedRelationIdList = NIL; + List *nonReferenceRelations = NIL; ListCell *relationIdCell = NULL; uint32 relationIndex = 0; uint32 rangeDistributedRelationCount = 0; @@ -2121,18 +2121,17 @@ ErrorIfUnsupportedShardDistribution(Query *query) { Oid relationId = lfirst_oid(relationIdCell); char partitionMethod = PartitionMethod(relationId); - if (partitionMethod == DISTRIBUTE_BY_RANGE) { rangeDistributedRelationCount++; - distributedRelationIdList = lappend_oid(distributedRelationIdList, - relationId); + nonReferenceRelations = lappend_oid(nonReferenceRelations, + relationId); } else if (partitionMethod == DISTRIBUTE_BY_HASH) { hashDistributedRelationCount++; - distributedRelationIdList = lappend_oid(distributedRelationIdList, - relationId); + nonReferenceRelations = lappend_oid(nonReferenceRelations, + relationId); } else if (partitionMethod == DISTRIBUTE_BY_NONE) { @@ -2142,7 +2141,7 @@ ErrorIfUnsupportedShardDistribution(Query *query) else { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot pushdown this subquery"), + errmsg("cannot push down this subquery"), errdetail("Currently append partitioned relations " "are not supported"))); } @@ -2156,7 +2155,7 @@ ErrorIfUnsupportedShardDistribution(Query *query) "partitioned relations are unsupported"))); } - foreach(relationIdCell, distributedRelationIdList) + foreach(relationIdCell, nonReferenceRelations) { Oid relationId = lfirst_oid(relationIdCell); bool coPartitionedTables = false; diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 1fa38f8d6..b8a61a00e 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -124,7 +124,8 @@ static Index RelationRestrictionPartitionKeyIndex(RelationRestriction * /* * SafeToPushdownUnionSubquery returns true if all the relations are returns - * partition keys in the same ordinal position. + * partition keys in the same ordinal position and there is no reference table + * exists. * * Note that the function expects (and asserts) the input query to be a top * level union query defined by TopLevelUnionQuery(). diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index bc4795d65..6ebebc80f 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -206,7 +206,8 @@ extern List * TableEntryList(List *rangeTableList); extern List * UsedTableEntryList(Query *query); extern bool ExtractRangeTableRelationWalker(Node *node, List **rangeTableList); extern bool ExtractRangeTableEntryWalker(Node *node, List **rangeTableList); -extern bool ExtractRangeTableRelationWalkerInRTE(Node *node, List **rangeTableList); +extern bool ExtractRangeTableRelationWalkerWithRTEExpand(Node *node, + List **rangeTableList); extern List * pull_var_clause_default(Node *node); extern bool OperatorImplementsEquality(Oid opno); diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index e78b1b1eb..4ce4b795d 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -446,15 +446,16 @@ Aggregate Node: host=localhost port=57637 dbname=regression -> Aggregate -> GroupAggregate - Group Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id) + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) -> Sort - Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id) - -> Nested Loop - Join Filter: ((NULL::user_composite_type) = events.composite_id) - -> Result - One-Time Filter: false - -> Seq Scan on events_1400027 events - Filter: ((event_type)::text = ANY ('{click,submit,pay}'::text[])) + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) + -> Hash Join + Hash Cond: (users.composite_id = events.composite_id) + -> Seq Scan on users_1400029 users + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400025 events + Filter: ((event_type)::text = ANY ('{click,submit,pay}'::text[])) -- Union and left join subquery pushdown EXPLAIN (COSTS OFF) SELECT @@ -531,33 +532,34 @@ HashAggregate Sort Key: subquery_top.hasdone -> Subquery Scan on subquery_top -> GroupAggregate - Group Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), subquery_2.hasdone + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone -> Sort - Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), subquery_2.hasdone + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone -> Hash Left Join - Hash Cond: ((NULL::user_composite_type) = subquery_2.composite_id) - -> Unique - -> Sort - Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), (NULL::user_composite_type), ('action=>1'::text), events.event_time - -> Append - -> Nested Loop - Join Filter: ((NULL::user_composite_type) = events.composite_id) - -> Result - One-Time Filter: false - -> Seq Scan on events_1400027 events + Hash Cond: (users.composite_id = subquery_2.composite_id) + -> HashAggregate + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), users.composite_id, ('action=>1'::text), events.event_time + -> Append + -> Hash Join + Hash Cond: (users.composite_id = events.composite_id) + -> Seq Scan on users_1400029 users + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400025 events Filter: ((event_type)::text = 'click'::text) - -> Nested Loop - Join Filter: ((NULL::user_composite_type) = events_1.composite_id) - -> Result - One-Time Filter: false - -> Seq Scan on events_1400027 events_1 + -> Hash Join + Hash Cond: (users_1.composite_id = events_1.composite_id) + -> Seq Scan on users_1400029 users_1 + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400025 events_1 Filter: ((event_type)::text = 'submit'::text) -> Hash -> Subquery Scan on subquery_2 -> Unique -> Sort Sort Key: ((events_2.composite_id).tenant_id), ((events_2.composite_id).user_id) - -> Seq Scan on events_1400027 events_2 + -> Seq Scan on events_1400025 events_2 Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type) AND ((event_type)::text = 'pay'::text)) -- Union, left join and having subquery pushdown EXPLAIN (COSTS OFF) @@ -703,13 +705,12 @@ Limit -> Limit -> Sort Sort Key: users.lastseen DESC - -> Subquery Scan on users - -> Result - One-Time Filter: false + -> Seq Scan on users_1400029 users + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) -> Limit -> Sort Sort Key: events.event_time DESC - -> Seq Scan on events_1400027 events + -> Seq Scan on events_1400025 events Filter: (composite_id = users.composite_id) -- Test all tasks output SET citus.explain_all_tasks TO on; diff --git a/src/test/regress/expected/multi_explain_0.out b/src/test/regress/expected/multi_explain_0.out index 998b97578..f44ac8d9c 100644 --- a/src/test/regress/expected/multi_explain_0.out +++ b/src/test/regress/expected/multi_explain_0.out @@ -446,15 +446,16 @@ Aggregate Node: host=localhost port=57637 dbname=regression -> Aggregate -> GroupAggregate - Group Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id) + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) -> Sort - Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id) - -> Nested Loop - Join Filter: ((NULL::user_composite_type) = events.composite_id) - -> Result - One-Time Filter: false - -> Seq Scan on events_1400027 events - Filter: ((event_type)::text = ANY ('{click,submit,pay}'::text[])) + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id) + -> Hash Join + Hash Cond: (users.composite_id = events.composite_id) + -> Seq Scan on users_1400029 users + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400025 events + Filter: ((event_type)::text = ANY ('{click,submit,pay}'::text[])) -- Union and left join subquery pushdown EXPLAIN (COSTS OFF) SELECT @@ -531,33 +532,34 @@ HashAggregate Sort Key: subquery_top.hasdone -> Subquery Scan on subquery_top -> GroupAggregate - Group Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), subquery_2.hasdone + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone -> Sort - Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), subquery_2.hasdone + Sort Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), subquery_2.hasdone -> Hash Left Join - Hash Cond: ((NULL::user_composite_type) = subquery_2.composite_id) - -> Unique - -> Sort - Sort Key: (((NULL::user_composite_type)).tenant_id), (((NULL::user_composite_type)).user_id), (NULL::user_composite_type), ('action=>1'::text), events.event_time - -> Append - -> Nested Loop - Join Filter: ((NULL::user_composite_type) = events.composite_id) - -> Result - One-Time Filter: false - -> Seq Scan on events_1400027 events + Hash Cond: (users.composite_id = subquery_2.composite_id) + -> HashAggregate + Group Key: ((users.composite_id).tenant_id), ((users.composite_id).user_id), users.composite_id, ('action=>1'::text), events.event_time + -> Append + -> Hash Join + Hash Cond: (users.composite_id = events.composite_id) + -> Seq Scan on users_1400029 users + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400025 events Filter: ((event_type)::text = 'click'::text) - -> Nested Loop - Join Filter: ((NULL::user_composite_type) = events_1.composite_id) - -> Result - One-Time Filter: false - -> Seq Scan on events_1400027 events_1 + -> Hash Join + Hash Cond: (users_1.composite_id = events_1.composite_id) + -> Seq Scan on users_1400029 users_1 + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) + -> Hash + -> Seq Scan on events_1400025 events_1 Filter: ((event_type)::text = 'submit'::text) -> Hash -> Subquery Scan on subquery_2 -> Unique -> Sort Sort Key: ((events_2.composite_id).tenant_id), ((events_2.composite_id).user_id) - -> Seq Scan on events_1400027 events_2 + -> Seq Scan on events_1400025 events_2 Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type) AND ((event_type)::text = 'pay'::text)) -- Union, left join and having subquery pushdown EXPLAIN (COSTS OFF) @@ -703,13 +705,12 @@ Limit -> Limit -> Sort Sort Key: users.lastseen DESC - -> Subquery Scan on users - -> Result - One-Time Filter: false + -> Seq Scan on users_1400029 users + Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type)) -> Limit -> Sort Sort Key: events.event_time DESC - -> Seq Scan on events_1400027 events + -> Seq Scan on events_1400025 events Filter: (composite_id = users.composite_id) -- Test all tasks output SET citus.explain_all_tasks TO on; diff --git a/src/test/regress/expected/multi_reference_table.out b/src/test/regress/expected/multi_reference_table.out index fcd5f7c2c..6e6b45c59 100644 --- a/src/test/regress/expected/multi_reference_table.out +++ b/src/test/regress/expected/multi_reference_table.out @@ -1122,8 +1122,7 @@ WHERE colocated_table_test.value_1 = reference_table_test.value_1; DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator --- now, insert into the hash partitioned table and use reference --- tables in the SELECT queries +-- not pushable due to lack of equality between partition column and column of reference table INSERT INTO colocated_table_test (value_1, value_2) SELECT diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 865d44e8e..ec0931d12 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -8,22 +8,112 @@ ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000; SET citus.enable_router_execution TO FALSE; -CREATE TABLE events_reference_table as SELECT * FROM events_table; -CREATE TABLE users_reference_table as SELECT * FROM users_table; -SELECT create_reference_table('events_reference_table'); -NOTICE: Copying data from local table... +CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int); +SELECT create_distributed_table('user_buy_test_table', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO user_buy_test_table VALUES(1,2,1); +INSERT INTO user_buy_test_table VALUES(2,3,4); +INSERT INTO user_buy_test_table VALUES(3,4,2); +INSERT INTO user_buy_test_table VALUES(7,5,2); +CREATE TABLE users_return_test_table(user_id int, item_id int, buy_count int); +SELECT create_distributed_table('users_return_test_table', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO users_return_test_table VALUES(4,1,1); +INSERT INTO users_return_test_table VALUES(1,3,1); +INSERT INTO users_return_test_table VALUES(3,2,2); +CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int); +SELECT create_reference_table('users_ref_test_table'); create_reference_table ------------------------ (1 row) -SELECT create_reference_table('users_reference_table'); -NOTICE: Copying data from local table... - create_reference_table ------------------------- - +INSERT INTO users_ref_test_table VALUES(1,'User_1',45); +INSERT INTO users_ref_test_table VALUES(2,'User_2',46); +INSERT INTO users_ref_test_table VALUES(3,'User_3',47); +INSERT INTO users_ref_test_table VALUES(4,'User_4',48); +INSERT INTO users_ref_test_table VALUES(5,'User_5',49); +INSERT INTO users_ref_test_table VALUES(6,'User_6',50); +-- Simple Join test with reference table +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table JOIN users_ref_test_table + ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1; + count +------- + 3 (1 row) +-- Should work, reference table at the inner side is allowed +SELECT count(*) FROM + (SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table + ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47; + count +------- + 1 +(1 row) + +-- Should not work, no equality between partition column and reference table +SELECT * FROM + (SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table + ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- Should not work, no equality between partition column and reference table +SELECT * FROM + (SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table + ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- Shouldn't work, reference table at the outer side is not allowed +SELECT * FROM + (SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table + ON users_ref_test_table.id = user_buy_test_table.user_id) subquery_1; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- Should work, reference table at the inner side is allowed +SELECT count(*) FROM + (SELECT random() FROM users_ref_test_table RIGHT JOIN user_buy_test_table + ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1; + count +------- + 4 +(1 row) + +-- Shouldn't work, reference table at the outer side is not allowed +SELECT * FROM + (SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table + ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- Should pass since reference table locates in the inner part of each left join +SELECT count(*) FROM + (SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2 + ON tt1.user_id = tt2.user_id) subquery_1 + LEFT JOIN + (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref + ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id; + count +------- + 2 +(1 row) + + -- Should not pass since reference table locates in the outer part of right join +SELECT * FROM + (SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2 + ON tt1.user_id = tt2.user_id) subquery_1 + RIGHT JOIN + (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref + ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join -- LATERAL JOINs used with INNER JOINs with reference tables SET citus.subquery_pushdown to ON; SELECT user_id, lastseen @@ -570,3 +660,6 @@ GROUP BY types ORDER BY types; ERROR: cannot push down this subquery DETAIL: Reference tables are not supported with union operator +DROP TABLE user_buy_test_table; +DROP TABLE users_ref_test_table; +DROP TABLE users_return_test_table; diff --git a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out index d45ec005f..a0e2bf7f0 100644 --- a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out @@ -28,6 +28,89 @@ LIMIT 5; 63 (4 rows) +-- subqueries in WHERE with IN operator +SELECT + user_id +FROM + users_table +WHERE + value_2 IN + (SELECT + value_2 + FROM + events_reference_table + WHERE + users_table.user_id = events_reference_table.user_id + ) +GROUP BY user_id +ORDER BY user_id +LIMIT 3; + user_id +--------- + 0 + 1 + 2 +(3 rows) + +-- subqueries in WHERE with NOT EXISTS operator, should work since +-- reference table in the inner part of the join +SELECT + user_id +FROM + users_table +WHERE + NOT EXISTS + (SELECT + value_2 + FROM + events_reference_table + WHERE + users_table.user_id = events_reference_table.user_id + ) +GROUP BY user_id +ORDER BY user_id +LIMIT 3; + user_id +--------- +(0 rows) + +-- subqueries in WHERE with NOT EXISTS operator, should not work +-- there is a reference table in the outer part of the join +SELECT + user_id +FROM + users_reference_table +WHERE + NOT EXISTS + (SELECT + value_2 + FROM + events_table + WHERE + users_reference_table.user_id = events_table.user_id + ) +LIMIT 3; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- subqueries in WHERE with IN operator without equality +SELECT + user_id +FROM + users_table +WHERE + value_2 IN + (SELECT + value_2 + FROM + events_reference_table + WHERE + users_table.user_id > events_reference_table.user_id + ) +GROUP BY user_id +ORDER BY user_id +LIMIT 3; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -- have reference table without any equality, should error out SELECT user_id @@ -141,5 +224,3 @@ SELECT user_id, value_2 FROM users_table WHERE ORDER BY 1, 2; ERROR: cannot pushdown the subquery DETAIL: There exist a reference table in the outer part of the outer join -DROP TABLE events_reference_table; -DROP TABLE users_reference_table; diff --git a/src/test/regress/expected/multi_subquery_union.out b/src/test/regress/expected/multi_subquery_union.out index 0a858baf7..3a5e00400 100644 --- a/src/test/regress/expected/multi_subquery_union.out +++ b/src/test/regress/expected/multi_subquery_union.out @@ -23,6 +23,17 @@ LIMIT 5; 20 | 9 (5 rows) +-- a very simple union query with reference table +SELECT user_id, counter +FROM ( + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2, 3, 4, 5) + UNION + SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6, 7, 8, 9, 10) +) user_id +ORDER BY 2 DESC,1 +LIMIT 5; +ERROR: cannot pushdown this query +DETAIL: Reference tables are not allowed with set operations -- the same query with union all SELECT user_id, counter FROM ( @@ -41,6 +52,17 @@ LIMIT 5; 15 | 9 (5 rows) +-- the same query with union all and reference table +SELECT user_id, counter +FROM ( + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2, 3, 4, 5) + UNION ALL + SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6, 7, 8, 9, 10) +) user_id +ORDER BY 2 DESC,1 +LIMIT 5; +ERROR: cannot pushdown this query +DETAIL: Reference tables are not allowed with set operations -- the same query with group by SELECT user_id, sum(counter) FROM ( @@ -162,6 +184,22 @@ GROUP BY user_id ORDER BY 1 DESC LIMIT 5; 23508 (5 rows) +-- similar query this time more subqueries with reference table and target list contains a resjunk entry +SELECT sum(counter) +FROM ( + SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 20 GROUP BY user_id HAVING sum(value_2) > 500 + UNION + SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 40 and value_1 < 60 GROUP BY user_id HAVING sum(value_2) > 500 + UNION + SELECT user_id, sum(value_2) AS counter FROM users_reference_table where value_1 < 60 and value_1 < 80 GROUP BY user_id HAVING sum(value_2) > 500 + UNION + SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 80 and value_1 < 100 GROUP BY user_id HAVING sum(value_2) > 500 + UNION + SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 100 and value_1 < 120 GROUP BY user_id HAVING sum(value_2) > 500 +) user_id +GROUP BY user_id ORDER BY 1 DESC LIMIT 5; +ERROR: cannot pushdown this query +DETAIL: Reference tables are not allowed with set operations -- similar query as above, with UNION ALL SELECT sum(counter) FROM ( @@ -236,6 +274,50 @@ LIMIT 5; 90 | 115843 (5 rows) +-- unions within unions with reference table +SELECT * +FROM ( + ( SELECT user_id, + sum(counter) + FROM + (SELECT + user_id, sum(value_2) AS counter + FROM + users_table + GROUP BY + user_id + UNION + SELECT + user_id, sum(value_2) AS counter + FROM + events_reference_table + GROUP BY + user_id) user_id_1 + GROUP BY + user_id) + UNION + (SELECT + user_id, sum(counter) + FROM + (SELECT + user_id, sum(value_2) AS counter + FROM + users_table + GROUP BY + user_id + UNION + SELECT + user_id, sum(value_2) AS counter + FROM + events_table + GROUP BY + user_id) user_id_2 + GROUP BY + user_id)) AS ftop +ORDER BY 2 DESC, 1 DESC +LIMIT 5; +ERROR: cannot pushdown this query +DETAIL: Reference tables are not allowed with set operations -- top level unions are wrapped into top level aggregations SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -493,6 +575,17 @@ FROM 20002 (1 row) +-- some UNION ALL queries that are going to be pulled up with reference table +SELECT + count(*) +FROM +( + (SELECT user_id FROM users_table) + UNION ALL + (SELECT user_id FROM events_reference_table) +) b; +ERROR: cannot pushdown this query +DETAIL: Reference tables are not allowed with set operations -- similar query without top level agg SELECT user_id @@ -899,3 +992,5 @@ ORDER BY types; ERROR: cannot push down this subquery DETAIL: Subqueries without relations are unsupported SET citus.enable_router_execution TO true; +DROP TABLE events_reference_table; +DROP TABLE users_reference_table; diff --git a/src/test/regress/input/multi_behavioral_analytics_create_table.source b/src/test/regress/input/multi_behavioral_analytics_create_table.source index fcb1ade20..c83673a2d 100644 --- a/src/test/regress/input/multi_behavioral_analytics_create_table.source +++ b/src/test/regress/input/multi_behavioral_analytics_create_table.source @@ -353,3 +353,11 @@ SET citus.shard_max_size TO "1MB"; \copy orders_subquery FROM '@abs_srcdir@/data/orders.1.data' with delimiter '|' \copy orders_subquery FROM '@abs_srcdir@/data/orders.2.data' with delimiter '|' + +CREATE TABLE events_reference_table (like events_table including all); +SELECT create_reference_table('events_reference_table'); +INSERT INTO events_reference_table SELECT * FROM events_table; + +CREATE TABLE users_reference_table (like users_table including all); +SELECT create_reference_table('users_reference_table'); +INSERT INTO users_reference_table SELECT * FROM users_table; diff --git a/src/test/regress/output/multi_behavioral_analytics_create_table.source b/src/test/regress/output/multi_behavioral_analytics_create_table.source index 093bc1d15..81b996c0d 100644 --- a/src/test/regress/output/multi_behavioral_analytics_create_table.source +++ b/src/test/regress/output/multi_behavioral_analytics_create_table.source @@ -421,3 +421,19 @@ SET citus.shard_max_size TO "1MB"; \copy lineitem_subquery FROM '@abs_srcdir@/data/lineitem.2.data' with delimiter '|' \copy orders_subquery FROM '@abs_srcdir@/data/orders.1.data' with delimiter '|' \copy orders_subquery FROM '@abs_srcdir@/data/orders.2.data' with delimiter '|' +CREATE TABLE events_reference_table (like events_table including all); +SELECT create_reference_table('events_reference_table'); + create_reference_table +------------------------ + +(1 row) + +INSERT INTO events_reference_table SELECT * FROM events_table; +CREATE TABLE users_reference_table (like users_table including all); +SELECT create_reference_table('users_reference_table'); + create_reference_table +------------------------ + +(1 row) + +INSERT INTO users_reference_table SELECT * FROM users_table; diff --git a/src/test/regress/sql/multi_reference_table.sql b/src/test/regress/sql/multi_reference_table.sql index 336b38bee..8c498ba42 100644 --- a/src/test/regress/sql/multi_reference_table.sql +++ b/src/test/regress/sql/multi_reference_table.sql @@ -707,8 +707,7 @@ FROM WHERE colocated_table_test.value_1 = reference_table_test.value_1; --- now, insert into the hash partitioned table and use reference --- tables in the SELECT queries +-- not pushable due to lack of equality between partition column and column of reference table INSERT INTO colocated_table_test (value_1, value_2) SELECT diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 103a3dbbe..fc4ffef7b 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -10,11 +10,78 @@ ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000; SET citus.enable_router_execution TO FALSE; -CREATE TABLE events_reference_table as SELECT * FROM events_table; -CREATE TABLE users_reference_table as SELECT * FROM users_table; +CREATE TABLE user_buy_test_table(user_id int, item_id int, buy_count int); +SELECT create_distributed_table('user_buy_test_table', 'user_id'); +INSERT INTO user_buy_test_table VALUES(1,2,1); +INSERT INTO user_buy_test_table VALUES(2,3,4); +INSERT INTO user_buy_test_table VALUES(3,4,2); +INSERT INTO user_buy_test_table VALUES(7,5,2); -SELECT create_reference_table('events_reference_table'); -SELECT create_reference_table('users_reference_table'); +CREATE TABLE users_return_test_table(user_id int, item_id int, buy_count int); +SELECT create_distributed_table('users_return_test_table', 'user_id'); +INSERT INTO users_return_test_table VALUES(4,1,1); +INSERT INTO users_return_test_table VALUES(1,3,1); +INSERT INTO users_return_test_table VALUES(3,2,2); + +CREATE TABLE users_ref_test_table(id int, it_name varchar(25), k_no int); +SELECT create_reference_table('users_ref_test_table'); +INSERT INTO users_ref_test_table VALUES(1,'User_1',45); +INSERT INTO users_ref_test_table VALUES(2,'User_2',46); +INSERT INTO users_ref_test_table VALUES(3,'User_3',47); +INSERT INTO users_ref_test_table VALUES(4,'User_4',48); +INSERT INTO users_ref_test_table VALUES(5,'User_5',49); +INSERT INTO users_ref_test_table VALUES(6,'User_6',50); + +-- Simple Join test with reference table +SELECT count(*) FROM + (SELECT random() FROM user_buy_test_table JOIN users_ref_test_table + ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1; + +-- Should work, reference table at the inner side is allowed +SELECT count(*) FROM + (SELECT random(), k_no FROM user_buy_test_table LEFT JOIN users_ref_test_table + ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1 WHERE k_no = 47; + +-- Should not work, no equality between partition column and reference table +SELECT * FROM + (SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table + ON user_buy_test_table.item_id = users_ref_test_table.id) subquery_1; + +-- Should not work, no equality between partition column and reference table +SELECT * FROM + (SELECT random() FROM user_buy_test_table LEFT JOIN users_ref_test_table + ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1; + +-- Shouldn't work, reference table at the outer side is not allowed +SELECT * FROM + (SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table + ON users_ref_test_table.id = user_buy_test_table.user_id) subquery_1; + +-- Should work, reference table at the inner side is allowed +SELECT count(*) FROM + (SELECT random() FROM users_ref_test_table RIGHT JOIN user_buy_test_table + ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1; + +-- Shouldn't work, reference table at the outer side is not allowed +SELECT * FROM + (SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table + ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1; + +-- Should pass since reference table locates in the inner part of each left join +SELECT count(*) FROM + (SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2 + ON tt1.user_id = tt2.user_id) subquery_1 + LEFT JOIN + (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 LEFT JOIN users_ref_test_table as ref + ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id; + + -- Should not pass since reference table locates in the outer part of right join +SELECT * FROM + (SELECT tt1.user_id, random() FROM user_buy_test_table AS tt1 JOIN users_return_test_table as tt2 + ON tt1.user_id = tt2.user_id) subquery_1 + RIGHT JOIN + (SELECT tt1.user_id, random() FROM user_buy_test_table as tt1 JOIN users_ref_test_table as ref + ON tt1.user_id = ref.id) subquery_2 ON subquery_1.user_id = subquery_2.user_id; -- LATERAL JOINs used with INNER JOINs with reference tables SET citus.subquery_pushdown to ON; @@ -500,4 +567,8 @@ INNER JOIN FROM users_table as "users" WHERE value_1 > 50 and value_1 < 70) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types -ORDER BY types; \ No newline at end of file +ORDER BY types; + +DROP TABLE user_buy_test_table; +DROP TABLE users_ref_test_table; +DROP TABLE users_return_test_table; diff --git a/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql b/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql index 0126f362d..54266b085 100644 --- a/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql @@ -22,6 +22,78 @@ HAVING count(*) > 66 ORDER BY user_id LIMIT 5; +-- subqueries in WHERE with IN operator +SELECT + user_id +FROM + users_table +WHERE + value_2 IN + (SELECT + value_2 + FROM + events_reference_table + WHERE + users_table.user_id = events_reference_table.user_id + ) +GROUP BY user_id +ORDER BY user_id +LIMIT 3; + +-- subqueries in WHERE with NOT EXISTS operator, should work since +-- reference table in the inner part of the join +SELECT + user_id +FROM + users_table +WHERE + NOT EXISTS + (SELECT + value_2 + FROM + events_reference_table + WHERE + users_table.user_id = events_reference_table.user_id + ) +GROUP BY user_id +ORDER BY user_id +LIMIT 3; + +-- subqueries in WHERE with NOT EXISTS operator, should not work +-- there is a reference table in the outer part of the join +SELECT + user_id +FROM + users_reference_table +WHERE + NOT EXISTS + (SELECT + value_2 + FROM + events_table + WHERE + users_reference_table.user_id = events_table.user_id + ) +LIMIT 3; + +-- subqueries in WHERE with IN operator without equality +SELECT + user_id +FROM + users_table +WHERE + value_2 IN + (SELECT + value_2 + FROM + events_reference_table + WHERE + users_table.user_id > events_reference_table.user_id + ) +GROUP BY user_id +ORDER BY user_id +LIMIT 3; + -- have reference table without any equality, should error out SELECT user_id @@ -125,6 +197,3 @@ SELECT user_id, value_2 FROM users_table WHERE HAVING sum(submit_card_info) > 0 ) ORDER BY 1, 2; - -DROP TABLE events_reference_table; -DROP TABLE users_reference_table; \ No newline at end of file diff --git a/src/test/regress/sql/multi_subquery_union.sql b/src/test/regress/sql/multi_subquery_union.sql index ef960e49e..f48d13603 100644 --- a/src/test/regress/sql/multi_subquery_union.sql +++ b/src/test/regress/sql/multi_subquery_union.sql @@ -17,6 +17,16 @@ FROM ( ORDER BY 2 DESC,1 LIMIT 5; +-- a very simple union query with reference table +SELECT user_id, counter +FROM ( + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2, 3, 4, 5) + UNION + SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6, 7, 8, 9, 10) +) user_id +ORDER BY 2 DESC,1 +LIMIT 5; + -- the same query with union all SELECT user_id, counter FROM ( @@ -27,6 +37,16 @@ FROM ( ORDER BY 2 DESC,1 LIMIT 5; +-- the same query with union all and reference table +SELECT user_id, counter +FROM ( + SELECT user_id, value_2 % 10 AS counter FROM events_table WHERE event_type IN (1, 2, 3, 4, 5) + UNION ALL + SELECT user_id, value_2 % 10 AS counter FROM events_reference_table WHERE event_type IN (5, 6, 7, 8, 9, 10) +) user_id +ORDER BY 2 DESC,1 +LIMIT 5; + -- the same query with group by SELECT user_id, sum(counter) FROM ( @@ -102,6 +122,21 @@ FROM ( ) user_id GROUP BY user_id ORDER BY 1 DESC LIMIT 5; +-- similar query this time more subqueries with reference table and target list contains a resjunk entry +SELECT sum(counter) +FROM ( + SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 20 GROUP BY user_id HAVING sum(value_2) > 500 + UNION + SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 40 and value_1 < 60 GROUP BY user_id HAVING sum(value_2) > 500 + UNION + SELECT user_id, sum(value_2) AS counter FROM users_reference_table where value_1 < 60 and value_1 < 80 GROUP BY user_id HAVING sum(value_2) > 500 + UNION + SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 80 and value_1 < 100 GROUP BY user_id HAVING sum(value_2) > 500 + UNION + SELECT user_id, sum(value_2) AS counter FROM users_table where value_1 < 100 and value_1 < 120 GROUP BY user_id HAVING sum(value_2) > 500 +) user_id +GROUP BY user_id ORDER BY 1 DESC LIMIT 5; + -- similar query as above, with UNION ALL SELECT sum(counter) FROM ( @@ -160,6 +195,49 @@ FROM ( ORDER BY 2 DESC, 1 DESC LIMIT 5; +-- unions within unions with reference table +SELECT * +FROM ( + ( SELECT user_id, + sum(counter) + FROM + (SELECT + user_id, sum(value_2) AS counter + FROM + users_table + GROUP BY + user_id + UNION + SELECT + user_id, sum(value_2) AS counter + FROM + events_reference_table + GROUP BY + user_id) user_id_1 + GROUP BY + user_id) + UNION + (SELECT + user_id, sum(counter) + FROM + (SELECT + user_id, sum(value_2) AS counter + FROM + users_table + GROUP BY + user_id + UNION + SELECT + user_id, sum(value_2) AS counter + FROM + events_table + GROUP BY + user_id) user_id_2 + GROUP BY + user_id)) AS ftop +ORDER BY 2 DESC, 1 DESC +LIMIT 5; + -- top level unions are wrapped into top level aggregations SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -377,6 +455,16 @@ FROM (SELECT user_id FROM events_table) ) b; +-- some UNION ALL queries that are going to be pulled up with reference table +SELECT + count(*) +FROM +( + (SELECT user_id FROM users_table) + UNION ALL + (SELECT user_id FROM events_reference_table) +) b; + -- similar query without top level agg SELECT user_id @@ -724,3 +812,6 @@ GROUP BY types ORDER BY types; SET citus.enable_router_execution TO true; + +DROP TABLE events_reference_table; +DROP TABLE users_reference_table;