diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 1f6cc9eb5..fe3428324 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -128,7 +128,7 @@ static bool IsRecurringRTE(RangeTblEntry *rangeTableEntry, static bool IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType); static bool HasRecurringTuples(Node *node, RecurringTuplesType *recurType); static bool IsReadIntermediateResultFunction(Node *node); -static void ValidateClauseList(List *clauseList); +static DeferredErrorMessage * DeferErrorIfUnsupportedClause(List *clauseList); static bool ExtractFromExpressionWalker(Node *node, QualifierWalkerContext *walkerContext); static List * MultiTableNodeList(List *tableEntryList, List *rangeTableList); @@ -227,6 +227,8 @@ MultiLogicalPlanCreate(Query *originalQuery, Query *queryTree, static bool ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery) { + List *whereClauseList = NIL; + /* * We check the existence of subqueries in FROM clause on the modified query * given that if postgres already flattened the subqueries, MultiPlanTree() @@ -257,6 +259,16 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery) return true; } + /* + * Some unsupported join clauses in logical planner + * may be supported by subquery pushdown planner. + */ + whereClauseList = WhereClauseList(rewrittenQuery->jointree); + if (DeferErrorIfUnsupportedClause(whereClauseList) != NULL) + { + return true; + } + return false; } @@ -560,11 +572,9 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext)) { return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot pushdown the subquery since all relations are not " - "joined using distribution keys", - "Each relation should be joined with at least " - "one another relation using distribution keys and " - "equality operator.", NULL); + "complex joins are only supported when all distributed tables are " + "joined on their distribution columns with equal operator", + NULL, NULL); } /* we shouldn't allow reference tables in the FROM clause when the query has sublinks */ @@ -1770,7 +1780,11 @@ MultiNodeTree(Query *queryTree) /* extract where clause qualifiers and verify we can plan for them */ whereClauseList = WhereClauseList(queryTree->jointree); - ValidateClauseList(whereClauseList); + unsupportedQueryError = DeferErrorIfUnsupportedClause(whereClauseList); + if (unsupportedQueryError) + { + RaiseDeferredErrorInternal(unsupportedQueryError, ERROR); + } /* * If we have a subquery, build a multi table node for the subquery and @@ -2742,12 +2756,15 @@ QualifierList(FromExpr *fromExpr) /* - * ValidateClauseList walks over the given list of clauses, and checks that we - * can recognize all the clauses. This function ensures that we do not drop an - * unsupported clause type on the floor, and thus prevents erroneous results. + * DeferErrorIfUnsupportedClause walks over the given list of clauses, and + * checks that we can recognize all the clauses. This function ensures that + * we do not drop an unsupported clause type on the floor, and thus prevents + * erroneous results. + * + * Returns a deferred error, caller is responsible for raising the error. */ -static void -ValidateClauseList(List *clauseList) +static DeferredErrorMessage * +DeferErrorIfUnsupportedClause(List *clauseList) { ListCell *clauseCell = NULL; foreach(clauseCell, clauseList) @@ -2756,10 +2773,11 @@ ValidateClauseList(List *clauseList) if (!(IsSelectClause(clause) || IsJoinClause(clause) || or_clause(clause))) { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("unsupported clause type"))); + return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, + "unsupported clause type", NULL, NULL); } } + return NULL; } @@ -2907,8 +2925,8 @@ IsJoinClause(Node *clause) List *argumentList = NIL; Node *leftArgument = NULL; Node *rightArgument = NULL; - List *leftColumnList = NIL; - List *rightColumnList = NIL; + Node *strippedLeftArgument = NULL; + Node *strippedRightArgument = NULL; if (!IsA(clause, OpExpr)) { @@ -2928,14 +2946,14 @@ IsJoinClause(Node *clause) leftArgument = (Node *) linitial(argumentList); rightArgument = (Node *) lsecond(argumentList); - leftColumnList = pull_var_clause_default(leftArgument); - rightColumnList = pull_var_clause_default(rightArgument); + strippedLeftArgument = strip_implicit_coercions(leftArgument); + strippedRightArgument = strip_implicit_coercions(rightArgument); /* each side of the expression should have only one column */ - if ((list_length(leftColumnList) == 1) && (list_length(rightColumnList) == 1)) + if (IsA(strippedLeftArgument, Var) && IsA(strippedRightArgument, Var)) { - Var *leftColumn = (Var *) linitial(leftColumnList); - Var *rightColumn = (Var *) linitial(rightColumnList); + Var *leftColumn = (Var *) strippedLeftArgument; + Var *rightColumn = (Var *) strippedRightArgument; bool equiJoin = false; bool joinBetweenDifferentTables = false; diff --git a/src/test/regress/expected/multi_complex_expressions.out b/src/test/regress/expected/multi_complex_expressions.out index 3049fa848..5cdcd384b 100644 --- a/src/test/regress/expected/multi_complex_expressions.out +++ b/src/test/regress/expected/multi_complex_expressions.out @@ -343,8 +343,7 @@ SELECT count(*) FROM lineitem JOIN orders ON l_orderkey = o_orderkey -- Check that we make sure local joins are between columns only. SELECT count(*) FROM lineitem, orders WHERE l_orderkey + 1 = o_orderkey; -ERROR: cannot perform local joins that involve expressions -DETAIL: local joins can be performed between columns only +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- Check that we can issue limit/offset queries -- the subquery is recursively planned since it contains OFFSET, which is not pushdownable SELECT * FROM (SELECT o_custkey FROM orders GROUP BY o_custkey ORDER BY o_custkey OFFSET 20) sq ORDER BY 1 LIMIT 5; diff --git a/src/test/regress/expected/multi_complex_expressions_0.out b/src/test/regress/expected/multi_complex_expressions_0.out index 270bb0b11..0a8a0ef66 100644 --- a/src/test/regress/expected/multi_complex_expressions_0.out +++ b/src/test/regress/expected/multi_complex_expressions_0.out @@ -343,8 +343,7 @@ SELECT count(*) FROM lineitem JOIN orders ON l_orderkey = o_orderkey -- Check that we make sure local joins are between columns only. SELECT count(*) FROM lineitem, orders WHERE l_orderkey + 1 = o_orderkey; -ERROR: cannot perform local joins that involve expressions -DETAIL: local joins can be performed between columns only +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- Check that we can issue limit/offset queries -- the subquery is recursively planned since it contains OFFSET, which is not pushdownable SELECT * FROM (SELECT o_custkey FROM orders GROUP BY o_custkey ORDER BY o_custkey OFFSET 20) sq ORDER BY 1 LIMIT 5; diff --git a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out index 55d18f229..b42c1b99a 100644 --- a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out +++ b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out @@ -77,8 +77,7 @@ FROM ( ) t2 ON (t1.user_id = t2.user_id) GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not pushable since the JOIN is not an equi join right part of the UNION -- is not joined on the partition key INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg ) @@ -119,8 +118,7 @@ FROM ( ) t2 ON (t1.user_id = t2.user_id) GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2) INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg ) SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event) @@ -160,8 +158,7 @@ FROM ( ) t2 ON (t1.user_id = (t2.user_id)/2) GROUP BY t1.user_id, hasdone_event ) t GROUP BY user_id, hasdone_event; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator ------------------------------------ ------------------------------------ -- Funnel, grouped by the number of times a user has done an event @@ -234,8 +231,7 @@ GROUP BY count_pay, user_id ORDER BY count_pay; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not pushable since the JOIN condition is not equi JOIN -- (subquery_1 JOIN subquery_2) INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg) @@ -303,8 +299,7 @@ GROUP BY count_pay, user_id ORDER BY count_pay; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator ------------------------------------ ------------------------------------ -- Most recently seen users_table events_table diff --git a/src/test/regress/expected/multi_subquery.out b/src/test/regress/expected/multi_subquery.out index ec961077d..8c5f18bfc 100644 --- a/src/test/regress/expected/multi_subquery.out +++ b/src/test/regress/expected/multi_subquery.out @@ -52,8 +52,7 @@ FROM orders_subquery GROUP BY l_orderkey) AS unit_prices; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator SELECT avg(unit_price) FROM @@ -67,8 +66,7 @@ FROM l_orderkey = o_custkey GROUP BY l_orderkey) AS unit_prices; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- Subqueries without relation with a volatile functions (non-constant) are planned recursively SELECT count(*) FROM ( SELECT l_orderkey FROM lineitem_subquery JOIN (SELECT random()::int r) sub ON (l_orderkey = r) WHERE r > 10 @@ -156,6 +154,318 @@ ERROR: cannot push down this subquery DETAIL: Limit in subquery without limit in the outermost query is unsupported -- reset the flag for next query SET citus.subquery_pushdown to OFF; +-- some queries without a subquery uses subquery planner +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_orderkey DESC +LIMIT 10; + l_orderkey +------------ + 39 + 39 + 39 + 39 + 38 + 37 + 37 + 37 + 36 + 33 +(10 rows) + +-- query is still supported if contains additional join +-- clauses that includes arithmetic expressions +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity + 3) +ORDER BY l_orderkey DESC +LIMIT 10; + l_orderkey +------------ + 39 + 39 + 39 + 39 + 38 + 37 + 37 + 37 + 36 + 35 +(10 rows) + +-- implicit typecasts in joins is supported +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey::int8 = o_orderkey::int8) +WHERE + (o_orderkey < l_quantity + 3) +ORDER BY l_orderkey DESC +LIMIT 10; + l_orderkey +------------ + 39 + 39 + 39 + 39 + 38 + 37 + 37 + 37 + 36 + 35 +(10 rows) + +-- non-implicit typecasts in joins is not supported +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey::int8 = o_orderkey::int4) +WHERE + (o_orderkey < l_quantity + 3) +ORDER BY l_orderkey DESC +LIMIT 10; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- implicit typecast supported in equi-join +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey::int8 = o_orderkey::int8) +ORDER BY l_orderkey DESC +LIMIT 10; + l_orderkey +------------ + 14946 + 14946 + 14945 + 14945 + 14945 + 14945 + 14945 + 14945 + 14944 + 14944 +(10 rows) + +-- non-implicit typecast is not supported in equi-join +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey::int4 = o_orderkey::int8) +ORDER BY l_orderkey DESC +LIMIT 10; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- type casts in filters are supported as long as +-- a valid equi-join exists +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey::int8 < l_quantity::int8 + 3) +ORDER BY l_orderkey DESC +LIMIT 10; + l_orderkey +------------ + 39 + 39 + 39 + 39 + 38 + 37 + 37 + 37 + 36 + 35 +(10 rows) + +-- even if type cast is non-implicit +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey::int4 < l_quantity::int8 + 3) +ORDER BY l_orderkey DESC +LIMIT 10; + l_orderkey +------------ + 39 + 39 + 39 + 39 + 38 + 37 + 37 + 37 + 36 + 35 +(10 rows) + +-- query is not supported if contains an partition column +-- equi join that includes arithmetic expressions +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey + 1) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_orderkey DESC +LIMIT 10; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- query is not supported if there is a single +-- join clause with arithmetic expression. It fails +-- with a different error message +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey + 1) +ORDER BY l_orderkey DESC +LIMIT 10; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- query is not supported if does not have equi-join clause +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey < o_orderkey) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_orderkey DESC +LIMIT 10; +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- distinct queries work +SELECT DISTINCT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_orderkey DESC +LIMIT 10; + l_orderkey +------------ + 39 + 38 + 37 + 36 + 33 + 32 + 7 + 6 + 5 + 4 +(10 rows) + +-- count(distinct) queries work +SELECT COUNT(DISTINCT l_orderkey) +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity); + count +------- + 13 +(1 row) + +-- the same queries returning a non-partition column +SELECT l_quantity +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_quantity DESC +LIMIT 10; + l_quantity +------------ + 50.00 + 49.00 + 46.00 + 46.00 + 45.00 + 44.00 + 44.00 + 44.00 + 43.00 + 43.00 +(10 rows) + +-- distinct queries work +SELECT DISTINCT l_quantity +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_quantity DESC +LIMIT 10; + l_quantity +------------ + 50.00 + 49.00 + 46.00 + 45.00 + 44.00 + 43.00 + 42.00 + 41.00 + 40.00 + 39.00 +(10 rows) + +-- count(distinct) queries work +SELECT COUNT(DISTINCT l_quantity) +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity); + count +------- + 25 +(1 row) + -- Check that we support count distinct with a subquery SELECT count(DISTINCT a) diff --git a/src/test/regress/expected/multi_subquery_behavioral_analytics.out b/src/test/regress/expected/multi_subquery_behavioral_analytics.out index c6ba5fcc3..f919fc19b 100644 --- a/src/test/regress/expected/multi_subquery_behavioral_analytics.out +++ b/src/test/regress/expected/multi_subquery_behavioral_analytics.out @@ -794,8 +794,7 @@ SELECT count(*) FROM HAVING count(distinct value_1) = 2 ) as foo; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- previous push down query SELECT subquery_count FROM (SELECT count(*) as subquery_count FROM @@ -2004,11 +2003,10 @@ FROM ( GROUP BY user_id ) q ORDER BY 2 DESC, 1; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. --- note that the following query has joins on the partition keys --- however we fail to push down it due to the function call on the --- where clause. We probably need to relax that check +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- note that the following query has both equi-joins on the partition keys +-- and non-equi-joins on other columns. We now support query filters +-- having non-equi-joins as long as they have equi-joins on partition keys. SELECT users_table.user_id, users_table.value_1, prob FROM @@ -2024,7 +2022,10 @@ FROM ON users_table.user_id = temp.user_id WHERE users_table.value_1 < 3 AND test_join_function_2(users_table.user_id, temp.user_id); -ERROR: unsupported clause type + user_id | value_1 | prob +---------+---------+------ +(0 rows) + -- we do support the following since there is already an equality on the partition -- key and we have an additional join via a function SELECT @@ -2091,8 +2092,7 @@ FROM events_table.time = users_table.time AND events_table.value_2 IN (0, 4) ) as foo; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- we can even allow that on top level joins SELECT count(*) @@ -2142,8 +2142,7 @@ FROM events_table.value_2 IN (1, 5) ) as bar WHERE foo.event_type = bar.event_type; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- DISTINCT in the outer query and DISTINCT in the subquery SELECT DISTINCT users_ids.user_id diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index ab19dbcc4..0f2e103cb 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -202,8 +202,7 @@ GROUP BY types ORDER BY types; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not supported since events_subquery_2 doesn't have partition key on the target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -261,8 +260,7 @@ GROUP BY types ORDER BY types; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- we can support arbitrary subqueries within UNIONs SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -412,8 +410,7 @@ GROUP BY types ORDER BY types; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not supported since the join is not equi join SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -475,8 +472,7 @@ GROUP BY types ORDER BY types; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not supported since subquery 3 includes a JOIN with non-equi join SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -538,8 +534,7 @@ GROUP BY types ORDER BY types; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- similar query with more union statements (to enable UNION tree become larger) SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM @@ -805,8 +800,7 @@ INNER JOIN WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not supported since events_subquery_4 does not have partition key on the -- target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType @@ -863,8 +857,7 @@ INNER JOIN ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- union all with inner and left joins SELECT user_id, count(*) as cnt FROM @@ -1016,8 +1009,7 @@ INNER JOIN GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- -- Union, inner join and left join -- @@ -1441,8 +1433,7 @@ FROM ORDER BY user_id DESC LIMIT 10; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not supported since the inner JOIN is not on the partition key SELECT user_id, lastseen FROM @@ -1496,8 +1487,7 @@ FROM ORDER BY user_id DESC LIMIT 10; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not supported since upper LATERAL JOIN is not equi join SELECT user_id, lastseen FROM @@ -1551,8 +1541,7 @@ FROM ORDER BY user_id DESC LIMIT 10; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not pushdownable since lower LATERAL JOIN is not on the partition key -- not recursively plannable due to LATERAL join where there is a reference -- from an outer query @@ -1702,8 +1691,7 @@ GROUP BY "generated_group_field" ORDER BY generated_group_field DESC, value DESC; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not supported since the first inner join is not an equi join SELECT count(*) AS value, "generated_group_field" @@ -1746,8 +1734,7 @@ GROUP BY "generated_group_field" ORDER BY generated_group_field DESC, value DESC; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- single level inner joins SELECT "value_3", count(*) AS cnt @@ -1832,8 +1819,7 @@ FROM ) segmentalias_1) "tempQuery" GROUP BY "value_3" ORDER BY cnt, value_3 DESC LIMIT 10; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- nested LATERAL JOINs SET citus.subquery_pushdown to ON; SELECT * diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 86e47c0f3..81d3d371e 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -1128,8 +1128,7 @@ SELECT count(*) FROM (SELECT user_buy_test_table.user_id, random() FROM user_buy_test_table LEFT JOIN users_ref_test_table ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2 WHERE subquery_1.user_id != subquery_2.user_id ; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- we cannot push this query since hash partitioned tables -- are not joined on partition keys with equality SELECT @@ -1168,8 +1167,7 @@ count(*) AS cnt, "generated_group_field" ORDER BY cnt DESC, generated_group_field ASC LIMIT 10; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- two hash partitioned relations are not joined -- on partiton keys although reference table is fine -- to push down @@ -1192,8 +1190,7 @@ WHERE GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 5; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator SELECT foo.user_id FROM ( SELECT m.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) @@ -1210,7 +1207,7 @@ SELECT foo.user_id FROM SELECT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) GROUP BY r.user_id ) as foo; -ERROR: unsupported clause type +ERROR: cannot handle complex subqueries when the router executor is disabled -- not pushdownable since the group by contains at least one distributed table -- recursively planned, but hits unsupported clause type error on the top level query SELECT foo.user_id FROM @@ -1232,13 +1229,13 @@ SELECT foo.user_id FROM ( SELECT DISTINCT r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) ) as foo; -ERROR: unsupported clause type +ERROR: cannot handle complex subqueries when the router executor is disabled -- not supported since distinct on is on the reference table column SELECT foo.user_id FROM ( SELECT DISTINCT ON(r.user_id) r.user_id, random() FROM users_table m JOIN events_reference_table r ON int4eq(m.user_id, r.user_id) ) as foo; -ERROR: unsupported clause type +ERROR: cannot handle complex subqueries when the router executor is disabled -- supported since the distinct on contains at least one distributed table SELECT foo.user_id FROM ( diff --git a/src/test/regress/expected/multi_subquery_in_where_clause.out b/src/test/regress/expected/multi_subquery_in_where_clause.out index e209c5f15..22d93f460 100644 --- a/src/test/regress/expected/multi_subquery_in_where_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_clause.out @@ -80,8 +80,7 @@ GROUP BY user_id HAVING count(*) > 1 ORDER BY user_id LIMIT 5; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- subqueries in where with ALL operator SELECT user_id @@ -466,8 +465,7 @@ SELECT user_id, value_2 FROM users_table WHERE group by e1.user_id HAVING sum(submit_card_info) > 0 ); -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- left leaf query does not return partition key SELECT user_id @@ -536,8 +534,7 @@ WHERE GROUP BY user_id HAVING count(*) > 1 AND sum(value_2) > 29 ORDER BY 1; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- NOT EXISTS query has non-equi join SELECT user_id, array_length(events_table, 1) FROM ( @@ -563,8 +560,7 @@ FROM ( GROUP BY user_id ) q ORDER BY 2 DESC, 1; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- subquery in where clause doesn't have a relation, but is constant SELECT user_id @@ -655,8 +651,7 @@ FROM users_table WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4) AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6); -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator CREATE FUNCTION test_join_function(integer, integer) RETURNS bool AS 'select $1 > $2;' LANGUAGE SQL @@ -669,6 +664,5 @@ SELECT user_id, value_2 FROM users_table WHERE AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=1 AND value_3 > 1 AND test_join_function(events_table.user_id, users_table.user_id)) ORDER BY 1 DESC, 2 DESC LIMIT 3; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator DROP FUNCTION test_join_function(int,int); diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index 0401f0c9f..d997e68f2 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -419,8 +419,7 @@ SELECT * FROM GROUP BY ru.user_id ORDER BY 2 DESC, 1) s1 ORDER BY 2 DESC, 1; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- join between views -- recent users who has an event in recent events SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id; @@ -514,8 +513,7 @@ SELECT * FROM ON(ru.user_id = et.event_type) ) s1 ORDER BY 2 DESC, 1; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- create a select only view CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 1 and value_1 <3; CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id); diff --git a/src/test/regress/expected/subqueries_not_supported.out b/src/test/regress/expected/subqueries_not_supported.out index 8c9cfcb1d..191d6221c 100644 --- a/src/test/regress/expected/subqueries_not_supported.out +++ b/src/test/regress/expected/subqueries_not_supported.out @@ -126,8 +126,7 @@ FROM (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar WHERE foo.value_2 = bar.value_2; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- OUTER JOINs where the outer part is recursively planned and not the other way -- around is not supported SELECT diff --git a/src/test/regress/expected/subquery_executors.out b/src/test/regress/expected/subquery_executors.out index 9d672c63c..188bb1ac4 100644 --- a/src/test/regress/expected/subquery_executors.out +++ b/src/test/regress/expected/subquery_executors.out @@ -34,8 +34,7 @@ FROM SELECT user_id FROM users_table ) as bar WHERE foo.counter = bar.user_id; -ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys -DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- subquery with real-time query SELECT count(*) diff --git a/src/test/regress/expected/with_basics.out b/src/test/regress/expected/with_basics.out index 00efb1ec0..5a4596c72 100644 --- a/src/test/regress/expected/with_basics.out +++ b/src/test/regress/expected/with_basics.out @@ -407,7 +407,7 @@ LIMIT 6 (5 rows) --- Unsupported join in CTE +-- non-equi joins in CTE are supported if accompanied with an equi-join WITH top_users AS ( SELECT DISTINCT e.user_id FROM users_table u JOIN events_table e ON (u.user_id = e.user_id AND u.value_1 > e.value_2) ) @@ -421,7 +421,15 @@ ORDER BY user_id LIMIT 5; -ERROR: unsupported clause type + user_id +--------- + 6 + 6 + 6 + 6 + 6 +(5 rows) + -- Join can be supported with another CTE WITH events_table AS ( SELECT * FROM events_table diff --git a/src/test/regress/sql/multi_subquery.sql b/src/test/regress/sql/multi_subquery.sql index bdceb4d83..d789774af 100644 --- a/src/test/regress/sql/multi_subquery.sql +++ b/src/test/regress/sql/multi_subquery.sql @@ -145,6 +145,194 @@ FROM -- reset the flag for next query SET citus.subquery_pushdown to OFF; +-- some queries without a subquery uses subquery planner +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- query is still supported if contains additional join +-- clauses that includes arithmetic expressions +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity + 3) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- implicit typecasts in joins is supported +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey::int8 = o_orderkey::int8) +WHERE + (o_orderkey < l_quantity + 3) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- non-implicit typecasts in joins is not supported +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey::int8 = o_orderkey::int4) +WHERE + (o_orderkey < l_quantity + 3) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- implicit typecast supported in equi-join +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey::int8 = o_orderkey::int8) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- non-implicit typecast is not supported in equi-join +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey::int4 = o_orderkey::int8) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- type casts in filters are supported as long as +-- a valid equi-join exists +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey::int8 < l_quantity::int8 + 3) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- even if type cast is non-implicit +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey::int4 < l_quantity::int8 + 3) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- query is not supported if contains an partition column +-- equi join that includes arithmetic expressions +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey + 1) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- query is not supported if there is a single +-- join clause with arithmetic expression. It fails +-- with a different error message +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey + 1) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- query is not supported if does not have equi-join clause +SELECT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey < o_orderkey) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- distinct queries work +SELECT DISTINCT l_orderkey +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_orderkey DESC +LIMIT 10; + +-- count(distinct) queries work +SELECT COUNT(DISTINCT l_orderkey) +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity); + +-- the same queries returning a non-partition column +SELECT l_quantity +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_quantity DESC +LIMIT 10; + +-- distinct queries work +SELECT DISTINCT l_quantity +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity) +ORDER BY l_quantity DESC +LIMIT 10; + +-- count(distinct) queries work +SELECT COUNT(DISTINCT l_quantity) +FROM + lineitem_subquery l +JOIN + orders_subquery o +ON (l_orderkey = o_orderkey) +WHERE + (o_orderkey < l_quantity); + + -- Check that we support count distinct with a subquery SELECT diff --git a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql index 824604e25..7b9091ca1 100644 --- a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql +++ b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql @@ -1650,9 +1650,9 @@ FROM ( ) q ORDER BY 2 DESC, 1; --- note that the following query has joins on the partition keys --- however we fail to push down it due to the function call on the --- where clause. We probably need to relax that check +-- note that the following query has both equi-joins on the partition keys +-- and non-equi-joins on other columns. We now support query filters +-- having non-equi-joins as long as they have equi-joins on partition keys. SELECT users_table.user_id, users_table.value_1, prob FROM diff --git a/src/test/regress/sql/with_basics.sql b/src/test/regress/sql/with_basics.sql index ca96a8075..b60d51b6b 100644 --- a/src/test/regress/sql/with_basics.sql +++ b/src/test/regress/sql/with_basics.sql @@ -239,7 +239,7 @@ ORDER BY LIMIT 5; --- Unsupported join in CTE +-- non-equi joins in CTE are supported if accompanied with an equi-join WITH top_users AS ( SELECT DISTINCT e.user_id FROM users_table u JOIN events_table e ON (u.user_id = e.user_id AND u.value_1 > e.value_2) )