diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 9133cda61..6bd750e51 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -3213,6 +3213,24 @@ WorkerLimitCount(MultiExtendedOp *originalOpNode) return NULL; } + /* + * During subquery pushdown planning original query is used. In that case, + * certain expressions such as parameters are not evaluated and converted + * into Consts on the op node. + */ + if (!IsA(originalOpNode->limitCount, Const)) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsupported limit clause"))); + } + + /* same as the above but this time for OFFSET clause */ + if (originalOpNode->limitOffset && !IsA(originalOpNode->limitOffset, Const)) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsupported offset clause"))); + } + /* * If we don't have group by clauses, or if we have order by clauses without * aggregates, we can push down the original limit. Else if we have order by diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index a25fcb149..f0c87c6de 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -19,6 +19,7 @@ #include "catalog/pg_am.h" #include "catalog/pg_class.h" #include "commands/defrem.h" +#include "distributed/citus_clauses.h" #include "distributed/colocation_utils.h" #include "distributed/metadata_cache.h" #include "distributed/multi_logical_optimizer.h" @@ -2992,6 +2993,17 @@ SubqueryPushdownMultiPlanTree(Query *queryTree) (Node *) make_ands_implicit((Expr *) extendedOpNode->havingQual); } + /* + * Postgres standard planner evaluates expressions in the LIMIT/OFFSET clauses. + * Since we're using original query here, we should manually evaluate the + * expression on the LIMIT and OFFSET clauses. Note that logical optimizer + * expects those clauses to be already evaluated. + */ + extendedOpNode->limitCount = + PartiallyEvaluateExpression(extendedOpNode->limitCount, NULL); + extendedOpNode->limitOffset = + PartiallyEvaluateExpression(extendedOpNode->limitOffset, NULL); + SetChild((MultiUnaryNode *) extendedOpNode, currentTopNode); currentTopNode = (MultiNode *) extendedOpNode; diff --git a/src/backend/distributed/utils/citus_clauses.c b/src/backend/distributed/utils/citus_clauses.c index 91f2a0db2..0016499a3 100644 --- a/src/backend/distributed/utils/citus_clauses.c +++ b/src/backend/distributed/utils/citus_clauses.c @@ -31,7 +31,6 @@ typedef struct FunctionEvaluationContext /* private function declarations */ -static Node * PartiallyEvaluateExpression(Node *expression, PlanState *planState); static Node * EvaluateNodeIfReferencesFunction(Node *expression, PlanState *planState); static Node * PartiallyEvaluateExpressionMutator(Node *expression, FunctionEvaluationContext *context); @@ -162,7 +161,7 @@ ExecuteMasterEvaluableFunctions(Query *query, PlanState *planState) * Walks the expression evaluating any node which invokes a function as long as a Var * doesn't show up in the parameter list. */ -static Node * +Node * PartiallyEvaluateExpression(Node *expression, PlanState *planState) { FunctionEvaluationContext globalContext = { planState, false }; diff --git a/src/include/distributed/citus_clauses.h b/src/include/distributed/citus_clauses.h index 5612fc3b2..4bd619000 100644 --- a/src/include/distributed/citus_clauses.h +++ b/src/include/distributed/citus_clauses.h @@ -17,5 +17,6 @@ extern bool RequiresMasterEvaluation(Query *query); extern void ExecuteMasterEvaluableFunctions(Query *query, PlanState *planState); +extern Node * PartiallyEvaluateExpression(Node *expression, PlanState *planState); #endif /* CITUS_CLAUSES_H */ diff --git a/src/test/regress/expected/multi_subquery_behavioral_analytics.out b/src/test/regress/expected/multi_subquery_behavioral_analytics.out index 5d2a93480..f510949ef 100644 --- a/src/test/regress/expected/multi_subquery_behavioral_analytics.out +++ b/src/test/regress/expected/multi_subquery_behavioral_analytics.out @@ -1672,4 +1672,147 @@ WHERE b.user_id IS NULL GROUP BY a.user_id; ERROR: cannot push down this subquery DETAIL: Table expressions other than simple relations and subqueries are currently unsupported +-- same query without LIMIT/OFFSET returns 30 rows +SET client_min_messages TO DEBUG1; +-- now, lets use a simple expression on the LIMIT and explicit coercion on the OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT 3+3 OFFSET 5::smallint; +DEBUG: push down of limit count: 11 + user_id | array_length +---------+-------------- + 23 | 115 + 46 | 115 + 10 | 114 + 96 | 113 + 73 | 111 + 91 | 107 +(6 rows) + +-- now, lets use implicit coersion in LIMIT and a simple expressions on OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT '3' OFFSET 27+2; +DEBUG: push down of limit count: 32 + user_id | array_length +---------+-------------- + 0 | 54 +(1 row) + +-- create a test function which is marked as volatile +CREATE OR REPLACE FUNCTION volatile_func_test() + RETURNS INT AS $$ + SELECT 5; + $$ LANGUAGE sql VOLATILE; +-- Citus should be able to evalute functions/row comparisons on the LIMIT/OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT volatile_func_test() + (ROW(1,2,NULL) < ROW(1,3,0))::int OFFSET volatile_func_test() + volatile_func_test(); +DEBUG: push down of limit count: 16 + user_id | array_length +---------+-------------- + 91 | 107 + 69 | 103 + 67 | 101 + 35 | 100 + 80 | 100 + 86 | 100 +(6 rows) + +-- now, lets use expressions on both the LIMIT and OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT (5 > 4)::int OFFSET + CASE + WHEN 5 != 5 THEN 27 + WHEN 1 > 5 THEN 28 + ELSE 29 + END; +DEBUG: push down of limit count: 30 + user_id | array_length +---------+-------------- + 0 | 54 +(1 row) + +-- we don't allow parameters on the LIMIT/OFFSET clauses +PREPARE parametrized_limit AS +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id + ) q + ORDER BY 2 DESC, 1 + LIMIT $1 OFFSET $2; + EXECUTE parametrized_limit(3,3); +ERROR: unsupported limit clause +PREPARE parametrized_offset AS +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id + ) q + ORDER BY 2 DESC, 1 + LIMIT 3 OFFSET $1; + EXECUTE parametrized_offset(3); +ERROR: unsupported offset clause +SET client_min_messages TO DEFAULT; +DROP FUNCTION volatile_func_test(); SET citus.enable_router_execution TO TRUE; diff --git a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql index 14fe41709..49ec30154 100644 --- a/src/test/regress/sql/multi_subquery_behavioral_analytics.sql +++ b/src/test/regress/sql/multi_subquery_behavioral_analytics.sql @@ -1345,4 +1345,123 @@ FROM (SELECT WHERE b.user_id IS NULL GROUP BY a.user_id; + +-- same query without LIMIT/OFFSET returns 30 rows + +SET client_min_messages TO DEBUG1; +-- now, lets use a simple expression on the LIMIT and explicit coercion on the OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT 3+3 OFFSET 5::smallint; + +-- now, lets use implicit coersion in LIMIT and a simple expressions on OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT '3' OFFSET 27+2; + +-- create a test function which is marked as volatile +CREATE OR REPLACE FUNCTION volatile_func_test() + RETURNS INT AS $$ + SELECT 5; + $$ LANGUAGE sql VOLATILE; + +-- Citus should be able to evalute functions/row comparisons on the LIMIT/OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT volatile_func_test() + (ROW(1,2,NULL) < ROW(1,3,0))::int OFFSET volatile_func_test() + volatile_func_test(); + +-- now, lets use expressions on both the LIMIT and OFFSET +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id +) q +ORDER BY 2 DESC, 1 +LIMIT (5 > 4)::int OFFSET + CASE + WHEN 5 != 5 THEN 27 + WHEN 1 > 5 THEN 28 + ELSE 29 + END; + +-- we don't allow parameters on the LIMIT/OFFSET clauses +PREPARE parametrized_limit AS +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id + ) q + ORDER BY 2 DESC, 1 + LIMIT $1 OFFSET $2; + + EXECUTE parametrized_limit(3,3); + +PREPARE parametrized_offset AS +SELECT user_id, array_length(events_table, 1) +FROM ( + SELECT user_id, array_agg(event ORDER BY time) AS events_table + FROM ( + SELECT u.user_id, e.event_type::text AS event, e.time + FROM users_table AS u, + events_table AS e + WHERE u.user_id = e.user_id + AND e.event_type IN (100, 101, 102) + ) t + GROUP BY user_id + ) q + ORDER BY 2 DESC, 1 + LIMIT 3 OFFSET $1; + + EXECUTE parametrized_offset(3); + +SET client_min_messages TO DEFAULT; +DROP FUNCTION volatile_func_test(); + SET citus.enable_router_execution TO TRUE;