diff --git a/src/test/regress/expected/multi_subquery_misc.out b/src/test/regress/expected/multi_subquery_misc.out index 32f5ab801..fb8ac1d39 100644 --- a/src/test/regress/expected/multi_subquery_misc.out +++ b/src/test/regress/expected/multi_subquery_misc.out @@ -270,12 +270,15 @@ CREATE FUNCTION sql_subquery_test(int, int) RETURNS bigint AS $$ WHERE users_table.value_1 < $2; $$ LANGUAGE SQL; --- should error out +-- this fails pg <18 +-- succeeds in pg18 because of pg18 commit 0dca5d68d +-- -> Change SQL-language functions to use the plan cache SELECT sql_subquery_test(1,1); -ERROR: could not create distributed plan -DETAIL: Possibly this is caused by the use of parameters in SQL functions, which is not supported in Citus. -HINT: Consider using PL/pgSQL functions instead. -CONTEXT: SQL function "sql_subquery_test" statement 1 + sql_subquery_test +--------------------------------------------------------------------- + 307 +(1 row) + -- the joins are actually removed since they are -- not needed by PostgreSQL planner (e.g., target list -- doesn't contain anything from there) diff --git a/src/test/regress/expected/multi_subquery_misc_0.out b/src/test/regress/expected/multi_subquery_misc_0.out new file mode 100644 index 000000000..8fa135abc --- /dev/null +++ b/src/test/regress/expected/multi_subquery_misc_0.out @@ -0,0 +1,390 @@ +-- multi subquery pushdown misc aims to test subquery pushdown queries with +-- (i) Prepared statements +-- (ii) PL/PGSQL functions +-- (iii) SQL functions +-- the tables that are used depends to multi_behavioral_analytics_create_table.sql +-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests +SET citus.enable_router_execution TO false; +PREPARE prepared_subquery_1 AS +SELECT + user_id, + user_lastseen, + array_length(event_array, 1) +FROM ( + SELECT + user_id, + max(u.time) as user_lastseen, + array_agg(event_type ORDER BY u.time) AS event_array + FROM ( + SELECT user_id, time + FROM users_table + WHERE + user_id >= 1 AND + user_id <= 3 AND + users_table.value_1 > 1 AND users_table.value_1 < 3 + ) u LEFT JOIN LATERAL ( + SELECT event_type, time + FROM events_table + WHERE user_id = u.user_id AND + events_table.event_type > 1 AND events_table.event_type < 3 + ) t ON true + GROUP BY user_id +) AS shard_union +ORDER BY user_lastseen DESC, user_id; +EXECUTE prepared_subquery_1; + user_id | user_lastseen | array_length +--------------------------------------------------------------------- + 2 | Thu Nov 23 11:47:26.900284 2017 | 12 + 3 | Thu Nov 23 11:18:53.114408 2017 | 14 +(2 rows) + +PREPARE prepared_subquery_2(int, int) AS +SELECT + user_id, + user_lastseen, + array_length(event_array, 1) +FROM ( + SELECT + user_id, + max(u.time) as user_lastseen, + array_agg(event_type ORDER BY u.time) AS event_array + FROM ( + SELECT user_id, time + FROM users_table + WHERE + user_id >= $1 AND + user_id <= $2 AND + users_table.value_1 > 1 AND users_table.value_1 < 3 + ) u LEFT JOIN LATERAL ( + SELECT event_type, time + FROM events_table + WHERE user_id = u.user_id AND + events_table.event_type > 1 AND events_table.event_type < 3 + ) t ON true + GROUP BY user_id +) AS shard_union +ORDER BY user_lastseen DESC, user_id; +-- should be fine with more than five executions +EXECUTE prepared_subquery_2(1, 3); + user_id | user_lastseen | array_length +--------------------------------------------------------------------- + 2 | Thu Nov 23 11:47:26.900284 2017 | 12 + 3 | Thu Nov 23 11:18:53.114408 2017 | 14 +(2 rows) + +EXECUTE prepared_subquery_2(1, 3); + user_id | user_lastseen | array_length +--------------------------------------------------------------------- + 2 | Thu Nov 23 11:47:26.900284 2017 | 12 + 3 | Thu Nov 23 11:18:53.114408 2017 | 14 +(2 rows) + +EXECUTE prepared_subquery_2(1, 3); + user_id | user_lastseen | array_length +--------------------------------------------------------------------- + 2 | Thu Nov 23 11:47:26.900284 2017 | 12 + 3 | Thu Nov 23 11:18:53.114408 2017 | 14 +(2 rows) + +EXECUTE prepared_subquery_2(1, 3); + user_id | user_lastseen | array_length +--------------------------------------------------------------------- + 2 | Thu Nov 23 11:47:26.900284 2017 | 12 + 3 | Thu Nov 23 11:18:53.114408 2017 | 14 +(2 rows) + +EXECUTE prepared_subquery_2(1, 3); + user_id | user_lastseen | array_length +--------------------------------------------------------------------- + 2 | Thu Nov 23 11:47:26.900284 2017 | 12 + 3 | Thu Nov 23 11:18:53.114408 2017 | 14 +(2 rows) + +EXECUTE prepared_subquery_2(1, 3); + user_id | user_lastseen | array_length +--------------------------------------------------------------------- + 2 | Thu Nov 23 11:47:26.900284 2017 | 12 + 3 | Thu Nov 23 11:18:53.114408 2017 | 14 +(2 rows) + +EXECUTE prepared_subquery_2(1, 3); + user_id | user_lastseen | array_length +--------------------------------------------------------------------- + 2 | Thu Nov 23 11:47:26.900284 2017 | 12 + 3 | Thu Nov 23 11:18:53.114408 2017 | 14 +(2 rows) + +-- prepared statements with subqueries in WHERE clause +PREPARE prepared_subquery_3(int, int, int, int, int, int) AS +SELECT user_id +FROM users_table +WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= $4 AND value_1 <= $3) + AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= $5 AND value_1 <= $6) + AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= $1 AND value_1 <= $2) +GROUP BY + user_id +ORDER BY + user_id DESC + LIMIT 5; +-- enough times (6+) to actually use prepared statements +EXECUTE prepared_subquery_3(4, 5, 1, 0, 2, 3); + user_id +--------------------------------------------------------------------- + 6 + 5 + 4 + 3 + 2 +(5 rows) + +EXECUTE prepared_subquery_3(4, 5, 1, 0, 2, 3); + user_id +--------------------------------------------------------------------- + 6 + 5 + 4 + 3 + 2 +(5 rows) + +EXECUTE prepared_subquery_3(4, 5, 1, 0, 2, 3); + user_id +--------------------------------------------------------------------- + 6 + 5 + 4 + 3 + 2 +(5 rows) + +EXECUTE prepared_subquery_3(4, 5, 1, 0, 2, 3); + user_id +--------------------------------------------------------------------- + 6 + 5 + 4 + 3 + 2 +(5 rows) + +EXECUTE prepared_subquery_3(4, 5, 1, 0, 2, 3); + user_id +--------------------------------------------------------------------- + 6 + 5 + 4 + 3 + 2 +(5 rows) + +EXECUTE prepared_subquery_3(4, 5, 1, 0, 2, 3); + user_id +--------------------------------------------------------------------- + 6 + 5 + 4 + 3 + 2 +(5 rows) + +CREATE FUNCTION plpgsql_subquery_test(int, int) RETURNS TABLE(count bigint) AS $$ +DECLARE +BEGIN + RETURN QUERY + SELECT + count(*) + FROM + users_table + JOIN + (SELECT + ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob + FROM + users_table AS ma, events_table as short_list + WHERE + short_list.user_id = ma.user_id and ma.value_1 < $1 and short_list.event_type < 3 + ) temp + ON users_table.user_id = temp.user_id + WHERE + users_table.value_1 < $2; + +END; +$$ LANGUAGE plpgsql; +-- enough times (6+) to actually use prepared statements +SELECT plpgsql_subquery_test(1, 2); + plpgsql_subquery_test +--------------------------------------------------------------------- + 539 +(1 row) + +SELECT plpgsql_subquery_test(1, 2); + plpgsql_subquery_test +--------------------------------------------------------------------- + 539 +(1 row) + +SELECT plpgsql_subquery_test(1, 2); + plpgsql_subquery_test +--------------------------------------------------------------------- + 539 +(1 row) + +SELECT plpgsql_subquery_test(1, 2); + plpgsql_subquery_test +--------------------------------------------------------------------- + 539 +(1 row) + +SELECT plpgsql_subquery_test(1, 2); + plpgsql_subquery_test +--------------------------------------------------------------------- + 539 +(1 row) + +SELECT plpgsql_subquery_test(1, 2); + plpgsql_subquery_test +--------------------------------------------------------------------- + 539 +(1 row) + +-- this should also work, but should return 0 given that int = NULL is always returns false +SELECT plpgsql_subquery_test(1, NULL); + plpgsql_subquery_test +--------------------------------------------------------------------- + 0 +(1 row) + +CREATE FUNCTION sql_subquery_test(int, int) RETURNS bigint AS $$ + SELECT + count(*) + FROM + users_table + JOIN + (SELECT + ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob + FROM + users_table AS ma, events_table as short_list + WHERE + short_list.user_id = ma.user_id and ma.value_1 < $1 and short_list.event_type < 3 + ) temp + ON users_table.user_id = temp.user_id + WHERE + users_table.value_1 < $2; +$$ LANGUAGE SQL; +-- this fails pg <18 +-- succeeds in pg18 because of pg18 commit 0dca5d68d +-- -> Change SQL-language functions to use the plan cache +SELECT sql_subquery_test(1,1); +ERROR: could not create distributed plan +DETAIL: Possibly this is caused by the use of parameters in SQL functions, which is not supported in Citus. +HINT: Consider using PL/pgSQL functions instead. +CONTEXT: SQL function "sql_subquery_test" statement 1 +-- the joins are actually removed since they are +-- not needed by PostgreSQL planner (e.g., target list +-- doesn't contain anything from there) +-- but Citus can still pushdown this query +SELECT + t1.user_id, count(*) +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + user_id + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +GROUP BY 1 +ORDER BY 2 DESC; + user_id | count +--------------------------------------------------------------------- + 5 | 676 + 4 | 529 + 2 | 324 + 3 | 289 + 6 | 100 + 1 | 49 +(6 rows) + +-- the joins are actually removed since they are +-- not needed by PostgreSQL planner (e.g., target list +-- doesn't contain anything from there) +-- but Citus can still plan this query even though the query +-- is not safe to pushdown +SELECT + t1.user_id, count(*) +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + value_2 + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +GROUP BY 1 +ORDER BY 2 DESC; + user_id | count +--------------------------------------------------------------------- + 5 | 676 + 4 | 529 + 2 | 324 + 3 | 289 + 6 | 100 + 1 | 49 +(6 rows) + +-- Similar to the above queries, but +-- this time the joins are not removed because +-- target list contains all the entries +SET citus.enable_router_execution TO true; +SELECT + * +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + user_id + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC, 5 DESC, 6 DESC, 7 DESC, 8 DESC +LIMIT 5; + user_id | time | value_1 | value_2 | value_3 | value_4 | user_id | user_id +--------------------------------------------------------------------- + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 +(5 rows) + +DROP FUNCTION plpgsql_subquery_test(int, int); +DROP FUNCTION sql_subquery_test(int, int); diff --git a/src/test/regress/sql/multi_subquery_misc.sql b/src/test/regress/sql/multi_subquery_misc.sql index 2cdcc810a..e39c7073a 100644 --- a/src/test/regress/sql/multi_subquery_misc.sql +++ b/src/test/regress/sql/multi_subquery_misc.sql @@ -148,7 +148,9 @@ CREATE FUNCTION sql_subquery_test(int, int) RETURNS bigint AS $$ users_table.value_1 < $2; $$ LANGUAGE SQL; --- should error out +-- this fails pg <18 +-- succeeds in pg18 because of pg18 commit 0dca5d68d +-- -> Change SQL-language functions to use the plan cache SELECT sql_subquery_test(1,1);