Merge pull request #3397 from citusdata/cte_inline_pg_11

Fix issues for CTE inlining on Postgres 11
pull/3399/head
Önder Kalacı 2020-01-17 14:39:21 +01:00 committed by GitHub
commit 4b5241c7b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 834 additions and 653 deletions

View File

@ -768,6 +768,24 @@ InlineCtesAndCreateDistributedPlannedStmt(uint64 planId,
/* after inlining, we shouldn't have any inlinable CTEs */ /* after inlining, we shouldn't have any inlinable CTEs */
Assert(!QueryTreeContainsInlinableCTE(copyOfOriginalQuery)); Assert(!QueryTreeContainsInlinableCTE(copyOfOriginalQuery));
#if PG_VERSION_NUM < 120000
Query *query = planContext->query;
/*
* We had to implement this hack because on Postgres11 and below, the originalQuery
* and the query would have significant differences in terms of CTEs where CTEs
* would not be inlined on the query (as standard_planner() wouldn't inline CTEs
* on PG 11 and below).
*
* Instead, we prefer to pass the inlined query to the distributed planning. We rely
* on the fact that the query includes subqueries, and it'd definitely go through
* query pushdown planning. During query pushdown planning, the only relevant query
* tree is the original query.
*/
planContext->query = copyObject(copyOfOriginalQuery);
#endif
/* simply recurse into CreateDistributedPlannedStmt() in a PG_TRY() block */ /* simply recurse into CreateDistributedPlannedStmt() in a PG_TRY() block */
PlannedStmt *result = TryCreateDistributedPlannedStmt(planContext->plan, PlannedStmt *result = TryCreateDistributedPlannedStmt(planContext->plan,
copyOfOriginalQuery, copyOfOriginalQuery,
@ -776,6 +794,15 @@ InlineCtesAndCreateDistributedPlannedStmt(uint64 planId,
planContext-> planContext->
plannerRestrictionContext); plannerRestrictionContext);
#if PG_VERSION_NUM < 120000
/*
* Set back the original query, in case the planning failed and we need to go
* into distributed planning again.
*/
planContext->query = query;
#endif
return result; return result;
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,8 @@ SET citus.next_shard_id TO 1960000;
CREATE TABLE test_table (key int, value text, other_value jsonb); CREATE TABLE test_table (key int, value text, other_value jsonb);
SELECT create_distributed_table ('test_table', 'key'); SELECT create_distributed_table ('test_table', 'key');
INSERT INTO test_table SELECT i % 10, 'test' || i, row_to_json(row(i, i*18, 'test' || i)) FROM generate_series (0, 100) i;
-- server version because CTE inlining might produce -- server version because CTE inlining might produce
-- different debug messages in PG 11 vs PG 12 -- different debug messages in PG 11 vs PG 12
SHOW server_version \gset SHOW server_version \gset
@ -17,14 +19,16 @@ WITH cte_1 AS (SELECT * FROM test_table)
SELECT SELECT
*, (SELECT 1) *, (SELECT 1)
FROM FROM
cte_1; cte_1
ORDER BY 1 DESC LIMIT 3;
-- Should still not be inlined even if NOT MATERIALIZED is passed -- Should still not be inlined even if NOT MATERIALIZED is passed
WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table)
SELECT SELECT
*, (SELECT 1) *, (SELECT 1)
FROM FROM
cte_1; cte_1
ORDER BY 2 DESC LIMIT 1;
-- the cte can be inlined because the unsupported -- the cte can be inlined because the unsupported
-- part of the query (subquery in WHERE clause) -- part of the query (subquery in WHERE clause)
@ -68,7 +72,8 @@ WITH cte_1 AS
SELECT *, (SELECT 1) SELECT *, (SELECT 1)
FROM FROM
(SELECT * (SELECT *
FROM cte_1) AS foo; FROM cte_1) AS foo
ORDER BY 2 DESC LIMIT 1;
-- a little more complicated query tree -- a little more complicated query tree
-- Citus does the inlining, the planning fails -- Citus does the inlining, the planning fails
@ -76,7 +81,7 @@ FROM
WITH top_cte AS WITH top_cte AS
(SELECT * (SELECT *
FROM test_table) FROM test_table)
SELECT * SELECT count(*)
FROM top_cte, FROM top_cte,
(WITH cte_1 AS (WITH cte_1 AS
(SELECT * (SELECT *
@ -114,7 +119,7 @@ WITH cte_1 AS
SELECT (SELECT 1) AS KEY FROM ( SELECT (SELECT 1) AS KEY FROM (
WITH cte_2 AS (SELECT *, random() WITH cte_2 AS (SELECT *, random()
FROM (SELECT *,random() FROM cte_1) as foo) FROM (SELECT *,random() FROM cte_1) as foo)
SELECT *, random() FROM cte_2) as bar; SELECT *, random() FROM cte_2) as bar ORDER BY 1 DESC LIMIT 3;
-- in this example, cte_2 can be inlined, because it is not used -- in this example, cte_2 can be inlined, because it is not used
-- on any query that Citus cannot plan. However, cte_1 should not be -- on any query that Citus cannot plan. However, cte_1 should not be
@ -122,7 +127,7 @@ SELECT *, random() FROM cte_2) as bar;
WITH cte_1 AS (SELECT * FROM test_table), WITH cte_1 AS (SELECT * FROM test_table),
cte_2 AS (select * from test_table) cte_2 AS (select * from test_table)
SELECT SELECT
* count(*)
FROM FROM
(SELECT *, (SELECT 1) FROM cte_1) as foo (SELECT *, (SELECT 1) FROM cte_1) as foo
JOIN JOIN
@ -137,7 +142,9 @@ SELECT
FROM FROM
test_table test_table
WHERE WHERE
key = 1; key = 1
ORDER BY 3 DESC
LIMIT 5;
-- router queries are affected by the distributed -- router queries are affected by the distributed
-- cte inlining -- cte inlining
@ -147,7 +154,9 @@ SELECT
FROM FROM
a a
WHERE WHERE
key = 1; key = 1
ORDER BY 1 DESC
LIMIT 5;
-- non router queries are affected by the distributed -- non router queries are affected by the distributed
-- cte inlining as well -- cte inlining as well
@ -240,14 +249,15 @@ FROM
-- inlined -- inlined
WITH cte_1 AS (SELECT *, random() FROM test_table) WITH cte_1 AS (SELECT *, random() FROM test_table)
SELECT SELECT
* key, value
FROM FROM
cte_1; cte_1
ORDER BY 2 DESC LIMIT 1;
-- even with NOT MATERIALIZED volatile functions should not be inlined -- even with NOT MATERIALIZED volatile functions should not be inlined
WITH cte_1 AS NOT MATERIALIZED (SELECT *, random() FROM test_table) WITH cte_1 AS NOT MATERIALIZED (SELECT *, random() FROM test_table)
SELECT SELECT
* count(*)
FROM FROM
cte_1; cte_1;
@ -256,7 +266,7 @@ FROM
-- it is used one level below -- it is used one level below
WITH cte_1 AS (SELECT * FROM test_table) WITH cte_1 AS (SELECT * FROM test_table)
SELECT SELECT
* count(*)
FROM FROM
( (
WITH ct2 AS (SELECT * FROM cte_1) WITH ct2 AS (SELECT * FROM cte_1)
@ -268,7 +278,7 @@ FROM
-- CTE -- CTE
WITH cte_1 AS (SELECT * FROM test_table) WITH cte_1 AS (SELECT * FROM test_table)
SELECT SELECT
* count(DISTINCT key)
FROM FROM
( (
WITH cte_2 AS (SELECT * FROM cte_1), WITH cte_2 AS (SELECT * FROM cte_1),
@ -279,7 +289,7 @@ FROM
-- inlined CTE contains a reference to outer query -- inlined CTE contains a reference to outer query
-- should be fine (because we pushdown the whole query) -- should be fine (because we pushdown the whole query)
SELECT * SELECT count(*)
FROM FROM
(SELECT * (SELECT *
FROM test_table) AS test_table_cte FROM test_table) AS test_table_cte
@ -295,7 +305,7 @@ SELECT *
-- inlined CTE contains a reference to outer query -- inlined CTE contains a reference to outer query
-- should be fine (even if the recursive planning fails -- should be fine (even if the recursive planning fails
-- to recursively plan the query) -- to recursively plan the query)
SELECT * SELECT count(*)
FROM FROM
(SELECT * (SELECT *
FROM test_table) AS test_table_cte FROM test_table) AS test_table_cte
@ -317,9 +327,9 @@ SELECT
* *
FROM FROM
( (
WITH ct2 AS (SELECT * FROM cte_1 LIMIT 5) WITH ct2 AS (SELECT * FROM cte_1 ORDER BY 1, 2, 3 LIMIT 5)
SELECT * FROM ct2 SELECT * FROM ct2
) as foo; ) as foo ORDER BY 1 DESC, 2 DESC, 3 DESC LIMIT 5;
-- all nested CTEs can be inlinied -- all nested CTEs can be inlinied
WITH cte_1 AS ( WITH cte_1 AS (
@ -343,13 +353,14 @@ SELECT * FROM cte_1 WHERE key = 6;
-- in set operations -- in set operations
WITH cte_1 AS (SELECT * FROM test_table), WITH cte_1 AS (SELECT * FROM test_table),
cte_2 AS (SELECT * FROM test_table) cte_2 AS (SELECT * FROM test_table)
SELECT count(*) FROM (
(SELECT * FROM cte_1 EXCEPT SELECT * FROM test_table) (SELECT * FROM cte_1 EXCEPT SELECT * FROM test_table)
UNION UNION
(SELECT * FROM cte_2); (SELECT * FROM cte_2)) as foo;
-- cte_1 is going to be inlined even inside another set operation -- cte_1 is going to be inlined even inside another set operation
WITH cte_1 AS (SELECT * FROM test_table), WITH cte_1 AS (SELECT * FROM test_table),
cte_2 AS (SELECT * FROM test_table) cte_2 AS (SELECT * FROM test_table ORDER BY 1 DESC LIMIT 3)
(SELECT *, (SELECT 1) FROM cte_1 EXCEPT SELECT *, 1 FROM test_table) (SELECT *, (SELECT 1) FROM cte_1 EXCEPT SELECT *, 1 FROM test_table)
UNION UNION
(SELECT *, 1 FROM cte_2); (SELECT *, 1 FROM cte_2);
@ -369,7 +380,7 @@ UNION
-- subquery pushdown with set operations -- subquery pushdown with set operations
WITH cte_1 AS (SELECT * FROM test_table), WITH cte_1 AS (SELECT * FROM test_table),
cte_2 AS (SELECT * FROM test_table) cte_2 AS (SELECT * FROM test_table)
SELECT * FROM SELECT max(key) FROM
( (
SELECT * FROM cte_1 SELECT * FROM cte_1
UNION UNION
@ -435,19 +446,21 @@ WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table)
DELETE FROM test_table WHERE key NOT IN (SELECT key FROM cte_1); DELETE FROM test_table WHERE key NOT IN (SELECT key FROM cte_1);
-- we don't inline CTEs if they are modifying CTEs -- we don't inline CTEs if they are modifying CTEs
WITH cte_1 AS (DELETE FROM test_table RETURNING key) WITH cte_1 AS (DELETE FROM test_table WHERE key % 3 = 1 RETURNING key)
SELECT * FROM cte_1; SELECT * FROM cte_1 ORDER BY 1 DESC LIMIT 3;
-- NOT MATERIALIZED should not affect modifying CTEs -- NOT MATERIALIZED should not affect modifying CTEs
WITH cte_1 AS NOT MATERIALIZED (DELETE FROM test_table RETURNING key) WITH cte_1 AS NOT MATERIALIZED (DELETE FROM test_table WHERE key % 3 = 0 RETURNING key)
SELECT * FROM cte_1; SELECT count(*) FROM cte_1;
-- cte with column aliases -- cte with column aliases
SELECT * FROM test_table, SELECT * FROM test_table,
(WITH cte_1 (x,y) AS (SELECT * FROM test_table), (WITH cte_1 (x,y) AS (SELECT * FROM test_table),
cte_2 (z,y) AS (SELECT value, other_value, key FROM test_table), cte_2 (z,y) AS (SELECT value, other_value, key FROM test_table),
cte_3 (t,m) AS (SELECT z, y, key as cte_2_key FROM cte_2) cte_3 (t,m) AS (SELECT z, y, key as cte_2_key FROM cte_2)
SELECT * FROM cte_2, cte_3) as bar; SELECT * FROM cte_2, cte_3) as bar
ORDER BY value, other_value, z, y, t, m, cte_2_key
LIMIT 5;
-- cte used in HAVING subquery just works fine -- cte used in HAVING subquery just works fine
-- even if it is inlined -- even if it is inlined
@ -459,7 +472,9 @@ FROM
GROUP BY GROUP BY
key key
HAVING HAVING
(count(*) > (SELECT max FROM cte_1)); (count(*) > (SELECT max FROM cte_1))
ORDER BY 2 DESC
LIMIT 5;
-- cte used in ORDER BY just works fine -- cte used in ORDER BY just works fine
-- even if it is inlined -- even if it is inlined
@ -469,32 +484,33 @@ SELECT
FROM FROM
test_table JOIN cte_1 ON (key = max) test_table JOIN cte_1 ON (key = max)
ORDER BY ORDER BY
cte_1.max; cte_1.max
LIMIT 3;
PREPARE inlined_cte_without_params AS PREPARE inlined_cte_without_params AS
WITH cte_1 AS (SELECT count(*) FROM test_table GROUP BY key) WITH cte_1 AS (SELECT count(*) FROM test_table GROUP BY key)
SELECT * FROM cte_1; SELECT * FROM cte_1 ORDER BY 1 DESC LIMIT 3;
PREPARE non_inlined_cte_without_params AS PREPARE non_inlined_cte_without_params AS
WITH cte_1 AS (SELECT * FROM test_table) WITH cte_1 AS (SELECT * FROM test_table)
SELECT SELECT
*, (SELECT 1) *, (SELECT 1)
FROM FROM
cte_1; cte_1 ORDER BY 1 DESC LIMIT 3;
PREPARE inlined_cte_has_parameter_on_non_dist_key(int) AS PREPARE inlined_cte_has_parameter_on_non_dist_key(text) AS
WITH cte_1 AS (SELECT count(*) FROM test_table WHERE value::int = $1 GROUP BY key) WITH cte_1 AS (SELECT count(*) FROM test_table WHERE value = $1 GROUP BY key)
SELECT * FROM cte_1; SELECT * FROM cte_1 ORDER BY 1 DESC LIMIT 3;
PREPARE inlined_cte_has_parameter_on_dist_key(int) AS PREPARE inlined_cte_has_parameter_on_dist_key(int) AS
WITH cte_1 AS (SELECT count(*) FROM test_table WHERE key > $1 GROUP BY key) WITH cte_1 AS (SELECT count(*) FROM test_table WHERE key > $1 GROUP BY key)
SELECT * FROM cte_1; SELECT * FROM cte_1 ORDER BY 1 DESC LIMIT 3;
PREPARE non_inlined_cte_has_parameter_on_dist_key(int) AS PREPARE non_inlined_cte_has_parameter_on_dist_key(int) AS
WITH cte_1 AS (SELECT * FROM test_table where key > $1) WITH cte_1 AS (SELECT * FROM test_table where key > $1)
SELECT SELECT
*, (SELECT 1) *, (SELECT 1)
FROM FROM
cte_1; cte_1 ORDER BY 1 DESC, 2 DESC, 3 DESC LIMIT 3;
PREPARE retry_planning(int) AS PREPARE retry_planning(int) AS
WITH cte_1 AS (SELECT * FROM test_table WHERE key > $1) WITH cte_1 AS (SELECT * FROM test_table WHERE key > $1)
SELECT json_object_agg(DISTINCT key, value) FROM cte_1; SELECT json_object_agg(DISTINCT key, value) FROM cte_1 ORDER BY max(key), min(value) DESC LIMIT 3;
EXECUTE inlined_cte_without_params; EXECUTE inlined_cte_without_params;
@ -511,12 +527,12 @@ EXECUTE non_inlined_cte_without_params;
EXECUTE non_inlined_cte_without_params; EXECUTE non_inlined_cte_without_params;
EXECUTE non_inlined_cte_without_params; EXECUTE non_inlined_cte_without_params;
EXECUTE inlined_cte_has_parameter_on_non_dist_key(1); EXECUTE inlined_cte_has_parameter_on_non_dist_key('test1');
EXECUTE inlined_cte_has_parameter_on_non_dist_key(2); EXECUTE inlined_cte_has_parameter_on_non_dist_key('test2');
EXECUTE inlined_cte_has_parameter_on_non_dist_key(3); EXECUTE inlined_cte_has_parameter_on_non_dist_key('test3');
EXECUTE inlined_cte_has_parameter_on_non_dist_key(4); EXECUTE inlined_cte_has_parameter_on_non_dist_key('test4');
EXECUTE inlined_cte_has_parameter_on_non_dist_key(5); EXECUTE inlined_cte_has_parameter_on_non_dist_key('test5');
EXECUTE inlined_cte_has_parameter_on_non_dist_key(6); EXECUTE inlined_cte_has_parameter_on_non_dist_key('test6');
EXECUTE inlined_cte_has_parameter_on_dist_key(1); EXECUTE inlined_cte_has_parameter_on_dist_key(1);
EXECUTE inlined_cte_has_parameter_on_dist_key(2); EXECUTE inlined_cte_has_parameter_on_dist_key(2);
@ -542,31 +558,31 @@ EXECUTE retry_planning(6);
-- this test can only work if the CTE is recursively -- this test can only work if the CTE is recursively
-- planned -- planned
WITH b AS (SELECT * FROM test_table) WITH b AS (SELECT * FROM test_table)
SELECT * FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key); SELECT count(*) FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key);
-- this becomes a non-colocated subquery join -- this becomes a non-colocated subquery join
-- because after the CTEs are inlined the joins -- because after the CTEs are inlined the joins
-- become a non-colocated subquery join -- become a non-colocated subquery join
WITH a AS (SELECT * FROM test_table), WITH a AS (SELECT * FROM test_table),
b AS (SELECT * FROM test_table) b AS (SELECT * FROM test_table)
SELECT * FROM a LEFT JOIN b ON (a.value = b.value); SELECT count(*) FROM a LEFT JOIN b ON (a.value = b.value);
-- cte a has to be recursively planned because of OFFSET 0 -- cte a has to be recursively planned because of OFFSET 0
-- after that, cte b also requires recursive planning -- after that, cte b also requires recursive planning
WITH a AS (SELECT * FROM test_table OFFSET 0), WITH a AS (SELECT * FROM test_table OFFSET 0),
b AS (SELECT * FROM test_table) b AS (SELECT * FROM test_table)
SELECT * FROM a LEFT JOIN b ON (a.value = b.value); SELECT min(a.key) FROM a LEFT JOIN b ON (a.value = b.value);
-- after both CTEs are inlined, this becomes non-colocated subquery join -- after both CTEs are inlined, this becomes non-colocated subquery join
WITH cte_1 AS (SELECT * FROM test_table), WITH cte_1 AS (SELECT * FROM test_table),
cte_2 AS (SELECT * FROM test_table) cte_2 AS (SELECT * FROM test_table)
SELECT * FROM cte_1 JOIN cte_2 ON (cte_1.value > cte_2.value); SELECT * FROM cte_1 JOIN cte_2 ON (cte_1.value > cte_2.value) ORDER BY 1,2,3,4,5,6 DESC LIMIT 3;;
-- full join is only supported when both sides are -- full join is only supported when both sides are
-- recursively planned -- recursively planned
WITH cte_1 AS (SELECT value FROM test_table WHERE key > 1), WITH cte_1 AS (SELECT value FROM test_table WHERE key > 1),
cte_2 AS (SELECT value FROM test_table WHERE key > 3) cte_2 AS (SELECT value FROM test_table WHERE key > 3)
SELECT * FROM cte_1 FULL JOIN cte_2 USING (value); SELECT * FROM cte_1 FULL JOIN cte_2 USING (value) ORDER BY 1 DESC LIMIT 3;;
-- an unsupported agg. for multi-shard queries -- an unsupported agg. for multi-shard queries
-- so CTE has to be recursively planned -- so CTE has to be recursively planned
@ -580,7 +596,7 @@ SELECT json_object_agg(DISTINCT key, value) FROM cte_1;
-- "some" of the CTEs -- "some" of the CTEs
WITH cte_1 AS (SELECT value FROM test_table WHERE key > 1), WITH cte_1 AS (SELECT value FROM test_table WHERE key > 1),
cte_2 AS (SELECT max(value) as value FROM test_table WHERE key > 3) cte_2 AS (SELECT max(value) as value FROM test_table WHERE key > 3)
SELECT * FROM cte_1 JOIN cte_2 USING (value); SELECT count(*) FROM cte_1 JOIN cte_2 USING (value);
-- prevent DROP CASCADE to give notices -- prevent DROP CASCADE to give notices