From e40261a1cd219ffe06fc587da2c7b584dab14fc6 Mon Sep 17 00:00:00 2001 From: naisila Date: Wed, 17 Aug 2022 13:15:02 +0300 Subject: [PATCH] Handles EXPLAIN output diffs in PG15, Hash Agg/Join leverage To handle differences in usage of GroupAggregate vs HashAggregate or Merge Join vs Hash join in cases where this detail doesn't seem to matter, we use coordinator_plan(). - coordinator_plan() is updated to remove "Result" lines There are some cases where we have subplans so we add a new function that prints all Task Count lines as well - coordinator_plan_with_subplans() Still not sure of the relevant PG commit Could be db0d67db2401eb6238ccc04c6407a4fd4f985832 but disabling enable_group_by_reordering didn't help. --- src/test/regress/expected/cte_inline.out | 24 +++------- .../expected/insert_select_repartition.out | 15 ++---- .../regress/expected/multi_test_helpers.out | 30 ++++++++++++ src/test/regress/expected/multi_view.out | 25 ++-------- src/test/regress/expected/subquery_view.out | 46 ++++--------------- src/test/regress/sql/cte_inline.sql | 7 +++ .../regress/sql/insert_select_repartition.sql | 2 + src/test/regress/sql/multi_test_helpers.sql | 31 +++++++++++++ src/test/regress/sql/multi_view.sql | 2 + src/test/regress/sql/subquery_view.sql | 4 ++ 10 files changed, 98 insertions(+), 88 deletions(-) diff --git a/src/test/regress/expected/cte_inline.out b/src/test/regress/expected/cte_inline.out index 072f076d6..46156ac93 100644 --- a/src/test/regress/expected/cte_inline.out +++ b/src/test/regress/expected/cte_inline.out @@ -423,6 +423,8 @@ DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] (1 row) -- EXPLAIN should show the differences between MATERIALIZED and NOT MATERIALIZED +\set VERBOSITY terse +SELECT public.coordinator_plan_with_subplans($Q$ EXPLAIN (COSTS OFF) WITH cte_1 AS (SELECT * FROM test_table) SELECT count(*) @@ -431,36 +433,22 @@ FROM JOIN cte_1 as second_entry USING (key); +$Q$); DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) first_entry JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) second_entry USING (key)) DEBUG: Creating router plan - QUERY PLAN + coordinator_plan_with_subplans --------------------------------------------------------------------- Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Seq Scan on test_table_1960000 test_table Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Aggregate - -> Merge Join - Merge Cond: (intermediate_result.key = intermediate_result_1.key) - -> Sort - Sort Key: intermediate_result.key - -> Function Scan on read_intermediate_result intermediate_result - -> Sort - Sort Key: intermediate_result_1.key - -> Function Scan on read_intermediate_result intermediate_result_1 -(21 rows) +(5 rows) +\set VERBOSITY default EXPLAIN (COSTS OFF) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) SELECT count(*) diff --git a/src/test/regress/expected/insert_select_repartition.out b/src/test/regress/expected/insert_select_repartition.out index 2a36f8fa8..f6cc3db97 100644 --- a/src/test/regress/expected/insert_select_repartition.out +++ b/src/test/regress/expected/insert_select_repartition.out @@ -702,23 +702,16 @@ PREPARE insert_plan AS INSERT INTO target_table SELECT a, max(b) FROM source_table WHERE a BETWEEN 1 AND 2 GROUP BY a; +SELECT public.coordinator_plan($Q$ EXPLAIN EXECUTE insert_plan; - QUERY PLAN +$Q$); + coordinator_plan --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) (cost=0.00..0.00 rows=0 width=0) INSERT/SELECT method: repartition -> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=100000 width=8) Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> GroupAggregate (cost=44.09..44.28 rows=11 width=8) - Group Key: a - -> Sort (cost=44.09..44.12 rows=11 width=8) - Sort Key: a - -> Seq Scan on source_table_4213606 source_table (cost=0.00..43.90 rows=11 width=8) - Filter: ((a >= 1) AND (a <= 2)) -(13 rows) +(4 rows) SET client_min_messages TO DEBUG1; EXECUTE insert_plan; diff --git a/src/test/regress/expected/multi_test_helpers.out b/src/test/regress/expected/multi_test_helpers.out index 45401c90f..118a052c2 100644 --- a/src/test/regress/expected/multi_test_helpers.out +++ b/src/test/regress/expected/multi_test_helpers.out @@ -17,10 +17,15 @@ BEGIN END; $$LANGUAGE plpgsql; -- Create a function to ignore worker plans in explain output +-- Also remove extra "-> Result" lines for PG15 support CREATE OR REPLACE FUNCTION coordinator_plan(explain_command text, out query_plan text) RETURNS SETOF TEXT AS $$ BEGIN FOR query_plan IN execute explain_command LOOP + IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') + THEN + CONTINUE; + END IF; RETURN next; IF query_plan LIKE '%Task Count:%' THEN @@ -29,6 +34,31 @@ BEGIN END LOOP; RETURN; END; $$ language plpgsql; +-- Create a function to ignore worker plans in explain output +-- It also shows task count for plan and subplans +-- Also remove extra "-> Result" lines for PG15 support +CREATE OR REPLACE FUNCTION coordinator_plan_with_subplans(explain_command text, out query_plan text) +RETURNS SETOF TEXT AS $$ +DECLARE + task_count_line_reached boolean := false; +BEGIN + FOR query_plan IN execute explain_command LOOP + IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') THEN + CONTINUE; + END IF; + IF NOT task_count_line_reached THEN + RETURN next; + END IF; + IF query_plan LIKE '%Task Count:%' THEN + IF NOT task_count_line_reached THEN + SELECT true INTO task_count_line_reached; + ELSE + RETURN next; + END IF; + END IF; + END LOOP; + RETURN; +END; $$ language plpgsql; -- Create a function to ignore "-> Result" lines for PG15 support -- In PG15 there are some extra "-> Result" lines CREATE OR REPLACE FUNCTION plan_without_result_lines(explain_command text, out query_plan text) diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index c7a6b44e0..70fa10874 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -785,6 +785,7 @@ EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER Filter: ((value_1 >= 1) AND (value_1 < 3)) (19 rows) +SELECT public.coordinator_plan($Q$ EXPLAIN (COSTS FALSE) SELECT * FROM ( (SELECT user_id FROM recent_users) @@ -792,32 +793,14 @@ EXPLAIN (COSTS FALSE) SELECT * (SELECT user_id FROM selected_users) ) u WHERE user_id < 4 AND user_id > 1 ORDER BY user_id; - QUERY PLAN +$Q$); + coordinator_plan --------------------------------------------------------------------- Sort Sort Key: remote_scan.user_id -> Custom Scan (Citus Adaptive) Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Unique - -> Sort - Sort Key: recent_users.user_id - -> Append - -> Subquery Scan on recent_users - -> Sort - Sort Key: (max(users_table."time")) DESC - -> GroupAggregate - Group Key: users_table.user_id - Filter: (max(users_table."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone) - -> Sort - Sort Key: users_table.user_id - -> Seq Scan on users_table_1400256 users_table - Filter: ((user_id < 4) AND (user_id > 1)) - -> Seq Scan on users_table_1400256 users_table_1 - Filter: ((value_1 >= 1) AND (value_1 < 3) AND (user_id < 4) AND (user_id > 1)) -(23 rows) +(4 rows) EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; QUERY PLAN diff --git a/src/test/regress/expected/subquery_view.out b/src/test/regress/expected/subquery_view.out index 535e356d5..32354e329 100644 --- a/src/test/regress/expected/subquery_view.out +++ b/src/test/regress/expected/subquery_view.out @@ -578,11 +578,13 @@ SELECT create_reference_table('reference_table'); (1 row) +SELECT public.coordinator_plan_with_subplans($Q$ EXPLAIN (COSTS OFF) WITH cte AS ( SELECT application_name AS text_col FROM pg_stat_activity ) SELECT * FROM reference_table JOIN cte USING (text_col); - QUERY PLAN +$Q$); + coordinator_plan_with_subplans --------------------------------------------------------------------- Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 @@ -590,38 +592,17 @@ EXPLAIN (COSTS OFF) WITH cte AS ( -> Distributed Subplan XXX_2 -> Custom Scan (Citus Adaptive) Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Hash Left Join - Hash Cond: (intermediate_result.usesysid = u.oid) - -> Hash Left Join - Hash Cond: (intermediate_result.datid = d.oid) - -> Function Scan on read_intermediate_result intermediate_result - -> Hash - -> Seq Scan on pg_database d - -> Hash - -> Seq Scan on pg_authid u Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Merge Join - Merge Cond: (intermediate_result.application_name = reference_table.text_col) - -> Sort - Sort Key: intermediate_result.application_name - -> Function Scan on read_intermediate_result intermediate_result - -> Sort - Sort Key: reference_table.text_col - -> Seq Scan on reference_table_1512000 reference_table -(30 rows) +(7 rows) CREATE OR REPLACE VIEW view_on_views AS SELECT pg_stat_activity.application_name, pg_locks.pid FROM pg_stat_activity, pg_locks; +SELECT public.coordinator_plan_with_subplans($Q$ EXPLAIN (COSTS OFF) WITH cte AS ( SELECT application_name AS text_col FROM view_on_views ) SELECT * FROM reference_table JOIN cte USING (text_col); - QUERY PLAN +$Q$); + coordinator_plan_with_subplans --------------------------------------------------------------------- Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 @@ -629,18 +610,7 @@ EXPLAIN (COSTS OFF) WITH cte AS ( -> Function Scan on pg_stat_get_activity s -> Function Scan on pg_lock_status l Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Merge Join - Merge Cond: (intermediate_result.text_col = reference_table.text_col) - -> Sort - Sort Key: intermediate_result.text_col - -> Function Scan on read_intermediate_result intermediate_result - -> Sort - Sort Key: reference_table.text_col - -> Seq Scan on reference_table_1512000 reference_table -(17 rows) +(6 rows) DROP SCHEMA subquery_view CASCADE; NOTICE: drop cascades to 19 other objects diff --git a/src/test/regress/sql/cte_inline.sql b/src/test/regress/sql/cte_inline.sql index 28691e35a..8c70bc13f 100644 --- a/src/test/regress/sql/cte_inline.sql +++ b/src/test/regress/sql/cte_inline.sql @@ -220,6 +220,10 @@ FROM USING (key); -- EXPLAIN should show the differences between MATERIALIZED and NOT MATERIALIZED + +\set VERBOSITY terse + +SELECT public.coordinator_plan_with_subplans($Q$ EXPLAIN (COSTS OFF) WITH cte_1 AS (SELECT * FROM test_table) SELECT count(*) @@ -228,6 +232,9 @@ FROM JOIN cte_1 as second_entry USING (key); +$Q$); + +\set VERBOSITY default EXPLAIN (COSTS OFF) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) SELECT diff --git a/src/test/regress/sql/insert_select_repartition.sql b/src/test/regress/sql/insert_select_repartition.sql index b39efe03a..f88c67f50 100644 --- a/src/test/regress/sql/insert_select_repartition.sql +++ b/src/test/regress/sql/insert_select_repartition.sql @@ -354,7 +354,9 @@ INSERT INTO target_table SELECT a, max(b) FROM source_table WHERE a BETWEEN 1 AND 2 GROUP BY a; +SELECT public.coordinator_plan($Q$ EXPLAIN EXECUTE insert_plan; +$Q$); SET client_min_messages TO DEBUG1; EXECUTE insert_plan; diff --git a/src/test/regress/sql/multi_test_helpers.sql b/src/test/regress/sql/multi_test_helpers.sql index ccf33dda6..c1bff60c9 100644 --- a/src/test/regress/sql/multi_test_helpers.sql +++ b/src/test/regress/sql/multi_test_helpers.sql @@ -20,10 +20,15 @@ END; $$LANGUAGE plpgsql; -- Create a function to ignore worker plans in explain output +-- Also remove extra "-> Result" lines for PG15 support CREATE OR REPLACE FUNCTION coordinator_plan(explain_command text, out query_plan text) RETURNS SETOF TEXT AS $$ BEGIN FOR query_plan IN execute explain_command LOOP + IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') + THEN + CONTINUE; + END IF; RETURN next; IF query_plan LIKE '%Task Count:%' THEN @@ -33,6 +38,32 @@ BEGIN RETURN; END; $$ language plpgsql; +-- Create a function to ignore worker plans in explain output +-- It also shows task count for plan and subplans +-- Also remove extra "-> Result" lines for PG15 support +CREATE OR REPLACE FUNCTION coordinator_plan_with_subplans(explain_command text, out query_plan text) +RETURNS SETOF TEXT AS $$ +DECLARE + task_count_line_reached boolean := false; +BEGIN + FOR query_plan IN execute explain_command LOOP + IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') THEN + CONTINUE; + END IF; + IF NOT task_count_line_reached THEN + RETURN next; + END IF; + IF query_plan LIKE '%Task Count:%' THEN + IF NOT task_count_line_reached THEN + SELECT true INTO task_count_line_reached; + ELSE + RETURN next; + END IF; + END IF; + END LOOP; + RETURN; +END; $$ language plpgsql; + -- Create a function to ignore "-> Result" lines for PG15 support -- In PG15 there are some extra "-> Result" lines CREATE OR REPLACE FUNCTION plan_without_result_lines(explain_command text, out query_plan text) diff --git a/src/test/regress/sql/multi_view.sql b/src/test/regress/sql/multi_view.sql index 181fb4e70..193d669f6 100644 --- a/src/test/regress/sql/multi_view.sql +++ b/src/test/regress/sql/multi_view.sql @@ -374,6 +374,7 @@ VACUUM ANALYZE users_table; -- explain tests EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; +SELECT public.coordinator_plan($Q$ EXPLAIN (COSTS FALSE) SELECT * FROM ( (SELECT user_id FROM recent_users) @@ -381,6 +382,7 @@ EXPLAIN (COSTS FALSE) SELECT * (SELECT user_id FROM selected_users) ) u WHERE user_id < 4 AND user_id > 1 ORDER BY user_id; +$Q$); EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; SET citus.subquery_pushdown to ON; diff --git a/src/test/regress/sql/subquery_view.sql b/src/test/regress/sql/subquery_view.sql index 40798fccd..8f57ef5a3 100644 --- a/src/test/regress/sql/subquery_view.sql +++ b/src/test/regress/sql/subquery_view.sql @@ -427,17 +427,21 @@ SET client_min_messages TO DEFAULT; CREATE TABLE reference_table (text_col text, int_col int); SELECT create_reference_table('reference_table'); +SELECT public.coordinator_plan_with_subplans($Q$ EXPLAIN (COSTS OFF) WITH cte AS ( SELECT application_name AS text_col FROM pg_stat_activity ) SELECT * FROM reference_table JOIN cte USING (text_col); +$Q$); CREATE OR REPLACE VIEW view_on_views AS SELECT pg_stat_activity.application_name, pg_locks.pid FROM pg_stat_activity, pg_locks; +SELECT public.coordinator_plan_with_subplans($Q$ EXPLAIN (COSTS OFF) WITH cte AS ( SELECT application_name AS text_col FROM view_on_views ) SELECT * FROM reference_table JOIN cte USING (text_col); +$Q$); DROP SCHEMA subquery_view CASCADE; SET search_path TO public;