mirror of https://github.com/citusdata/citus.git
Handles EXPLAIN output diffs in PG15, Hash Agg/Join leverage
To handle differences in usage of GroupAggregate vs HashAggregate or Merge Join vs Hash join in cases where this detail doesn't seem to matter, we use coordinator_plan(). - coordinator_plan() is updated to remove "Result" lines There are some cases where we have subplans so we add a new function that prints all Task Count lines as well - coordinator_plan_with_subplans() Still not sure of the relevant PG commit Could be db0d67db2401eb6238ccc04c6407a4fd4f985832 but disabling enable_group_by_reordering didn't help.naisila/failure_pg15
parent
df5f628175
commit
2aa07e37a4
|
@ -423,6 +423,8 @@ DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
|
|||
(1 row)
|
||||
|
||||
-- EXPLAIN should show the differences between MATERIALIZED and NOT MATERIALIZED
|
||||
\set VERBOSITY terse
|
||||
SELECT public.coordinator_plan_with_subplans($Q$
|
||||
EXPLAIN (COSTS OFF) WITH cte_1 AS (SELECT * FROM test_table)
|
||||
SELECT
|
||||
count(*)
|
||||
|
@ -431,36 +433,22 @@ FROM
|
|||
JOIN
|
||||
cte_1 as second_entry
|
||||
USING (key);
|
||||
$Q$);
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT key, value, other_value FROM cte_inline.test_table
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) first_entry JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) second_entry USING (key))
|
||||
DEBUG: Creating router plan
|
||||
QUERY PLAN
|
||||
coordinator_plan_with_subplans
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive)
|
||||
-> Distributed Subplan XXX_1
|
||||
-> Custom Scan (Citus Adaptive)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Seq Scan on test_table_1960000 test_table
|
||||
Task Count: 1
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Aggregate
|
||||
-> Merge Join
|
||||
Merge Cond: (intermediate_result.key = intermediate_result_1.key)
|
||||
-> Sort
|
||||
Sort Key: intermediate_result.key
|
||||
-> Function Scan on read_intermediate_result intermediate_result
|
||||
-> Sort
|
||||
Sort Key: intermediate_result_1.key
|
||||
-> Function Scan on read_intermediate_result intermediate_result_1
|
||||
(21 rows)
|
||||
(5 rows)
|
||||
|
||||
\set VERBOSITY default
|
||||
EXPLAIN (COSTS OFF) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table)
|
||||
SELECT
|
||||
count(*)
|
||||
|
|
|
@ -702,23 +702,16 @@ PREPARE insert_plan AS
|
|||
INSERT INTO target_table
|
||||
SELECT a, max(b) FROM source_table
|
||||
WHERE a BETWEEN 1 AND 2 GROUP BY a;
|
||||
SELECT public.coordinator_plan($Q$
|
||||
EXPLAIN EXECUTE insert_plan;
|
||||
QUERY PLAN
|
||||
$Q$);
|
||||
coordinator_plan
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus INSERT ... SELECT) (cost=0.00..0.00 rows=0 width=0)
|
||||
INSERT/SELECT method: repartition
|
||||
-> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=100000 width=8)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> GroupAggregate (cost=44.09..44.28 rows=11 width=8)
|
||||
Group Key: a
|
||||
-> Sort (cost=44.09..44.12 rows=11 width=8)
|
||||
Sort Key: a
|
||||
-> Seq Scan on source_table_4213606 source_table (cost=0.00..43.90 rows=11 width=8)
|
||||
Filter: ((a >= 1) AND (a <= 2))
|
||||
(13 rows)
|
||||
(4 rows)
|
||||
|
||||
SET client_min_messages TO DEBUG1;
|
||||
EXECUTE insert_plan;
|
||||
|
|
|
@ -17,10 +17,15 @@ BEGIN
|
|||
END;
|
||||
$$LANGUAGE plpgsql;
|
||||
-- Create a function to ignore worker plans in explain output
|
||||
-- Also remove extra "-> Result" lines for PG15 support
|
||||
CREATE OR REPLACE FUNCTION coordinator_plan(explain_command text, out query_plan text)
|
||||
RETURNS SETOF TEXT AS $$
|
||||
BEGIN
|
||||
FOR query_plan IN execute explain_command LOOP
|
||||
IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result')
|
||||
THEN
|
||||
CONTINUE;
|
||||
END IF;
|
||||
RETURN next;
|
||||
IF query_plan LIKE '%Task Count:%'
|
||||
THEN
|
||||
|
@ -29,6 +34,31 @@ BEGIN
|
|||
END LOOP;
|
||||
RETURN;
|
||||
END; $$ language plpgsql;
|
||||
-- Create a function to ignore worker plans in explain output
|
||||
-- It also shows task count for plan and subplans
|
||||
-- Also remove extra "-> Result" lines for PG15 support
|
||||
CREATE OR REPLACE FUNCTION coordinator_plan_with_subplans(explain_command text, out query_plan text)
|
||||
RETURNS SETOF TEXT AS $$
|
||||
DECLARE
|
||||
task_count_line_reached boolean := false;
|
||||
BEGIN
|
||||
FOR query_plan IN execute explain_command LOOP
|
||||
IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') THEN
|
||||
CONTINUE;
|
||||
END IF;
|
||||
IF NOT task_count_line_reached THEN
|
||||
RETURN next;
|
||||
END IF;
|
||||
IF query_plan LIKE '%Task Count:%' THEN
|
||||
IF NOT task_count_line_reached THEN
|
||||
SELECT true INTO task_count_line_reached;
|
||||
ELSE
|
||||
RETURN next;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
RETURN;
|
||||
END; $$ language plpgsql;
|
||||
-- Create a function to ignore "-> Result" lines for PG15 support
|
||||
-- In PG15 there are some extra "-> Result" lines
|
||||
CREATE OR REPLACE FUNCTION plan_without_result_lines(explain_command text, out query_plan text)
|
||||
|
|
|
@ -785,6 +785,7 @@ EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER
|
|||
Filter: ((value_1 >= 1) AND (value_1 < 3))
|
||||
(19 rows)
|
||||
|
||||
SELECT public.coordinator_plan($Q$
|
||||
EXPLAIN (COSTS FALSE) SELECT *
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
|
@ -792,32 +793,14 @@ EXPLAIN (COSTS FALSE) SELECT *
|
|||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 4 AND user_id > 1
|
||||
ORDER BY user_id;
|
||||
QUERY PLAN
|
||||
$Q$);
|
||||
coordinator_plan
|
||||
---------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: remote_scan.user_id
|
||||
-> Custom Scan (Citus Adaptive)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Unique
|
||||
-> Sort
|
||||
Sort Key: recent_users.user_id
|
||||
-> Append
|
||||
-> Subquery Scan on recent_users
|
||||
-> Sort
|
||||
Sort Key: (max(users_table."time")) DESC
|
||||
-> GroupAggregate
|
||||
Group Key: users_table.user_id
|
||||
Filter: (max(users_table."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone)
|
||||
-> Sort
|
||||
Sort Key: users_table.user_id
|
||||
-> Seq Scan on users_table_1400256 users_table
|
||||
Filter: ((user_id < 4) AND (user_id > 1))
|
||||
-> Seq Scan on users_table_1400256 users_table_1
|
||||
Filter: ((value_1 >= 1) AND (value_1 < 3) AND (user_id < 4) AND (user_id > 1))
|
||||
(23 rows)
|
||||
(4 rows)
|
||||
|
||||
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
||||
QUERY PLAN
|
||||
|
|
|
@ -578,11 +578,13 @@ SELECT create_reference_table('reference_table');
|
|||
|
||||
(1 row)
|
||||
|
||||
SELECT public.coordinator_plan_with_subplans($Q$
|
||||
EXPLAIN (COSTS OFF) WITH cte AS (
|
||||
SELECT application_name AS text_col
|
||||
FROM pg_stat_activity
|
||||
) SELECT * FROM reference_table JOIN cte USING (text_col);
|
||||
QUERY PLAN
|
||||
$Q$);
|
||||
coordinator_plan_with_subplans
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive)
|
||||
-> Distributed Subplan XXX_1
|
||||
|
@ -590,38 +592,17 @@ EXPLAIN (COSTS OFF) WITH cte AS (
|
|||
-> Distributed Subplan XXX_2
|
||||
-> Custom Scan (Citus Adaptive)
|
||||
Task Count: 1
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Hash Left Join
|
||||
Hash Cond: (intermediate_result.usesysid = u.oid)
|
||||
-> Hash Left Join
|
||||
Hash Cond: (intermediate_result.datid = d.oid)
|
||||
-> Function Scan on read_intermediate_result intermediate_result
|
||||
-> Hash
|
||||
-> Seq Scan on pg_database d
|
||||
-> Hash
|
||||
-> Seq Scan on pg_authid u
|
||||
Task Count: 1
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Merge Join
|
||||
Merge Cond: (intermediate_result.application_name = reference_table.text_col)
|
||||
-> Sort
|
||||
Sort Key: intermediate_result.application_name
|
||||
-> Function Scan on read_intermediate_result intermediate_result
|
||||
-> Sort
|
||||
Sort Key: reference_table.text_col
|
||||
-> Seq Scan on reference_table_1512000 reference_table
|
||||
(30 rows)
|
||||
(7 rows)
|
||||
|
||||
CREATE OR REPLACE VIEW view_on_views AS SELECT pg_stat_activity.application_name, pg_locks.pid FROM pg_stat_activity, pg_locks;
|
||||
SELECT public.coordinator_plan_with_subplans($Q$
|
||||
EXPLAIN (COSTS OFF) WITH cte AS (
|
||||
SELECT application_name AS text_col
|
||||
FROM view_on_views
|
||||
) SELECT * FROM reference_table JOIN cte USING (text_col);
|
||||
QUERY PLAN
|
||||
$Q$);
|
||||
coordinator_plan_with_subplans
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive)
|
||||
-> Distributed Subplan XXX_1
|
||||
|
@ -629,18 +610,7 @@ EXPLAIN (COSTS OFF) WITH cte AS (
|
|||
-> Function Scan on pg_stat_get_activity s
|
||||
-> Function Scan on pg_lock_status l
|
||||
Task Count: 1
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Merge Join
|
||||
Merge Cond: (intermediate_result.text_col = reference_table.text_col)
|
||||
-> Sort
|
||||
Sort Key: intermediate_result.text_col
|
||||
-> Function Scan on read_intermediate_result intermediate_result
|
||||
-> Sort
|
||||
Sort Key: reference_table.text_col
|
||||
-> Seq Scan on reference_table_1512000 reference_table
|
||||
(17 rows)
|
||||
(6 rows)
|
||||
|
||||
DROP SCHEMA subquery_view CASCADE;
|
||||
NOTICE: drop cascades to 19 other objects
|
||||
|
|
|
@ -220,6 +220,10 @@ FROM
|
|||
USING (key);
|
||||
|
||||
-- EXPLAIN should show the differences between MATERIALIZED and NOT MATERIALIZED
|
||||
|
||||
\set VERBOSITY terse
|
||||
|
||||
SELECT public.coordinator_plan_with_subplans($Q$
|
||||
EXPLAIN (COSTS OFF) WITH cte_1 AS (SELECT * FROM test_table)
|
||||
SELECT
|
||||
count(*)
|
||||
|
@ -228,6 +232,9 @@ FROM
|
|||
JOIN
|
||||
cte_1 as second_entry
|
||||
USING (key);
|
||||
$Q$);
|
||||
|
||||
\set VERBOSITY default
|
||||
|
||||
EXPLAIN (COSTS OFF) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table)
|
||||
SELECT
|
||||
|
|
|
@ -354,7 +354,9 @@ INSERT INTO target_table
|
|||
SELECT a, max(b) FROM source_table
|
||||
WHERE a BETWEEN 1 AND 2 GROUP BY a;
|
||||
|
||||
SELECT public.coordinator_plan($Q$
|
||||
EXPLAIN EXECUTE insert_plan;
|
||||
$Q$);
|
||||
|
||||
SET client_min_messages TO DEBUG1;
|
||||
EXECUTE insert_plan;
|
||||
|
|
|
@ -20,10 +20,15 @@ END;
|
|||
$$LANGUAGE plpgsql;
|
||||
|
||||
-- Create a function to ignore worker plans in explain output
|
||||
-- Also remove extra "-> Result" lines for PG15 support
|
||||
CREATE OR REPLACE FUNCTION coordinator_plan(explain_command text, out query_plan text)
|
||||
RETURNS SETOF TEXT AS $$
|
||||
BEGIN
|
||||
FOR query_plan IN execute explain_command LOOP
|
||||
IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result')
|
||||
THEN
|
||||
CONTINUE;
|
||||
END IF;
|
||||
RETURN next;
|
||||
IF query_plan LIKE '%Task Count:%'
|
||||
THEN
|
||||
|
@ -33,6 +38,32 @@ BEGIN
|
|||
RETURN;
|
||||
END; $$ language plpgsql;
|
||||
|
||||
-- Create a function to ignore worker plans in explain output
|
||||
-- It also shows task count for plan and subplans
|
||||
-- Also remove extra "-> Result" lines for PG15 support
|
||||
CREATE OR REPLACE FUNCTION coordinator_plan_with_subplans(explain_command text, out query_plan text)
|
||||
RETURNS SETOF TEXT AS $$
|
||||
DECLARE
|
||||
task_count_line_reached boolean := false;
|
||||
BEGIN
|
||||
FOR query_plan IN execute explain_command LOOP
|
||||
IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') THEN
|
||||
CONTINUE;
|
||||
END IF;
|
||||
IF NOT task_count_line_reached THEN
|
||||
RETURN next;
|
||||
END IF;
|
||||
IF query_plan LIKE '%Task Count:%' THEN
|
||||
IF NOT task_count_line_reached THEN
|
||||
SELECT true INTO task_count_line_reached;
|
||||
ELSE
|
||||
RETURN next;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
RETURN;
|
||||
END; $$ language plpgsql;
|
||||
|
||||
-- Create a function to ignore "-> Result" lines for PG15 support
|
||||
-- In PG15 there are some extra "-> Result" lines
|
||||
CREATE OR REPLACE FUNCTION plan_without_result_lines(explain_command text, out query_plan text)
|
||||
|
|
|
@ -374,6 +374,7 @@ VACUUM ANALYZE users_table;
|
|||
-- explain tests
|
||||
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
||||
|
||||
SELECT public.coordinator_plan($Q$
|
||||
EXPLAIN (COSTS FALSE) SELECT *
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
|
@ -381,6 +382,7 @@ EXPLAIN (COSTS FALSE) SELECT *
|
|||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 4 AND user_id > 1
|
||||
ORDER BY user_id;
|
||||
$Q$);
|
||||
|
||||
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
||||
SET citus.subquery_pushdown to ON;
|
||||
|
|
|
@ -427,17 +427,21 @@ SET client_min_messages TO DEFAULT;
|
|||
CREATE TABLE reference_table (text_col text, int_col int);
|
||||
SELECT create_reference_table('reference_table');
|
||||
|
||||
SELECT public.coordinator_plan_with_subplans($Q$
|
||||
EXPLAIN (COSTS OFF) WITH cte AS (
|
||||
SELECT application_name AS text_col
|
||||
FROM pg_stat_activity
|
||||
) SELECT * FROM reference_table JOIN cte USING (text_col);
|
||||
$Q$);
|
||||
|
||||
CREATE OR REPLACE VIEW view_on_views AS SELECT pg_stat_activity.application_name, pg_locks.pid FROM pg_stat_activity, pg_locks;
|
||||
|
||||
SELECT public.coordinator_plan_with_subplans($Q$
|
||||
EXPLAIN (COSTS OFF) WITH cte AS (
|
||||
SELECT application_name AS text_col
|
||||
FROM view_on_views
|
||||
) SELECT * FROM reference_table JOIN cte USING (text_col);
|
||||
$Q$);
|
||||
|
||||
DROP SCHEMA subquery_view CASCADE;
|
||||
SET search_path TO public;
|
||||
|
|
Loading…
Reference in New Issue