From c32b2de1a794e8cd96528aefe70da0f82deea647 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Fri, 4 Mar 2022 17:22:28 +0100 Subject: [PATCH] Improve citus_lock_waits 1) Remove useless columns 2) Show backends that are blocked on a DDL even before gpid is assigned 3) One minor bugfix, where we clear distributedCommandOriginator properly. --- src/backend/distributed/shared_library_init.c | 13 ++ .../distributed/sql/citus--10.2-4--11.0-1.sql | 10 ++ .../sql/downgrades/citus--11.0-1--10.2-4.sql | 1 + .../sql/udfs/citus_lock_waits/11.0-1.sql | 47 +++++--- .../sql/udfs/citus_lock_waits/latest.sql | 47 +++++--- .../test/run_from_same_connection.c | 4 +- .../distributed/transaction/backend_data.c | 11 +- src/include/distributed/backend_data.h | 4 +- ...lation_get_distributed_wait_queries_mx.out | 114 +++++++++++++++--- src/test/regress/expected/multi_extension.out | 3 +- .../expected/upgrade_list_citus_objects.out | 3 +- ...ation_get_distributed_wait_queries_mx.spec | 68 +++++++++++ 12 files changed, 271 insertions(+), 54 deletions(-) diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index b5b8ac43f..5c319da53 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -2226,6 +2226,19 @@ CitusAuthHook(Port *port, int status) "regular client connections", MaxClientConnections))); } + + /* + * Right after this, before we assign global pid, this backend + * might get blocked by a DDL as that happens during parsing. + * + * That's why, lets mark the backend as an external backend + * which is likely to execute a distributed command. + * + * We do this so that this backend gets the chance to show + * up in citus_lock_waits. + */ + InitializeBackendData(); + SetBackendDataDistributedCommandOriginator(true); } /* let other authentication hooks to kick in first */ diff --git a/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql b/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql index 040f1a530..19ebefbfd 100644 --- a/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql +++ b/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql @@ -35,6 +35,16 @@ DROP FUNCTION IF EXISTS pg_catalog.citus_dist_stat_activity() CASCADE; DROP FUNCTION IF EXISTS pg_catalog.citus_worker_stat_activity() CASCADE; #include "udfs/citus_dist_stat_activity/11.0-1.sql" +-- a very simple helper function defined for citus_lock_waits +CREATE OR REPLACE FUNCTION get_nodeid_for_groupid(groupIdInput int) RETURNS int AS $$ +DECLARE + returnNodeNodeId int := 0; +begin + SELECT nodeId into returnNodeNodeId FROM pg_dist_node WHERE groupid = groupIdInput and nodecluster = current_setting('citus.cluster_name'); + RETURN returnNodeNodeId; +end +$$ LANGUAGE plpgsql; + #include "udfs/citus_lock_waits/11.0-1.sql" #include "udfs/pg_cancel_backend/11.0-1.sql" diff --git a/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql b/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql index 0e9e43757..3992dd62b 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql @@ -357,6 +357,7 @@ GRANT SELECT ON pg_catalog.citus_lock_waits TO PUBLIC; DROP FUNCTION pg_catalog.citus_finalize_upgrade_to_citus11(bool); DROP FUNCTION pg_catalog.citus_calculate_gpid(integer,integer); DROP FUNCTION pg_catalog.citus_backend_gpid(); +DROP FUNCTION get_nodeid_for_groupid(integer); RESET search_path; diff --git a/src/backend/distributed/sql/udfs/citus_lock_waits/11.0-1.sql b/src/backend/distributed/sql/udfs/citus_lock_waits/11.0-1.sql index 8770f2b1d..b3de12632 100644 --- a/src/backend/distributed/sql/udfs/citus_lock_waits/11.0-1.sql +++ b/src/backend/distributed/sql/udfs/citus_lock_waits/11.0-1.sql @@ -2,25 +2,44 @@ SET search_path = 'pg_catalog'; CREATE VIEW citus.citus_lock_waits AS WITH +unique_global_wait_edges_with_calculated_gpids AS ( +SELECT + -- if global_pid is NULL, it is most likely that a backend is blocked on a DDL + -- also for legacy reasons citus_internal_global_blocked_processes() returns groupId, we replace that with nodeIds + case WHEN waiting_global_pid !=0 THEN waiting_global_pid ELSE citus_calculate_gpid(get_nodeid_for_groupid(waiting_node_id), waiting_pid) END waiting_global_pid, + case WHEN blocking_global_pid !=0 THEN blocking_global_pid ELSE citus_calculate_gpid(get_nodeid_for_groupid(blocking_node_id), blocking_pid) END blocking_global_pid, + + -- citus_internal_global_blocked_processes returns groupId, we replace it here with actual + -- nodeId to be consisten with the other views + get_nodeid_for_groupid(blocking_node_id) as blocking_node_id, + get_nodeid_for_groupid(waiting_node_id) as waiting_node_id, + + blocking_transaction_waiting + + FROM citus_internal_global_blocked_processes() +), unique_global_wait_edges AS ( - SELECT DISTINCT ON(waiting_global_pid, blocking_global_pid) * FROM citus_internal_global_blocked_processes() + SELECT DISTINCT ON(waiting_global_pid, blocking_global_pid) * FROM unique_global_wait_edges_with_calculated_gpids +), +citus_dist_stat_activity_with_calculated_gpids AS +( + -- if global_pid is NULL, it is most likely that a backend is blocked on a DDL + SELECT CASE WHEN global_pid != 0 THEN global_pid ELSE citus_calculate_gpid(nodeid, pid) END global_pid, nodeid, pid, query FROM citus_dist_stat_activity ) SELECT - waiting.global_pid as waiting_gpid, - blocking.global_pid as blocking_gpid, - waiting.pid AS waiting_pid, - blocking.pid AS blocking_pid, - waiting.query AS blocked_statement, - blocking.query AS current_statement_in_blocking_process, - waiting.nodeid AS waiting_nodeid, - blocking.nodeid AS blocking_nodeid + waiting.global_pid as waiting_gpid, + blocking.global_pid as blocking_gpid, + waiting.query AS blocked_statement, + blocking.query AS current_statement_in_blocking_process, + waiting.nodeid AS waiting_nodeid, + blocking.nodeid AS blocking_nodeid FROM - unique_global_wait_edges -JOIN - citus_dist_stat_activity waiting ON (unique_global_wait_edges.waiting_global_pid = waiting.global_pid) -JOIN - citus_dist_stat_activity blocking ON (unique_global_wait_edges.blocking_global_pid = blocking.global_pid); + unique_global_wait_edges + JOIN + citus_dist_stat_activity_with_calculated_gpids waiting ON (unique_global_wait_edges.waiting_global_pid = waiting.global_pid) + JOIN + citus_dist_stat_activity_with_calculated_gpids blocking ON (unique_global_wait_edges.blocking_global_pid = blocking.global_pid); ALTER VIEW citus.citus_lock_waits SET SCHEMA pg_catalog; GRANT SELECT ON pg_catalog.citus_lock_waits TO PUBLIC; diff --git a/src/backend/distributed/sql/udfs/citus_lock_waits/latest.sql b/src/backend/distributed/sql/udfs/citus_lock_waits/latest.sql index 8770f2b1d..b3de12632 100644 --- a/src/backend/distributed/sql/udfs/citus_lock_waits/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_lock_waits/latest.sql @@ -2,25 +2,44 @@ SET search_path = 'pg_catalog'; CREATE VIEW citus.citus_lock_waits AS WITH +unique_global_wait_edges_with_calculated_gpids AS ( +SELECT + -- if global_pid is NULL, it is most likely that a backend is blocked on a DDL + -- also for legacy reasons citus_internal_global_blocked_processes() returns groupId, we replace that with nodeIds + case WHEN waiting_global_pid !=0 THEN waiting_global_pid ELSE citus_calculate_gpid(get_nodeid_for_groupid(waiting_node_id), waiting_pid) END waiting_global_pid, + case WHEN blocking_global_pid !=0 THEN blocking_global_pid ELSE citus_calculate_gpid(get_nodeid_for_groupid(blocking_node_id), blocking_pid) END blocking_global_pid, + + -- citus_internal_global_blocked_processes returns groupId, we replace it here with actual + -- nodeId to be consisten with the other views + get_nodeid_for_groupid(blocking_node_id) as blocking_node_id, + get_nodeid_for_groupid(waiting_node_id) as waiting_node_id, + + blocking_transaction_waiting + + FROM citus_internal_global_blocked_processes() +), unique_global_wait_edges AS ( - SELECT DISTINCT ON(waiting_global_pid, blocking_global_pid) * FROM citus_internal_global_blocked_processes() + SELECT DISTINCT ON(waiting_global_pid, blocking_global_pid) * FROM unique_global_wait_edges_with_calculated_gpids +), +citus_dist_stat_activity_with_calculated_gpids AS +( + -- if global_pid is NULL, it is most likely that a backend is blocked on a DDL + SELECT CASE WHEN global_pid != 0 THEN global_pid ELSE citus_calculate_gpid(nodeid, pid) END global_pid, nodeid, pid, query FROM citus_dist_stat_activity ) SELECT - waiting.global_pid as waiting_gpid, - blocking.global_pid as blocking_gpid, - waiting.pid AS waiting_pid, - blocking.pid AS blocking_pid, - waiting.query AS blocked_statement, - blocking.query AS current_statement_in_blocking_process, - waiting.nodeid AS waiting_nodeid, - blocking.nodeid AS blocking_nodeid + waiting.global_pid as waiting_gpid, + blocking.global_pid as blocking_gpid, + waiting.query AS blocked_statement, + blocking.query AS current_statement_in_blocking_process, + waiting.nodeid AS waiting_nodeid, + blocking.nodeid AS blocking_nodeid FROM - unique_global_wait_edges -JOIN - citus_dist_stat_activity waiting ON (unique_global_wait_edges.waiting_global_pid = waiting.global_pid) -JOIN - citus_dist_stat_activity blocking ON (unique_global_wait_edges.blocking_global_pid = blocking.global_pid); + unique_global_wait_edges + JOIN + citus_dist_stat_activity_with_calculated_gpids waiting ON (unique_global_wait_edges.waiting_global_pid = waiting.global_pid) + JOIN + citus_dist_stat_activity_with_calculated_gpids blocking ON (unique_global_wait_edges.blocking_global_pid = blocking.global_pid); ALTER VIEW citus.citus_lock_waits SET SCHEMA pg_catalog; GRANT SELECT ON pg_catalog.citus_lock_waits TO PUBLIC; diff --git a/src/backend/distributed/test/run_from_same_connection.c b/src/backend/distributed/test/run_from_same_connection.c index ad39664bf..657c1fd82 100644 --- a/src/backend/distributed/test/run_from_same_connection.c +++ b/src/backend/distributed/test/run_from_same_connection.c @@ -188,7 +188,7 @@ run_commands_on_session_level_connection_to_node(PG_FUNCTION_ARGS) /* * override_backend_data_command_originator is a wrapper around - * OverrideBackendDataDistributedCommandOriginator(). + * SetBackendDataDistributedCommandOriginator(). */ Datum override_backend_data_command_originator(PG_FUNCTION_ARGS) @@ -197,7 +197,7 @@ override_backend_data_command_originator(PG_FUNCTION_ARGS) bool distributedCommandOriginator = PG_GETARG_BOOL(0); - OverrideBackendDataDistributedCommandOriginator(distributedCommandOriginator); + SetBackendDataDistributedCommandOriginator(distributedCommandOriginator); PG_RETURN_VOID(); } diff --git a/src/backend/distributed/transaction/backend_data.c b/src/backend/distributed/transaction/backend_data.c index 7cfe1fe51..fd482c7eb 100644 --- a/src/backend/distributed/transaction/backend_data.c +++ b/src/backend/distributed/transaction/backend_data.c @@ -721,6 +721,7 @@ UnSetGlobalPID(void) MyBackendData->globalPID = 0; MyBackendData->databaseId = 0; MyBackendData->userId = 0; + MyBackendData->distributedCommandOriginator = false; SpinLockRelease(&MyBackendData->mutex); } @@ -850,12 +851,16 @@ AssignGlobalPID(void) /* - * OverrideBackendDataDistributedCommandOriginator should only be used for isolation testing. - * See how it is used in the relevant functions. + * SetBackendDataDistributedCommandOriginator is used to set the distributedCommandOriginator + * field on MyBackendData. */ void -OverrideBackendDataDistributedCommandOriginator(bool distributedCommandOriginator) +SetBackendDataDistributedCommandOriginator(bool distributedCommandOriginator) { + if (!MyBackendData) + { + return; + } SpinLockAcquire(&MyBackendData->mutex); MyBackendData->distributedCommandOriginator = distributedCommandOriginator; diff --git a/src/include/distributed/backend_data.h b/src/include/distributed/backend_data.h index 74f86177e..ccb4da535 100644 --- a/src/include/distributed/backend_data.h +++ b/src/include/distributed/backend_data.h @@ -55,8 +55,8 @@ extern void UnSetGlobalPID(void); extern void AssignDistributedTransactionId(void); extern void AssignGlobalPID(void); extern uint64 GetGlobalPID(void); -extern void OverrideBackendDataDistributedCommandOriginator(bool - distributedCommandOriginator); +extern void SetBackendDataDistributedCommandOriginator(bool + distributedCommandOriginator); extern uint64 ExtractGlobalPID(char *applicationName); extern int ExtractNodeIdFromGlobalPID(uint64 globalPID, bool missingOk); extern int ExtractProcessIdFromGlobalPID(uint64 globalPID); diff --git a/src/test/regress/expected/isolation_get_distributed_wait_queries_mx.out b/src/test/regress/expected/isolation_get_distributed_wait_queries_mx.out index ce7e4f7fa..8cbedbf37 100644 --- a/src/test/regress/expected/isolation_get_distributed_wait_queries_mx.out +++ b/src/test/regress/expected/isolation_get_distributed_wait_queries_mx.out @@ -1,4 +1,4 @@ -Parsed test spec with 4 sessions +Parsed test spec with 8 sessions starting permutation: s1-begin s1-update-ref-table-from-coordinator s2-start-session-level-connection s2-begin-on-worker s2-update-ref-table s3-select-distributed-waiting-queries s1-commit s2-commit-worker s2-stop-connection step s1-begin: @@ -61,7 +61,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -157,7 +157,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -253,7 +253,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -349,7 +349,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -445,7 +445,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -539,7 +539,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -635,7 +635,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -729,7 +729,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -823,7 +823,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -919,7 +919,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -986,7 +986,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -1018,7 +1018,7 @@ step s1-commit: COMMIT; step s2-update-on-the-coordinator: <... completed> -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -1114,7 +1114,7 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) @@ -1194,13 +1194,13 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +citus_remove_node --------------------------------------------------------------------- (1 row) -starting permutation: s1-begin s1-update-ref-table-from-coordinator s2-start-session-level-connection s2-update-ref-table s3-select-distributed-waiting-queries s1-commit s2-stop-connection +starting permutation: s1-begin s1-update-ref-table-from-coordinator s2-start-session-level-connection s2-update-ref-table s3-select-distributed-waiting-queries s1-commit s2-stop-connection s5-begin s5-alter s6-select s3-select-distributed-waiting-queries s3-show-actual-gpids s5-rollback s8-begin s8-select s7-alter s3-select-distributed-waiting-queries s3-show-actual-gpids s8-rollback step s1-begin: BEGIN; @@ -1245,7 +1245,87 @@ stop_session_level_connection_to_node (1 row) -restore_isolation_tester_func +step s5-begin: + BEGIN; + +step s5-alter: + ALTER TABLE tt1 ADD COLUMN new_column INT; + +step s6-select: + SELECT user_id FROM tt1 ORDER BY user_id DESC LIMIT 1; + +step s3-select-distributed-waiting-queries: + SELECT blocked_statement, current_statement_in_blocking_process FROM citus_lock_waits WHERE blocked_statement NOT ILIKE '%run_commands_on_session_level_connection_to_node%' AND current_statement_in_blocking_process NOT ILIKE '%run_commands_on_session_level_connection_to_node%'; + +blocked_statement |current_statement_in_blocking_process +--------------------------------------------------------------------- + + SELECT user_id FROM tt1 ORDER BY user_id DESC LIMIT 1; +| + ALTER TABLE tt1 ADD COLUMN new_column INT; + +(1 row) + +step s3-show-actual-gpids: + SELECT global_pid > 0 as gpid_exists, query FROM citus_stat_activity WHERE state = 'active' AND query IN (SELECT blocked_statement FROM citus_lock_waits UNION SELECT current_statement_in_blocking_process FROM citus_lock_waits) ORDER BY 1 DESC; + +gpid_exists|query +--------------------------------------------------------------------- +f | + SELECT user_id FROM tt1 ORDER BY user_id DESC LIMIT 1; + +(1 row) + +step s5-rollback: + ROLLBACK; + +step s6-select: <... completed> +user_id +--------------------------------------------------------------------- + 7 +(1 row) + +step s8-begin: + BEGIN; + +step s8-select: + SELECT user_id FROM tt1 ORDER BY user_id DESC LIMIT 1; + +user_id +--------------------------------------------------------------------- + 7 +(1 row) + +step s7-alter: + ALTER TABLE tt1 ADD COLUMN new_column INT; + +step s3-select-distributed-waiting-queries: + SELECT blocked_statement, current_statement_in_blocking_process FROM citus_lock_waits WHERE blocked_statement NOT ILIKE '%run_commands_on_session_level_connection_to_node%' AND current_statement_in_blocking_process NOT ILIKE '%run_commands_on_session_level_connection_to_node%'; + +blocked_statement |current_statement_in_blocking_process +--------------------------------------------------------------------- + + ALTER TABLE tt1 ADD COLUMN new_column INT; +| + SELECT user_id FROM tt1 ORDER BY user_id DESC LIMIT 1; + +(1 row) + +step s3-show-actual-gpids: + SELECT global_pid > 0 as gpid_exists, query FROM citus_stat_activity WHERE state = 'active' AND query IN (SELECT blocked_statement FROM citus_lock_waits UNION SELECT current_statement_in_blocking_process FROM citus_lock_waits) ORDER BY 1 DESC; + +gpid_exists|query +--------------------------------------------------------------------- +t | + ALTER TABLE tt1 ADD COLUMN new_column INT; + +(1 row) + +step s8-rollback: + ROLLBACK; + +step s7-alter: <... completed> +citus_remove_node --------------------------------------------------------------------- (1 row) diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index cc6dbefb9..bcd998ee4 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -1014,6 +1014,7 @@ SELECT * FROM multi_extension.print_extension_changes(); | function citus_shards_on_worker() SETOF record | function citus_stat_activity() SETOF record | function create_distributed_function(regprocedure,text,text,boolean) void + | function get_nodeid_for_groupid(integer) integer | function pg_cancel_backend(bigint) boolean | function pg_terminate_backend(bigint,bigint) boolean | function run_command_on_all_nodes(text,boolean,boolean) SETOF record @@ -1023,7 +1024,7 @@ SELECT * FROM multi_extension.print_extension_changes(); | function worker_partition_query_result(text,text,integer,citus.distribution_type,text[],text[],boolean,boolean,boolean) SETOF record | table pg_dist_object | view citus_stat_activity -(35 rows) +(36 rows) DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff; -- show running version diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out index 8dc1b418e..01dec9ef1 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects.out +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -146,6 +146,7 @@ ORDER BY 1; function get_current_transaction_id() function get_global_active_transactions() function get_missing_time_partition_ranges(regclass,interval,timestamp with time zone,timestamp with time zone) + function get_nodeid_for_groupid(integer) function get_rebalance_progress() function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name,real) function get_shard_id_for_distribution_column(regclass,"any") @@ -277,5 +278,5 @@ ORDER BY 1; view citus_stat_statements view pg_dist_shard_placement view time_partitions -(261 rows) +(262 rows) diff --git a/src/test/regress/spec/isolation_get_distributed_wait_queries_mx.spec b/src/test/regress/spec/isolation_get_distributed_wait_queries_mx.spec index 5954f2979..dfe1587be 100644 --- a/src/test/regress/spec/isolation_get_distributed_wait_queries_mx.spec +++ b/src/test/regress/spec/isolation_get_distributed_wait_queries_mx.spec @@ -1,6 +1,7 @@ #include "isolation_mx_common.include.spec" setup { + SELECT citus_add_node('localhost', 57636, groupid:=0); CREATE TABLE ref_table(user_id int, value_1 int); SELECT create_reference_table('ref_table'); INSERT INTO ref_table VALUES (1, 11), (2, 21), (3, 31), (4, 41), (5, 51), (6, 61), (7, 71); @@ -17,6 +18,7 @@ teardown DROP TABLE ref_table; DROP TABLE tt1; SELECT citus_internal.restore_isolation_tester_func(); + SELECT citus_remove_node('localhost', 57636); } session "s1" @@ -162,6 +164,12 @@ step "s3-select-distributed-waiting-queries" SELECT blocked_statement, current_statement_in_blocking_process FROM citus_lock_waits WHERE blocked_statement NOT ILIKE '%run_commands_on_session_level_connection_to_node%' AND current_statement_in_blocking_process NOT ILIKE '%run_commands_on_session_level_connection_to_node%'; } +// only works for the coordinator +step "s3-show-actual-gpids" +{ + SELECT global_pid > 0 as gpid_exists, query FROM citus_stat_activity WHERE state = 'active' AND query IN (SELECT blocked_statement FROM citus_lock_waits UNION SELECT current_statement_in_blocking_process FROM citus_lock_waits) ORDER BY 1 DESC; +} + // session s1 and s4 executes the commands on the same worker node session "s4" @@ -189,6 +197,59 @@ step "s4-commit-worker" SELECT run_commands_on_session_level_connection_to_node('COMMIT'); } + + +// on the coordinator, show that even if a backend is blocked on a DDL as the first command +// (e.g., as of today global pid has not been assigned), we can still show the blocking activity +// we use the following 4 sessions 5,6,7,8 for this purpose +session "s5" + +step "s5-begin" +{ + BEGIN; +} + +step "s5-alter" +{ + ALTER TABLE tt1 ADD COLUMN new_column INT; +} + +step "s5-rollback" +{ + ROLLBACK; +} + +session "s6" + +step "s6-select" +{ + SELECT user_id FROM tt1 ORDER BY user_id DESC LIMIT 1; +} + +session "s7" + +step "s7-alter" +{ + ALTER TABLE tt1 ADD COLUMN new_column INT; +} + +session "s8" + +step "s8-begin" +{ + BEGIN; +} + +step "s8-select" +{ + SELECT user_id FROM tt1 ORDER BY user_id DESC LIMIT 1; +} + +step "s8-rollback" +{ + ROLLBACK; +} + permutation "s1-begin" "s1-update-ref-table-from-coordinator" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-update-ref-table" "s3-select-distributed-waiting-queries" "s1-commit" "s2-commit-worker" "s2-stop-connection" permutation "s1-start-session-level-connection" "s1-begin-on-worker" "s1-update-ref-table" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-update-ref-table" "s3-select-distributed-waiting-queries" "s1-commit-worker" "s2-commit-worker" "s1-stop-connection" "s2-stop-connection" permutation "s1-start-session-level-connection" "s1-begin-on-worker" "s1-update-dist-table" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-update-dist-table" "s3-select-distributed-waiting-queries" "s1-commit-worker" "s2-commit-worker" "s1-stop-connection" "s2-stop-connection" @@ -212,3 +273,10 @@ permutation "s1-start-session-level-connection" "s1-begin-on-worker" "s1-update- // we can find the blocking relationship permutation "s1-start-session-level-connection" "s1-begin-on-worker" "s1-update-dist-table-id-1" "s2-start-session-level-connection" "s2-update-dist-table-id-1" "s3-select-distributed-waiting-queries" "s1-commit-worker" "s1-stop-connection" "s2-stop-connection" permutation "s1-begin" "s1-update-ref-table-from-coordinator" "s2-start-session-level-connection" "s2-update-ref-table" "s3-select-distributed-waiting-queries" "s1-commit" "s2-stop-connection" + +// show that we can see blocking activity even if these are the first commands in the sessions +// such that global_pids have not been assigned +// in the second permutation, s3-show-actual-gpids shows the gpid for ALTER TABLE +// because ALTER TABLE is not blocked on the parser but during the execution (hence gpid already asssigned) +"s5-begin" "s5-alter" "s6-select" "s3-select-distributed-waiting-queries" "s3-show-actual-gpids" "s5-rollback" +"s8-begin" "s8-select" "s7-alter" "s3-select-distributed-waiting-queries" "s3-show-actual-gpids" "s8-rollback"