From 32b5cf257c392fc94da1667fc86063051fdee2a1 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Wed, 9 Feb 2022 15:48:48 +0100 Subject: [PATCH] Dumping wait edges becomes optionally scan all backends Before this commit, dumping wait edges can only be used for distributed deadlock detection purposes. With this commit, we open the possibility that we can use it for any backend. --- .../distributed/sql/citus--10.2-4--11.0-1.sql | 3 ++ .../sql/downgrades/citus--11.0-1--10.2-4.sql | 35 +++++++++++++++++++ .../udfs/dump_global_wait_edges/11.0-1.sql | 17 +++++++++ .../udfs/dump_global_wait_edges/latest.sql | 17 +++++++++ .../sql/udfs/dump_local_wait_edges/11.0-1.sql | 17 +++++++++ .../sql/udfs/dump_local_wait_edges/latest.sql | 17 +++++++++ .../test/distributed_deadlock_detection.c | 4 ++- .../distributed_deadlock_detection.c | 3 +- .../distributed/transaction/lock_graph.c | 31 ++++++++++++---- src/include/distributed/lock_graph.h | 2 +- 10 files changed, 136 insertions(+), 10 deletions(-) create mode 100644 src/backend/distributed/sql/udfs/dump_global_wait_edges/11.0-1.sql create mode 100644 src/backend/distributed/sql/udfs/dump_global_wait_edges/latest.sql create mode 100644 src/backend/distributed/sql/udfs/dump_local_wait_edges/11.0-1.sql create mode 100644 src/backend/distributed/sql/udfs/dump_local_wait_edges/latest.sql diff --git a/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql b/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql index c3ffbb1cb..8df040a12 100644 --- a/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql +++ b/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql @@ -17,6 +17,9 @@ #include "udfs/get_all_active_transactions/11.0-1.sql" #include "udfs/get_global_active_transactions/11.0-1.sql" +#include "udfs/dump_local_wait_edges/11.0-1.sql" +#include "udfs/dump_global_wait_edges/11.0-1.sql" + #include "udfs/citus_worker_stat_activity/11.0-1.sql" CREATE VIEW citus.citus_worker_stat_activity AS diff --git a/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql b/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql index e94ed0bbf..690e79f3f 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql @@ -208,4 +208,39 @@ SELECT * FROM pg_catalog.citus_worker_stat_activity(); ALTER VIEW citus.citus_worker_stat_activity SET SCHEMA pg_catalog; GRANT SELECT ON pg_catalog.citus_worker_stat_activity TO PUBLIC; +DROP FUNCTION dump_global_wait_edges; +CREATE FUNCTION pg_catalog.dump_local_wait_edges( + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS $$MODULE_PATHNAME$$, $$dump_local_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_local_wait_edges() +IS 'returns all local lock wait chains, that start from distributed transactions'; + +DROP FUNCTION dump_global_wait_edges; +CREATE FUNCTION pg_catalog.dump_global_wait_edges( + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE 'c' STRICT +AS $$MODULE_PATHNAME$$, $$dump_global_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_global_wait_edges() +IS 'returns a global list of blocked transactions originating from this node'; + + RESET search_path; diff --git a/src/backend/distributed/sql/udfs/dump_global_wait_edges/11.0-1.sql b/src/backend/distributed/sql/udfs/dump_global_wait_edges/11.0-1.sql new file mode 100644 index 000000000..950d08ccc --- /dev/null +++ b/src/backend/distributed/sql/udfs/dump_global_wait_edges/11.0-1.sql @@ -0,0 +1,17 @@ +DROP FUNCTION pg_catalog.dump_global_wait_edges CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.dump_global_wait_edges( + distributed_tx_only boolean DEFAULT false, + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS $$MODULE_PATHNAME$$, $$dump_global_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_global_wait_edges(bool) +IS 'returns a global list of blocked backends originating from this node'; diff --git a/src/backend/distributed/sql/udfs/dump_global_wait_edges/latest.sql b/src/backend/distributed/sql/udfs/dump_global_wait_edges/latest.sql new file mode 100644 index 000000000..950d08ccc --- /dev/null +++ b/src/backend/distributed/sql/udfs/dump_global_wait_edges/latest.sql @@ -0,0 +1,17 @@ +DROP FUNCTION pg_catalog.dump_global_wait_edges CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.dump_global_wait_edges( + distributed_tx_only boolean DEFAULT false, + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS $$MODULE_PATHNAME$$, $$dump_global_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_global_wait_edges(bool) +IS 'returns a global list of blocked backends originating from this node'; diff --git a/src/backend/distributed/sql/udfs/dump_local_wait_edges/11.0-1.sql b/src/backend/distributed/sql/udfs/dump_local_wait_edges/11.0-1.sql new file mode 100644 index 000000000..705210f9d --- /dev/null +++ b/src/backend/distributed/sql/udfs/dump_local_wait_edges/11.0-1.sql @@ -0,0 +1,17 @@ +DROP FUNCTION pg_catalog.dump_local_wait_edges CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.dump_local_wait_edges( + distributed_tx_only boolean DEFAULT false, + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS $$MODULE_PATHNAME$$, $$dump_local_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_local_wait_edges(bool) +IS 'returns all local lock wait chains, that start from any citus backend'; \ No newline at end of file diff --git a/src/backend/distributed/sql/udfs/dump_local_wait_edges/latest.sql b/src/backend/distributed/sql/udfs/dump_local_wait_edges/latest.sql new file mode 100644 index 000000000..705210f9d --- /dev/null +++ b/src/backend/distributed/sql/udfs/dump_local_wait_edges/latest.sql @@ -0,0 +1,17 @@ +DROP FUNCTION pg_catalog.dump_local_wait_edges CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.dump_local_wait_edges( + distributed_tx_only boolean DEFAULT false, + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS $$MODULE_PATHNAME$$, $$dump_local_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_local_wait_edges(bool) +IS 'returns all local lock wait chains, that start from any citus backend'; \ No newline at end of file diff --git a/src/backend/distributed/test/distributed_deadlock_detection.c b/src/backend/distributed/test/distributed_deadlock_detection.c index 448228158..2a66ad2e0 100644 --- a/src/backend/distributed/test/distributed_deadlock_detection.c +++ b/src/backend/distributed/test/distributed_deadlock_detection.c @@ -50,7 +50,9 @@ get_adjacency_list_wait_graph(PG_FUNCTION_ARGS) bool isNulls[2]; Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor); - WaitGraph *waitGraph = BuildGlobalWaitGraph(); + + bool onlyDistributedTx = true; + WaitGraph *waitGraph = BuildGlobalWaitGraph(onlyDistributedTx); HTAB *adjacencyList = BuildAdjacencyListsForWaitGraph(waitGraph); /* iterate on all nodes */ diff --git a/src/backend/distributed/transaction/distributed_deadlock_detection.c b/src/backend/distributed/transaction/distributed_deadlock_detection.c index f9e4adca0..60a8519fe 100644 --- a/src/backend/distributed/transaction/distributed_deadlock_detection.c +++ b/src/backend/distributed/transaction/distributed_deadlock_detection.c @@ -119,7 +119,8 @@ CheckForDistributedDeadlocks(void) return false; } - WaitGraph *waitGraph = BuildGlobalWaitGraph(); + bool onlyDistributedTx = true; + WaitGraph *waitGraph = BuildGlobalWaitGraph(onlyDistributedTx); HTAB *adjacencyLists = BuildAdjacencyListsForWaitGraph(waitGraph); int edgeCount = waitGraph->edgeCount; diff --git a/src/backend/distributed/transaction/lock_graph.c b/src/backend/distributed/transaction/lock_graph.c index aa37e4371..33dc7bbcf 100644 --- a/src/backend/distributed/transaction/lock_graph.c +++ b/src/backend/distributed/transaction/lock_graph.c @@ -47,7 +47,7 @@ typedef struct PROCStack static void AddWaitEdgeFromResult(WaitGraph *waitGraph, PGresult *result, int rowIndex); static void ReturnWaitGraph(WaitGraph *waitGraph, FunctionCallInfo fcinfo); -static WaitGraph * BuildLocalWaitGraph(void); +static WaitGraph * BuildLocalWaitGraph(bool onlyDistributedTx); static bool IsProcessWaitingForSafeOperations(PGPROC *proc); static void LockLockData(void); static void UnlockLockData(void); @@ -74,7 +74,9 @@ PG_FUNCTION_INFO_V1(dump_global_wait_edges); Datum dump_global_wait_edges(PG_FUNCTION_ARGS) { - WaitGraph *waitGraph = BuildGlobalWaitGraph(); + uint64 onlyDistributedTx = PG_GETARG_BOOL(0); + + WaitGraph *waitGraph = BuildGlobalWaitGraph(onlyDistributedTx); ReturnWaitGraph(waitGraph, fcinfo); @@ -86,16 +88,23 @@ dump_global_wait_edges(PG_FUNCTION_ARGS) * BuildGlobalWaitGraph builds a wait graph for distributed transactions * that originate from this node, including edges from all (other) worker * nodes. + * + * + * If onlyDistributedTx is true, we only return distributed transactions + * (e.g., AssignDistributedTransaction() or assign_distributed_transactions()) + * has been called for the process. Distributed deadlock detection only + * interested in these processes. */ WaitGraph * -BuildGlobalWaitGraph(void) +BuildGlobalWaitGraph(bool onlyDistributedTx) { List *workerNodeList = ActiveReadableNodeList(); char *nodeUser = CitusExtensionOwnerName(); List *connectionList = NIL; int32 localGroupId = GetLocalGroupId(); - WaitGraph *waitGraph = BuildLocalWaitGraph(); + /* deadlock detection is only interested in */ + WaitGraph *waitGraph = BuildLocalWaitGraph(onlyDistributedTx); /* open connections in parallel */ WorkerNode *workerNode = NULL; @@ -256,7 +265,9 @@ ParseTimestampTzField(PGresult *result, int rowIndex, int colIndex) Datum dump_local_wait_edges(PG_FUNCTION_ARGS) { - WaitGraph *waitGraph = BuildLocalWaitGraph(); + uint64 onlyDistributedTx = PG_GETARG_BOOL(0); + + WaitGraph *waitGraph = BuildLocalWaitGraph(onlyDistributedTx); ReturnWaitGraph(waitGraph, fcinfo); return (Datum) 0; @@ -328,9 +339,14 @@ ReturnWaitGraph(WaitGraph *waitGraph, FunctionCallInfo fcinfo) /* * BuildLocalWaitGraph builds a wait graph for distributed transactions * that originate from the local node. + * + * If onlyDistributedTx is true, we only return distributed transactions + * (e.g., AssignDistributedTransaction() or assign_distributed_transactions()) + * has been called for the process. Distributed deadlock detection only + * interested in these processes. */ static WaitGraph * -BuildLocalWaitGraph(void) +BuildLocalWaitGraph(bool onlyDistributedTx) { PROCStack remaining; int totalProcs = TotalProcCount(); @@ -379,7 +395,8 @@ BuildLocalWaitGraph(void) * care about distributed transactions for the purpose of distributed * deadlock detection. */ - if (!IsInDistributedTransaction(¤tBackendData)) + if (onlyDistributedTx && + !IsInDistributedTransaction(¤tBackendData)) { continue; } diff --git a/src/include/distributed/lock_graph.h b/src/include/distributed/lock_graph.h index 46fec1dee..8668fe798 100644 --- a/src/include/distributed/lock_graph.h +++ b/src/include/distributed/lock_graph.h @@ -58,7 +58,7 @@ typedef struct WaitGraph } WaitGraph; -extern WaitGraph * BuildGlobalWaitGraph(void); +extern WaitGraph * BuildGlobalWaitGraph(bool onlyDistributedTx); extern bool IsProcessWaitingForLock(PGPROC *proc); extern bool IsInDistributedTransaction(BackendData *backendData); extern TimestampTz ParseTimestampTzField(PGresult *result, int rowIndex, int colIndex);