diff --git a/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql b/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql index c3ffbb1cb..8df040a12 100644 --- a/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql +++ b/src/backend/distributed/sql/citus--10.2-4--11.0-1.sql @@ -17,6 +17,9 @@ #include "udfs/get_all_active_transactions/11.0-1.sql" #include "udfs/get_global_active_transactions/11.0-1.sql" +#include "udfs/dump_local_wait_edges/11.0-1.sql" +#include "udfs/dump_global_wait_edges/11.0-1.sql" + #include "udfs/citus_worker_stat_activity/11.0-1.sql" CREATE VIEW citus.citus_worker_stat_activity AS diff --git a/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql b/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql index e94ed0bbf..690e79f3f 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.0-1--10.2-4.sql @@ -208,4 +208,39 @@ SELECT * FROM pg_catalog.citus_worker_stat_activity(); ALTER VIEW citus.citus_worker_stat_activity SET SCHEMA pg_catalog; GRANT SELECT ON pg_catalog.citus_worker_stat_activity TO PUBLIC; +DROP FUNCTION dump_global_wait_edges; +CREATE FUNCTION pg_catalog.dump_local_wait_edges( + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS $$MODULE_PATHNAME$$, $$dump_local_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_local_wait_edges() +IS 'returns all local lock wait chains, that start from distributed transactions'; + +DROP FUNCTION dump_global_wait_edges; +CREATE FUNCTION pg_catalog.dump_global_wait_edges( + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE 'c' STRICT +AS $$MODULE_PATHNAME$$, $$dump_global_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_global_wait_edges() +IS 'returns a global list of blocked transactions originating from this node'; + + RESET search_path; diff --git a/src/backend/distributed/sql/udfs/dump_global_wait_edges/11.0-1.sql b/src/backend/distributed/sql/udfs/dump_global_wait_edges/11.0-1.sql new file mode 100644 index 000000000..950d08ccc --- /dev/null +++ b/src/backend/distributed/sql/udfs/dump_global_wait_edges/11.0-1.sql @@ -0,0 +1,17 @@ +DROP FUNCTION pg_catalog.dump_global_wait_edges CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.dump_global_wait_edges( + distributed_tx_only boolean DEFAULT false, + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS $$MODULE_PATHNAME$$, $$dump_global_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_global_wait_edges(bool) +IS 'returns a global list of blocked backends originating from this node'; diff --git a/src/backend/distributed/sql/udfs/dump_global_wait_edges/latest.sql b/src/backend/distributed/sql/udfs/dump_global_wait_edges/latest.sql new file mode 100644 index 000000000..950d08ccc --- /dev/null +++ b/src/backend/distributed/sql/udfs/dump_global_wait_edges/latest.sql @@ -0,0 +1,17 @@ +DROP FUNCTION pg_catalog.dump_global_wait_edges CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.dump_global_wait_edges( + distributed_tx_only boolean DEFAULT false, + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS $$MODULE_PATHNAME$$, $$dump_global_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_global_wait_edges(bool) +IS 'returns a global list of blocked backends originating from this node'; diff --git a/src/backend/distributed/sql/udfs/dump_local_wait_edges/11.0-1.sql b/src/backend/distributed/sql/udfs/dump_local_wait_edges/11.0-1.sql new file mode 100644 index 000000000..705210f9d --- /dev/null +++ b/src/backend/distributed/sql/udfs/dump_local_wait_edges/11.0-1.sql @@ -0,0 +1,17 @@ +DROP FUNCTION pg_catalog.dump_local_wait_edges CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.dump_local_wait_edges( + distributed_tx_only boolean DEFAULT false, + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS $$MODULE_PATHNAME$$, $$dump_local_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_local_wait_edges(bool) +IS 'returns all local lock wait chains, that start from any citus backend'; \ No newline at end of file diff --git a/src/backend/distributed/sql/udfs/dump_local_wait_edges/latest.sql b/src/backend/distributed/sql/udfs/dump_local_wait_edges/latest.sql new file mode 100644 index 000000000..705210f9d --- /dev/null +++ b/src/backend/distributed/sql/udfs/dump_local_wait_edges/latest.sql @@ -0,0 +1,17 @@ +DROP FUNCTION pg_catalog.dump_local_wait_edges CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.dump_local_wait_edges( + distributed_tx_only boolean DEFAULT false, + OUT waiting_pid int4, + OUT waiting_node_id int4, + OUT waiting_transaction_num int8, + OUT waiting_transaction_stamp timestamptz, + OUT blocking_pid int4, + OUT blocking_node_id int4, + OUT blocking_transaction_num int8, + OUT blocking_transaction_stamp timestamptz, + OUT blocking_transaction_waiting bool) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS $$MODULE_PATHNAME$$, $$dump_local_wait_edges$$; +COMMENT ON FUNCTION pg_catalog.dump_local_wait_edges(bool) +IS 'returns all local lock wait chains, that start from any citus backend'; \ No newline at end of file diff --git a/src/backend/distributed/test/distributed_deadlock_detection.c b/src/backend/distributed/test/distributed_deadlock_detection.c index 448228158..2a66ad2e0 100644 --- a/src/backend/distributed/test/distributed_deadlock_detection.c +++ b/src/backend/distributed/test/distributed_deadlock_detection.c @@ -50,7 +50,9 @@ get_adjacency_list_wait_graph(PG_FUNCTION_ARGS) bool isNulls[2]; Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor); - WaitGraph *waitGraph = BuildGlobalWaitGraph(); + + bool onlyDistributedTx = true; + WaitGraph *waitGraph = BuildGlobalWaitGraph(onlyDistributedTx); HTAB *adjacencyList = BuildAdjacencyListsForWaitGraph(waitGraph); /* iterate on all nodes */ diff --git a/src/backend/distributed/transaction/distributed_deadlock_detection.c b/src/backend/distributed/transaction/distributed_deadlock_detection.c index f9e4adca0..60a8519fe 100644 --- a/src/backend/distributed/transaction/distributed_deadlock_detection.c +++ b/src/backend/distributed/transaction/distributed_deadlock_detection.c @@ -119,7 +119,8 @@ CheckForDistributedDeadlocks(void) return false; } - WaitGraph *waitGraph = BuildGlobalWaitGraph(); + bool onlyDistributedTx = true; + WaitGraph *waitGraph = BuildGlobalWaitGraph(onlyDistributedTx); HTAB *adjacencyLists = BuildAdjacencyListsForWaitGraph(waitGraph); int edgeCount = waitGraph->edgeCount; diff --git a/src/backend/distributed/transaction/lock_graph.c b/src/backend/distributed/transaction/lock_graph.c index aa37e4371..33dc7bbcf 100644 --- a/src/backend/distributed/transaction/lock_graph.c +++ b/src/backend/distributed/transaction/lock_graph.c @@ -47,7 +47,7 @@ typedef struct PROCStack static void AddWaitEdgeFromResult(WaitGraph *waitGraph, PGresult *result, int rowIndex); static void ReturnWaitGraph(WaitGraph *waitGraph, FunctionCallInfo fcinfo); -static WaitGraph * BuildLocalWaitGraph(void); +static WaitGraph * BuildLocalWaitGraph(bool onlyDistributedTx); static bool IsProcessWaitingForSafeOperations(PGPROC *proc); static void LockLockData(void); static void UnlockLockData(void); @@ -74,7 +74,9 @@ PG_FUNCTION_INFO_V1(dump_global_wait_edges); Datum dump_global_wait_edges(PG_FUNCTION_ARGS) { - WaitGraph *waitGraph = BuildGlobalWaitGraph(); + uint64 onlyDistributedTx = PG_GETARG_BOOL(0); + + WaitGraph *waitGraph = BuildGlobalWaitGraph(onlyDistributedTx); ReturnWaitGraph(waitGraph, fcinfo); @@ -86,16 +88,23 @@ dump_global_wait_edges(PG_FUNCTION_ARGS) * BuildGlobalWaitGraph builds a wait graph for distributed transactions * that originate from this node, including edges from all (other) worker * nodes. + * + * + * If onlyDistributedTx is true, we only return distributed transactions + * (e.g., AssignDistributedTransaction() or assign_distributed_transactions()) + * has been called for the process. Distributed deadlock detection only + * interested in these processes. */ WaitGraph * -BuildGlobalWaitGraph(void) +BuildGlobalWaitGraph(bool onlyDistributedTx) { List *workerNodeList = ActiveReadableNodeList(); char *nodeUser = CitusExtensionOwnerName(); List *connectionList = NIL; int32 localGroupId = GetLocalGroupId(); - WaitGraph *waitGraph = BuildLocalWaitGraph(); + /* deadlock detection is only interested in */ + WaitGraph *waitGraph = BuildLocalWaitGraph(onlyDistributedTx); /* open connections in parallel */ WorkerNode *workerNode = NULL; @@ -256,7 +265,9 @@ ParseTimestampTzField(PGresult *result, int rowIndex, int colIndex) Datum dump_local_wait_edges(PG_FUNCTION_ARGS) { - WaitGraph *waitGraph = BuildLocalWaitGraph(); + uint64 onlyDistributedTx = PG_GETARG_BOOL(0); + + WaitGraph *waitGraph = BuildLocalWaitGraph(onlyDistributedTx); ReturnWaitGraph(waitGraph, fcinfo); return (Datum) 0; @@ -328,9 +339,14 @@ ReturnWaitGraph(WaitGraph *waitGraph, FunctionCallInfo fcinfo) /* * BuildLocalWaitGraph builds a wait graph for distributed transactions * that originate from the local node. + * + * If onlyDistributedTx is true, we only return distributed transactions + * (e.g., AssignDistributedTransaction() or assign_distributed_transactions()) + * has been called for the process. Distributed deadlock detection only + * interested in these processes. */ static WaitGraph * -BuildLocalWaitGraph(void) +BuildLocalWaitGraph(bool onlyDistributedTx) { PROCStack remaining; int totalProcs = TotalProcCount(); @@ -379,7 +395,8 @@ BuildLocalWaitGraph(void) * care about distributed transactions for the purpose of distributed * deadlock detection. */ - if (!IsInDistributedTransaction(¤tBackendData)) + if (onlyDistributedTx && + !IsInDistributedTransaction(¤tBackendData)) { continue; } diff --git a/src/include/distributed/lock_graph.h b/src/include/distributed/lock_graph.h index 46fec1dee..8668fe798 100644 --- a/src/include/distributed/lock_graph.h +++ b/src/include/distributed/lock_graph.h @@ -58,7 +58,7 @@ typedef struct WaitGraph } WaitGraph; -extern WaitGraph * BuildGlobalWaitGraph(void); +extern WaitGraph * BuildGlobalWaitGraph(bool onlyDistributedTx); extern bool IsProcessWaitingForLock(PGPROC *proc); extern bool IsInDistributedTransaction(BackendData *backendData); extern TimestampTz ParseTimestampTzField(PGresult *result, int rowIndex, int colIndex);