From 9a89c0b4252d6db38e64334b23d745e9abe832ea Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Fri, 19 Jan 2018 14:02:11 +0200 Subject: [PATCH 1/2] Fix bug while traversing the distributed deadlock graph With this fix, we traverse the graph with DFS which was originally intended. Note that, before the fix, we traverse the graph with BFS which might lead to killing some unrelated backend that is not involved in the distributed deadlock. --- .../distributed_deadlock_detection.c | 2 +- ...olation_distributed_deadlock_detection.out | 82 +++++++++++++++++++ ...lation_distributed_deadlock_detection.spec | 28 +++++++ 3 files changed, 111 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/transaction/distributed_deadlock_detection.c b/src/backend/distributed/transaction/distributed_deadlock_detection.c index 236942c70..0bf60c58d 100644 --- a/src/backend/distributed/transaction/distributed_deadlock_detection.c +++ b/src/backend/distributed/transaction/distributed_deadlock_detection.c @@ -307,7 +307,7 @@ PrependOutgoingNodesToQueue(TransactionNode *transactionNode, int currentStackDe queuedNode->transactionNode = waitForTransaction; queuedNode->currentStackDepth = currentStackDepth; - *toBeVisitedNodes = lappend(*toBeVisitedNodes, queuedNode); + *toBeVisitedNodes = lcons(queuedNode, *toBeVisitedNodes); } } diff --git a/src/test/regress/expected/isolation_distributed_deadlock_detection.out b/src/test/regress/expected/isolation_distributed_deadlock_detection.out index 5f2d7e8e9..6921743c6 100644 --- a/src/test/regress/expected/isolation_distributed_deadlock_detection.out +++ b/src/test/regress/expected/isolation_distributed_deadlock_detection.out @@ -874,3 +874,85 @@ step s1-finish: step s2-finish: COMMIT; + +starting permutation: s1-begin s2-begin s3-begin s4-begin s5-begin s1-update-1 s3-update-3 s2-update-4 s2-update-3 s4-update-2 s5-random-adv-lock s4-random-adv-lock s3-update-1 s1-update-2-4 deadlock-checker-call deadlock-checker-call s5-finish s4-finish s2-finish s1-finish s3-finish +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s3-begin: + BEGIN; + +step s4-begin: + BEGIN; + +step s5-begin: + BEGIN; + +step s1-update-1: + UPDATE deadlock_detection_test SET some_val = 1 WHERE user_id = 1; + +step s3-update-3: + UPDATE deadlock_detection_test SET some_val = 3 WHERE user_id = 3; + +step s2-update-4: + UPDATE deadlock_detection_test SET some_val = 2 WHERE user_id = 4; + +step s2-update-3: + UPDATE deadlock_detection_test SET some_val = 2 WHERE user_id = 3; + +step s4-update-2: + UPDATE deadlock_detection_test SET some_val = 4 WHERE user_id = 2; + +step s5-random-adv-lock: + SELECT pg_advisory_xact_lock(8765); + +pg_advisory_xact_lock + + +step s4-random-adv-lock: + SELECT pg_advisory_xact_lock(8765); + +step s3-update-1: + UPDATE deadlock_detection_test SET some_val = 3 WHERE user_id = 1; + +step s1-update-2-4: + UPDATE deadlock_detection_test SET some_val = 1 WHERE user_id = 2 OR user_id = 4; + +step deadlock-checker-call: + SELECT check_distributed_deadlocks(); + +check_distributed_deadlocks + +t +step s2-update-3: <... completed> +error in steps deadlock-checker-call s2-update-3: ERROR: canceling the transaction since it was involved in a distributed deadlock +step deadlock-checker-call: + SELECT check_distributed_deadlocks(); + +check_distributed_deadlocks + +f +step s5-finish: + COMMIT; + +step s4-random-adv-lock: <... completed> +pg_advisory_xact_lock + + +step s4-finish: + COMMIT; + +step s1-update-2-4: <... completed> +step s2-finish: + COMMIT; + +step s1-finish: + COMMIT; + +step s3-update-1: <... completed> +step s3-finish: + COMMIT; + diff --git a/src/test/regress/specs/isolation_distributed_deadlock_detection.spec b/src/test/regress/specs/isolation_distributed_deadlock_detection.spec index 9068133af..d860803ce 100644 --- a/src/test/regress/specs/isolation_distributed_deadlock_detection.spec +++ b/src/test/regress/specs/isolation_distributed_deadlock_detection.spec @@ -97,6 +97,11 @@ step "s1-insert-ref-11" INSERT INTO deadlock_detection_reference VALUES (11, 11); } +step "s1-update-2-4" +{ + UPDATE deadlock_detection_test SET some_val = 1 WHERE user_id = 2 OR user_id = 4; +} + step "s1-finish" { COMMIT; @@ -124,6 +129,11 @@ step "s2-update-3" UPDATE deadlock_detection_test SET some_val = 2 WHERE user_id = 3; } +step "s2-update-4" +{ + UPDATE deadlock_detection_test SET some_val = 2 WHERE user_id = 4; +} + step "s2-upsert-select-all" { INSERT INTO deadlock_detection_test SELECT * FROM deadlock_detection_test ON CONFLICT(user_id) DO UPDATE SET some_val = deadlock_detection_test.some_val + 5 RETURNING *; @@ -249,6 +259,11 @@ step "s4-update-7" UPDATE deadlock_detection_test SET some_val = 4 WHERE user_id = 7; } +step "s4-random-adv-lock" +{ + SELECT pg_advisory_xact_lock(8765); +} + step "s4-finish" { COMMIT; @@ -296,6 +311,11 @@ step "s5-update-7" UPDATE deadlock_detection_test SET some_val = 5 WHERE user_id = 7; } +step "s5-random-adv-lock" +{ + SELECT pg_advisory_xact_lock(8765); +} + step "s5-finish" { COMMIT; @@ -406,3 +426,11 @@ permutation "s1-begin" "s2-begin" "s3-begin" "s4-begin" "s5-begin" "s6-begin" "s # a larger graph where the deadlock starts from the last node permutation "s1-begin" "s2-begin" "s3-begin" "s4-begin" "s5-begin" "s6-begin" "s5-update-5" "s3-update-2" "s2-update-2" "s4-update-4" "s3-update-4" "s4-update-5" "s1-update-4" "deadlock-checker-call" "s6-update-6" "s5-update-6" "s6-update-5" "deadlock-checker-call" "s5-finish" "s6-finish" "s4-finish" "s3-finish" "s1-finish" "s2-finish" + +# a backend is blocked on multiple backends +# note that session 5 is not strictly necessary to simulate the deadlock +# we only added that such that session 4 waits on for that +# thus if any cancellation happens on session 4, we'd be able to +# observe it, otherwise cancelling idle backends has not affect +# (cancelling wrong backend used to be a bug and already fixed) +permutation "s1-begin" "s2-begin" "s3-begin" "s4-begin" "s5-begin" "s1-update-1" "s3-update-3" "s2-update-4" "s2-update-3" "s4-update-2" "s5-random-adv-lock" "s4-random-adv-lock" "s3-update-1" "s1-update-2-4" "deadlock-checker-call" "deadlock-checker-call" "s5-finish" "s4-finish" "s2-finish" "s1-finish" "s3-finish" From fbde87d2d074ef4e4d610a871c78c464f8119785 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Fri, 19 Jan 2018 14:06:09 +0200 Subject: [PATCH 2/2] Allocate enough space for transaction nodes This fix prevents any potential memory access that might occur while forming the deadlock path. --- .../distributed/transaction/distributed_deadlock_detection.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/distributed/transaction/distributed_deadlock_detection.c b/src/backend/distributed/transaction/distributed_deadlock_detection.c index 0bf60c58d..a2d847964 100644 --- a/src/backend/distributed/transaction/distributed_deadlock_detection.c +++ b/src/backend/distributed/transaction/distributed_deadlock_detection.c @@ -134,7 +134,7 @@ CheckForDistributedDeadlocks(void) { bool deadlockFound = false; List *deadlockPath = NIL; - TransactionNode *transactionNodeStack[edgeCount]; + TransactionNode *transactionNodeStack[edgeCount + 1]; /* we're only interested in finding deadlocks originating from this node */ if (transactionNode->transactionId.initiatorNodeIdentifier != localGroupId)