mirror of https://github.com/citusdata/citus.git
Fix bug while traversing the distributed deadlock graph
With this fix, we traverse the graph with DFS which was originally intended. Note that, before the fix, we traverse the graph with BFS which might lead to killing some unrelated backend that is not involved in the distributed deadlock.pull/1961/head
parent
bff44394fb
commit
9a89c0b425
|
@ -307,7 +307,7 @@ PrependOutgoingNodesToQueue(TransactionNode *transactionNode, int currentStackDe
|
||||||
queuedNode->transactionNode = waitForTransaction;
|
queuedNode->transactionNode = waitForTransaction;
|
||||||
queuedNode->currentStackDepth = currentStackDepth;
|
queuedNode->currentStackDepth = currentStackDepth;
|
||||||
|
|
||||||
*toBeVisitedNodes = lappend(*toBeVisitedNodes, queuedNode);
|
*toBeVisitedNodes = lcons(queuedNode, *toBeVisitedNodes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -874,3 +874,85 @@ step s1-finish:
|
||||||
step s2-finish:
|
step s2-finish:
|
||||||
COMMIT;
|
COMMIT;
|
||||||
|
|
||||||
|
|
||||||
|
starting permutation: s1-begin s2-begin s3-begin s4-begin s5-begin s1-update-1 s3-update-3 s2-update-4 s2-update-3 s4-update-2 s5-random-adv-lock s4-random-adv-lock s3-update-1 s1-update-2-4 deadlock-checker-call deadlock-checker-call s5-finish s4-finish s2-finish s1-finish s3-finish
|
||||||
|
step s1-begin:
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
step s2-begin:
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
step s3-begin:
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
step s4-begin:
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
step s5-begin:
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
step s1-update-1:
|
||||||
|
UPDATE deadlock_detection_test SET some_val = 1 WHERE user_id = 1;
|
||||||
|
|
||||||
|
step s3-update-3:
|
||||||
|
UPDATE deadlock_detection_test SET some_val = 3 WHERE user_id = 3;
|
||||||
|
|
||||||
|
step s2-update-4:
|
||||||
|
UPDATE deadlock_detection_test SET some_val = 2 WHERE user_id = 4;
|
||||||
|
|
||||||
|
step s2-update-3:
|
||||||
|
UPDATE deadlock_detection_test SET some_val = 2 WHERE user_id = 3;
|
||||||
|
<waiting ...>
|
||||||
|
step s4-update-2:
|
||||||
|
UPDATE deadlock_detection_test SET some_val = 4 WHERE user_id = 2;
|
||||||
|
|
||||||
|
step s5-random-adv-lock:
|
||||||
|
SELECT pg_advisory_xact_lock(8765);
|
||||||
|
|
||||||
|
pg_advisory_xact_lock
|
||||||
|
|
||||||
|
|
||||||
|
step s4-random-adv-lock:
|
||||||
|
SELECT pg_advisory_xact_lock(8765);
|
||||||
|
<waiting ...>
|
||||||
|
step s3-update-1:
|
||||||
|
UPDATE deadlock_detection_test SET some_val = 3 WHERE user_id = 1;
|
||||||
|
<waiting ...>
|
||||||
|
step s1-update-2-4:
|
||||||
|
UPDATE deadlock_detection_test SET some_val = 1 WHERE user_id = 2 OR user_id = 4;
|
||||||
|
<waiting ...>
|
||||||
|
step deadlock-checker-call:
|
||||||
|
SELECT check_distributed_deadlocks();
|
||||||
|
|
||||||
|
check_distributed_deadlocks
|
||||||
|
|
||||||
|
t
|
||||||
|
step s2-update-3: <... completed>
|
||||||
|
error in steps deadlock-checker-call s2-update-3: ERROR: canceling the transaction since it was involved in a distributed deadlock
|
||||||
|
step deadlock-checker-call:
|
||||||
|
SELECT check_distributed_deadlocks();
|
||||||
|
|
||||||
|
check_distributed_deadlocks
|
||||||
|
|
||||||
|
f
|
||||||
|
step s5-finish:
|
||||||
|
COMMIT;
|
||||||
|
|
||||||
|
step s4-random-adv-lock: <... completed>
|
||||||
|
pg_advisory_xact_lock
|
||||||
|
|
||||||
|
|
||||||
|
step s4-finish:
|
||||||
|
COMMIT;
|
||||||
|
|
||||||
|
step s1-update-2-4: <... completed>
|
||||||
|
step s2-finish:
|
||||||
|
COMMIT;
|
||||||
|
|
||||||
|
step s1-finish:
|
||||||
|
COMMIT;
|
||||||
|
|
||||||
|
step s3-update-1: <... completed>
|
||||||
|
step s3-finish:
|
||||||
|
COMMIT;
|
||||||
|
|
||||||
|
|
|
@ -97,6 +97,11 @@ step "s1-insert-ref-11"
|
||||||
INSERT INTO deadlock_detection_reference VALUES (11, 11);
|
INSERT INTO deadlock_detection_reference VALUES (11, 11);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
step "s1-update-2-4"
|
||||||
|
{
|
||||||
|
UPDATE deadlock_detection_test SET some_val = 1 WHERE user_id = 2 OR user_id = 4;
|
||||||
|
}
|
||||||
|
|
||||||
step "s1-finish"
|
step "s1-finish"
|
||||||
{
|
{
|
||||||
COMMIT;
|
COMMIT;
|
||||||
|
@ -124,6 +129,11 @@ step "s2-update-3"
|
||||||
UPDATE deadlock_detection_test SET some_val = 2 WHERE user_id = 3;
|
UPDATE deadlock_detection_test SET some_val = 2 WHERE user_id = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
step "s2-update-4"
|
||||||
|
{
|
||||||
|
UPDATE deadlock_detection_test SET some_val = 2 WHERE user_id = 4;
|
||||||
|
}
|
||||||
|
|
||||||
step "s2-upsert-select-all"
|
step "s2-upsert-select-all"
|
||||||
{
|
{
|
||||||
INSERT INTO deadlock_detection_test SELECT * FROM deadlock_detection_test ON CONFLICT(user_id) DO UPDATE SET some_val = deadlock_detection_test.some_val + 5 RETURNING *;
|
INSERT INTO deadlock_detection_test SELECT * FROM deadlock_detection_test ON CONFLICT(user_id) DO UPDATE SET some_val = deadlock_detection_test.some_val + 5 RETURNING *;
|
||||||
|
@ -249,6 +259,11 @@ step "s4-update-7"
|
||||||
UPDATE deadlock_detection_test SET some_val = 4 WHERE user_id = 7;
|
UPDATE deadlock_detection_test SET some_val = 4 WHERE user_id = 7;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
step "s4-random-adv-lock"
|
||||||
|
{
|
||||||
|
SELECT pg_advisory_xact_lock(8765);
|
||||||
|
}
|
||||||
|
|
||||||
step "s4-finish"
|
step "s4-finish"
|
||||||
{
|
{
|
||||||
COMMIT;
|
COMMIT;
|
||||||
|
@ -296,6 +311,11 @@ step "s5-update-7"
|
||||||
UPDATE deadlock_detection_test SET some_val = 5 WHERE user_id = 7;
|
UPDATE deadlock_detection_test SET some_val = 5 WHERE user_id = 7;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
step "s5-random-adv-lock"
|
||||||
|
{
|
||||||
|
SELECT pg_advisory_xact_lock(8765);
|
||||||
|
}
|
||||||
|
|
||||||
step "s5-finish"
|
step "s5-finish"
|
||||||
{
|
{
|
||||||
COMMIT;
|
COMMIT;
|
||||||
|
@ -406,3 +426,11 @@ permutation "s1-begin" "s2-begin" "s3-begin" "s4-begin" "s5-begin" "s6-begin" "s
|
||||||
|
|
||||||
# a larger graph where the deadlock starts from the last node
|
# a larger graph where the deadlock starts from the last node
|
||||||
permutation "s1-begin" "s2-begin" "s3-begin" "s4-begin" "s5-begin" "s6-begin" "s5-update-5" "s3-update-2" "s2-update-2" "s4-update-4" "s3-update-4" "s4-update-5" "s1-update-4" "deadlock-checker-call" "s6-update-6" "s5-update-6" "s6-update-5" "deadlock-checker-call" "s5-finish" "s6-finish" "s4-finish" "s3-finish" "s1-finish" "s2-finish"
|
permutation "s1-begin" "s2-begin" "s3-begin" "s4-begin" "s5-begin" "s6-begin" "s5-update-5" "s3-update-2" "s2-update-2" "s4-update-4" "s3-update-4" "s4-update-5" "s1-update-4" "deadlock-checker-call" "s6-update-6" "s5-update-6" "s6-update-5" "deadlock-checker-call" "s5-finish" "s6-finish" "s4-finish" "s3-finish" "s1-finish" "s2-finish"
|
||||||
|
|
||||||
|
# a backend is blocked on multiple backends
|
||||||
|
# note that session 5 is not strictly necessary to simulate the deadlock
|
||||||
|
# we only added that such that session 4 waits on for that
|
||||||
|
# thus if any cancellation happens on session 4, we'd be able to
|
||||||
|
# observe it, otherwise cancelling idle backends has not affect
|
||||||
|
# (cancelling wrong backend used to be a bug and already fixed)
|
||||||
|
permutation "s1-begin" "s2-begin" "s3-begin" "s4-begin" "s5-begin" "s1-update-1" "s3-update-3" "s2-update-4" "s2-update-3" "s4-update-2" "s5-random-adv-lock" "s4-random-adv-lock" "s3-update-1" "s1-update-2-4" "deadlock-checker-call" "deadlock-checker-call" "s5-finish" "s4-finish" "s2-finish" "s1-finish" "s3-finish"
|
||||||
|
|
Loading…
Reference in New Issue