mirror of https://github.com/citusdata/citus.git
Fix wrong cancellation message about distributed deadlocks (#3956)
parent
634d6cf9d7
commit
392c5e2c34
|
@ -355,10 +355,12 @@ multi_log_hook(ErrorData *edata)
|
|||
{
|
||||
/*
|
||||
* Show the user a meaningful error message when a backend is cancelled
|
||||
* by the distributed deadlock detection.
|
||||
* by the distributed deadlock detection. Also reset the state for this,
|
||||
* since the next cancelation of the backend might have another reason.
|
||||
*/
|
||||
bool clearState = true;
|
||||
if (edata->elevel == ERROR && edata->sqlerrcode == ERRCODE_QUERY_CANCELED &&
|
||||
MyBackendGotCancelledDueToDeadlock())
|
||||
MyBackendGotCancelledDueToDeadlock(clearState))
|
||||
{
|
||||
edata->sqlerrcode = ERRCODE_T_R_DEADLOCK_DETECTED;
|
||||
edata->message = "canceling the transaction since it was "
|
||||
|
|
|
@ -645,6 +645,7 @@ UnSetDistributedTransactionId(void)
|
|||
|
||||
MyBackendData->databaseId = 0;
|
||||
MyBackendData->userId = 0;
|
||||
MyBackendData->cancelledDueToDeadlock = false;
|
||||
MyBackendData->transactionId.initiatorNodeIdentifier = 0;
|
||||
MyBackendData->transactionId.transactionOriginator = false;
|
||||
MyBackendData->transactionId.transactionNumber = 0;
|
||||
|
@ -855,9 +856,13 @@ CancelTransactionDueToDeadlock(PGPROC *proc)
|
|||
* MyBackendGotCancelledDueToDeadlock returns whether the current distributed
|
||||
* transaction was cancelled due to a deadlock. If the backend is not in a
|
||||
* distributed transaction, the function returns false.
|
||||
* We keep some session level state to keep track of if we were cancelled
|
||||
* because of a distributed deadlock. When clearState is true, this function
|
||||
* also resets that state. So after calling this function with clearState true,
|
||||
* a second would always return false.
|
||||
*/
|
||||
bool
|
||||
MyBackendGotCancelledDueToDeadlock(void)
|
||||
MyBackendGotCancelledDueToDeadlock(bool clearState)
|
||||
{
|
||||
bool cancelledDueToDeadlock = false;
|
||||
|
||||
|
@ -873,6 +878,10 @@ MyBackendGotCancelledDueToDeadlock(void)
|
|||
{
|
||||
cancelledDueToDeadlock = MyBackendData->cancelledDueToDeadlock;
|
||||
}
|
||||
if (clearState)
|
||||
{
|
||||
MyBackendData->cancelledDueToDeadlock = false;
|
||||
}
|
||||
|
||||
SpinLockRelease(&MyBackendData->mutex);
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ extern void AssignDistributedTransactionId(void);
|
|||
extern void MarkCitusInitiatedCoordinatorBackend(void);
|
||||
extern void GetBackendDataForProc(PGPROC *proc, BackendData *result);
|
||||
extern void CancelTransactionDueToDeadlock(PGPROC *proc);
|
||||
extern bool MyBackendGotCancelledDueToDeadlock(void);
|
||||
extern bool MyBackendGotCancelledDueToDeadlock(bool clearState);
|
||||
extern List * ActiveDistributedTransactionNumbers(void);
|
||||
LocalTransactionId GetMyProcLocalTransactionId(void);
|
||||
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
--- Regression test for this issue:
|
||||
--- https://github.com/citusdata/citus/issues/3954
|
||||
SET citus.shard_count = 4;
|
||||
SET citus.shard_replication_factor = 1;
|
||||
SET citus.next_shard_id TO 4954000;
|
||||
CREATE SCHEMA wrong_cancel_message;
|
||||
SET search_path TO wrong_cancel_message;
|
||||
SET citus.force_max_query_parallelization TO ON;
|
||||
CREATE TABLE t_lock(id int);
|
||||
SELECT create_distributed_table('t_lock','id');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO t_lock VALUES (1), (2), (3), (4);
|
||||
BEGIN;
|
||||
SELECT id, pg_advisory_lock(15) FROM t_lock;
|
||||
ERROR: canceling the transaction since it was involved in a distributed deadlock
|
||||
-- ERROR: canceling the transaction since it was involved in a distributed deadlock
|
||||
-- This one is expected, with force_max_query_parallelization the connections will deadlock themselves
|
||||
ROLLBACK;
|
||||
CREATE TABLE t_unrelated(a int);
|
||||
SELECT create_distributed_table('t_unrelated', 'a');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL statement_timeout = '1ms';
|
||||
-- Ignore WARNING about non closed temporary file
|
||||
SET LOCAL client_min_messages to ERROR;
|
||||
INSERT INTO t_unrelated SELECT i FROM generate_series(1, 10) i;
|
||||
ERROR: canceling statement due to statement timeout
|
||||
ROLLBACK;
|
||||
\set VERBOSITY terse
|
||||
DROP SCHEMA wrong_cancel_message CASCADE;
|
||||
NOTICE: drop cascades to 2 other objects
|
|
@ -90,7 +90,7 @@ test: multi_basic_queries multi_complex_expressions multi_subquery multi_subquer
|
|||
test: multi_subquery_complex_reference_clause multi_subquery_window_functions multi_view multi_sql_function multi_prepare_sql
|
||||
test: sql_procedure multi_function_in_join row_types materialized_view
|
||||
test: multi_subquery_in_where_reference_clause full_join adaptive_executor propagate_set_commands
|
||||
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
|
||||
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc statement_cancel_error_message
|
||||
test: multi_agg_distinct multi_agg_approximate_distinct multi_limit_clause_approximate multi_outer_join_reference multi_single_relation_subquery multi_prepare_plsql set_role_in_transaction
|
||||
test: multi_reference_table multi_select_for_update relation_access_tracking
|
||||
test: custom_aggregate_support aggregate_support tdigest_aggregate_support
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
--- Regression test for this issue:
|
||||
--- https://github.com/citusdata/citus/issues/3954
|
||||
SET citus.shard_count = 4;
|
||||
SET citus.shard_replication_factor = 1;
|
||||
SET citus.next_shard_id TO 4954000;
|
||||
CREATE SCHEMA wrong_cancel_message;
|
||||
SET search_path TO wrong_cancel_message;
|
||||
|
||||
SET citus.force_max_query_parallelization TO ON;
|
||||
CREATE TABLE t_lock(id int);
|
||||
SELECT create_distributed_table('t_lock','id');
|
||||
INSERT INTO t_lock VALUES (1), (2), (3), (4);
|
||||
BEGIN;
|
||||
SELECT id, pg_advisory_lock(15) FROM t_lock;
|
||||
-- ERROR: canceling the transaction since it was involved in a distributed deadlock
|
||||
-- This one is expected, with force_max_query_parallelization the connections will deadlock themselves
|
||||
ROLLBACK;
|
||||
|
||||
CREATE TABLE t_unrelated(a int);
|
||||
SELECT create_distributed_table('t_unrelated', 'a');
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL statement_timeout = '1ms';
|
||||
-- Ignore WARNING about non closed temporary file
|
||||
SET LOCAL client_min_messages to ERROR;
|
||||
INSERT INTO t_unrelated SELECT i FROM generate_series(1, 10) i;
|
||||
ROLLBACK;
|
||||
|
||||
\set VERBOSITY terse
|
||||
DROP SCHEMA wrong_cancel_message CASCADE;
|
Loading…
Reference in New Issue