Fix assertion error when rolling back to savepoint (#3868)

It was possible to get an assertion error, if a DML command was
cancelled that opened a connection and then "ROLLBACK TO SAVEPOINT" was
used to continue the transaction. The reason for this was that canceling
the transaction might leave the `claimedExclusively` flag on for (some
of) it's connections.

This caused an assertion failure because `CanUseExistingConnection`
would return false and a new connection would be opened, and then there
would be two connections doing DML for the same placement. Which is
disallowed. That this situation caused an assertion failure instead of
an error, means that without asserts this could possibly result in some
visibility bugs, similar to the ones described
https://github.com/citusdata/citus/issues/3867
pull/3924/head
Jelte Fennema 2020-06-30 11:31:46 +02:00 committed by GitHub
parent e28683a025
commit 02fa942be1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 71 additions and 4 deletions

View File

@ -1119,6 +1119,22 @@ CoordinatedRemoteTransactionsSavepointRollback(SubTransactionId subId)
}
FinishRemoteTransactionSavepointRollback(connection, subId);
/*
* We unclaim the connection now so it can be used again when
* continuing after the ROLLBACK TO SAVEPOINT.
* XXX: We do not undo our hadDML/hadDDL flags. This could result in
* some queries not being allowed on Citus that would actually be fine
* to execute. Changing this would require us to keep track for each
* savepoint which placement connections had DDL/DML executed at that
* point and if they were already. We also do not call
* ResetShardPlacementAssociation. This might result in suboptimal
* parallelism, because of placement associations that are not really
* necessary anymore because of ROLLBACK TO SAVEPOINT. To change this
* we would need to keep track of when a connection becomes associated
* to a placement.
*/
UnclaimConnection(connection);
}
}

View File

@ -488,8 +488,8 @@ ResetShardPlacementTransactionState(void)
/*
* Subtransaction callback - currently only used to remember whether a
* savepoint has been rolled back, as we don't support that.
* CoordinatedSubTransactionCallback is the callback used to implement
* distributed ROLLBACK TO SAVEPOINT.
*/
static void
CoordinatedSubTransactionCallback(SubXactEvent event, SubTransactionId subId,

View File

@ -0,0 +1,27 @@
-- Regression test for this issue:
-- https://github.com/citusdata/citus/issues/3622
SET citus.shard_count = 4;
SET citus.next_shard_id TO 1954000;
CREATE SCHEMA rollback_to_savepoint;
SET search_path TO rollback_to_savepoint;
CREATE TABLE t(a int);
SELECT create_distributed_table('t', 'a');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- This timeout is chosen such that the INSERT with
-- generate_series(1, 100000000) is cancelled at the right time to trigger the
-- bug
SET statement_timeout = '2s';
BEGIN;
INSERT INTO t VALUES (4);
SAVEPOINT s1;
INSERT INTO t SELECT i FROM generate_series(1, 10000000) i;
ERROR: canceling statement due to statement timeout
ROLLBACK TO SAVEPOINT s1;
INSERT INTO t SELECT i FROM generate_series(1, 100) i;
ROLLBACK;
DROP SCHEMA rollback_to_savepoint CASCADE;
NOTICE: drop cascades to table t

View File

@ -84,8 +84,8 @@ test: subquery_prepared_statements pg12 cte_inline
# ----------
# Miscellaneous tests to check our query planning behavior
# ----------
test: multi_deparse_shard_query multi_distributed_transaction_id multi_real_time_transaction intermediate_results limit_intermediate_size
test: multi_explain hyperscale_tutorial partitioned_intermediate_results distributed_intermediate_results
test: multi_deparse_shard_query multi_distributed_transaction_id intermediate_results limit_intermediate_size rollback_to_savepoint
test: multi_explain hyperscale_tutorial partitioned_intermediate_results distributed_intermediate_results multi_real_time_transaction
test: multi_basic_queries multi_complex_expressions multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
test: multi_subquery_complex_reference_clause multi_subquery_window_functions multi_view multi_sql_function multi_prepare_sql
test: sql_procedure multi_function_in_join row_types materialized_view

View File

@ -64,6 +64,7 @@ test: multi_basic_queries multi_complex_expressions multi_subquery_complex_queri
test: multi_subquery_complex_reference_clause multi_subquery_window_functions multi_sql_function
test: multi_function_in_join row_types
test: multi_subquery_in_where_reference_clause full_join adaptive_executor propagate_set_commands
test: rollback_to_savepoint
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc
test: multi_limit_clause_approximate multi_single_relation_subquery set_role_in_transaction
test: multi_select_for_update

View File

@ -0,0 +1,23 @@
-- Regression test for this issue:
-- https://github.com/citusdata/citus/issues/3622
SET citus.shard_count = 4;
SET citus.next_shard_id TO 1954000;
CREATE SCHEMA rollback_to_savepoint;
SET search_path TO rollback_to_savepoint;
CREATE TABLE t(a int);
SELECT create_distributed_table('t', 'a');
-- This timeout is chosen such that the INSERT with
-- generate_series(1, 100000000) is cancelled at the right time to trigger the
-- bug
SET statement_timeout = '2s';
BEGIN;
INSERT INTO t VALUES (4);
SAVEPOINT s1;
INSERT INTO t SELECT i FROM generate_series(1, 10000000) i;
ROLLBACK TO SAVEPOINT s1;
INSERT INTO t SELECT i FROM generate_series(1, 100) i;
ROLLBACK;
DROP SCHEMA rollback_to_savepoint CASCADE;