mirror of https://github.com/citusdata/citus.git
Flaky test: Fix recover_prepared_transactions (#3205)
Failed test: https://app.circleci.com/jobs/github/citusdata/citus/35994 We now always take a new connectionpull/3201/head
parent
1ac96f228b
commit
1ed05be82c
|
@ -4,6 +4,7 @@
|
||||||
-- Replication factor, 1PC-2PC, sequential-parallel modes
|
-- Replication factor, 1PC-2PC, sequential-parallel modes
|
||||||
--
|
--
|
||||||
CREATE SCHEMA ddl_failure;
|
CREATE SCHEMA ddl_failure;
|
||||||
|
SET citus.force_max_query_parallelization TO ON;
|
||||||
SET search_path TO 'ddl_failure';
|
SET search_path TO 'ddl_failure';
|
||||||
-- do not cache any connections
|
-- do not cache any connections
|
||||||
SET citus.max_cached_conns_per_worker TO 0;
|
SET citus.max_cached_conns_per_worker TO 0;
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
--
|
--
|
||||||
-- Test DDL command propagation failures
|
-- Test DDL command propagation failures
|
||||||
-- Different dimensions we're testing:
|
-- Different dimensions we're testing:
|
||||||
-- Replication factor, 1PC-2PC, sequential-parallel modes
|
-- Replication factor, 1PC-2PC, sequential-parallel modes
|
||||||
--
|
--
|
||||||
|
|
||||||
|
|
||||||
CREATE SCHEMA ddl_failure;
|
CREATE SCHEMA ddl_failure;
|
||||||
|
|
||||||
|
SET citus.force_max_query_parallelization TO ON;
|
||||||
SET search_path TO 'ddl_failure';
|
SET search_path TO 'ddl_failure';
|
||||||
|
|
||||||
-- do not cache any connections
|
-- do not cache any connections
|
||||||
|
@ -27,13 +28,13 @@ SET citus.shard_replication_factor = 1;
|
||||||
CREATE TABLE test_table (key int, value int);
|
CREATE TABLE test_table (key int, value int);
|
||||||
SELECT create_distributed_table('test_table', 'key');
|
SELECT create_distributed_table('test_table', 'key');
|
||||||
|
|
||||||
-- in the first test, kill just in the first
|
-- in the first test, kill just in the first
|
||||||
-- response we get from the worker
|
-- response we get from the worker
|
||||||
SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()');
|
SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()');
|
||||||
ALTER TABLE test_table ADD COLUMN new_column INT;
|
ALTER TABLE test_table ADD COLUMN new_column INT;
|
||||||
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
||||||
|
|
||||||
-- cancel just in the first
|
-- cancel just in the first
|
||||||
-- response we get from the worker
|
-- response we get from the worker
|
||||||
SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')');
|
SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')');
|
||||||
ALTER TABLE test_table ADD COLUMN new_column INT;
|
ALTER TABLE test_table ADD COLUMN new_column INT;
|
||||||
|
@ -71,7 +72,7 @@ SELECT citus.mitmproxy('conn.allow()');
|
||||||
-- since we've killed the connection just after
|
-- since we've killed the connection just after
|
||||||
-- the coordinator sends the COMMIT, the command should be applied
|
-- the coordinator sends the COMMIT, the command should be applied
|
||||||
-- to the distributed table and the shards on the other worker
|
-- to the distributed table and the shards on the other worker
|
||||||
-- however, there is no way to recover the failure on the shards
|
-- however, there is no way to recover the failure on the shards
|
||||||
-- that live in the failed worker, since we're running 1PC
|
-- that live in the failed worker, since we're running 1PC
|
||||||
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
||||||
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
||||||
|
@ -91,7 +92,7 @@ SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pi
|
||||||
ALTER TABLE test_table ADD COLUMN new_column INT;
|
ALTER TABLE test_table ADD COLUMN new_column INT;
|
||||||
SELECT citus.mitmproxy('conn.allow()');
|
SELECT citus.mitmproxy('conn.allow()');
|
||||||
|
|
||||||
-- interrupts are held during COMMIT/ROLLBACK, so the command
|
-- interrupts are held during COMMIT/ROLLBACK, so the command
|
||||||
-- should have been applied without any issues since cancel is ignored
|
-- should have been applied without any issues since cancel is ignored
|
||||||
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
||||||
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
||||||
|
@ -99,7 +100,7 @@ SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORD
|
||||||
-- the following tests rely the column not exists, so drop manually
|
-- the following tests rely the column not exists, so drop manually
|
||||||
ALTER TABLE test_table DROP COLUMN new_column;
|
ALTER TABLE test_table DROP COLUMN new_column;
|
||||||
|
|
||||||
-- but now kill just after the worker sends response to
|
-- but now kill just after the worker sends response to
|
||||||
-- COMMIT command, so we'll have lots of warnings but the command
|
-- COMMIT command, so we'll have lots of warnings but the command
|
||||||
-- should have been committed both on the distributed table and the placements
|
-- should have been committed both on the distributed table and the placements
|
||||||
SET client_min_messages TO WARNING;
|
SET client_min_messages TO WARNING;
|
||||||
|
@ -112,7 +113,7 @@ SET client_min_messages TO ERROR;
|
||||||
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
||||||
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
||||||
|
|
||||||
-- now cancel just after the worker sends response to
|
-- now cancel just after the worker sends response to
|
||||||
-- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK
|
-- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK
|
||||||
-- so should not cancel at all, so not an effective test but adding in
|
-- so should not cancel at all, so not an effective test but adding in
|
||||||
-- case Citus messes up this behaviour
|
-- case Citus messes up this behaviour
|
||||||
|
@ -133,7 +134,7 @@ SET LOCAL client_min_messages TO WARNING;
|
||||||
ALTER TABLE test_table DROP COLUMN new_column;
|
ALTER TABLE test_table DROP COLUMN new_column;
|
||||||
ROLLBACK;
|
ROLLBACK;
|
||||||
|
|
||||||
-- now cancel just after the worker sends response to
|
-- now cancel just after the worker sends response to
|
||||||
-- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK
|
-- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK
|
||||||
-- so should not cancel at all, so not an effective test but adding in
|
-- so should not cancel at all, so not an effective test but adding in
|
||||||
-- case Citus messes up this behaviour
|
-- case Citus messes up this behaviour
|
||||||
|
@ -142,7 +143,7 @@ BEGIN;
|
||||||
ALTER TABLE test_table DROP COLUMN new_column;
|
ALTER TABLE test_table DROP COLUMN new_column;
|
||||||
ROLLBACK;
|
ROLLBACK;
|
||||||
|
|
||||||
-- but now kill just after the worker sends response to
|
-- but now kill just after the worker sends response to
|
||||||
-- ROLLBACK command, so we'll have lots of warnings but the command
|
-- ROLLBACK command, so we'll have lots of warnings but the command
|
||||||
-- should have been rollbacked both on the distributed table and the placements
|
-- should have been rollbacked both on the distributed table and the placements
|
||||||
SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()');
|
SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()');
|
||||||
|
@ -154,16 +155,16 @@ SELECT citus.mitmproxy('conn.allow()');
|
||||||
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
||||||
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
||||||
|
|
||||||
-- now, lets test with 2PC
|
-- now, lets test with 2PC
|
||||||
SET citus.multi_shard_commit_protocol TO '2pc';
|
SET citus.multi_shard_commit_protocol TO '2pc';
|
||||||
|
|
||||||
-- in the first test, kill just in the first
|
-- in the first test, kill just in the first
|
||||||
-- response we get from the worker
|
-- response we get from the worker
|
||||||
SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()');
|
SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()');
|
||||||
ALTER TABLE test_table DROP COLUMN new_column;
|
ALTER TABLE test_table DROP COLUMN new_column;
|
||||||
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
||||||
|
|
||||||
-- cancel just in the first
|
-- cancel just in the first
|
||||||
-- response we get from the worker
|
-- response we get from the worker
|
||||||
SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')');
|
SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')');
|
||||||
ALTER TABLE test_table DROP COLUMN new_column;
|
ALTER TABLE test_table DROP COLUMN new_column;
|
||||||
|
@ -257,7 +258,7 @@ SELECT citus.mitmproxy('conn.allow()');
|
||||||
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
||||||
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
||||||
|
|
||||||
-- but now kill just after the worker sends response to
|
-- but now kill just after the worker sends response to
|
||||||
-- ROLLBACK command, so we'll have lots of warnings but the command
|
-- ROLLBACK command, so we'll have lots of warnings but the command
|
||||||
-- should have been rollbacked both on the distributed table and the placements
|
-- should have been rollbacked both on the distributed table and the placements
|
||||||
SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()');
|
SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()');
|
||||||
|
@ -281,13 +282,13 @@ DROP TABLE test_table;
|
||||||
CREATE TABLE test_table (key int, value int);
|
CREATE TABLE test_table (key int, value int);
|
||||||
SELECT create_distributed_table('test_table', 'key');
|
SELECT create_distributed_table('test_table', 'key');
|
||||||
|
|
||||||
-- in the first test, kill just in the first
|
-- in the first test, kill just in the first
|
||||||
-- response we get from the worker
|
-- response we get from the worker
|
||||||
SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()');
|
SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()');
|
||||||
ALTER TABLE test_table ADD COLUMN new_column INT;
|
ALTER TABLE test_table ADD COLUMN new_column INT;
|
||||||
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
||||||
|
|
||||||
-- cancel just in the first
|
-- cancel just in the first
|
||||||
-- response we get from the worker
|
-- response we get from the worker
|
||||||
SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')');
|
SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')');
|
||||||
ALTER TABLE test_table ADD COLUMN new_column INT;
|
ALTER TABLE test_table ADD COLUMN new_column INT;
|
||||||
|
@ -321,7 +322,7 @@ SELECT citus.mitmproxy('conn.allow()');
|
||||||
-- we should be able to recover the transaction and
|
-- we should be able to recover the transaction and
|
||||||
-- see that the command is rollbacked on all workers
|
-- see that the command is rollbacked on all workers
|
||||||
-- note that in this case recover_prepared_transactions()
|
-- note that in this case recover_prepared_transactions()
|
||||||
-- sends ROLLBACK PREPARED to the workers given that
|
-- sends ROLLBACK PREPARED to the workers given that
|
||||||
-- the transaction has not been commited on any placement yet
|
-- the transaction has not been commited on any placement yet
|
||||||
SELECT recover_prepared_transactions();
|
SELECT recover_prepared_transactions();
|
||||||
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
||||||
|
@ -368,7 +369,7 @@ SELECT citus.mitmproxy('conn.allow()');
|
||||||
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
|
||||||
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
|
||||||
|
|
||||||
-- but now kill just after the worker sends response to
|
-- but now kill just after the worker sends response to
|
||||||
-- ROLLBACK command, so we'll have lots of warnings but the command
|
-- ROLLBACK command, so we'll have lots of warnings but the command
|
||||||
-- should have been rollbacked both on the distributed table and the placements
|
-- should have been rollbacked both on the distributed table and the placements
|
||||||
SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()');
|
SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()');
|
||||||
|
|
Loading…
Reference in New Issue