Adds test failure_pg15.sql for duplicate error message cases

Each of the following tests: failure_ddl.sql, failure_truncate.sql
failure_multi_dml.sql, failure_vacuum.sql has a part with
alternative output for PG15 resulting from removal of duplicate
error messages
This test file has been created to avoid 4 alternative output files

Relevant PG commit:
618c16707a6d6e8f5c83ede2092975e4670201ad
version-15-socket
naisila 2022-08-07 22:29:25 +03:00
parent 2161b183dd
commit 4ad8376796
12 changed files with 848 additions and 271 deletions

View File

@ -175,55 +175,29 @@ SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORD
(localhost,57637,100803,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}")
(4 rows) (4 rows)
-- the following tests rely the column not exists, so drop manually -- Commenting out the following test since it has an output with no
ALTER TABLE test_table DROP COLUMN new_column; -- duplicate error messages in PG15
-- but now kill just after the worker sends response to -- To avoid adding alternative output file for this test, this
-- COMMIT command, so we'll have lots of warnings but the command -- part is moved to failure_pg15.sql file.
-- should have been committed both on the distributed table and the placements -- Uncomment the following part when we drop support for PG14
SET client_min_messages TO WARNING; -- and we delete failure_pg15.sql file.
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()'); -- -- the following tests rely the column not exists, so drop manually
mitmproxy -- ALTER TABLE test_table DROP COLUMN new_column;
--------------------------------------------------------------------- -- -- but now kill just after the worker sends response to
-- -- COMMIT command, so we'll have lots of warnings but the command
(1 row) -- -- should have been committed both on the distributed table and the placements
-- SET client_min_messages TO WARNING;
ALTER TABLE test_table ADD COLUMN new_column INT; -- SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
WARNING: connection not open -- ALTER TABLE test_table ADD COLUMN new_column INT;
CONTEXT: while executing command on localhost:xxxxx -- SELECT citus.mitmproxy('conn.allow()');
WARNING: failed to commit transaction on localhost:xxxxx -- SET client_min_messages TO ERROR;
WARNING: connection not open -- SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
CONTEXT: while executing command on localhost:xxxxx -- SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO ERROR;
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
array_agg
---------------------------------------------------------------------
{key,new_column,value}
(1 row)
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
run_command_on_placements
---------------------------------------------------------------------
(localhost,9060,100800,t,"{key,new_column,value}")
(localhost,9060,100802,t,"{key,new_column,value}")
(localhost,57637,100801,t,"{key,new_column,value}")
(localhost,57637,100803,t,"{key,new_column,value}")
(4 rows)
-- now cancel just after the worker sends response to -- now cancel just after the worker sends response to
-- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK -- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK
-- so should not cancel at all, so not an effective test but adding in -- so should not cancel at all, so not an effective test but adding in
-- case Citus messes up this behaviour -- case Citus messes up this behaviour
SET client_min_messages TO ERROR;
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')'); SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')');
mitmproxy mitmproxy
--------------------------------------------------------------------- ---------------------------------------------------------------------

View File

@ -358,76 +358,44 @@ SELECT * FROM dml_test ORDER BY id ASC;
5 | Epsilon 5 | Epsilon
(4 rows) (4 rows)
-- drop table and recreate with different replication/sharding -- Commenting out the following test since it has an output with no
DROP TABLE dml_test; -- duplicate error messages in PG15
SET citus.shard_count = 1; -- To avoid adding alternative output file for this test, this
SET citus.shard_replication_factor = 2; -- two placements -- part is moved to failure_pg15.sql file.
CREATE TABLE dml_test (id integer, name text); -- Uncomment the following part when we drop support for PG14
SELECT create_distributed_table('dml_test', 'id'); -- and we delete failure_pg15.sql file.
create_distributed_table -- -- drop table and recreate with different replication/sharding
--------------------------------------------------------------------- -- DROP TABLE dml_test;
-- SET citus.shard_count = 1;
(1 row) -- SET citus.shard_replication_factor = 2; -- two placements
-- CREATE TABLE dml_test (id integer, name text);
COPY dml_test FROM STDIN WITH CSV; -- SELECT create_distributed_table('dml_test', 'id');
-- COPY dml_test FROM STDIN WITH CSV;
-- 1,Alpha
-- 2,Beta
-- 3,Gamma
-- 4,Delta
-- \.
-- -- test multiple statements against a single shard, but with two placements -- -- test multiple statements against a single shard, but with two placements
-- fail at PREPARED COMMIT as we use 2PC -- -- fail at PREPARED COMMIT as we use 2PC
SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); -- SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
mitmproxy -- BEGIN;
--------------------------------------------------------------------- -- DELETE FROM dml_test WHERE id = 1;
-- DELETE FROM dml_test WHERE id = 2;
(1 row) -- INSERT INTO dml_test VALUES (5, 'Epsilon');
-- UPDATE dml_test SET name = 'alpha' WHERE id = 1;
BEGIN; -- UPDATE dml_test SET name = 'gamma' WHERE id = 3;
DELETE FROM dml_test WHERE id = 1; -- COMMIT;
DELETE FROM dml_test WHERE id = 2; -- -- all changes should be committed because we injected
INSERT INTO dml_test VALUES (5, 'Epsilon'); -- -- the failure on the COMMIT time. And, we should not
UPDATE dml_test SET name = 'alpha' WHERE id = 1; -- -- mark any placements as INVALID
UPDATE dml_test SET name = 'gamma' WHERE id = 3; -- SELECT citus.mitmproxy('conn.allow()');
COMMIT; -- SELECT recover_prepared_transactions();
WARNING: connection not open -- SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3;
CONTEXT: while executing command on localhost:xxxxx -- SET citus.task_assignment_policy TO "round-robin";
WARNING: failed to commit transaction on localhost:xxxxx -- SELECT * FROM dml_test ORDER BY id ASC;
WARNING: connection not open -- SELECT * FROM dml_test ORDER BY id ASC;
CONTEXT: while executing command on localhost:xxxxx -- RESET citus.task_assignment_policy;
-- all changes should be committed because we injected
-- the failure on the COMMIT time. And, we should not
-- mark any placements as INVALID
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT recover_prepared_transactions();
recover_prepared_transactions
---------------------------------------------------------------------
1
(1 row)
SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3;
shardid
---------------------------------------------------------------------
(0 rows)
SET citus.task_assignment_policy TO "round-robin";
SELECT * FROM dml_test ORDER BY id ASC;
id | name
---------------------------------------------------------------------
3 | gamma
4 | Delta
5 | Epsilon
(3 rows)
SELECT * FROM dml_test ORDER BY id ASC;
id | name
---------------------------------------------------------------------
3 | gamma
4 | Delta
5 | Epsilon
(3 rows)
RESET citus.task_assignment_policy;
-- drop table and recreate as reference table -- drop table and recreate as reference table
DROP TABLE dml_test; DROP TABLE dml_test;
SET citus.shard_count = 2; SET citus.shard_count = 2;

View File

@ -0,0 +1,254 @@
--
-- FAILURE_PG15
--
-- Each of the following tests: failure_ddl.sql, failure_truncate.sql
-- failure_multi_dml.sql, failure_vacuum.sql
-- has a part with alternative output for PG15 resulting
-- from removal of duplicate error messages
-- Relevant PG commit: 618c16707a6d6e8f5c83ede2092975e4670201ad
-- This test file has been created to avoid 4 alternative output files
CREATE SCHEMA pg15_failure;
SET citus.force_max_query_parallelization TO ON;
SET search_path TO 'pg15_failure';
-- do not cache any connections
SET citus.max_cached_conns_per_worker TO 0;
-- we don't want to see the prepared transaction numbers in the warnings
SET client_min_messages TO WARNING;
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SET citus.next_shard_id TO 100700;
-- we'll start with replication factor 1, 2PC and parallel mode
SET citus.shard_count = 4;
SET citus.shard_replication_factor = 1;
CREATE TABLE test_table (key int, value int);
SELECT create_distributed_table('test_table', 'key');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- from failure_ddl.sql
-- but now kill just after the worker sends response to
-- COMMIT command, so we'll have lots of warnings but the command
-- should have been committed both on the distributed table and the placements
SET client_min_messages TO WARNING;
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
ALTER TABLE test_table ADD COLUMN new_column INT;
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO ERROR;
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
array_agg
---------------------------------------------------------------------
{key,new_column,value}
(1 row)
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
run_command_on_placements
---------------------------------------------------------------------
(localhost,9060,100700,t,"{key,new_column,value}")
(localhost,9060,100702,t,"{key,new_column,value}")
(localhost,57637,100701,t,"{key,new_column,value}")
(localhost,57637,100703,t,"{key,new_column,value}")
(4 rows)
-- the following tests rely the column not exists, so drop manually
ALTER TABLE test_table DROP COLUMN new_column;
-- from failure_truncate.sql
CREATE VIEW unhealthy_shard_count AS
SELECT count(*)
FROM pg_dist_shard_placement pdsp
JOIN
pg_dist_shard pds
ON pdsp.shardid=pds.shardid
WHERE logicalrelid='pg15_failure.test_table'::regclass AND shardstate != 1;
INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x);
SET client_min_messages TO WARNING;
-- now kill just after the worker sends response to
-- COMMIT command, so we'll have lots of warnings but the command
-- should have been committed both on the distributed table and the placements
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
TRUNCATE test_table;
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT * FROM unhealthy_shard_count;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM test_table;
count
---------------------------------------------------------------------
0
(1 row)
-- from failure_multi_dml.sql
SET citus.shard_count = 1;
SET citus.shard_replication_factor = 2; -- two placements
CREATE TABLE dml_test (id integer, name text);
SELECT create_distributed_table('dml_test', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
COPY dml_test FROM STDIN WITH CSV;
---- test multiple statements against a single shard, but with two placements
-- fail at PREPARED COMMIT as we use 2PC
SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
BEGIN;
DELETE FROM dml_test WHERE id = 1;
DELETE FROM dml_test WHERE id = 2;
INSERT INTO dml_test VALUES (5, 'Epsilon');
UPDATE dml_test SET name = 'alpha' WHERE id = 1;
UPDATE dml_test SET name = 'gamma' WHERE id = 3;
COMMIT;
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
-- all changes should be committed because we injected
-- the failure on the COMMIT time. And, we should not
-- mark any placements as INVALID
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT recover_prepared_transactions();
recover_prepared_transactions
---------------------------------------------------------------------
1
(1 row)
SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3;
shardid
---------------------------------------------------------------------
(0 rows)
SET citus.task_assignment_policy TO "round-robin";
SELECT * FROM dml_test ORDER BY id ASC;
id | name
---------------------------------------------------------------------
3 | gamma
4 | Delta
5 | Epsilon
(3 rows)
SELECT * FROM dml_test ORDER BY id ASC;
id | name
---------------------------------------------------------------------
3 | gamma
4 | Delta
5 | Epsilon
(3 rows)
RESET citus.task_assignment_policy;
-- from failure_vacuum.sql
CREATE TABLE vacuum_test (key int, value int);
SELECT create_distributed_table('vacuum_test', 'key');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT citus.clear_network_traffic();
clear_network_traffic
---------------------------------------------------------------------
(1 row)
SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
VACUUM vacuum_test;
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
ANALYZE vacuum_test;
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
ANALYZE vacuum_test;
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT recover_prepared_transactions();
recover_prepared_transactions
---------------------------------------------------------------------
1
(1 row)
-- ANALYZE transactions being critical is an open question, see #2430
-- show that we never mark as INVALID on COMMIT FAILURE
SELECT shardid, shardstate FROM pg_dist_shard_placement where shardstate != 1 AND
shardid in ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass);
shardid | shardstate
---------------------------------------------------------------------
(0 rows)
-- Clean up
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
DROP SCHEMA pg15_failure CASCADE;

View File

@ -0,0 +1,266 @@
--
-- FAILURE_PG15
--
-- Each of the following tests: failure_ddl.sql, failure_truncate.sql
-- failure_multi_dml.sql, failure_vacuum.sql
-- has a part with alternative output for PG15 resulting
-- from removal of duplicate error messages
-- Relevant PG commit: 618c16707a6d6e8f5c83ede2092975e4670201ad
-- This test file has been created to avoid 4 alternative output files
CREATE SCHEMA pg15_failure;
SET citus.force_max_query_parallelization TO ON;
SET search_path TO 'pg15_failure';
-- do not cache any connections
SET citus.max_cached_conns_per_worker TO 0;
-- we don't want to see the prepared transaction numbers in the warnings
SET client_min_messages TO WARNING;
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SET citus.next_shard_id TO 100700;
-- we'll start with replication factor 1, 2PC and parallel mode
SET citus.shard_count = 4;
SET citus.shard_replication_factor = 1;
CREATE TABLE test_table (key int, value int);
SELECT create_distributed_table('test_table', 'key');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- from failure_ddl.sql
-- but now kill just after the worker sends response to
-- COMMIT command, so we'll have lots of warnings but the command
-- should have been committed both on the distributed table and the placements
SET client_min_messages TO WARNING;
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
ALTER TABLE test_table ADD COLUMN new_column INT;
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO ERROR;
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
array_agg
---------------------------------------------------------------------
{key,new_column,value}
(1 row)
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
run_command_on_placements
---------------------------------------------------------------------
(localhost,9060,100700,t,"{key,new_column,value}")
(localhost,9060,100702,t,"{key,new_column,value}")
(localhost,57637,100701,t,"{key,new_column,value}")
(localhost,57637,100703,t,"{key,new_column,value}")
(4 rows)
-- the following tests rely the column not exists, so drop manually
ALTER TABLE test_table DROP COLUMN new_column;
-- from failure_truncate.sql
CREATE VIEW unhealthy_shard_count AS
SELECT count(*)
FROM pg_dist_shard_placement pdsp
JOIN
pg_dist_shard pds
ON pdsp.shardid=pds.shardid
WHERE logicalrelid='pg15_failure.test_table'::regclass AND shardstate != 1;
INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x);
SET client_min_messages TO WARNING;
-- now kill just after the worker sends response to
-- COMMIT command, so we'll have lots of warnings but the command
-- should have been committed both on the distributed table and the placements
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
TRUNCATE test_table;
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT * FROM unhealthy_shard_count;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM test_table;
count
---------------------------------------------------------------------
0
(1 row)
-- from failure_multi_dml.sql
SET citus.shard_count = 1;
SET citus.shard_replication_factor = 2; -- two placements
CREATE TABLE dml_test (id integer, name text);
SELECT create_distributed_table('dml_test', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
COPY dml_test FROM STDIN WITH CSV;
---- test multiple statements against a single shard, but with two placements
-- fail at PREPARED COMMIT as we use 2PC
SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
BEGIN;
DELETE FROM dml_test WHERE id = 1;
DELETE FROM dml_test WHERE id = 2;
INSERT INTO dml_test VALUES (5, 'Epsilon');
UPDATE dml_test SET name = 'alpha' WHERE id = 1;
UPDATE dml_test SET name = 'gamma' WHERE id = 3;
COMMIT;
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
-- all changes should be committed because we injected
-- the failure on the COMMIT time. And, we should not
-- mark any placements as INVALID
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT recover_prepared_transactions();
recover_prepared_transactions
---------------------------------------------------------------------
1
(1 row)
SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3;
shardid
---------------------------------------------------------------------
(0 rows)
SET citus.task_assignment_policy TO "round-robin";
SELECT * FROM dml_test ORDER BY id ASC;
id | name
---------------------------------------------------------------------
3 | gamma
4 | Delta
5 | Epsilon
(3 rows)
SELECT * FROM dml_test ORDER BY id ASC;
id | name
---------------------------------------------------------------------
3 | gamma
4 | Delta
5 | Epsilon
(3 rows)
RESET citus.task_assignment_policy;
-- from failure_vacuum.sql
CREATE TABLE vacuum_test (key int, value int);
SELECT create_distributed_table('vacuum_test', 'key');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT citus.clear_network_traffic();
clear_network_traffic
---------------------------------------------------------------------
(1 row)
SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
VACUUM vacuum_test;
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
ANALYZE vacuum_test;
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
ANALYZE vacuum_test;
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT recover_prepared_transactions();
recover_prepared_transactions
---------------------------------------------------------------------
1
(1 row)
-- ANALYZE transactions being critical is an open question, see #2430
-- show that we never mark as INVALID on COMMIT FAILURE
SELECT shardid, shardstate FROM pg_dist_shard_placement where shardstate != 1 AND
shardid in ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass);
shardid | shardstate
---------------------------------------------------------------------
(0 rows)
-- Clean up
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
DROP SCHEMA pg15_failure CASCADE;

View File

@ -266,47 +266,23 @@ SELECT count(*) FROM test_table;
-- refill the table -- refill the table
TRUNCATE test_table; TRUNCATE test_table;
INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x);
SET client_min_messages TO WARNING; -- Commenting out the following test since it has an output with no
-- now kill just after the worker sends response to -- duplicate error messages in PG15
-- COMMIT command, so we'll have lots of warnings but the command -- To avoid adding alternative output file for this test, this
-- should have been committed both on the distributed table and the placements -- part is moved to failure_pg15.sql file.
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()'); -- Uncomment the following part when we drop support for PG14
mitmproxy -- and we delete failure_pg15.sql file.
--------------------------------------------------------------------- -- SET client_min_messages TO WARNING;
-- -- now kill just after the worker sends response to
(1 row) -- -- COMMIT command, so we'll have lots of warnings but the command
-- -- should have been committed both on the distributed table and the placements
TRUNCATE test_table; -- SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
WARNING: connection not open -- TRUNCATE test_table;
CONTEXT: while executing command on localhost:xxxxx -- SELECT citus.mitmproxy('conn.allow()');
WARNING: failed to commit transaction on localhost:xxxxx -- SELECT * FROM unhealthy_shard_count;
WARNING: connection not open -- SELECT count(*) FROM test_table;
CONTEXT: while executing command on localhost:xxxxx -- SET client_min_messages TO ERROR;
WARNING: connection not open -- INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x);
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT * FROM unhealthy_shard_count;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM test_table;
count
---------------------------------------------------------------------
0
(1 row)
SET client_min_messages TO ERROR;
INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x);
-- now cancel just after the worker sends response to -- now cancel just after the worker sends response to
-- but Postgres doesn't accept interrupts during COMMIT and ROLLBACK -- but Postgres doesn't accept interrupts during COMMIT and ROLLBACK
-- so should not cancel at all, so not an effective test but adding in -- so should not cancel at all, so not an effective test but adding in

View File

@ -23,54 +23,24 @@ SELECT citus.clear_network_traffic();
(1 row) (1 row)
SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").kill()'); -- Commenting out the following test since it has an output with no
mitmproxy -- duplicate error messages in PG15
--------------------------------------------------------------------- -- To avoid adding alternative output file for this test, this
-- part is moved to failure_pg15.sql file.
(1 row) -- Uncomment the following part when we drop support for PG14
-- and we delete failure_pg15.sql file.
VACUUM vacuum_test; -- SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").kill()');
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open -- VACUUM vacuum_test;
SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").kill()'); -- SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").kill()');
mitmproxy -- ANALYZE vacuum_test;
--------------------------------------------------------------------- -- SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
-- ANALYZE vacuum_test;
(1 row) -- SELECT citus.mitmproxy('conn.allow()');
-- SELECT recover_prepared_transactions();
ANALYZE vacuum_test; -- -- ANALYZE transactions being critical is an open question, see #2430
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open -- -- show that we never mark as INVALID on COMMIT FAILURE
SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); -- SELECT shardid, shardstate FROM pg_dist_shard_placement where shardstate != 1 AND
mitmproxy -- shardid in ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass);
---------------------------------------------------------------------
(1 row)
ANALYZE vacuum_test;
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to commit transaction on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT recover_prepared_transactions();
recover_prepared_transactions
---------------------------------------------------------------------
1
(1 row)
-- ANALYZE transactions being critical is an open question, see #2430
-- show that we never mark as INVALID on COMMIT FAILURE
SELECT shardid, shardstate FROM pg_dist_shard_placement where shardstate != 1 AND
shardid in ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass);
shardid | shardstate
---------------------------------------------------------------------
(0 rows)
-- the same tests with cancel -- the same tests with cancel
SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").cancel(' || pg_backend_pid() || ')'); SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").cancel(' || pg_backend_pid() || ')');
mitmproxy mitmproxy

View File

@ -25,6 +25,7 @@ test: failure_cte_subquery
test: failure_insert_select_via_coordinator test: failure_insert_select_via_coordinator
test: failure_multi_dml test: failure_multi_dml
test: failure_vacuum test: failure_vacuum
test: failure_pg15
test: failure_ref_tables test: failure_ref_tables
test: failure_insert_select_pushdown test: failure_insert_select_pushdown
test: failure_single_mod test: failure_single_mod

View File

@ -83,26 +83,34 @@ SELECT citus.mitmproxy('conn.allow()');
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
-- the following tests rely the column not exists, so drop manually -- Commenting out the following test since it has an output with no
ALTER TABLE test_table DROP COLUMN new_column; -- duplicate error messages in PG15
-- To avoid adding alternative output file for this test, this
-- part is moved to failure_pg15.sql file.
-- Uncomment the following part when we drop support for PG14
-- and we delete failure_pg15.sql file.
-- but now kill just after the worker sends response to -- -- the following tests rely the column not exists, so drop manually
-- COMMIT command, so we'll have lots of warnings but the command -- ALTER TABLE test_table DROP COLUMN new_column;
-- should have been committed both on the distributed table and the placements
SET client_min_messages TO WARNING;
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
ALTER TABLE test_table ADD COLUMN new_column INT;
SELECT citus.mitmproxy('conn.allow()');
SET client_min_messages TO ERROR; -- -- but now kill just after the worker sends response to
-- -- COMMIT command, so we'll have lots of warnings but the command
-- -- should have been committed both on the distributed table and the placements
-- SET client_min_messages TO WARNING;
-- SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
-- ALTER TABLE test_table ADD COLUMN new_column INT;
-- SELECT citus.mitmproxy('conn.allow()');
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; -- SET client_min_messages TO ERROR;
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
-- SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
-- SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
-- now cancel just after the worker sends response to -- now cancel just after the worker sends response to
-- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK -- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK
-- so should not cancel at all, so not an effective test but adding in -- so should not cancel at all, so not an effective test but adding in
-- case Citus messes up this behaviour -- case Citus messes up this behaviour
SET client_min_messages TO ERROR;
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')'); SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')');
ALTER TABLE test_table DROP COLUMN new_column; ALTER TABLE test_table DROP COLUMN new_column;
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');

View File

@ -192,46 +192,53 @@ COMMIT;
-- should see changes, because cancellation is ignored -- should see changes, because cancellation is ignored
SELECT * FROM dml_test ORDER BY id ASC; SELECT * FROM dml_test ORDER BY id ASC;
-- drop table and recreate with different replication/sharding -- Commenting out the following test since it has an output with no
-- duplicate error messages in PG15
-- To avoid adding alternative output file for this test, this
-- part is moved to failure_pg15.sql file.
-- Uncomment the following part when we drop support for PG14
-- and we delete failure_pg15.sql file.
DROP TABLE dml_test; -- -- drop table and recreate with different replication/sharding
SET citus.shard_count = 1;
SET citus.shard_replication_factor = 2; -- two placements
CREATE TABLE dml_test (id integer, name text); -- DROP TABLE dml_test;
SELECT create_distributed_table('dml_test', 'id'); -- SET citus.shard_count = 1;
-- SET citus.shard_replication_factor = 2; -- two placements
COPY dml_test FROM STDIN WITH CSV; -- CREATE TABLE dml_test (id integer, name text);
1,Alpha -- SELECT create_distributed_table('dml_test', 'id');
2,Beta
3,Gamma -- COPY dml_test FROM STDIN WITH CSV;
4,Delta -- 1,Alpha
\. -- 2,Beta
-- 3,Gamma
-- 4,Delta
-- \.
-- -- test multiple statements against a single shard, but with two placements -- -- test multiple statements against a single shard, but with two placements
-- fail at PREPARED COMMIT as we use 2PC -- -- fail at PREPARED COMMIT as we use 2PC
SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); -- SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
BEGIN; -- BEGIN;
DELETE FROM dml_test WHERE id = 1; -- DELETE FROM dml_test WHERE id = 1;
DELETE FROM dml_test WHERE id = 2; -- DELETE FROM dml_test WHERE id = 2;
INSERT INTO dml_test VALUES (5, 'Epsilon'); -- INSERT INTO dml_test VALUES (5, 'Epsilon');
UPDATE dml_test SET name = 'alpha' WHERE id = 1; -- UPDATE dml_test SET name = 'alpha' WHERE id = 1;
UPDATE dml_test SET name = 'gamma' WHERE id = 3; -- UPDATE dml_test SET name = 'gamma' WHERE id = 3;
COMMIT; -- COMMIT;
-- all changes should be committed because we injected -- -- all changes should be committed because we injected
-- the failure on the COMMIT time. And, we should not -- -- the failure on the COMMIT time. And, we should not
-- mark any placements as INVALID -- -- mark any placements as INVALID
SELECT citus.mitmproxy('conn.allow()'); -- SELECT citus.mitmproxy('conn.allow()');
SELECT recover_prepared_transactions(); -- SELECT recover_prepared_transactions();
SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3; -- SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3;
SET citus.task_assignment_policy TO "round-robin"; -- SET citus.task_assignment_policy TO "round-robin";
SELECT * FROM dml_test ORDER BY id ASC; -- SELECT * FROM dml_test ORDER BY id ASC;
SELECT * FROM dml_test ORDER BY id ASC; -- SELECT * FROM dml_test ORDER BY id ASC;
RESET citus.task_assignment_policy; -- RESET citus.task_assignment_policy;
-- drop table and recreate as reference table -- drop table and recreate as reference table
DROP TABLE dml_test; DROP TABLE dml_test;

View File

@ -0,0 +1,139 @@
--
-- FAILURE_PG15
--
-- Each of the following tests: failure_ddl.sql, failure_truncate.sql
-- failure_multi_dml.sql, failure_vacuum.sql
-- has a part with alternative output for PG15 resulting
-- from removal of duplicate error messages
-- Relevant PG commit: 618c16707a6d6e8f5c83ede2092975e4670201ad
-- This test file has been created to avoid 4 alternative output files
CREATE SCHEMA pg15_failure;
SET citus.force_max_query_parallelization TO ON;
SET search_path TO 'pg15_failure';
-- do not cache any connections
SET citus.max_cached_conns_per_worker TO 0;
-- we don't want to see the prepared transaction numbers in the warnings
SET client_min_messages TO WARNING;
SELECT citus.mitmproxy('conn.allow()');
SET citus.next_shard_id TO 100700;
-- we'll start with replication factor 1, 2PC and parallel mode
SET citus.shard_count = 4;
SET citus.shard_replication_factor = 1;
CREATE TABLE test_table (key int, value int);
SELECT create_distributed_table('test_table', 'key');
-- from failure_ddl.sql
-- but now kill just after the worker sends response to
-- COMMIT command, so we'll have lots of warnings but the command
-- should have been committed both on the distributed table and the placements
SET client_min_messages TO WARNING;
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
ALTER TABLE test_table ADD COLUMN new_column INT;
SELECT citus.mitmproxy('conn.allow()');
SET client_min_messages TO ERROR;
SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass;
SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1;
-- the following tests rely the column not exists, so drop manually
ALTER TABLE test_table DROP COLUMN new_column;
-- from failure_truncate.sql
CREATE VIEW unhealthy_shard_count AS
SELECT count(*)
FROM pg_dist_shard_placement pdsp
JOIN
pg_dist_shard pds
ON pdsp.shardid=pds.shardid
WHERE logicalrelid='pg15_failure.test_table'::regclass AND shardstate != 1;
INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x);
SET client_min_messages TO WARNING;
-- now kill just after the worker sends response to
-- COMMIT command, so we'll have lots of warnings but the command
-- should have been committed both on the distributed table and the placements
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
TRUNCATE test_table;
SELECT citus.mitmproxy('conn.allow()');
SELECT * FROM unhealthy_shard_count;
SELECT count(*) FROM test_table;
-- from failure_multi_dml.sql
SET citus.shard_count = 1;
SET citus.shard_replication_factor = 2; -- two placements
CREATE TABLE dml_test (id integer, name text);
SELECT create_distributed_table('dml_test', 'id');
COPY dml_test FROM STDIN WITH CSV;
1,Alpha
2,Beta
3,Gamma
4,Delta
\.
---- test multiple statements against a single shard, but with two placements
-- fail at PREPARED COMMIT as we use 2PC
SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
BEGIN;
DELETE FROM dml_test WHERE id = 1;
DELETE FROM dml_test WHERE id = 2;
INSERT INTO dml_test VALUES (5, 'Epsilon');
UPDATE dml_test SET name = 'alpha' WHERE id = 1;
UPDATE dml_test SET name = 'gamma' WHERE id = 3;
COMMIT;
-- all changes should be committed because we injected
-- the failure on the COMMIT time. And, we should not
-- mark any placements as INVALID
SELECT citus.mitmproxy('conn.allow()');
SELECT recover_prepared_transactions();
SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3;
SET citus.task_assignment_policy TO "round-robin";
SELECT * FROM dml_test ORDER BY id ASC;
SELECT * FROM dml_test ORDER BY id ASC;
RESET citus.task_assignment_policy;
-- from failure_vacuum.sql
CREATE TABLE vacuum_test (key int, value int);
SELECT create_distributed_table('vacuum_test', 'key');
SELECT citus.clear_network_traffic();
SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").kill()');
VACUUM vacuum_test;
SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").kill()');
ANALYZE vacuum_test;
SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
ANALYZE vacuum_test;
SELECT citus.mitmproxy('conn.allow()');
SELECT recover_prepared_transactions();
-- ANALYZE transactions being critical is an open question, see #2430
-- show that we never mark as INVALID on COMMIT FAILURE
SELECT shardid, shardstate FROM pg_dist_shard_placement where shardstate != 1 AND
shardid in ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass);
-- Clean up
SELECT citus.mitmproxy('conn.allow()');
DROP SCHEMA pg15_failure CASCADE;

View File

@ -103,18 +103,25 @@ SELECT count(*) FROM test_table;
TRUNCATE test_table; TRUNCATE test_table;
INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x);
SET client_min_messages TO WARNING; -- Commenting out the following test since it has an output with no
-- now kill just after the worker sends response to -- duplicate error messages in PG15
-- COMMIT command, so we'll have lots of warnings but the command -- To avoid adding alternative output file for this test, this
-- should have been committed both on the distributed table and the placements -- part is moved to failure_pg15.sql file.
SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()'); -- Uncomment the following part when we drop support for PG14
TRUNCATE test_table; -- and we delete failure_pg15.sql file.
SELECT citus.mitmproxy('conn.allow()');
SELECT * FROM unhealthy_shard_count;
SELECT count(*) FROM test_table;
SET client_min_messages TO ERROR;
INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); -- SET client_min_messages TO WARNING;
-- -- now kill just after the worker sends response to
-- -- COMMIT command, so we'll have lots of warnings but the command
-- -- should have been committed both on the distributed table and the placements
-- SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()');
-- TRUNCATE test_table;
-- SELECT citus.mitmproxy('conn.allow()');
-- SELECT * FROM unhealthy_shard_count;
-- SELECT count(*) FROM test_table;
-- SET client_min_messages TO ERROR;
-- INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x);
-- now cancel just after the worker sends response to -- now cancel just after the worker sends response to
-- but Postgres doesn't accept interrupts during COMMIT and ROLLBACK -- but Postgres doesn't accept interrupts during COMMIT and ROLLBACK

View File

@ -14,22 +14,29 @@ SELECT create_distributed_table('vacuum_test', 'key');
SELECT citus.clear_network_traffic(); SELECT citus.clear_network_traffic();
SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").kill()'); -- Commenting out the following test since it has an output with no
VACUUM vacuum_test; -- duplicate error messages in PG15
-- To avoid adding alternative output file for this test, this
-- part is moved to failure_pg15.sql file.
-- Uncomment the following part when we drop support for PG14
-- and we delete failure_pg15.sql file.
SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").kill()'); -- SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").kill()');
ANALYZE vacuum_test; -- VACUUM vacuum_test;
SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); -- SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").kill()');
ANALYZE vacuum_test; -- ANALYZE vacuum_test;
SELECT citus.mitmproxy('conn.allow()'); -- SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
SELECT recover_prepared_transactions(); -- ANALYZE vacuum_test;
-- ANALYZE transactions being critical is an open question, see #2430 -- SELECT citus.mitmproxy('conn.allow()');
-- show that we never mark as INVALID on COMMIT FAILURE -- SELECT recover_prepared_transactions();
SELECT shardid, shardstate FROM pg_dist_shard_placement where shardstate != 1 AND
shardid in ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass); -- -- ANALYZE transactions being critical is an open question, see #2430
-- -- show that we never mark as INVALID on COMMIT FAILURE
-- SELECT shardid, shardstate FROM pg_dist_shard_placement where shardstate != 1 AND
-- shardid in ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass);
-- the same tests with cancel -- the same tests with cancel
SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").cancel(' || pg_backend_pid() || ')'); SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").cancel(' || pg_backend_pid() || ')');