Fix flakyness in failure_single_select (#6223)

The failure_single_select test would sometimes fail with an error that's
similar to this:
```diff
 -- cancel after first SELECT; txn should fail and nothing should be marked as invalid
 SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").cancel(' ||  pg_backend_pid() || ')');
- mitmproxy
----------------------------------------------------------------------
-
-(1 row)
-
+ERROR:  canceling statement due to user request
+CONTEXT:  COPY mitmproxy_result, line 1: ""
+SQL statement "COPY mitmproxy_result FROM '/home/circleci/project/src/test/regress/tmp_check/mitmproxy.fifo'"
+PL/pgSQL function citus.mitmproxy(text) line 11 at EXECUTE
 BEGIN;
```

This error looked very to the one from #6217 and indeed the cause turned
out to be similar. Because we were canceling all SELECT queries, we
would actually sometimes cancel our mitmproxy SELECT queries itself.

This puts some additional restrictions on the queries that we cancel,
most importantly it should contain the name of the table that we're
selecting from.

I was able to reproduce the original issue locally pretty reliably. With
the changes in this PR it didn't happen again.

In passing this also changes one other failure test that was cancelling
all selects and puts similar additional restrictions on those
cancellations. 

Example of failed test in CI: https://app.circleci.com/pipelines/github/citusdata/citus/26305/workflows/4d942b91-f83c-453c-8d9a-ae22d608e756/jobs/745071
pull/6223/merge
Jelte Fennema 2022-08-22 20:06:33 +02:00 committed by GitHub
parent 28b04dc9f4
commit 506c16efdf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 20 additions and 20 deletions

View File

@ -47,7 +47,7 @@ BEGIN;
0
(1 row)
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(1).kill()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT count").after(1).kill()');
mitmproxy
---------------------------------------------------------------------

View File

@ -23,7 +23,7 @@ SELECT create_distributed_table('select_test', 'key');
-- put data in shard for which mitm node is first placement
INSERT INTO select_test VALUES (3, 'test data');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").kill()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").kill()');
mitmproxy
---------------------------------------------------------------------
@ -45,7 +45,7 @@ WARNING: connection to the remote node localhost:xxxxx failed with the followin
-- kill after first SELECT; txn should fail as INSERT triggers
-- 2PC (and placementis not marked bad)
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").kill()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").kill()');
mitmproxy
---------------------------------------------------------------------
@ -66,7 +66,7 @@ TRUNCATE select_test;
-- now the same tests with query cancellation
-- put data in shard for which mitm node is first placement
INSERT INTO select_test VALUES (3, 'test data');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").cancel(' || pg_backend_pid() || ')');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").cancel(' || pg_backend_pid() || ')');
mitmproxy
---------------------------------------------------------------------
@ -77,7 +77,7 @@ ERROR: canceling statement due to user request
SELECT * FROM select_test WHERE key = 3;
ERROR: canceling statement due to user request
-- cancel after first SELECT; txn should fail and nothing should be marked as invalid
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").cancel(' || pg_backend_pid() || ')');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").cancel(' || pg_backend_pid() || ')');
mitmproxy
---------------------------------------------------------------------
@ -107,7 +107,7 @@ SELECT citus.mitmproxy('conn.allow()');
TRUNCATE select_test;
-- cancel the second query
-- error after second SELECT; txn should fail
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(1).cancel(' || pg_backend_pid() || ')');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).cancel(' || pg_backend_pid() || ')');
mitmproxy
---------------------------------------------------------------------
@ -126,7 +126,7 @@ SELECT * FROM select_test WHERE key = 3;
ERROR: canceling statement due to user request
COMMIT;
-- error after second SELECT; txn should fails the transaction
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(1).reset()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).reset()');
mitmproxy
---------------------------------------------------------------------
@ -144,7 +144,7 @@ INSERT INTO select_test VALUES (3, 'even more data');
SELECT * FROM select_test WHERE key = 3;
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
COMMIT;
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(2).kill()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*pg_prepared_xacts").after(2).kill()');
mitmproxy
---------------------------------------------------------------------
@ -173,7 +173,7 @@ SELECT create_distributed_table('select_test', 'key');
SET citus.max_cached_conns_per_worker TO 1; -- allow connection to be cached
INSERT INTO select_test VALUES (1, 'test data');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(1).kill()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).kill()');
mitmproxy
---------------------------------------------------------------------
@ -188,7 +188,7 @@ SELECT * FROM select_test WHERE key = 1;
SELECT * FROM select_test WHERE key = 1;
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
-- now the same test with query cancellation
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(1).cancel(' || pg_backend_pid() || ')');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).cancel(' || pg_backend_pid() || ')');
mitmproxy
---------------------------------------------------------------------

View File

@ -35,7 +35,7 @@ SET citus.force_max_query_parallelization TO ON;
BEGIN;
SELECT count(*) FROM distributed_table JOIN reference_table USING (key);
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(1).kill()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT count").after(1).kill()');
-- this query should not fail because each placement should be acceessed
-- over a seperate connection

View File

@ -13,13 +13,13 @@ SELECT create_distributed_table('select_test', 'key');
-- put data in shard for which mitm node is first placement
INSERT INTO select_test VALUES (3, 'test data');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").kill()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").kill()');
SELECT * FROM select_test WHERE key = 3;
SELECT * FROM select_test WHERE key = 3;
-- kill after first SELECT; txn should fail as INSERT triggers
-- 2PC (and placementis not marked bad)
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").kill()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").kill()');
BEGIN;
INSERT INTO select_test VALUES (3, 'more data');
@ -35,12 +35,12 @@ TRUNCATE select_test;
-- put data in shard for which mitm node is first placement
INSERT INTO select_test VALUES (3, 'test data');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").cancel(' || pg_backend_pid() || ')');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").cancel(' || pg_backend_pid() || ')');
SELECT * FROM select_test WHERE key = 3;
SELECT * FROM select_test WHERE key = 3;
-- cancel after first SELECT; txn should fail and nothing should be marked as invalid
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").cancel(' || pg_backend_pid() || ')');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").cancel(' || pg_backend_pid() || ')');
BEGIN;
INSERT INTO select_test VALUES (3, 'more data');
@ -58,7 +58,7 @@ TRUNCATE select_test;
-- cancel the second query
-- error after second SELECT; txn should fail
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(1).cancel(' || pg_backend_pid() || ')');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).cancel(' || pg_backend_pid() || ')');
BEGIN;
INSERT INTO select_test VALUES (3, 'more data');
@ -68,7 +68,7 @@ SELECT * FROM select_test WHERE key = 3;
COMMIT;
-- error after second SELECT; txn should fails the transaction
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(1).reset()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).reset()');
BEGIN;
INSERT INTO select_test VALUES (3, 'more data');
@ -77,7 +77,7 @@ INSERT INTO select_test VALUES (3, 'even more data');
SELECT * FROM select_test WHERE key = 3;
COMMIT;
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(2).kill()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*pg_prepared_xacts").after(2).kill()');
SELECT recover_prepared_transactions();
SELECT recover_prepared_transactions();
@ -93,12 +93,12 @@ SELECT create_distributed_table('select_test', 'key');
SET citus.max_cached_conns_per_worker TO 1; -- allow connection to be cached
INSERT INTO select_test VALUES (1, 'test data');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(1).kill()');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).kill()');
SELECT * FROM select_test WHERE key = 1;
SELECT * FROM select_test WHERE key = 1;
-- now the same test with query cancellation
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT").after(1).cancel(' || pg_backend_pid() || ')');
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT.*select_test").after(1).cancel(' || pg_backend_pid() || ')');
SELECT * FROM select_test WHERE key = 1;
SELECT * FROM select_test WHERE key = 1;