citus/src/test/regress/expected/detect_conn_close.out

--
-- PG15+ test as WL_SOCKET_CLOSED exposed for PG15+
--
CREATE SCHEMA socket_close;
SET search_path TO socket_close;
CREATE OR REPLACE FUNCTION kill_all_cached_internal_conns(gpid bigint)
RETURNS bool LANGUAGE plpgsql
AS $function$
DECLARE
  killed_backend_ct int;
BEGIN
  -- kill all the cached backends on the workers
   WITH command AS (SELECT 'SELECT count(*) FROM (SELECT pg_terminate_backend(pid, 1000) FROM pg_stat_activity WHERE application_name ILIKE ''%citus_internal gpid=' || gpid::text  ||''' AND pid !=pg_backend_pid()) as foo')
  SELECT sum(result::int) INTO killed_backend_ct FROM command JOIN LATERAL run_command_on_workers((SELECT * FROM command)) on (true);

  RETURN killed_backend_ct > 0;
END;
$function$;
SET citus.shard_count TO 8;
SET citus.shard_replication_factor TO 1;
CREATE TABLE socket_test_table(id bigserial, value text);
SELECT create_distributed_table('socket_test_table', 'id');
 create_distributed_table
---------------------------------------------------------------------

(1 row)

INSERT INTO socket_test_table (value) SELECT i::text FROM generate_series(0,100)i;
-- first, simulate that we only have one cached connection per node
SET citus.max_adaptive_executor_pool_size TO 1;
SET citus.max_cached_conns_per_worker TO 1;
SELECT count(*) FROM socket_test_table;
 count
---------------------------------------------------------------------
   101
(1 row)

-- kill all the cached backends on the workers initiated by the current gpid
select kill_all_cached_internal_conns(citus_backend_gpid());
 kill_all_cached_internal_conns
---------------------------------------------------------------------
 t
(1 row)

-- show that none remains
SELECT result FROM run_command_on_workers($$SELECT count(*) FROM (SELECT pid FROM pg_stat_activity WHERE query ilike '%socket_test_table%' AND pid !=pg_backend_pid()) as foo$$);
 result
---------------------------------------------------------------------
 0
 0
(2 rows)

-- even though the cached connections closed, the execution recovers and establishes new connections
SELECT count(*) FROM socket_test_table;
 count
---------------------------------------------------------------------
   101
(1 row)

-- now, use 16 connections per worker, we can still recover all connections
SET citus.max_adaptive_executor_pool_size TO 16;
SET citus.max_cached_conns_per_worker TO 16;
SET citus.force_max_query_parallelization  TO ON;
SELECT count(*) FROM socket_test_table;
 count
---------------------------------------------------------------------
   101
(1 row)

-- kill all the cached backends on the workers initiated by the current gpid
select kill_all_cached_internal_conns(citus_backend_gpid());
 kill_all_cached_internal_conns
---------------------------------------------------------------------
 t
(1 row)

SELECT count(*) FROM socket_test_table;
 count
---------------------------------------------------------------------
   101
(1 row)

-- now, get back to sane defaults
SET citus.max_adaptive_executor_pool_size TO 1;
SET citus.max_cached_conns_per_worker TO 1;
SET citus.force_max_query_parallelization  TO OFF;
-- we can recover for modification queries as well
-- single row INSERT
INSERT INTO socket_test_table VALUES (1);
-- kill all the cached backends on the workers initiated by the current gpid
select kill_all_cached_internal_conns(citus_backend_gpid());
 kill_all_cached_internal_conns
---------------------------------------------------------------------
 t
(1 row)

INSERT INTO socket_test_table VALUES (1);
-- single row UPDATE
UPDATE socket_test_table SET value = 15 WHERE id = 1;
-- kill all the cached backends on the workers initiated by the current gpid
select kill_all_cached_internal_conns(citus_backend_gpid());
 kill_all_cached_internal_conns
---------------------------------------------------------------------
 t
(1 row)

UPDATE socket_test_table SET value = 15 WHERE id = 1;
-- we cannot recover in a transaction block
BEGIN;
  SELECT count(*) FROM socket_test_table;
 count
---------------------------------------------------------------------
   103
(1 row)

  -- kill all the cached backends on the workers initiated by the current gpid
  select kill_all_cached_internal_conns(citus_backend_gpid());
 kill_all_cached_internal_conns
---------------------------------------------------------------------
 t
(1 row)

  SELECT count(*) FROM socket_test_table;
ERROR:  connection to the remote node postgres@localhost:xxxxx failed with the following error: connection not open
ROLLBACK;
-- repartition joins also can recover
SET citus.enable_repartition_joins TO on;
SET citus.max_adaptive_executor_pool_size TO 1;
SET citus.max_cached_conns_per_worker TO 1;
SELECT count(*) FROM socket_test_table t1 JOIN socket_test_table t2 USING(value);
 count
---------------------------------------------------------------------
   115
(1 row)

-- kill all the cached backends on the workers initiated by the current gpid
select kill_all_cached_internal_conns(citus_backend_gpid());
 kill_all_cached_internal_conns
---------------------------------------------------------------------
 t
(1 row)

-- even though the cached connections closed, the execution recovers and establishes new connections
SELECT count(*) FROM socket_test_table t1 JOIN socket_test_table t2 USING(value);
 count
---------------------------------------------------------------------
   115
(1 row)

-- also, recover insert .. select repartitioning
INSERT INTO socket_test_table SELECT value::bigint, value FROM socket_test_table;
-- kill all the cached backends on the workers initiated by the current gpid
select kill_all_cached_internal_conns(citus_backend_gpid());
 kill_all_cached_internal_conns
---------------------------------------------------------------------
 t
(1 row)

-- even though the cached connections closed, the execution recovers and establishes new connections
INSERT INTO socket_test_table SELECT value::bigint, value FROM socket_test_table;
-- also, recover with intermediate results
WITH cte_1 AS (SELECT * FROM socket_test_table LIMIT 1) SELECT count(*) FROM cte_1;
 count
---------------------------------------------------------------------
     1
(1 row)

-- kill all the cached backends on the workers initiated by the current gpid
select kill_all_cached_internal_conns(citus_backend_gpid());
 kill_all_cached_internal_conns
---------------------------------------------------------------------
 t
(1 row)

-- even though the cached connections closed, the execution recovers and establishes new connections
WITH cte_1 AS (SELECT * FROM socket_test_table LIMIT 1) SELECT count(*) FROM cte_1;
 count
---------------------------------------------------------------------
     1
(1 row)

-- although should have no difference, we can recover from the failures on the workers as well
\c - - - :worker_1_port
SET search_path TO socket_close;
SET citus.max_adaptive_executor_pool_size TO 1;
SET citus.max_cached_conns_per_worker TO 1;
SET citus.force_max_query_parallelization  TO ON;
SELECT count(*) FROM socket_test_table;
 count
---------------------------------------------------------------------
   412
(1 row)

-- kill all the cached backends on the workers initiated by the current gpid
select kill_all_cached_internal_conns(citus_backend_gpid());
 kill_all_cached_internal_conns
---------------------------------------------------------------------
 t
(1 row)

SELECT count(*) FROM socket_test_table;
 count
---------------------------------------------------------------------
   412
(1 row)

\c - - - :master_port
SET client_min_messages TO ERROR;
DROP SCHEMA socket_close CASCADE;