From 8ed792cdb468bfbe9afd9292aba95a072921a504 Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Tue, 24 Mar 2020 15:31:41 +0100 Subject: [PATCH] =?UTF-8?q?Mark=20all=20connections=20for=20shutdown=20whe?= =?UTF-8?q?n=20citus.node=5Fconninfo=20chan=E2=80=A6=20(#3642)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We cache connections between nodes in our connection management code. This is good for speed. For security this can be a problem though. If the user changes settings related to TLS encryption they want those to be applied to future queries. This is especially important when they did not have TLS enabled before and now they want to enable it. This can normally be achieved by changing citus.node_conninfo. However, because connections are not reopened there will still be old connections that might not be encrypted at all. This commit changes that by marking all connections to be shutdown at the end of their current transaction. This way running transactions will succeed, even if placement requires connections to be reused for this transaction. But after this transaction completes any future statements will use a connection created with the new connection options. If a connection is requested and a connection is found that is marked for shutdown, then we don't return this connection. Instead a new one is created. This is needed to make sure that if there are no running transactions, then the next statement will not use an old cached connection, since connections are only actually shutdown at the end of a transaction. (cherry picked from commit 2aabe3e2efe13897bec723d07427e8088e8b03f3) --- .../connection/connection_management.c | 43 +- src/backend/distributed/shared_library_init.c | 14 + .../distributed/connection_management.h | 3 + .../regress/expected/node_conninfo_reload.out | 469 ++++++++++++++++++ src/test/regress/multi_schedule | 5 + src/test/regress/sql/node_conninfo_reload.sql | 181 +++++++ 6 files changed, 714 insertions(+), 1 deletion(-) create mode 100644 src/test/regress/expected/node_conninfo_reload.out create mode 100644 src/test/regress/sql/node_conninfo_reload.sql diff --git a/src/backend/distributed/connection/connection_management.c b/src/backend/distributed/connection/connection_management.c index db53b2c84..b328b0f9e 100644 --- a/src/backend/distributed/connection/connection_management.c +++ b/src/backend/distributed/connection/connection_management.c @@ -371,6 +371,16 @@ FindAvailableConnection(dlist_head *connections, uint32 flags) continue; } + if (connection->forceCloseAtTransactionEnd) + { + /* + * This is a connection that should be closed, probabably because + * of old connection options. So we ignore it. It will + * automatically be closed at the end of the transaction. + */ + continue; + } + if ((flags & REQUIRE_SIDECHANNEL) != 0) { if (connection->purpose == CONNECTION_PURPOSE_SIDECHANNEL || @@ -421,6 +431,37 @@ GivePurposeToConnection(MultiConnection *connection, int flags) } +/* + * CloseAllConnectionsAfterTransaction sets the forceClose flag of all the + * connections. This is mainly done when citus.node_conninfo changes. + */ +void +CloseAllConnectionsAfterTransaction(void) +{ + if (ConnectionHash == NULL) + { + return; + } + HASH_SEQ_STATUS status; + ConnectionHashEntry *entry; + + hash_seq_init(&status, ConnectionHash); + while ((entry = (ConnectionHashEntry *) hash_seq_search(&status)) != 0) + { + dlist_iter iter; + + dlist_head *connections = entry->connections; + dlist_foreach(iter, connections) + { + MultiConnection *connection = + dlist_container(MultiConnection, connectionNode, iter.cur); + + connection->forceCloseAtTransactionEnd = true; + } + } +} + + /* * CloseNodeConnectionsAfterTransaction sets the forceClose flag of the connections * to a particular node as true such that the connections are no longer cached. This @@ -1110,7 +1151,7 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit) /* * ShouldShutdownConnection returns true if either one of the followings is true: * - The connection is citus initiated. - * - Current cached connections is already at MaxCachedConnectionPerWorker + * - Current cached connections is already at MaxCachedConnectionsPerWorker * - Connection is forced to close at the end of transaction * - Connection is not in OK state * - A transaction is still in progress (usually because we are cancelling a distributed transaction) diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index b445b00eb..f9c22d216 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -1455,6 +1455,12 @@ NodeConninfoGucAssignHook(const char *newval, void *extra) newval = ""; } + if (strcmp(newval, NodeConninfo) == 0) + { + /* It did not change, no need to do anything */ + return; + } + PQconninfoOption *optionArray = PQconninfoParse(newval, NULL); if (optionArray == NULL) { @@ -1476,6 +1482,14 @@ NodeConninfoGucAssignHook(const char *newval, void *extra) } PQconninfoFree(optionArray); + + /* + * Mark all connections for shutdown, since they have been opened using old + * connection settings. This is mostly important when changing SSL + * parameters, otherwise these would not be applied and connections could + * be unencrypted when the user doesn't want that. + */ + CloseAllConnectionsAfterTransaction(); } diff --git a/src/include/distributed/connection_management.h b/src/include/distributed/connection_management.h index 2424dfd13..43a2fbc90 100644 --- a/src/include/distributed/connection_management.h +++ b/src/include/distributed/connection_management.h @@ -11,6 +11,8 @@ #ifndef CONNECTION_MANAGMENT_H #define CONNECTION_MANAGMENT_H +#include "postgres.h" + #include "distributed/transaction_management.h" #include "distributed/remote_transaction.h" #include "lib/ilist.h" @@ -218,6 +220,7 @@ extern MultiConnection * StartNodeUserDatabaseConnection(uint32 flags, int32 port, const char *user, const char *database); +extern void CloseAllConnectionsAfterTransaction(void); extern void CloseNodeConnectionsAfterTransaction(char *nodeName, int nodePort); extern void CloseConnection(MultiConnection *connection); extern void ShutdownConnection(MultiConnection *connection); diff --git a/src/test/regress/expected/node_conninfo_reload.out b/src/test/regress/expected/node_conninfo_reload.out new file mode 100644 index 000000000..c0049b5e5 --- /dev/null +++ b/src/test/regress/expected/node_conninfo_reload.out @@ -0,0 +1,469 @@ +-- Make sure changes citus.node_conninfo shutdown connections with old settings +CREATE SCHEMA node_conninfo_reload; +SET search_path TO node_conninfo_reload; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +SET citus.force_max_query_parallelization TO ON; +create table test(a int); +select create_distributed_table('test', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Make sure a connection is opened and cached +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 0 +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=require +(1 row) + +-- Set sslmode to something that does not work when connecting +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=doesnotexist +(1 row) + +-- Should give a connection error because of bad sslmode +select count(*) from test where a = 0; +ERROR: connection error: localhost:xxxxx +DETAIL: invalid sslmode value: "doesnotexist" +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=require +(1 row) + +-- Should work again +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 0 +(1 row) + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +BEGIN; +-- Should still work (no SIGHUP yet); +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 0 +(1 row) + +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=doesnotexist +(1 row) + +-- Should work since a connection was already taken from pool for this shard, +-- since the same placement is accessed it will reuse that connection for this +-- query +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 0 +(1 row) + +COMMIT; +-- Should fail now with connection error, when transaction is finished +select count(*) from test where a = 0; +ERROR: connection error: localhost:xxxxx +DETAIL: invalid sslmode value: "doesnotexist" +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=require +(1 row) + +-- Should work again +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 0 +(1 row) + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +BEGIN; +-- Should still work (no SIGHUP yet); +INSERT INTO test VALUES(0); +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=doesnotexist +(1 row) + +-- Should work since a connection was already taken from pool for this shard, +-- since the same placement is accessed it will reuse that connection for this +-- query +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 1 +(1 row) + +COMMIT; +-- Should fail now, when transaction is finished +select count(*) from test where a = 0; +ERROR: connection error: localhost:xxxxx +DETAIL: invalid sslmode value: "doesnotexist" +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=require +(1 row) + +-- Should work again +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 1 +(1 row) + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +BEGIN; +-- Should still work (no SIGHUP yet); +INSERT INTO test VALUES(1); +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=doesnotexist +(1 row) + +-- Should fail since a different shard is accessed and thus a new connection +-- will to be created. +select count(*) from test where a = 0; +ERROR: connection error: localhost:xxxxx +DETAIL: invalid sslmode value: "doesnotexist" +COMMIT; +-- Should still fail now, when transaction is finished +select count(*) from test where a = 0; +ERROR: connection error: localhost:xxxxx +DETAIL: invalid sslmode value: "doesnotexist" +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=require +(1 row) + +-- Should work again +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 1 +(1 row) + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +BEGIN; +-- Should still work (no SIGHUP yet); +TRUNCATE test; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=doesnotexist +(1 row) + +-- Should work since truncate grabbed connections for all shards and these are +-- reused +select count(*) from test; + count +--------------------------------------------------------------------- + 0 +(1 row) + +COMMIT; +-- Should fail now, when transaction is finished +select count(*) from test; +ERROR: connection error: localhost:xxxxx +DETAIL: invalid sslmode value: "doesnotexist" +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=require +(1 row) + +-- Should work again +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 0 +(1 row) + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +BEGIN; +-- Should still work (no SIGHUP yet); +TRUNCATE test; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=doesnotexist +(1 row) + +-- Should fail because of divede by 0 on the coordinator. +select count(*)/0 from test; +ERROR: division by zero +ROLLBACK; +-- Should fail now, when transaction is finished +select count(*) from test; +ERROR: connection error: localhost:xxxxx +DETAIL: invalid sslmode value: "doesnotexist" +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=require +(1 row) + +-- Should work again +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Set sslmode to something that does work when connecting +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=allow'; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=allow +(1 row) + +-- Should still work, since sslmode=allow is valid +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Set sslmode to the same again (to get more coverage) +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=allow'; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=allow +(1 row) + +-- Should still work +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +show citus.node_conninfo; + citus.node_conninfo +--------------------------------------------------------------------- + sslmode=require +(1 row) + +-- Should work +select count(*) from test where a = 0; + count +--------------------------------------------------------------------- + 0 +(1 row) + +DROP SCHEMA node_conninfo_reload CASCADE; +NOTICE: drop cascades to table test diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 6eb075b36..304e0aa37 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -264,6 +264,11 @@ test: multi_colocated_shard_transfer # ---------- test: multi_citus_tools +# ---------- +# node_conninfo_reload tests that node_conninfo changes take effect +# ---------- +test: node_conninfo_reload + # ---------- # multi_foreign_key tests foreign key push down on distributed tables # ---------- diff --git a/src/test/regress/sql/node_conninfo_reload.sql b/src/test/regress/sql/node_conninfo_reload.sql new file mode 100644 index 000000000..dda8dae21 --- /dev/null +++ b/src/test/regress/sql/node_conninfo_reload.sql @@ -0,0 +1,181 @@ +-- Make sure changes citus.node_conninfo shutdown connections with old settings +CREATE SCHEMA node_conninfo_reload; +SET search_path TO node_conninfo_reload; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +SET citus.force_max_query_parallelization TO ON; + +create table test(a int); +select create_distributed_table('test', 'a'); + +-- Make sure a connection is opened and cached +select count(*) from test where a = 0; + +show citus.node_conninfo; + +-- Set sslmode to something that does not work when connecting +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; + +-- Should give a connection error because of bad sslmode +select count(*) from test where a = 0; + +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; + +-- Should work again +select count(*) from test where a = 0; + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +BEGIN; +-- Should still work (no SIGHUP yet); +select count(*) from test where a = 0; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; +-- Should work since a connection was already taken from pool for this shard, +-- since the same placement is accessed it will reuse that connection for this +-- query +select count(*) from test where a = 0; +COMMIT; + +-- Should fail now with connection error, when transaction is finished +select count(*) from test where a = 0; + +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; + +-- Should work again +select count(*) from test where a = 0; + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +BEGIN; +-- Should still work (no SIGHUP yet); +INSERT INTO test VALUES(0); +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; +-- Should work since a connection was already taken from pool for this shard, +-- since the same placement is accessed it will reuse that connection for this +-- query +select count(*) from test where a = 0; +COMMIT; + +-- Should fail now, when transaction is finished +select count(*) from test where a = 0; + +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; + +-- Should work again +select count(*) from test where a = 0; + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +BEGIN; +-- Should still work (no SIGHUP yet); +INSERT INTO test VALUES(1); +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; +-- Should fail since a different shard is accessed and thus a new connection +-- will to be created. +select count(*) from test where a = 0; +COMMIT; + +-- Should still fail now, when transaction is finished +select count(*) from test where a = 0; + +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; + +-- Should work again +select count(*) from test where a = 0; + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +BEGIN; +-- Should still work (no SIGHUP yet); +TRUNCATE test; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; +-- Should work since truncate grabbed connections for all shards and these are +-- reused +select count(*) from test; +COMMIT; + +-- Should fail now, when transaction is finished +select count(*) from test; + +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; + +-- Should work again +select count(*) from test where a = 0; + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist'; +BEGIN; +-- Should still work (no SIGHUP yet); +TRUNCATE test; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; +-- Should fail because of divede by 0 on the coordinator. +select count(*)/0 from test; +ROLLBACK; + +-- Should fail now, when transaction is finished +select count(*) from test; + +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; + +-- Should work again +select count(*) from test where a = 0; +-- Set sslmode to something that does work when connecting +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=allow'; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; + +-- Should still work, since sslmode=allow is valid +select count(*) from test where a = 0; + +-- Set sslmode to the same again (to get more coverage) +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=allow'; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; + +-- Should still work +select count(*) from test where a = 0; + +-- Reset it again +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply +show citus.node_conninfo; + +-- Should work +select count(*) from test where a = 0; + +DROP SCHEMA node_conninfo_reload CASCADE;