Mark all connections for shutdown when citus.node_conninfo chan… (#3642)

We cache connections between nodes in our connection management code.
This is good for speed. For security this can be a problem though. If
the user changes settings related to TLS encryption they want those to
be applied to future queries. This is especially important when they did
not have TLS enabled before and now they want to enable it. This can
normally be achieved by changing citus.node_conninfo.  However, because
connections are not reopened there will still be old connections that
might not be encrypted at all.

This commit changes that by marking all connections to be shutdown at
the end of their current transaction. This way running transactions will
succeed, even if placement requires connections to be reused for this
transaction. But after this transaction completes any future statements
will use a connection created with the new connection options.

If a connection is requested and a connection is found that is marked
for shutdown, then we don't return this connection. Instead a new one is
created. This is needed to make sure that if there are no running
transactions, then the next statement will not use an old cached
connection, since connections are only actually shutdown at the end of a
transaction.
pull/3639/head^2
Jelte Fennema 2020-03-24 15:31:41 +01:00 committed by GitHub
parent b166105f16
commit 2aabe3e2ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 714 additions and 1 deletions

View File

@ -372,6 +372,16 @@ FindAvailableConnection(dlist_head *connections, uint32 flags)
continue;
}
if (connection->forceCloseAtTransactionEnd)
{
/*
* This is a connection that should be closed, probabably because
* of old connection options. So we ignore it. It will
* automatically be closed at the end of the transaction.
*/
continue;
}
if ((flags & REQUIRE_SIDECHANNEL) != 0)
{
if (connection->purpose == CONNECTION_PURPOSE_SIDECHANNEL ||
@ -422,6 +432,37 @@ GivePurposeToConnection(MultiConnection *connection, int flags)
}
/*
* CloseAllConnectionsAfterTransaction sets the forceClose flag of all the
* connections. This is mainly done when citus.node_conninfo changes.
*/
void
CloseAllConnectionsAfterTransaction(void)
{
if (ConnectionHash == NULL)
{
return;
}
HASH_SEQ_STATUS status;
ConnectionHashEntry *entry;
hash_seq_init(&status, ConnectionHash);
while ((entry = (ConnectionHashEntry *) hash_seq_search(&status)) != 0)
{
dlist_iter iter;
dlist_head *connections = entry->connections;
dlist_foreach(iter, connections)
{
MultiConnection *connection =
dlist_container(MultiConnection, connectionNode, iter.cur);
connection->forceCloseAtTransactionEnd = true;
}
}
}
/*
* CloseNodeConnectionsAfterTransaction sets the forceClose flag of the connections
* to a particular node as true such that the connections are no longer cached. This
@ -1145,7 +1186,7 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit)
/*
* ShouldShutdownConnection returns true if either one of the followings is true:
* - The connection is citus initiated.
* - Current cached connections is already at MaxCachedConnectionPerWorker
* - Current cached connections is already at MaxCachedConnectionsPerWorker
* - Connection is forced to close at the end of transaction
* - Connection is not in OK state
* - A transaction is still in progress (usually because we are cancelling a distributed transaction)

View File

@ -1471,6 +1471,12 @@ NodeConninfoGucAssignHook(const char *newval, void *extra)
newval = "";
}
if (strcmp(newval, NodeConninfo) == 0)
{
/* It did not change, no need to do anything */
return;
}
PQconninfoOption *optionArray = PQconninfoParse(newval, NULL);
if (optionArray == NULL)
{
@ -1492,6 +1498,14 @@ NodeConninfoGucAssignHook(const char *newval, void *extra)
}
PQconninfoFree(optionArray);
/*
* Mark all connections for shutdown, since they have been opened using old
* connection settings. This is mostly important when changing SSL
* parameters, otherwise these would not be applied and connections could
* be unencrypted when the user doesn't want that.
*/
CloseAllConnectionsAfterTransaction();
}

View File

@ -11,6 +11,8 @@
#ifndef CONNECTION_MANAGMENT_H
#define CONNECTION_MANAGMENT_H
#include "postgres.h"
#include "distributed/transaction_management.h"
#include "distributed/remote_transaction.h"
#include "lib/ilist.h"
@ -219,6 +221,7 @@ extern MultiConnection * StartNodeUserDatabaseConnection(uint32 flags,
int32 port,
const char *user,
const char *database);
extern void CloseAllConnectionsAfterTransaction(void);
extern void CloseNodeConnectionsAfterTransaction(char *nodeName, int nodePort);
extern void CloseConnection(MultiConnection *connection);
extern void ShutdownAllConnections(void);

View File

@ -0,0 +1,469 @@
-- Make sure changes citus.node_conninfo shutdown connections with old settings
CREATE SCHEMA node_conninfo_reload;
SET search_path TO node_conninfo_reload;
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
SET citus.force_max_query_parallelization TO ON;
create table test(a int);
select create_distributed_table('test', 'a');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Make sure a connection is opened and cached
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
0
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=require
(1 row)
-- Set sslmode to something that does not work when connecting
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=doesnotexist
(1 row)
-- Should give a connection error because of bad sslmode
select count(*) from test where a = 0;
ERROR: connection error: localhost:xxxxx
DETAIL: invalid sslmode value: "doesnotexist"
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=require
(1 row)
-- Should work again
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
0
(1 row)
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
BEGIN;
-- Should still work (no SIGHUP yet);
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
0
(1 row)
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=doesnotexist
(1 row)
-- Should work since a connection was already taken from pool for this shard,
-- since the same placement is accessed it will reuse that connection for this
-- query
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
0
(1 row)
COMMIT;
-- Should fail now with connection error, when transaction is finished
select count(*) from test where a = 0;
ERROR: connection error: localhost:xxxxx
DETAIL: invalid sslmode value: "doesnotexist"
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=require
(1 row)
-- Should work again
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
0
(1 row)
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
BEGIN;
-- Should still work (no SIGHUP yet);
INSERT INTO test VALUES(0);
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=doesnotexist
(1 row)
-- Should work since a connection was already taken from pool for this shard,
-- since the same placement is accessed it will reuse that connection for this
-- query
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
1
(1 row)
COMMIT;
-- Should fail now, when transaction is finished
select count(*) from test where a = 0;
ERROR: connection error: localhost:xxxxx
DETAIL: invalid sslmode value: "doesnotexist"
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=require
(1 row)
-- Should work again
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
1
(1 row)
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
BEGIN;
-- Should still work (no SIGHUP yet);
INSERT INTO test VALUES(1);
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=doesnotexist
(1 row)
-- Should fail since a different shard is accessed and thus a new connection
-- will to be created.
select count(*) from test where a = 0;
ERROR: connection error: localhost:xxxxx
DETAIL: invalid sslmode value: "doesnotexist"
COMMIT;
-- Should still fail now, when transaction is finished
select count(*) from test where a = 0;
ERROR: connection error: localhost:xxxxx
DETAIL: invalid sslmode value: "doesnotexist"
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=require
(1 row)
-- Should work again
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
1
(1 row)
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
BEGIN;
-- Should still work (no SIGHUP yet);
TRUNCATE test;
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=doesnotexist
(1 row)
-- Should work since truncate grabbed connections for all shards and these are
-- reused
select count(*) from test;
count
---------------------------------------------------------------------
0
(1 row)
COMMIT;
-- Should fail now, when transaction is finished
select count(*) from test;
ERROR: connection error: localhost:xxxxx
DETAIL: invalid sslmode value: "doesnotexist"
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=require
(1 row)
-- Should work again
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
0
(1 row)
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
BEGIN;
-- Should still work (no SIGHUP yet);
TRUNCATE test;
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=doesnotexist
(1 row)
-- Should fail because of divede by 0 on the coordinator.
select count(*)/0 from test;
ERROR: division by zero
ROLLBACK;
-- Should fail now, when transaction is finished
select count(*) from test;
ERROR: connection error: localhost:xxxxx
DETAIL: invalid sslmode value: "doesnotexist"
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=require
(1 row)
-- Should work again
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
0
(1 row)
-- Set sslmode to something that does work when connecting
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=allow';
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=allow
(1 row)
-- Should still work, since sslmode=allow is valid
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
0
(1 row)
-- Set sslmode to the same again (to get more coverage)
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=allow';
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=allow
(1 row)
-- Should still work
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
0
(1 row)
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
select pg_sleep(0.1); -- wait for config reload to apply
pg_sleep
---------------------------------------------------------------------
(1 row)
show citus.node_conninfo;
citus.node_conninfo
---------------------------------------------------------------------
sslmode=require
(1 row)
-- Should work
select count(*) from test where a = 0;
count
---------------------------------------------------------------------
0
(1 row)
DROP SCHEMA node_conninfo_reload CASCADE;
NOTICE: drop cascades to table test

View File

@ -267,6 +267,11 @@ test: multi_colocated_shard_transfer
# ----------
test: multi_citus_tools
# ----------
# node_conninfo_reload tests that node_conninfo changes take effect
# ----------
test: node_conninfo_reload
# ----------
# multi_foreign_key tests foreign key push down on distributed tables
# ----------

View File

@ -0,0 +1,181 @@
-- Make sure changes citus.node_conninfo shutdown connections with old settings
CREATE SCHEMA node_conninfo_reload;
SET search_path TO node_conninfo_reload;
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
SET citus.force_max_query_parallelization TO ON;
create table test(a int);
select create_distributed_table('test', 'a');
-- Make sure a connection is opened and cached
select count(*) from test where a = 0;
show citus.node_conninfo;
-- Set sslmode to something that does not work when connecting
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should give a connection error because of bad sslmode
select count(*) from test where a = 0;
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should work again
select count(*) from test where a = 0;
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
BEGIN;
-- Should still work (no SIGHUP yet);
select count(*) from test where a = 0;
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should work since a connection was already taken from pool for this shard,
-- since the same placement is accessed it will reuse that connection for this
-- query
select count(*) from test where a = 0;
COMMIT;
-- Should fail now with connection error, when transaction is finished
select count(*) from test where a = 0;
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should work again
select count(*) from test where a = 0;
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
BEGIN;
-- Should still work (no SIGHUP yet);
INSERT INTO test VALUES(0);
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should work since a connection was already taken from pool for this shard,
-- since the same placement is accessed it will reuse that connection for this
-- query
select count(*) from test where a = 0;
COMMIT;
-- Should fail now, when transaction is finished
select count(*) from test where a = 0;
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should work again
select count(*) from test where a = 0;
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
BEGIN;
-- Should still work (no SIGHUP yet);
INSERT INTO test VALUES(1);
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should fail since a different shard is accessed and thus a new connection
-- will to be created.
select count(*) from test where a = 0;
COMMIT;
-- Should still fail now, when transaction is finished
select count(*) from test where a = 0;
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should work again
select count(*) from test where a = 0;
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
BEGIN;
-- Should still work (no SIGHUP yet);
TRUNCATE test;
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should work since truncate grabbed connections for all shards and these are
-- reused
select count(*) from test;
COMMIT;
-- Should fail now, when transaction is finished
select count(*) from test;
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should work again
select count(*) from test where a = 0;
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=doesnotexist';
BEGIN;
-- Should still work (no SIGHUP yet);
TRUNCATE test;
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should fail because of divede by 0 on the coordinator.
select count(*)/0 from test;
ROLLBACK;
-- Should fail now, when transaction is finished
select count(*) from test;
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should work again
select count(*) from test where a = 0;
-- Set sslmode to something that does work when connecting
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=allow';
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should still work, since sslmode=allow is valid
select count(*) from test where a = 0;
-- Set sslmode to the same again (to get more coverage)
ALTER SYSTEM SET citus.node_conninfo = 'sslmode=allow';
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should still work
select count(*) from test where a = 0;
-- Reset it again
ALTER SYSTEM RESET citus.node_conninfo;
select pg_reload_conf();
select pg_sleep(0.1); -- wait for config reload to apply
show citus.node_conninfo;
-- Should work
select count(*) from test where a = 0;
DROP SCHEMA node_conninfo_reload CASCADE;