Merge pull request #1543 from citusdata/test-follower-cluster

Add make target for testing follower clusters
pull/1551/head
Marco Slot 2017-08-12 12:18:56 +02:00 committed by GitHub
commit d19818de21
12 changed files with 399 additions and 9 deletions

View File

@ -27,6 +27,7 @@
#include "commands/dbcommands.h"
#include "distributed/citus_nodes.h"
#include "distributed/connection_management.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_client_executor.h"
#include "distributed/multi_physical_planner.h"
#include "distributed/multi_server_executor.h"
@ -163,6 +164,13 @@ MultiTaskTrackerExecute(Job *job)
const char *transmitTrackerHashName = "Transmit Tracker Hash";
List *jobIdList = NIL;
if (ReadFromSecondaries == USE_SECONDARY_NODES_ALWAYS)
{
ereport(ERROR, (errmsg("task tracker queries are not allowed while "
"citus.use_secondary_nodes is 'always'"),
errhint("try setting citus.task_executor_type TO 'real-time'")));
}
/*
* We walk over the task tree, and create a task execution struct for each
* task. We then associate the task with its execution and get back a list.
@ -190,7 +198,7 @@ MultiTaskTrackerExecute(Job *job)
* assigning and checking the status of tasks. The second (temporary) hash
* helps us in fetching results data from worker nodes to the master node.
*/
workerNodeList = ActiveReadableNodeList();
workerNodeList = ActivePrimaryNodeList();
taskTrackerCount = (uint32) list_length(workerNodeList);
taskTrackerHash = TrackerHash(taskTrackerHashName, workerNodeList);

View File

@ -37,7 +37,7 @@ output_files := $(patsubst $(citus_abs_srcdir)/output/%.source,expected/%.out, $
# intermediate, for muscle memory backward compatibility.
check: check-full
# check-full triggers all tests that ought to be run routinely
check-full: check-multi check-multi-mx check-multi-task-tracker-extra check-multi-binary check-worker
check-full: check-multi check-multi-mx check-multi-task-tracker-extra check-multi-binary check-worker check-follower-cluster
# using pg_regress_multi_check unnecessarily starts up multiple nodes, which isn't needed
# for check-worker. But that's harmless besides a few cycles.
@ -76,6 +76,10 @@ check-multi-binary: all tempinstall-main
--server-option=citus.binary_worker_copy_format=on \
-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_binary_schedule $(EXTRA_TESTS)
check-follower-cluster: all
$(pg_regress_multi_check) --load-extension=citus --follower-cluster \
-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_follower_schedule $(EXTRA_TESTS)
clean distclean maintainer-clean:
rm -f $(output_files) $(input_files)
rm -rf tmp_check/

View File

@ -0,0 +1,29 @@
-- prepare for future tests by configuring all the follower nodes
\c - - - :follower_master_port
ALTER SYSTEM SET citus.use_secondary_nodes TO 'always';
ALTER SYSTEM SET citus.cluster_name TO 'second-cluster';
SELECT pg_reload_conf();
pg_reload_conf
----------------
t
(1 row)
-- also configure the workers, they'll run queries when MX is enabled
\c - - - :follower_worker_1_port
ALTER SYSTEM SET citus.use_secondary_nodes TO 'always';
ALTER SYSTEM SET citus.cluster_name TO 'second-cluster';
SELECT pg_reload_conf();
pg_reload_conf
----------------
t
(1 row)
\c - - - :follower_worker_2_port
ALTER SYSTEM SET citus.use_secondary_nodes TO 'always';
ALTER SYSTEM SET citus.cluster_name TO 'second-cluster';
SELECT pg_reload_conf();
pg_reload_conf
----------------
t
(1 row)

View File

@ -0,0 +1,10 @@
-- check that the nodes are all in read-only mode and rejecting write queries
\c - - - :follower_master_port
CREATE TABLE tab (a int);
ERROR: cannot execute CREATE TABLE in a read-only transaction
\c - - - :follower_worker_1_port
CREATE TABLE tab (a int);
ERROR: cannot execute CREATE TABLE in a read-only transaction
\c - - - :follower_worker_2_port
CREATE TABLE tab (a int);
ERROR: cannot execute CREATE TABLE in a read-only transaction

View File

@ -0,0 +1,87 @@
\c - - - :master_port
-- do some setup
SELECT 1 FROM master_add_node('localhost', :worker_1_port);
?column?
----------
1
(1 row)
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
?column?
----------
1
(1 row)
CREATE TABLE the_table (a int, b int);
SELECT create_distributed_table('the_table', 'a');
create_distributed_table
--------------------------
(1 row)
INSERT INTO the_table (a, b) VALUES (1, 1);
INSERT INTO the_table (a, b) VALUES (1, 2);
-- connect to the follower and check that a simple select query works, the follower
-- is still in the default cluster and will send queries to the primary nodes
\c - - - :follower_master_port
SELECT * FROM the_table;
a | b
---+---
1 | 1
1 | 2
(2 rows)
-- now, connect to the follower but tell it to use secondary nodes. There are no
-- secondary nodes so this should fail.
-- (this is :follower_master_port but substitution doesn't work here)
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always'"
SELECT * FROM the_table;
ERROR: node group 2 does not have a secondary node
-- add the secondary nodes and try again, the SELECT statement should work this time
\c - - - :master_port
SELECT 1 FROM master_add_node('localhost', :follower_worker_1_port,
groupid => (SELECT groupid FROM pg_dist_node WHERE nodeport = :worker_1_port),
noderole => 'secondary');
?column?
----------
1
(1 row)
SELECT 1 FROM master_add_node('localhost', :follower_worker_2_port,
groupid => (SELECT groupid FROM pg_dist_node WHERE nodeport = :worker_2_port),
noderole => 'secondary');
?column?
----------
1
(1 row)
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always'"
-- now that we've added secondaries this should work
SELECT * FROM the_table;
a | b
---+---
1 | 1
1 | 2
(2 rows)
-- okay, now let's play with nodecluster. If we change the cluster of our follower node
-- queries should stat failing again, since there are no worker nodes in the new cluster
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"
-- there are no secondary nodes in this cluster, so this should fail!
SELECT * FROM the_table;
ERROR: there is a shard placement in node group 2 but there are no nodes in that group
-- now move the secondary nodes into the new cluster and see that the follower, finally
-- correctly configured, can run select queries involving them
\c - - - :master_port
UPDATE pg_dist_node SET nodecluster = 'second-cluster' WHERE noderole = 'secondary';
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"
SELECT * FROM the_table;
a | b
---+---
1 | 1
1 | 2
(2 rows)
-- clean up after ourselves
\c - - - :master_port
DROP TABLE the_table;

View File

@ -0,0 +1,27 @@
\c - - - :master_port
-- do some setup
CREATE TABLE tab(a int, b int);
SELECT create_distributed_table('tab', 'a');
create_distributed_table
--------------------------
(1 row)
INSERT INTO tab (a, b) VALUES (1, 1);
INSERT INTO tab (a, b) VALUES (1, 2);
\c - - - :follower_master_port
SET citus.task_executor_type TO 'real-time';
SELECT * FROM tab;
a | b
---+---
1 | 1
1 | 2
(2 rows)
SET citus.task_executor_type TO 'task-tracker';
SELECT * FROM tab;
ERROR: task tracker queries are not allowed while citus.use_secondary_nodes is 'always'
HINT: try setting citus.task_executor_type TO 'real-time'
-- clean up
\c - - - :master_port
DROP TABLE tab;

View File

@ -0,0 +1,4 @@
test: multi_follower_sanity_check
test: multi_follower_select_statements
test: multi_follower_configure_followers
test: multi_follower_task_tracker

View File

@ -46,6 +46,7 @@ sub Usage()
# Option parsing
my $isolationtester = 0;
my $vanillatest = 0;
my $followercluster = 0;
my $bindir = "";
my $libdir = undef;
my $pgxsdir = "";
@ -70,6 +71,7 @@ my $serversAreShutdown = "TRUE";
GetOptions(
'isolationtester' => \$isolationtester,
'vanillatest' => \$vanillatest,
'follower-cluster' => \$followercluster,
'bindir=s' => \$bindir,
'libdir=s' => \$libdir,
'pgxsdir=s' => \$pgxsdir,
@ -216,6 +218,13 @@ for (my $workerIndex = 1; $workerIndex <= $workerCount; $workerIndex++) {
push(@workerPorts, $workerPort);
}
my $followerCoordPort = 9070;
my @followerWorkerPorts = ();
for (my $workerIndex = 1; $workerIndex <= $workerCount; $workerIndex++) {
my $workerPort = $followerCoordPort + $workerIndex;
push(@followerWorkerPorts, $workerPort);
}
my $host = "localhost";
my $user = "postgres";
my @pgOptions = ();
@ -237,6 +246,13 @@ push(@pgOptions, '-c', "citus.remote_task_check_interval=1ms");
push(@pgOptions, '-c', "citus.shard_replication_factor=2");
push(@pgOptions, '-c', "citus.node_connection_timeout=${connectionTimeout}");
if ($followercluster)
{
push(@pgOptions, '-c', "max_wal_senders=10");
push(@pgOptions, '-c', "hot_standby=on");
push(@pgOptions, '-c', "wal_level=replica");
}
# Add externally added options last, so they overwrite the default ones above
for my $option (@userPgOptions)
{
@ -272,6 +288,12 @@ for my $port (@workerPorts)
system("rm", ('-rf', "tmp_check/worker.$port")) == 0 or die "Could not remove worker directory";
}
system("rm", ('-rf', 'tmp_check/master-follower')) == 0 or die "Could not remove master directory";
for my $port (@followerWorkerPorts)
{
system("rm", ('-rf', "tmp_check/follower.$port")) == 0 or die "Could not remove worker directory";
}
# Prepare directory in which 'psql' has some helpful variables for locating the workers
system("mkdir", ('-p', "tmp_check/tmp-bin")) == 0
or die "Could not create tmp-bin directory";
@ -280,6 +302,7 @@ sysopen my $fh, "tmp_check/tmp-bin/psql", O_CREAT|O_TRUNC|O_RDWR, 0700
print $fh "#!/bin/bash\n";
print $fh "exec psql ";
print $fh "--variable=master_port=$masterPort ";
print $fh "--variable=follower_master_port=$followerCoordPort ";
print $fh "--variable=default_user=$user ";
print $fh "--variable=SHOW_CONTEXT=always ";
for my $workeroff (0 .. $#workerPorts)
@ -287,6 +310,11 @@ for my $workeroff (0 .. $#workerPorts)
my $port = $workerPorts[$workeroff];
print $fh "--variable=worker_".($workeroff+1)."_port=$port ";
}
for my $workeroff (0 .. $#followerWorkerPorts)
{
my $port = $followerWorkerPorts[$workeroff];
print $fh "--variable=follower_worker_".($workeroff+1)."_port=$port ";
}
print $fh "\"\$@\"\n"; # pass on the commandline arguments
close $fh;
@ -297,10 +325,29 @@ for my $port (@workerPorts)
or die "Could not create worker directory";
}
if ($followercluster)
{
system("mkdir", ('-p', 'tmp_check/master-follower/log')) == 0 or die "Could not create follower directory";
for my $port (@followerWorkerPorts)
{
system("mkdir", ('-p', "tmp_check/follower.$port/log")) == 0
or die "Could not create worker directory";
}
}
# Create new data directories, copy workers for speed
system("$bindir/initdb", ("--nosync", "-U", $user, "tmp_check/master/data")) == 0
or die "Could not create master data directory";
if ($followercluster)
{
# This is only necessary on PG 9.6 but it doesn't hurt PG 10
open(my $fd, ">>", "tmp_check/master/data/pg_hba.conf")
or die "could not open pg_hba.conf";
print $fd "\nhost replication postgres 127.0.0.1/32 trust";
close $fd;
}
for my $port (@workerPorts)
{
system("cp -a tmp_check/master/data tmp_check/worker.$port/data") == 0
@ -322,6 +369,20 @@ sub ShutdownServers()
('stop', '-w', '-D', "tmp_check/worker.$port/data")) == 0
or warn "Could not shutdown worker server";
}
if ($followercluster)
{
system("$bindir/pg_ctl",
('stop', '-w', '-D', 'tmp_check/master-follower/data')) == 0
or warn "Could not shutdown worker server";
for my $port (@followerWorkerPorts)
{
system("$bindir/pg_ctl",
('stop', '-w', '-D', "tmp_check/follower.$port/data")) == 0
or warn "Could not shutdown worker server";
}
}
$serversAreShutdown = "TRUE";
}
}
@ -357,20 +418,69 @@ if ($valgrind)
$serversAreShutdown = "FALSE";
# Start servers
system("$bindir/pg_ctl",
if(system("$bindir/pg_ctl",
('start', '-w',
'-o', join(" ", @pgOptions)." -c port=$masterPort",
'-D', 'tmp_check/master/data', '-l', 'tmp_check/master/log/postmaster.log')) == 0
or die "Could not start master server";
'-D', 'tmp_check/master/data', '-l', 'tmp_check/master/log/postmaster.log')) != 0)
{
system("tail", ("-n20", "tmp_check/master/log/postmaster.log"));
die "Could not start master server";
}
for my $port (@workerPorts)
{
system("$bindir/pg_ctl",
if(system("$bindir/pg_ctl",
('start', '-w',
'-o', join(" ", @pgOptions)." -c port=$port",
'-D', "tmp_check/worker.$port/data",
'-l', "tmp_check/worker.$port/log/postmaster.log")) == 0
or die "Could not start worker server";
'-l', "tmp_check/worker.$port/log/postmaster.log")) != 0)
{
system("tail", ("-n20", "tmp_check/worker.$port/log/postmaster.log"));
die "Could not start worker server";
}
}
# Setup the follower nodes
if ($followercluster)
{
# This test would run faster on PG10 if we could pass --no-sync here but that flag
# isn't supported on PG 9.6. In a year when we drop support for PG9.6 add that flag!
system("$bindir/pg_basebackup",
("-D", "tmp_check/master-follower/data", "--host=$host", "--port=$masterPort",
"--username=$user", "-R", "-X", "stream")) == 0
or die 'could not take basebackup';
for my $offset (0 .. $#workerPorts)
{
my $workerPort = $workerPorts[$offset];
my $followerPort = $followerWorkerPorts[$offset];
system("$bindir/pg_basebackup",
("-D", "tmp_check/follower.$followerPort/data", "--host=$host", "--port=$workerPort",
"--username=$user", "-R", "-X", "stream")) == 0
or die "Could not take basebackup";
}
if(system("$bindir/pg_ctl",
('start', '-w',
'-o', join(" ", @pgOptions)." -c port=$followerCoordPort",
'-D', 'tmp_check/master-follower/data', '-l', 'tmp_check/master-follower/log/postmaster.log')) != 0)
{
system("tail", ("-n20", "tmp_check/master-follower/log/postmaster.log"));
die "Could not start master follower server";
}
for my $port (@followerWorkerPorts)
{
if(system("$bindir/pg_ctl",
('start', '-w',
'-o', join(" ", @pgOptions)." -c port=$port",
'-D', "tmp_check/follower.$port/data",
'-l', "tmp_check/follower.$port/log/postmaster.log")) != 0)
{
system("tail", ("-n20", "tmp_check/follower.$port/log/postmaster.log"));
die "Could not start follower server";
}
}
}
###
@ -466,7 +576,7 @@ elsif ($isolationtester)
{
push(@arguments, "--dbname=regression");
system("$isolationRegress", @arguments) == 0
or die "Could not run isolation tests";
or die "Could not run isolation tests";
}
else
{

View File

@ -0,0 +1,18 @@
-- prepare for future tests by configuring all the follower nodes
\c - - - :follower_master_port
ALTER SYSTEM SET citus.use_secondary_nodes TO 'always';
ALTER SYSTEM SET citus.cluster_name TO 'second-cluster';
SELECT pg_reload_conf();
-- also configure the workers, they'll run queries when MX is enabled
\c - - - :follower_worker_1_port
ALTER SYSTEM SET citus.use_secondary_nodes TO 'always';
ALTER SYSTEM SET citus.cluster_name TO 'second-cluster';
SELECT pg_reload_conf();
\c - - - :follower_worker_2_port
ALTER SYSTEM SET citus.use_secondary_nodes TO 'always';
ALTER SYSTEM SET citus.cluster_name TO 'second-cluster';
SELECT pg_reload_conf();

View File

@ -0,0 +1,8 @@
-- check that the nodes are all in read-only mode and rejecting write queries
\c - - - :follower_master_port
CREATE TABLE tab (a int);
\c - - - :follower_worker_1_port
CREATE TABLE tab (a int);
\c - - - :follower_worker_2_port
CREATE TABLE tab (a int);

View File

@ -0,0 +1,63 @@
\c - - - :master_port
-- do some setup
SELECT 1 FROM master_add_node('localhost', :worker_1_port);
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
CREATE TABLE the_table (a int, b int);
SELECT create_distributed_table('the_table', 'a');
INSERT INTO the_table (a, b) VALUES (1, 1);
INSERT INTO the_table (a, b) VALUES (1, 2);
-- connect to the follower and check that a simple select query works, the follower
-- is still in the default cluster and will send queries to the primary nodes
\c - - - :follower_master_port
SELECT * FROM the_table;
-- now, connect to the follower but tell it to use secondary nodes. There are no
-- secondary nodes so this should fail.
-- (this is :follower_master_port but substitution doesn't work here)
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always'"
SELECT * FROM the_table;
-- add the secondary nodes and try again, the SELECT statement should work this time
\c - - - :master_port
SELECT 1 FROM master_add_node('localhost', :follower_worker_1_port,
groupid => (SELECT groupid FROM pg_dist_node WHERE nodeport = :worker_1_port),
noderole => 'secondary');
SELECT 1 FROM master_add_node('localhost', :follower_worker_2_port,
groupid => (SELECT groupid FROM pg_dist_node WHERE nodeport = :worker_2_port),
noderole => 'secondary');
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always'"
-- now that we've added secondaries this should work
SELECT * FROM the_table;
-- okay, now let's play with nodecluster. If we change the cluster of our follower node
-- queries should stat failing again, since there are no worker nodes in the new cluster
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"
-- there are no secondary nodes in this cluster, so this should fail!
SELECT * FROM the_table;
-- now move the secondary nodes into the new cluster and see that the follower, finally
-- correctly configured, can run select queries involving them
\c - - - :master_port
UPDATE pg_dist_node SET nodecluster = 'second-cluster' WHERE noderole = 'secondary';
\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'"
SELECT * FROM the_table;
-- clean up after ourselves
\c - - - :master_port
DROP TABLE the_table;

View File

@ -0,0 +1,22 @@
\c - - - :master_port
-- do some setup
CREATE TABLE tab(a int, b int);
SELECT create_distributed_table('tab', 'a');
INSERT INTO tab (a, b) VALUES (1, 1);
INSERT INTO tab (a, b) VALUES (1, 2);
\c - - - :follower_master_port
SET citus.task_executor_type TO 'real-time';
SELECT * FROM tab;
SET citus.task_executor_type TO 'task-tracker';
SELECT * FROM tab;
-- clean up
\c - - - :master_port
DROP TABLE tab;