From 59e626d158cd6edee2d121c3e1ab6f29d3583676 Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Sat, 12 Aug 2017 11:47:51 +0200 Subject: [PATCH] Add regression tests for follower clusters --- src/test/regress/Makefile | 6 +- .../multi_follower_configure_followers.out | 29 ++++ .../expected/multi_follower_sanity_check.out | 10 ++ .../multi_follower_select_statements.out | 87 ++++++++++++ .../expected/multi_follower_task_tracker.out | 27 ++++ src/test/regress/multi_follower_schedule | 4 + src/test/regress/pg_regress_multi.pl | 124 +++++++++++++++++- .../multi_follower_configure_followers.sql | 18 +++ .../sql/multi_follower_sanity_check.sql | 8 ++ .../sql/multi_follower_select_statements.sql | 63 +++++++++ .../sql/multi_follower_task_tracker.sql | 22 ++++ 11 files changed, 390 insertions(+), 8 deletions(-) create mode 100644 src/test/regress/expected/multi_follower_configure_followers.out create mode 100644 src/test/regress/expected/multi_follower_sanity_check.out create mode 100644 src/test/regress/expected/multi_follower_select_statements.out create mode 100644 src/test/regress/expected/multi_follower_task_tracker.out create mode 100644 src/test/regress/multi_follower_schedule create mode 100644 src/test/regress/sql/multi_follower_configure_followers.sql create mode 100644 src/test/regress/sql/multi_follower_sanity_check.sql create mode 100644 src/test/regress/sql/multi_follower_select_statements.sql create mode 100644 src/test/regress/sql/multi_follower_task_tracker.sql diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile index 488043dd1..7bda66c3e 100644 --- a/src/test/regress/Makefile +++ b/src/test/regress/Makefile @@ -37,7 +37,7 @@ output_files := $(patsubst $(citus_abs_srcdir)/output/%.source,expected/%.out, $ # intermediate, for muscle memory backward compatibility. check: check-full # check-full triggers all tests that ought to be run routinely -check-full: check-multi check-multi-mx check-multi-task-tracker-extra check-multi-binary check-worker +check-full: check-multi check-multi-mx check-multi-task-tracker-extra check-multi-binary check-worker check-follower-cluster # using pg_regress_multi_check unnecessarily starts up multiple nodes, which isn't needed # for check-worker. But that's harmless besides a few cycles. @@ -76,6 +76,10 @@ check-multi-binary: all tempinstall-main --server-option=citus.binary_worker_copy_format=on \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_binary_schedule $(EXTRA_TESTS) +check-follower-cluster: all + $(pg_regress_multi_check) --load-extension=citus --follower-cluster \ + -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_follower_schedule $(EXTRA_TESTS) + clean distclean maintainer-clean: rm -f $(output_files) $(input_files) rm -rf tmp_check/ diff --git a/src/test/regress/expected/multi_follower_configure_followers.out b/src/test/regress/expected/multi_follower_configure_followers.out new file mode 100644 index 000000000..27d157ef2 --- /dev/null +++ b/src/test/regress/expected/multi_follower_configure_followers.out @@ -0,0 +1,29 @@ +-- prepare for future tests by configuring all the follower nodes +\c - - - :follower_master_port +ALTER SYSTEM SET citus.use_secondary_nodes TO 'always'; +ALTER SYSTEM SET citus.cluster_name TO 'second-cluster'; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +-- also configure the workers, they'll run queries when MX is enabled +\c - - - :follower_worker_1_port +ALTER SYSTEM SET citus.use_secondary_nodes TO 'always'; +ALTER SYSTEM SET citus.cluster_name TO 'second-cluster'; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + +\c - - - :follower_worker_2_port +ALTER SYSTEM SET citus.use_secondary_nodes TO 'always'; +ALTER SYSTEM SET citus.cluster_name TO 'second-cluster'; +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) + diff --git a/src/test/regress/expected/multi_follower_sanity_check.out b/src/test/regress/expected/multi_follower_sanity_check.out new file mode 100644 index 000000000..6d5a0e1f6 --- /dev/null +++ b/src/test/regress/expected/multi_follower_sanity_check.out @@ -0,0 +1,10 @@ +-- check that the nodes are all in read-only mode and rejecting write queries +\c - - - :follower_master_port +CREATE TABLE tab (a int); +ERROR: cannot execute CREATE TABLE in a read-only transaction +\c - - - :follower_worker_1_port +CREATE TABLE tab (a int); +ERROR: cannot execute CREATE TABLE in a read-only transaction +\c - - - :follower_worker_2_port +CREATE TABLE tab (a int); +ERROR: cannot execute CREATE TABLE in a read-only transaction diff --git a/src/test/regress/expected/multi_follower_select_statements.out b/src/test/regress/expected/multi_follower_select_statements.out new file mode 100644 index 000000000..1452ec3cc --- /dev/null +++ b/src/test/regress/expected/multi_follower_select_statements.out @@ -0,0 +1,87 @@ +\c - - - :master_port +-- do some setup +SELECT 1 FROM master_add_node('localhost', :worker_1_port); + ?column? +---------- + 1 +(1 row) + +SELECT 1 FROM master_add_node('localhost', :worker_2_port); + ?column? +---------- + 1 +(1 row) + +CREATE TABLE the_table (a int, b int); +SELECT create_distributed_table('the_table', 'a'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO the_table (a, b) VALUES (1, 1); +INSERT INTO the_table (a, b) VALUES (1, 2); +-- connect to the follower and check that a simple select query works, the follower +-- is still in the default cluster and will send queries to the primary nodes +\c - - - :follower_master_port +SELECT * FROM the_table; + a | b +---+--- + 1 | 1 + 1 | 2 +(2 rows) + +-- now, connect to the follower but tell it to use secondary nodes. There are no +-- secondary nodes so this should fail. +-- (this is :follower_master_port but substitution doesn't work here) +\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always'" +SELECT * FROM the_table; +ERROR: node group 2 does not have a secondary node +-- add the secondary nodes and try again, the SELECT statement should work this time +\c - - - :master_port +SELECT 1 FROM master_add_node('localhost', :follower_worker_1_port, + groupid => (SELECT groupid FROM pg_dist_node WHERE nodeport = :worker_1_port), + noderole => 'secondary'); + ?column? +---------- + 1 +(1 row) + +SELECT 1 FROM master_add_node('localhost', :follower_worker_2_port, + groupid => (SELECT groupid FROM pg_dist_node WHERE nodeport = :worker_2_port), + noderole => 'secondary'); + ?column? +---------- + 1 +(1 row) + +\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always'" +-- now that we've added secondaries this should work +SELECT * FROM the_table; + a | b +---+--- + 1 | 1 + 1 | 2 +(2 rows) + +-- okay, now let's play with nodecluster. If we change the cluster of our follower node +-- queries should stat failing again, since there are no worker nodes in the new cluster +\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'" +-- there are no secondary nodes in this cluster, so this should fail! +SELECT * FROM the_table; +ERROR: there is a shard placement in node group 2 but there are no nodes in that group +-- now move the secondary nodes into the new cluster and see that the follower, finally +-- correctly configured, can run select queries involving them +\c - - - :master_port +UPDATE pg_dist_node SET nodecluster = 'second-cluster' WHERE noderole = 'secondary'; +\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'" +SELECT * FROM the_table; + a | b +---+--- + 1 | 1 + 1 | 2 +(2 rows) + +-- clean up after ourselves +\c - - - :master_port +DROP TABLE the_table; diff --git a/src/test/regress/expected/multi_follower_task_tracker.out b/src/test/regress/expected/multi_follower_task_tracker.out new file mode 100644 index 000000000..d60e39187 --- /dev/null +++ b/src/test/regress/expected/multi_follower_task_tracker.out @@ -0,0 +1,27 @@ +\c - - - :master_port +-- do some setup +CREATE TABLE tab(a int, b int); +SELECT create_distributed_table('tab', 'a'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO tab (a, b) VALUES (1, 1); +INSERT INTO tab (a, b) VALUES (1, 2); +\c - - - :follower_master_port +SET citus.task_executor_type TO 'real-time'; +SELECT * FROM tab; + a | b +---+--- + 1 | 1 + 1 | 2 +(2 rows) + +SET citus.task_executor_type TO 'task-tracker'; +SELECT * FROM tab; +ERROR: task tracker queries are not allowed while citus.use_secondary_nodes is 'always' +HINT: try setting citus.task_executor_type TO 'real-time' +-- clean up +\c - - - :master_port +DROP TABLE tab; diff --git a/src/test/regress/multi_follower_schedule b/src/test/regress/multi_follower_schedule new file mode 100644 index 000000000..1d55732f6 --- /dev/null +++ b/src/test/regress/multi_follower_schedule @@ -0,0 +1,4 @@ +test: multi_follower_sanity_check +test: multi_follower_select_statements +test: multi_follower_configure_followers +test: multi_follower_task_tracker diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl index cd3f03286..1a503427d 100755 --- a/src/test/regress/pg_regress_multi.pl +++ b/src/test/regress/pg_regress_multi.pl @@ -46,6 +46,7 @@ sub Usage() # Option parsing my $isolationtester = 0; my $vanillatest = 0; +my $followercluster = 0; my $bindir = ""; my $libdir = undef; my $pgxsdir = ""; @@ -70,6 +71,7 @@ my $serversAreShutdown = "TRUE"; GetOptions( 'isolationtester' => \$isolationtester, 'vanillatest' => \$vanillatest, + 'follower-cluster' => \$followercluster, 'bindir=s' => \$bindir, 'libdir=s' => \$libdir, 'pgxsdir=s' => \$pgxsdir, @@ -216,6 +218,13 @@ for (my $workerIndex = 1; $workerIndex <= $workerCount; $workerIndex++) { push(@workerPorts, $workerPort); } +my $followerCoordPort = 9070; +my @followerWorkerPorts = (); +for (my $workerIndex = 1; $workerIndex <= $workerCount; $workerIndex++) { + my $workerPort = $followerCoordPort + $workerIndex; + push(@followerWorkerPorts, $workerPort); +} + my $host = "localhost"; my $user = "postgres"; my @pgOptions = (); @@ -237,6 +246,13 @@ push(@pgOptions, '-c', "citus.remote_task_check_interval=1ms"); push(@pgOptions, '-c', "citus.shard_replication_factor=2"); push(@pgOptions, '-c', "citus.node_connection_timeout=${connectionTimeout}"); +if ($followercluster) +{ + push(@pgOptions, '-c', "max_wal_senders=10"); + push(@pgOptions, '-c', "hot_standby=on"); + push(@pgOptions, '-c', "wal_level=replica"); +} + # Add externally added options last, so they overwrite the default ones above for my $option (@userPgOptions) { @@ -272,6 +288,12 @@ for my $port (@workerPorts) system("rm", ('-rf', "tmp_check/worker.$port")) == 0 or die "Could not remove worker directory"; } +system("rm", ('-rf', 'tmp_check/master-follower')) == 0 or die "Could not remove master directory"; +for my $port (@followerWorkerPorts) +{ + system("rm", ('-rf', "tmp_check/follower.$port")) == 0 or die "Could not remove worker directory"; +} + # Prepare directory in which 'psql' has some helpful variables for locating the workers system("mkdir", ('-p', "tmp_check/tmp-bin")) == 0 or die "Could not create tmp-bin directory"; @@ -280,6 +302,7 @@ sysopen my $fh, "tmp_check/tmp-bin/psql", O_CREAT|O_TRUNC|O_RDWR, 0700 print $fh "#!/bin/bash\n"; print $fh "exec psql "; print $fh "--variable=master_port=$masterPort "; +print $fh "--variable=follower_master_port=$followerCoordPort "; print $fh "--variable=default_user=$user "; print $fh "--variable=SHOW_CONTEXT=always "; for my $workeroff (0 .. $#workerPorts) @@ -287,6 +310,11 @@ for my $workeroff (0 .. $#workerPorts) my $port = $workerPorts[$workeroff]; print $fh "--variable=worker_".($workeroff+1)."_port=$port "; } +for my $workeroff (0 .. $#followerWorkerPorts) +{ + my $port = $followerWorkerPorts[$workeroff]; + print $fh "--variable=follower_worker_".($workeroff+1)."_port=$port "; +} print $fh "\"\$@\"\n"; # pass on the commandline arguments close $fh; @@ -297,10 +325,29 @@ for my $port (@workerPorts) or die "Could not create worker directory"; } +if ($followercluster) +{ + system("mkdir", ('-p', 'tmp_check/master-follower/log')) == 0 or die "Could not create follower directory"; + for my $port (@followerWorkerPorts) + { + system("mkdir", ('-p', "tmp_check/follower.$port/log")) == 0 + or die "Could not create worker directory"; + } +} + # Create new data directories, copy workers for speed system("$bindir/initdb", ("--nosync", "-U", $user, "tmp_check/master/data")) == 0 or die "Could not create master data directory"; +if ($followercluster) +{ + # This is only necessary on PG 9.6 but it doesn't hurt PG 10 + open(my $fd, ">>", "tmp_check/master/data/pg_hba.conf") + or die "could not open pg_hba.conf"; + print $fd "\nhost replication postgres 127.0.0.1/32 trust"; + close $fd; +} + for my $port (@workerPorts) { system("cp -a tmp_check/master/data tmp_check/worker.$port/data") == 0 @@ -322,6 +369,20 @@ sub ShutdownServers() ('stop', '-w', '-D', "tmp_check/worker.$port/data")) == 0 or warn "Could not shutdown worker server"; } + + if ($followercluster) + { + system("$bindir/pg_ctl", + ('stop', '-w', '-D', 'tmp_check/master-follower/data')) == 0 + or warn "Could not shutdown worker server"; + + for my $port (@followerWorkerPorts) + { + system("$bindir/pg_ctl", + ('stop', '-w', '-D', "tmp_check/follower.$port/data")) == 0 + or warn "Could not shutdown worker server"; + } + } $serversAreShutdown = "TRUE"; } } @@ -357,20 +418,69 @@ if ($valgrind) $serversAreShutdown = "FALSE"; # Start servers -system("$bindir/pg_ctl", +if(system("$bindir/pg_ctl", ('start', '-w', '-o', join(" ", @pgOptions)." -c port=$masterPort", - '-D', 'tmp_check/master/data', '-l', 'tmp_check/master/log/postmaster.log')) == 0 - or die "Could not start master server"; + '-D', 'tmp_check/master/data', '-l', 'tmp_check/master/log/postmaster.log')) != 0) +{ + system("tail", ("-n20", "tmp_check/master/log/postmaster.log")); + die "Could not start master server"; +} for my $port (@workerPorts) { - system("$bindir/pg_ctl", + if(system("$bindir/pg_ctl", ('start', '-w', '-o', join(" ", @pgOptions)." -c port=$port", '-D', "tmp_check/worker.$port/data", - '-l', "tmp_check/worker.$port/log/postmaster.log")) == 0 - or die "Could not start worker server"; + '-l', "tmp_check/worker.$port/log/postmaster.log")) != 0) + { + system("tail", ("-n20", "tmp_check/worker.$port/log/postmaster.log")); + die "Could not start worker server"; + } +} + +# Setup the follower nodes +if ($followercluster) +{ + # This test would run faster on PG10 if we could pass --no-sync here but that flag + # isn't supported on PG 9.6. In a year when we drop support for PG9.6 add that flag! + system("$bindir/pg_basebackup", + ("-D", "tmp_check/master-follower/data", "--host=$host", "--port=$masterPort", + "--username=$user", "-R", "-X", "stream")) == 0 + or die 'could not take basebackup'; + + for my $offset (0 .. $#workerPorts) + { + my $workerPort = $workerPorts[$offset]; + my $followerPort = $followerWorkerPorts[$offset]; + system("$bindir/pg_basebackup", + ("-D", "tmp_check/follower.$followerPort/data", "--host=$host", "--port=$workerPort", + "--username=$user", "-R", "-X", "stream")) == 0 + or die "Could not take basebackup"; + } + + if(system("$bindir/pg_ctl", + ('start', '-w', + '-o', join(" ", @pgOptions)." -c port=$followerCoordPort", + '-D', 'tmp_check/master-follower/data', '-l', 'tmp_check/master-follower/log/postmaster.log')) != 0) + { + system("tail", ("-n20", "tmp_check/master-follower/log/postmaster.log")); + die "Could not start master follower server"; + } + + for my $port (@followerWorkerPorts) + { + if(system("$bindir/pg_ctl", + ('start', '-w', + '-o', join(" ", @pgOptions)." -c port=$port", + '-D', "tmp_check/follower.$port/data", + '-l', "tmp_check/follower.$port/log/postmaster.log")) != 0) + { + system("tail", ("-n20", "tmp_check/follower.$port/log/postmaster.log")); + die "Could not start follower server"; + } + } } ### @@ -466,7 +576,7 @@ elsif ($isolationtester) { push(@arguments, "--dbname=regression"); system("$isolationRegress", @arguments) == 0 - or die "Could not run isolation tests"; + or die "Could not run isolation tests"; } else { diff --git a/src/test/regress/sql/multi_follower_configure_followers.sql b/src/test/regress/sql/multi_follower_configure_followers.sql new file mode 100644 index 000000000..e5a6b7b15 --- /dev/null +++ b/src/test/regress/sql/multi_follower_configure_followers.sql @@ -0,0 +1,18 @@ +-- prepare for future tests by configuring all the follower nodes + +\c - - - :follower_master_port +ALTER SYSTEM SET citus.use_secondary_nodes TO 'always'; +ALTER SYSTEM SET citus.cluster_name TO 'second-cluster'; +SELECT pg_reload_conf(); + +-- also configure the workers, they'll run queries when MX is enabled + +\c - - - :follower_worker_1_port +ALTER SYSTEM SET citus.use_secondary_nodes TO 'always'; +ALTER SYSTEM SET citus.cluster_name TO 'second-cluster'; +SELECT pg_reload_conf(); + +\c - - - :follower_worker_2_port +ALTER SYSTEM SET citus.use_secondary_nodes TO 'always'; +ALTER SYSTEM SET citus.cluster_name TO 'second-cluster'; +SELECT pg_reload_conf(); diff --git a/src/test/regress/sql/multi_follower_sanity_check.sql b/src/test/regress/sql/multi_follower_sanity_check.sql new file mode 100644 index 000000000..3059b9f9f --- /dev/null +++ b/src/test/regress/sql/multi_follower_sanity_check.sql @@ -0,0 +1,8 @@ +-- check that the nodes are all in read-only mode and rejecting write queries + +\c - - - :follower_master_port +CREATE TABLE tab (a int); +\c - - - :follower_worker_1_port +CREATE TABLE tab (a int); +\c - - - :follower_worker_2_port +CREATE TABLE tab (a int); diff --git a/src/test/regress/sql/multi_follower_select_statements.sql b/src/test/regress/sql/multi_follower_select_statements.sql new file mode 100644 index 000000000..90d0009cb --- /dev/null +++ b/src/test/regress/sql/multi_follower_select_statements.sql @@ -0,0 +1,63 @@ +\c - - - :master_port + +-- do some setup + +SELECT 1 FROM master_add_node('localhost', :worker_1_port); +SELECT 1 FROM master_add_node('localhost', :worker_2_port); + +CREATE TABLE the_table (a int, b int); +SELECT create_distributed_table('the_table', 'a'); + +INSERT INTO the_table (a, b) VALUES (1, 1); +INSERT INTO the_table (a, b) VALUES (1, 2); + +-- connect to the follower and check that a simple select query works, the follower +-- is still in the default cluster and will send queries to the primary nodes + +\c - - - :follower_master_port + +SELECT * FROM the_table; + +-- now, connect to the follower but tell it to use secondary nodes. There are no +-- secondary nodes so this should fail. + +-- (this is :follower_master_port but substitution doesn't work here) +\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always'" + +SELECT * FROM the_table; + +-- add the secondary nodes and try again, the SELECT statement should work this time + +\c - - - :master_port + +SELECT 1 FROM master_add_node('localhost', :follower_worker_1_port, + groupid => (SELECT groupid FROM pg_dist_node WHERE nodeport = :worker_1_port), + noderole => 'secondary'); +SELECT 1 FROM master_add_node('localhost', :follower_worker_2_port, + groupid => (SELECT groupid FROM pg_dist_node WHERE nodeport = :worker_2_port), + noderole => 'secondary'); + +\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always'" + +-- now that we've added secondaries this should work +SELECT * FROM the_table; + +-- okay, now let's play with nodecluster. If we change the cluster of our follower node +-- queries should stat failing again, since there are no worker nodes in the new cluster + +\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'" + +-- there are no secondary nodes in this cluster, so this should fail! +SELECT * FROM the_table; + +-- now move the secondary nodes into the new cluster and see that the follower, finally +-- correctly configured, can run select queries involving them + +\c - - - :master_port +UPDATE pg_dist_node SET nodecluster = 'second-cluster' WHERE noderole = 'secondary'; +\c "port=9070 dbname=regression options='-c\ citus.use_secondary_nodes=always\ -c\ citus.cluster_name=second-cluster'" +SELECT * FROM the_table; + +-- clean up after ourselves +\c - - - :master_port +DROP TABLE the_table; diff --git a/src/test/regress/sql/multi_follower_task_tracker.sql b/src/test/regress/sql/multi_follower_task_tracker.sql new file mode 100644 index 000000000..3a6fe215f --- /dev/null +++ b/src/test/regress/sql/multi_follower_task_tracker.sql @@ -0,0 +1,22 @@ +\c - - - :master_port + +-- do some setup + +CREATE TABLE tab(a int, b int); +SELECT create_distributed_table('tab', 'a'); + +INSERT INTO tab (a, b) VALUES (1, 1); +INSERT INTO tab (a, b) VALUES (1, 2); + +\c - - - :follower_master_port + +SET citus.task_executor_type TO 'real-time'; +SELECT * FROM tab; +SET citus.task_executor_type TO 'task-tracker'; +SELECT * FROM tab; + +-- clean up + +\c - - - :master_port + +DROP TABLE tab;