ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1200000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1200000; -- =================================================================== -- test end-to-end modification functionality -- =================================================================== CREATE TABLE researchers ( id bigint NOT NULL, lab_id int NOT NULL, name text NOT NULL ); CREATE TABLE labs ( id bigint NOT NULL, name text NOT NULL ); SELECT master_create_distributed_table('researchers', 'lab_id', 'hash'); master_create_distributed_table --------------------------------- (1 row) SELECT master_create_worker_shards('researchers', 2, 2); master_create_worker_shards ----------------------------- (1 row) SELECT master_create_distributed_table('labs', 'id', 'hash'); master_create_distributed_table --------------------------------- (1 row) SELECT master_create_worker_shards('labs', 1, 1); master_create_worker_shards ----------------------------- (1 row) -- might be confusing to have two people in the same lab with the same name CREATE UNIQUE INDEX avoid_name_confusion_idx ON researchers (lab_id, name); NOTICE: using one-phase commit for distributed DDL commands HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc' -- add some data INSERT INTO researchers VALUES (1, 1, 'Donald Knuth'); INSERT INTO researchers VALUES (2, 1, 'Niklaus Wirth'); INSERT INTO researchers VALUES (3, 2, 'Tony Hoare'); INSERT INTO researchers VALUES (4, 2, 'Kenneth Iverson'); -- replace a researcher, reusing their id BEGIN; DELETE FROM researchers WHERE lab_id = 1 AND id = 2; INSERT INTO researchers VALUES (2, 1, 'John Backus'); COMMIT; SELECT name FROM researchers WHERE lab_id = 1 AND id = 2; name ------------- John Backus (1 row) -- abort a modification BEGIN; DELETE FROM researchers WHERE lab_id = 1 AND id = 1; ABORT; SELECT name FROM researchers WHERE lab_id = 1 AND id = 1; name -------------- Donald Knuth (1 row) -- trigger a unique constraint violation BEGIN; UPDATE researchers SET name = 'John Backus' WHERE id = 1 AND lab_id = 1; ERROR: duplicate key value violates unique constraint "avoid_name_confusion_idx_1200000" DETAIL: Key (lab_id, name)=(1, John Backus) already exists. CONTEXT: while executing command on localhost:57637 ABORT; -- creating savepoints should work... BEGIN; INSERT INTO researchers VALUES (5, 3, 'Dennis Ritchie'); SAVEPOINT hire_thompson; INSERT INTO researchers VALUES (6, 3, 'Ken Thompson'); COMMIT; SELECT name FROM researchers WHERE lab_id = 3 AND id = 6; name -------------- Ken Thompson (1 row) -- even if created by PL/pgSQL... \set VERBOSITY terse BEGIN; DO $$ BEGIN INSERT INTO researchers VALUES (10, 10, 'Edsger Dijkstra'); EXCEPTION WHEN not_null_violation THEN RAISE NOTICE 'caught not_null_violation'; END $$; COMMIT; -- but rollback should not BEGIN; INSERT INTO researchers VALUES (7, 4, 'Jim Gray'); SAVEPOINT hire_engelbart; INSERT INTO researchers VALUES (8, 4, 'Douglas Engelbart'); ROLLBACK TO hire_engelbart; COMMIT; ERROR: cannot ROLLBACK TO SAVEPOINT in transactions which modify distributed tables SELECT name FROM researchers WHERE lab_id = 4; name ------ (0 rows) BEGIN; DO $$ BEGIN INSERT INTO researchers VALUES (11, 11, 'Whitfield Diffie'); INSERT INTO researchers VALUES (NULL, 10, 'Edsger Dijkstra'); EXCEPTION WHEN not_null_violation THEN RAISE NOTICE 'caught not_null_violation'; END $$; NOTICE: caught not_null_violation COMMIT; ERROR: cannot ROLLBACK TO SAVEPOINT in transactions which modify distributed tables \set VERBOSITY default -- should be valid to edit labs after researchers... BEGIN; INSERT INTO researchers VALUES (8, 5, 'Douglas Engelbart'); INSERT INTO labs VALUES (5, 'Los Alamos'); COMMIT; SELECT * FROM researchers, labs WHERE labs.id = researchers.lab_id; id | lab_id | name | id | name ----+--------+-------------------+----+------------ 8 | 5 | Douglas Engelbart | 5 | Los Alamos (1 row) -- but not the other way around (would require expanding xact participants)... BEGIN; INSERT INTO labs VALUES (6, 'Bell Labs'); INSERT INTO researchers VALUES (9, 6, 'Leslie Lamport'); ERROR: no transaction participant matches localhost:57638 DETAIL: Transactions which modify distributed tables may only target nodes affected by the modification command which began the transaction. COMMIT; -- this logic even applies to router SELECTs occurring after a modification: -- selecting from the modified node is fine... BEGIN; INSERT INTO labs VALUES (6, 'Bell Labs'); SELECT count(*) FROM researchers WHERE lab_id = 6; count ------- 0 (1 row) ABORT; -- but if a SELECT needs to go to new node, that's a problem... BEGIN; UPDATE pg_dist_shard_placement AS sp SET shardstate = 3 FROM pg_dist_shard AS s WHERE sp.shardid = s.shardid AND sp.nodename = 'localhost' AND sp.nodeport = :worker_1_port AND s.logicalrelid = 'researchers'::regclass; INSERT INTO labs VALUES (6, 'Bell Labs'); SELECT count(*) FROM researchers WHERE lab_id = 6; ERROR: no transaction participant matches localhost:57638 DETAIL: Transactions which modify distributed tables may only target nodes affected by the modification command which began the transaction. ABORT; -- applies to DDL, too BEGIN; INSERT INTO labs VALUES (6, 'Bell Labs'); ALTER TABLE labs ADD COLUMN motto text; ERROR: distributed DDL commands must not appear within transaction blocks containing data modifications COMMIT; -- whether it occurs first or second BEGIN; ALTER TABLE labs ADD COLUMN motto text; INSERT INTO labs VALUES (6, 'Bell Labs'); ERROR: distributed data modifications must not appear in transaction blocks which contain distributed DDL commands COMMIT; -- but the DDL should correctly roll back \d labs Table "public.labs" Column | Type | Modifiers --------+--------+----------- id | bigint | not null name | text | not null SELECT * FROM labs WHERE id = 6; id | name ----+------ (0 rows) -- COPY can't happen second, BEGIN; INSERT INTO labs VALUES (6, 'Bell Labs'); \copy labs from stdin delimiter ',' ERROR: distributed copy operations must not appear in transaction blocks containing other distributed modifications CONTEXT: COPY labs, line 1: "10,Weyland-Yutani" COMMIT; -- though it will work if before any modifications BEGIN; \copy labs from stdin delimiter ',' SELECT name FROM labs WHERE id = 10; name ---------------- Weyland-Yutani (1 row) INSERT INTO labs VALUES (6, 'Bell Labs'); COMMIT; -- but a double-copy isn't allowed (the first will persist) BEGIN; \copy labs from stdin delimiter ',' \copy labs from stdin delimiter ',' ERROR: distributed copy operations must not appear in transaction blocks containing other distributed modifications CONTEXT: COPY labs, line 1: "12,fsociety" COMMIT; SELECT name FROM labs WHERE id = 11; name ---------------- Planet Express (1 row) -- finally, ALTER and copy aren't compatible BEGIN; ALTER TABLE labs ADD COLUMN motto text; \copy labs from stdin delimiter ',' ERROR: distributed copy operations must not appear in transaction blocks containing other distributed modifications CONTEXT: COPY labs, line 1: "12,fsociety,lol" COMMIT; -- but the DDL should correctly roll back \d labs Table "public.labs" Column | Type | Modifiers --------+--------+----------- id | bigint | not null name | text | not null SELECT * FROM labs WHERE id = 12; id | name ----+------ (0 rows) -- and if the copy is before the ALTER... BEGIN; \copy labs from stdin delimiter ',' ALTER TABLE labs ADD COLUMN motto text; ERROR: distributed DDL commands must not appear within transaction blocks containing data modifications COMMIT; -- the DDL fails, but copy persists \d labs Table "public.labs" Column | Type | Modifiers --------+--------+----------- id | bigint | not null name | text | not null SELECT * FROM labs WHERE id = 12; id | name ----+---------- 12 | fsociety (1 row) -- now, for some special failures... CREATE TABLE objects ( id bigint PRIMARY KEY, name text NOT NULL ); SELECT master_create_distributed_table('objects', 'id', 'hash'); master_create_distributed_table --------------------------------- (1 row) SELECT master_create_worker_shards('objects', 1, 2); master_create_worker_shards ----------------------------- (1 row) -- test primary key violations BEGIN; INSERT INTO objects VALUES (1, 'apple'); INSERT INTO objects VALUES (1, 'orange'); ERROR: duplicate key value violates unique constraint "objects_pkey_1200003" DETAIL: Key (id)=(1) already exists. CONTEXT: while executing command on localhost:57637 COMMIT; -- data shouldn't have persisted... SELECT * FROM objects WHERE id = 1; id | name ----+------ (0 rows) -- and placements should still be healthy... SELECT count(*) FROM pg_dist_shard_placement AS sp, pg_dist_shard AS s WHERE sp.shardid = s.shardid AND sp.shardstate = 1 AND s.logicalrelid = 'objects'::regclass; count ------- 2 (1 row) -- create trigger on one worker to reject certain values \c - - - :worker_2_port CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN IF (NEW.name = 'BAD') THEN RAISE 'illegal value'; END IF; RETURN NEW; END; $rb$ LANGUAGE plpgsql; CREATE CONSTRAINT TRIGGER reject_bad AFTER INSERT ON objects_1200003 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port -- test partial failure; worker_1 succeeds, 2 fails \set VERBOSITY terse BEGIN; INSERT INTO objects VALUES (1, 'apple'); INSERT INTO objects VALUES (2, 'BAD'); WARNING: illegal value INSERT INTO labs VALUES (7, 'E Corp'); COMMIT; -- data should be persisted SELECT * FROM objects WHERE id = 2; id | name ----+------ 2 | BAD (1 row) SELECT * FROM labs WHERE id = 7; id | name ----+-------- 7 | E Corp (1 row) -- but one placement should be bad SELECT count(*) FROM pg_dist_shard_placement AS sp, pg_dist_shard AS s WHERE sp.shardid = s.shardid AND sp.nodename = 'localhost' AND sp.nodeport = :worker_2_port AND sp.shardstate = 3 AND s.logicalrelid = 'objects'::regclass; count ------- 1 (1 row) DELETE FROM objects; -- mark shards as healthy again; delete all data UPDATE pg_dist_shard_placement AS sp SET shardstate = 1 FROM pg_dist_shard AS s WHERE sp.shardid = s.shardid AND s.logicalrelid = 'objects'::regclass; -- what if there are errors on different shards at different times? \c - - - :worker_1_port CREATE FUNCTION reject_bad() RETURNS trigger AS $rb$ BEGIN IF (NEW.name = 'BAD') THEN RAISE 'illegal value'; END IF; RETURN NEW; END; $rb$ LANGUAGE plpgsql; CREATE CONSTRAINT TRIGGER reject_bad AFTER INSERT ON labs_1200002 DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port BEGIN; INSERT INTO objects VALUES (1, 'apple'); INSERT INTO objects VALUES (2, 'BAD'); WARNING: illegal value INSERT INTO labs VALUES (8, 'Aperture Science'); INSERT INTO labs VALUES (9, 'BAD'); WARNING: illegal value ERROR: could not modify any active placements COMMIT; -- data should NOT be persisted SELECT * FROM objects WHERE id = 1; id | name ----+------ (0 rows) SELECT * FROM labs WHERE id = 8; id | name ----+------ (0 rows) -- all placements should remain healthy SELECT count(*) FROM pg_dist_shard_placement AS sp, pg_dist_shard AS s WHERE sp.shardid = s.shardid AND sp.shardstate = 1 AND (s.logicalrelid = 'objects'::regclass OR s.logicalrelid = 'labs'::regclass); count ------- 3 (1 row) -- what if the failures happen at COMMIT time? \c - - - :worker_2_port DROP TRIGGER reject_bad ON objects_1200003; CREATE CONSTRAINT TRIGGER reject_bad AFTER INSERT ON objects_1200003 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port -- should be the same story as before, just at COMMIT time BEGIN; INSERT INTO objects VALUES (1, 'apple'); INSERT INTO objects VALUES (2, 'BAD'); INSERT INTO labs VALUES (9, 'Umbrella Corporation'); COMMIT; WARNING: illegal value -- data should be persisted SELECT * FROM objects WHERE id = 2; id | name ----+------ 2 | BAD (1 row) SELECT * FROM labs WHERE id = 7; id | name ----+-------- 7 | E Corp (1 row) -- but one placement should be bad SELECT count(*) FROM pg_dist_shard_placement AS sp, pg_dist_shard AS s WHERE sp.shardid = s.shardid AND sp.nodename = 'localhost' AND sp.nodeport = :worker_2_port AND sp.shardstate = 3 AND s.logicalrelid = 'objects'::regclass; count ------- 1 (1 row) DELETE FROM objects; -- mark shards as healthy again; delete all data UPDATE pg_dist_shard_placement AS sp SET shardstate = 1 FROM pg_dist_shard AS s WHERE sp.shardid = s.shardid AND s.logicalrelid = 'objects'::regclass; -- what if all nodes have failures at COMMIT time? \c - - - :worker_1_port DROP TRIGGER reject_bad ON labs_1200002; CREATE CONSTRAINT TRIGGER reject_bad AFTER INSERT ON labs_1200002 DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE reject_bad(); \c - - - :master_port BEGIN; INSERT INTO objects VALUES (1, 'apple'); INSERT INTO objects VALUES (2, 'BAD'); INSERT INTO labs VALUES (8, 'Aperture Science'); INSERT INTO labs VALUES (9, 'BAD'); COMMIT; WARNING: illegal value WARNING: illegal value ERROR: could not commit transaction on any active nodes -- data should NOT be persisted SELECT * FROM objects WHERE id = 1; id | name ----+------ (0 rows) SELECT * FROM labs WHERE id = 8; id | name ----+------ (0 rows) -- all placements should remain healthy SELECT count(*) FROM pg_dist_shard_placement AS sp, pg_dist_shard AS s WHERE sp.shardid = s.shardid AND sp.shardstate = 1 AND (s.logicalrelid = 'objects'::regclass OR s.logicalrelid = 'labs'::regclass); count ------- 3 (1 row) -- what if one shard (objects) succeeds but another (labs) completely fails? \c - - - :worker_2_port DROP TRIGGER reject_bad ON objects_1200003; \c - - - :master_port BEGIN; INSERT INTO objects VALUES (1, 'apple'); INSERT INTO labs VALUES (8, 'Aperture Science'); INSERT INTO labs VALUES (9, 'BAD'); COMMIT; WARNING: illegal value \set VERBOSITY default -- data to objects should be persisted, but labs should not... SELECT * FROM objects WHERE id = 1; id | name ----+------- 1 | apple (1 row) SELECT * FROM labs WHERE id = 8; id | name ----+------ (0 rows) -- labs should be healthy, but one object placement shouldn't be SELECT s.logicalrelid::regclass::text, sp.shardstate, count(*) FROM pg_dist_shard_placement AS sp, pg_dist_shard AS s WHERE sp.shardid = s.shardid AND (s.logicalrelid = 'objects'::regclass OR s.logicalrelid = 'labs'::regclass) GROUP BY s.logicalrelid, sp.shardstate ORDER BY s.logicalrelid, sp.shardstate; logicalrelid | shardstate | count --------------+------------+------- labs | 1 | 1 objects | 1 | 1 objects | 3 | 1 (3 rows) -- some append-partitioned tests for good measure CREATE TABLE append_researchers ( LIKE researchers ); SELECT master_create_distributed_table('append_researchers', 'id', 'append'); master_create_distributed_table --------------------------------- (1 row) SET citus.shard_replication_factor TO 1; SELECT master_create_empty_shard('append_researchers') AS new_shard_id \gset UPDATE pg_dist_shard SET shardminvalue = 0, shardmaxvalue = 500000 WHERE shardid = :new_shard_id; SELECT master_create_empty_shard('append_researchers') AS new_shard_id \gset UPDATE pg_dist_shard SET shardminvalue = 500000, shardmaxvalue = 1000000 WHERE shardid = :new_shard_id; SET citus.shard_replication_factor TO DEFAULT; -- try single-shard INSERT BEGIN; INSERT INTO append_researchers VALUES (0, 0, 'John Backus'); COMMIT; SELECT * FROM append_researchers WHERE id = 0; id | lab_id | name ----+--------+------------- 0 | 0 | John Backus (1 row) -- try rollback BEGIN; DELETE FROM append_researchers WHERE id = 0; ROLLBACK; SELECT * FROM append_researchers WHERE id = 0; id | lab_id | name ----+--------+------------- 0 | 0 | John Backus (1 row) -- try hitting shard on other node BEGIN; INSERT INTO append_researchers VALUES (1, 1, 'John McCarthy'); INSERT INTO append_researchers VALUES (500000, 500000, 'Tony Hoare'); ERROR: distributed modifications must target exactly one shard ROLLBACK; SELECT * FROM append_researchers; id | lab_id | name ----+--------+------------- 0 | 0 | John Backus (1 row)