From 850c51947afcbaa5e5ad5837b463c1474de1d21d Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Thu, 4 Aug 2016 16:14:46 -0700 Subject: [PATCH] Re-permit DDL in transactions, selectively Recent changes to DDL and transaction logic resulted in a "regression" from the viewpoint of users. Previously, DDL commands were allowed in multi-command transaction blocks, though they were not processed in any actual transactional manner. We improved the atomicity of our DDL code, but added a restriction that DDL commands themselves must not occur in any BEGIN/END transaction block. To give users back the original functionality (and improved atomicity) we now keep track of whether a multi-command transaction has modified data (DML) or schema (DDL). Interleaving the two modification types in a single transaction is disallowed. This first step simply permits a single DDL command in such a block, admittedly an incomplete solution, but one which will permit us to add full multi-DDL command support in a subsequent commit. --- src/backend/distributed/commands/multi_copy.c | 2 + .../executor/multi_client_executor.c | 4 +- .../executor/multi_router_executor.c | 19 ++++- .../distributed/executor/multi_utility.c | 10 ++- .../transaction/multi_shard_transaction.c | 1 + .../distributed/utils/connection_cache.c | 6 +- src/include/distributed/connection_cache.h | 13 ++- .../expected/multi_modifying_xacts.out | 84 ++++++++++++++++++- .../input/multi_alter_table_statements.source | 23 ++++- .../multi_alter_table_statements.source | 38 ++++++++- .../regress/sql/multi_modifying_xacts.sql | 54 +++++++++++- 11 files changed, 232 insertions(+), 22 deletions(-) diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c index adfe5791d..a7858d4c2 100644 --- a/src/backend/distributed/commands/multi_copy.c +++ b/src/backend/distributed/commands/multi_copy.c @@ -233,6 +233,8 @@ CitusCopyFrom(CopyStmt *copyStatement, char *completionTag) errmsg("unsupported partition method"))); } } + + XactModificationLevel = XACT_MODIFICATION_DATA; } diff --git a/src/backend/distributed/executor/multi_client_executor.c b/src/backend/distributed/executor/multi_client_executor.c index 88b9f4572..f5f0df1cc 100644 --- a/src/backend/distributed/executor/multi_client_executor.c +++ b/src/backend/distributed/executor/multi_client_executor.c @@ -94,7 +94,7 @@ MultiClientConnect(const char *nodeName, uint32 nodePort, const char *nodeDataba char *effectiveDatabaseName = NULL; char *effectiveUserName = NULL; - if (IsModifyingTransaction) + if (XactModificationLevel > XACT_MODIFICATION_NONE) { ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), errmsg("cannot open new connections after the first modification " @@ -181,7 +181,7 @@ MultiClientConnectStart(const char *nodeName, uint32 nodePort, const char *nodeD return connectionId; } - if (IsModifyingTransaction) + if (XactModificationLevel > XACT_MODIFICATION_NONE) { ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), errmsg("cannot open new connections after the first modification " diff --git a/src/backend/distributed/executor/multi_router_executor.c b/src/backend/distributed/executor/multi_router_executor.c index ab8c239b6..60105db69 100644 --- a/src/backend/distributed/executor/multi_router_executor.c +++ b/src/backend/distributed/executor/multi_router_executor.c @@ -138,6 +138,14 @@ RouterExecutorStart(QueryDesc *queryDesc, int eflags, Task *task) { eflags |= EXEC_FLAG_SKIP_TRIGGERS; + if (XactModificationLevel == XACT_MODIFICATION_SCHEMA) + { + ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), + errmsg("distributed data modifications must not appear in " + "transaction blocks which contain distributed DDL " + "commands"))); + } + /* * We could naturally handle function-based transactions (i.e. those * using PL/pgSQL or similar) by checking the type of queryDesc->dest, @@ -228,7 +236,7 @@ InitTransactionStateForTask(Task *task) participantEntry->connection = connection; } - IsModifyingTransaction = true; + XactModificationLevel = XACT_MODIFICATION_DATA; } @@ -1178,7 +1186,7 @@ RegisterRouterExecutorXactCallbacks(void) static void RouterTransactionCallback(XactEvent event, void *arg) { - if (xactParticipantHash == NULL) + if (XactModificationLevel != XACT_MODIFICATION_DATA) { return; } @@ -1235,7 +1243,7 @@ RouterTransactionCallback(XactEvent event, void *arg) } /* reset transaction state */ - IsModifyingTransaction = false; + XactModificationLevel = XACT_MODIFICATION_NONE; xactParticipantHash = NULL; xactShardConnSetList = NIL; subXactAbortAttempted = false; @@ -1275,6 +1283,11 @@ ExecuteTransactionEnd(bool commit) NodeConnectionEntry *participant; bool completed = !commit; /* aborts are assumed completed */ + if (xactParticipantHash == NULL) + { + return; + } + hash_seq_init(&scan, xactParticipantHash); while ((participant = (NodeConnectionEntry *) hash_seq_search(&scan))) { diff --git a/src/backend/distributed/executor/multi_utility.c b/src/backend/distributed/executor/multi_utility.c index 59aa01d17..1f7b0f206 100644 --- a/src/backend/distributed/executor/multi_utility.c +++ b/src/backend/distributed/executor/multi_utility.c @@ -1255,7 +1255,13 @@ ExecuteDistributedDDLCommand(Oid relationId, const char *ddlCommandString, { bool executionOK = false; - PreventTransactionChain(isTopLevel, "distributed DDL commands"); + if (XactModificationLevel > XACT_MODIFICATION_NONE) + { + ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), + errmsg("distributed DDL commands must not appear within " + "transaction blocks containing other modifications"))); + } + ShowNoticeIfNotUsing2PC(); executionOK = ExecuteCommandOnWorkerShards(relationId, ddlCommandString); @@ -1265,6 +1271,8 @@ ExecuteDistributedDDLCommand(Oid relationId, const char *ddlCommandString, { ereport(ERROR, (errmsg("could not execute DDL command on worker node shards"))); } + + XactModificationLevel = XACT_MODIFICATION_SCHEMA; } diff --git a/src/backend/distributed/transaction/multi_shard_transaction.c b/src/backend/distributed/transaction/multi_shard_transaction.c index 3ec4ee618..57a22adfe 100644 --- a/src/backend/distributed/transaction/multi_shard_transaction.c +++ b/src/backend/distributed/transaction/multi_shard_transaction.c @@ -283,6 +283,7 @@ CompleteShardPlacementTransactions(XactEvent event, void *arg) CloseConnections(shardPlacementConnectionList); shardPlacementConnectionList = NIL; + XactModificationLevel = XACT_MODIFICATION_NONE; } diff --git a/src/backend/distributed/utils/connection_cache.c b/src/backend/distributed/utils/connection_cache.c index 766a5ade2..22644a890 100644 --- a/src/backend/distributed/utils/connection_cache.c +++ b/src/backend/distributed/utils/connection_cache.c @@ -30,8 +30,8 @@ #include "utils/palloc.h" -/* state needed to prevent new connections during modifying transactions */ -bool IsModifyingTransaction = false; +/* state needed to keep track of operations used during a transaction */ +XactModificationType XactModificationLevel = XACT_MODIFICATION_NONE; /* * NodeConnectionHash is the connection hash itself. It begins uninitialized. @@ -377,7 +377,7 @@ ConnectToNode(char *nodeName, int32 nodePort, char *nodeUser) sprintf(nodePortString, "%d", nodePort); - if (IsModifyingTransaction) + if (XactModificationLevel > XACT_MODIFICATION_NONE) { ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), errmsg("cannot open new connections after the first modification " diff --git a/src/include/distributed/connection_cache.h b/src/include/distributed/connection_cache.h index f335e0735..0e2c0af76 100644 --- a/src/include/distributed/connection_cache.h +++ b/src/include/distributed/connection_cache.h @@ -32,7 +32,6 @@ /* SQL statement for testing */ #define TEST_SQL "DO $$ BEGIN RAISE EXCEPTION 'Raised remotely!'; END $$" - /* * NodeConnectionKey acts as the key to index into the (process-local) hash * keeping track of open connections. Node name and port are sufficient. @@ -53,8 +52,18 @@ typedef struct NodeConnectionEntry } NodeConnectionEntry; +/* describes what kind of modifications have occurred in the current transaction */ +typedef enum +{ + XACT_MODIFICATION_INVALID = 0, /* placeholder initial value */ + XACT_MODIFICATION_NONE, /* no modifications have taken place */ + XACT_MODIFICATION_DATA, /* data modifications (DML) have occurred */ + XACT_MODIFICATION_SCHEMA /* schema modifications (DDL) have occurred */ +} XactModificationType; + + /* state needed to prevent new connections during modifying transactions */ -extern bool IsModifyingTransaction; +extern XactModificationType XactModificationLevel; /* function declarations for obtaining and using a connection */ diff --git a/src/test/regress/expected/multi_modifying_xacts.out b/src/test/regress/expected/multi_modifying_xacts.out index e2b736f2f..328bc1233 100644 --- a/src/test/regress/expected/multi_modifying_xacts.out +++ b/src/test/regress/expected/multi_modifying_xacts.out @@ -100,6 +100,7 @@ SELECT name FROM researchers WHERE lab_id = 4; BEGIN; DO $$ BEGIN + INSERT INTO researchers VALUES (11, 11, 'Whitfield Diffie'); INSERT INTO researchers VALUES (NULL, 10, 'Edsger Dijkstra'); EXCEPTION WHEN not_null_violation THEN @@ -150,19 +151,41 @@ SELECT count(*) FROM researchers WHERE lab_id = 6; ERROR: no transaction participant matches localhost:57638 DETAIL: Transactions which modify distributed tables may only target nodes affected by the modification command which began the transaction. ABORT; --- applies to DDL or COPY, too +-- applies to DDL, too BEGIN; INSERT INTO labs VALUES (6, 'Bell Labs'); -ALTER TABLE labs ADD COLUMN text motto; -ERROR: distributed DDL commands cannot run inside a transaction block +ALTER TABLE labs ADD COLUMN motto text; +ERROR: distributed DDL commands must not appear within transaction blocks containing other modifications COMMIT; +-- whether it occurs first or second +BEGIN; +ALTER TABLE labs ADD COLUMN motto text; +NOTICE: using one-phase commit for distributed DDL commands +HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc' +INSERT INTO labs VALUES (6, 'Bell Labs'); +ERROR: distributed data modifications must not appear in transaction blocks which contain distributed DDL commands +COMMIT; +-- but the DDL should correctly roll back +\d labs + Table "public.labs" + Column | Type | Modifiers +--------+--------+----------- + id | bigint | not null + name | text | not null + +SELECT * FROM labs WHERE id = 6; + id | name +----+------ +(0 rows) + +-- COPY can't happen second, BEGIN; INSERT INTO labs VALUES (6, 'Bell Labs'); \copy labs from stdin delimiter ',' ERROR: cannot open new connections after the first modification command within a transaction CONTEXT: COPY labs, line 1: "10,Weyland-Yutani" COMMIT; --- though the copy will work if before any modifications +-- though it will work if before any modifications BEGIN; \copy labs from stdin delimiter ',' SELECT name FROM labs WHERE id = 10; @@ -173,6 +196,59 @@ SELECT name FROM labs WHERE id = 10; INSERT INTO labs VALUES (6, 'Bell Labs'); COMMIT; +-- but a double-copy isn't allowed (the first will persist) +BEGIN; +\copy labs from stdin delimiter ',' +\copy labs from stdin delimiter ',' +ERROR: cannot open new connections after the first modification command within a transaction +CONTEXT: COPY labs, line 1: "12,fsociety" +COMMIT; +SELECT name FROM labs WHERE id = 11; + name +---------------- + Planet Express +(1 row) + +-- finally, ALTER and copy aren't compatible +BEGIN; +ALTER TABLE labs ADD COLUMN motto text; +\copy labs from stdin delimiter ',' +ERROR: cannot open new connections after the first modification command within a transaction +CONTEXT: COPY labs, line 1: "12,fsociety,lol" +COMMIT; +-- but the DDL should correctly roll back +\d labs + Table "public.labs" + Column | Type | Modifiers +--------+--------+----------- + id | bigint | not null + name | text | not null + +SELECT * FROM labs WHERE id = 12; + id | name +----+------ +(0 rows) + +-- and if the copy is before the ALTER... +BEGIN; +\copy labs from stdin delimiter ',' +ALTER TABLE labs ADD COLUMN motto text; +ERROR: distributed DDL commands must not appear within transaction blocks containing other modifications +COMMIT; +-- the DDL fails, but copy persists +\d labs + Table "public.labs" + Column | Type | Modifiers +--------+--------+----------- + id | bigint | not null + name | text | not null + +SELECT * FROM labs WHERE id = 12; + id | name +----+---------- + 12 | fsociety +(1 row) + -- now, for some special failures... CREATE TABLE objects ( id bigint PRIMARY KEY, diff --git a/src/test/regress/input/multi_alter_table_statements.source b/src/test/regress/input/multi_alter_table_statements.source index d0bb6f605..a50b384fb 100644 --- a/src/test/regress/input/multi_alter_table_statements.source +++ b/src/test/regress/input/multi_alter_table_statements.source @@ -165,7 +165,7 @@ COMMIT; SELECT indexname, tablename FROM pg_indexes WHERE tablename = 'lineitem_alter'; DROP INDEX temp_index_1; --- verify that distributed ddl commands are not allowed inside a transaction block +-- verify that single distributed ddl commands are allowed inside a transaction block SET citus.enable_ddl_propagation to true; BEGIN; CREATE INDEX temp_index_2 ON lineitem_alter(l_orderkey); @@ -173,6 +173,27 @@ COMMIT; SELECT indexname, tablename FROM pg_indexes WHERE tablename = 'lineitem_alter'; DROP INDEX temp_index_2; +-- but that multiple ddl statements in a block results in ROLLBACK +BEGIN; +CREATE INDEX temp_index_2 ON lineitem_alter(l_orderkey); +ALTER TABLE lineitem_alter ADD COLUMN first integer; +COMMIT; +SELECT indexname, tablename FROM pg_indexes WHERE tablename = 'lineitem_alter'; + +-- and distributed SELECTs cannot appear after ALTER +BEGIN; +CREATE INDEX temp_index_2 ON lineitem_alter(l_orderkey); +SELECT l_orderkey FROM lineitem_alter LIMIT 0; +COMMIT; + +-- but are allowed before +BEGIN; +SELECT l_orderkey FROM lineitem_alter LIMIT 0; +CREATE INDEX temp_index_2 ON lineitem_alter(l_orderkey); +COMMIT; +SELECT indexname, tablename FROM pg_indexes WHERE tablename = 'lineitem_alter'; +DROP INDEX temp_index_2; + --- verify that distributed ddl commands can be used with 2pc SET citus.multi_shard_commit_protocol TO '2pc'; CREATE INDEX temp_index_3 ON lineitem_alter(l_orderkey); diff --git a/src/test/regress/output/multi_alter_table_statements.source b/src/test/regress/output/multi_alter_table_statements.source index 4231a6cac..d00052881 100644 --- a/src/test/regress/output/multi_alter_table_statements.source +++ b/src/test/regress/output/multi_alter_table_statements.source @@ -424,19 +424,51 @@ SELECT indexname, tablename FROM pg_indexes WHERE tablename = 'lineitem_alter'; (1 row) DROP INDEX temp_index_1; --- verify that distributed ddl commands are not allowed inside a transaction block +-- verify that single distributed ddl commands are allowed inside a transaction block SET citus.enable_ddl_propagation to true; BEGIN; CREATE INDEX temp_index_2 ON lineitem_alter(l_orderkey); -ERROR: distributed DDL commands cannot run inside a transaction block +COMMIT; +SELECT indexname, tablename FROM pg_indexes WHERE tablename = 'lineitem_alter'; + indexname | tablename +--------------+---------------- + temp_index_2 | lineitem_alter +(1 row) + +DROP INDEX temp_index_2; +-- but that multiple ddl statements in a block results in ROLLBACK +BEGIN; +CREATE INDEX temp_index_2 ON lineitem_alter(l_orderkey); +ALTER TABLE lineitem_alter ADD COLUMN first integer; +ERROR: distributed DDL commands must not appear within transaction blocks containing other modifications COMMIT; SELECT indexname, tablename FROM pg_indexes WHERE tablename = 'lineitem_alter'; indexname | tablename -----------+----------- (0 rows) +-- and distributed SELECTs cannot appear after ALTER +BEGIN; +CREATE INDEX temp_index_2 ON lineitem_alter(l_orderkey); +SELECT l_orderkey FROM lineitem_alter LIMIT 0; +ERROR: cannot open new connections after the first modification command within a transaction +COMMIT; +-- but are allowed before +BEGIN; +SELECT l_orderkey FROM lineitem_alter LIMIT 0; + l_orderkey +------------ +(0 rows) + +CREATE INDEX temp_index_2 ON lineitem_alter(l_orderkey); +COMMIT; +SELECT indexname, tablename FROM pg_indexes WHERE tablename = 'lineitem_alter'; + indexname | tablename +--------------+---------------- + temp_index_2 | lineitem_alter +(1 row) + DROP INDEX temp_index_2; -ERROR: index "temp_index_2" does not exist --- verify that distributed ddl commands can be used with 2pc SET citus.multi_shard_commit_protocol TO '2pc'; CREATE INDEX temp_index_3 ON lineitem_alter(l_orderkey); diff --git a/src/test/regress/sql/multi_modifying_xacts.sql b/src/test/regress/sql/multi_modifying_xacts.sql index c875e4c15..109d371c2 100644 --- a/src/test/regress/sql/multi_modifying_xacts.sql +++ b/src/test/regress/sql/multi_modifying_xacts.sql @@ -78,6 +78,7 @@ SELECT name FROM researchers WHERE lab_id = 4; BEGIN; DO $$ BEGIN + INSERT INTO researchers VALUES (11, 11, 'Whitfield Diffie'); INSERT INTO researchers VALUES (NULL, 10, 'Edsger Dijkstra'); EXCEPTION WHEN not_null_violation THEN @@ -122,12 +123,23 @@ INSERT INTO labs VALUES (6, 'Bell Labs'); SELECT count(*) FROM researchers WHERE lab_id = 6; ABORT; --- applies to DDL or COPY, too +-- applies to DDL, too BEGIN; INSERT INTO labs VALUES (6, 'Bell Labs'); -ALTER TABLE labs ADD COLUMN text motto; +ALTER TABLE labs ADD COLUMN motto text; COMMIT; +-- whether it occurs first or second +BEGIN; +ALTER TABLE labs ADD COLUMN motto text; +INSERT INTO labs VALUES (6, 'Bell Labs'); +COMMIT; + +-- but the DDL should correctly roll back +\d labs +SELECT * FROM labs WHERE id = 6; + +-- COPY can't happen second, BEGIN; INSERT INTO labs VALUES (6, 'Bell Labs'); \copy labs from stdin delimiter ',' @@ -135,7 +147,7 @@ INSERT INTO labs VALUES (6, 'Bell Labs'); \. COMMIT; --- though the copy will work if before any modifications +-- though it will work if before any modifications BEGIN; \copy labs from stdin delimiter ',' 10,Weyland-Yutani @@ -144,6 +156,42 @@ SELECT name FROM labs WHERE id = 10; INSERT INTO labs VALUES (6, 'Bell Labs'); COMMIT; +-- but a double-copy isn't allowed (the first will persist) +BEGIN; +\copy labs from stdin delimiter ',' +11,Planet Express +\. +\copy labs from stdin delimiter ',' +12,fsociety +\. +COMMIT; + +SELECT name FROM labs WHERE id = 11; + +-- finally, ALTER and copy aren't compatible +BEGIN; +ALTER TABLE labs ADD COLUMN motto text; +\copy labs from stdin delimiter ',' +12,fsociety,lol +\. +COMMIT; + +-- but the DDL should correctly roll back +\d labs +SELECT * FROM labs WHERE id = 12; + +-- and if the copy is before the ALTER... +BEGIN; +\copy labs from stdin delimiter ',' +12,fsociety +\. +ALTER TABLE labs ADD COLUMN motto text; +COMMIT; + +-- the DDL fails, but copy persists +\d labs +SELECT * FROM labs WHERE id = 12; + -- now, for some special failures... CREATE TABLE objects ( id bigint PRIMARY KEY,