From 15eed396b3fc9d73839773d6e403b5c803873db9 Mon Sep 17 00:00:00 2001 From: Metin Doslu Date: Tue, 7 Jun 2016 14:22:10 +0300 Subject: [PATCH 1/5] Update ereport format --- .../distributed/utils/connection_cache.c | 37 ++++++++++++------- .../expected/multi_connection_cache.out | 2 +- .../expected/multi_index_statements.out | 9 +++-- .../regress/expected/multi_modifications.out | 5 ++- .../multi_alter_table_statements.source | 16 ++++---- src/test/regress/output/multi_copy.source | 4 +- 6 files changed, 42 insertions(+), 31 deletions(-) diff --git a/src/backend/distributed/utils/connection_cache.c b/src/backend/distributed/utils/connection_cache.c index 3f05a0ce4..ce592d8b5 100644 --- a/src/backend/distributed/utils/connection_cache.c +++ b/src/backend/distributed/utils/connection_cache.c @@ -198,34 +198,31 @@ void ReportRemoteError(PGconn *connection, PGresult *result) { char *sqlStateString = PQresultErrorField(result, PG_DIAG_SQLSTATE); - char *remoteMessage = PQresultErrorField(result, PG_DIAG_MESSAGE_PRIMARY); + char *messagePrimary = PQresultErrorField(result, PG_DIAG_MESSAGE_PRIMARY); + char *messageDetail = PQresultErrorField(result, PG_DIAG_MESSAGE_DETAIL); + char *messageHint = PQresultErrorField(result, PG_DIAG_MESSAGE_HINT); + char *messageContext = PQresultErrorField(result, PG_DIAG_CONTEXT); + char *nodeName = ConnectionGetOptionValue(connection, "host"); char *nodePort = ConnectionGetOptionValue(connection, "port"); - char *errorPrefix = "Connection failed to"; int sqlState = ERRCODE_CONNECTION_FAILURE; if (sqlStateString != NULL) { sqlState = MAKE_SQLSTATE(sqlStateString[0], sqlStateString[1], sqlStateString[2], sqlStateString[3], sqlStateString[4]); - - /* use more specific error prefix for result failures */ - if (sqlState != ERRCODE_CONNECTION_FAILURE) - { - errorPrefix = "Bad result from"; - } } /* * If the PGresult did not contain a message, the connection may provide a * suitable top level one. At worst, this is an empty string. */ - if (remoteMessage == NULL) + if (messagePrimary == NULL) { char *lastNewlineIndex = NULL; - remoteMessage = PQerrorMessage(connection); - lastNewlineIndex = strrchr(remoteMessage, '\n'); + messagePrimary = PQerrorMessage(connection); + lastNewlineIndex = strrchr(messagePrimary, '\n'); /* trim trailing newline, if any */ if (lastNewlineIndex != NULL) @@ -234,9 +231,21 @@ ReportRemoteError(PGconn *connection, PGresult *result) } } - ereport(WARNING, (errcode(sqlState), - errmsg("%s %s:%s", errorPrefix, nodeName, nodePort), - errdetail("Remote message: %s", remoteMessage))); + if (sqlState == ERRCODE_CONNECTION_FAILURE) + { + ereport(WARNING, (errcode(sqlState), + errmsg("connection failed to %s:%s", nodeName, nodePort), + errdetail("%s", messagePrimary))); + } + else + { + ereport(WARNING, (errcode(sqlState), errmsg("%s", messagePrimary), + messageDetail ? errdetail("%s", messageDetail) : 0, + messageHint ? errhint("%s", messageHint) : 0, + messageContext ? errcontext("%s", messageContext) : 0, + errcontext("Error occurred on remote connection to %s:%s.", + nodeName, nodePort))); + } } diff --git a/src/test/regress/expected/multi_connection_cache.out b/src/test/regress/expected/multi_connection_cache.out index 84e927bbe..fd17f3845 100644 --- a/src/test/regress/expected/multi_connection_cache.out +++ b/src/test/regress/expected/multi_connection_cache.out @@ -28,7 +28,7 @@ CREATE FUNCTION set_connection_status_bad(cstring, integer) \set VERBOSITY terse -- connect to non-existent host SELECT initialize_remote_temp_table('dummy-host-name', 12345); -WARNING: Connection failed to dummy-host-name:12345 +WARNING: connection failed to dummy-host-name:12345 initialize_remote_temp_table ------------------------------ f diff --git a/src/test/regress/expected/multi_index_statements.out b/src/test/regress/expected/multi_index_statements.out index b8258596f..5f115f0f4 100644 --- a/src/test/regress/expected/multi_index_statements.out +++ b/src/test/regress/expected/multi_index_statements.out @@ -139,12 +139,13 @@ ERROR: creating unique indexes on append-partitioned tables is currently unsupp CREATE INDEX lineitem_orderkey_index ON lineitem (l_orderkey); ERROR: relation "lineitem_orderkey_index" already exists CREATE INDEX try_index ON lineitem USING gist (l_orderkey); -WARNING: Bad result from localhost:57638 -DETAIL: Remote message: data type bigint has no default operator class for access method "gist" +WARNING: data type bigint has no default operator class for access method "gist" +HINT: You must specify an operator class for the index or define a default operator class for the data type. +CONTEXT: Error occurred on remote connection to localhost:57638. ERROR: could not execute DDL command on worker node shards CREATE INDEX try_index ON lineitem (non_existent_column); -WARNING: Bad result from localhost:57638 -DETAIL: Remote message: column "non_existent_column" does not exist +WARNING: column "non_existent_column" does not exist +CONTEXT: Error occurred on remote connection to localhost:57638. ERROR: could not execute DDL command on worker node shards CREATE INDEX ON lineitem (l_orderkey); ERROR: creating index without a name on a distributed table is currently unsupported diff --git a/src/test/regress/expected/multi_modifications.out b/src/test/regress/expected/multi_modifications.out index 4a707f733..5d0dcdaee 100644 --- a/src/test/regress/expected/multi_modifications.out +++ b/src/test/regress/expected/multi_modifications.out @@ -276,8 +276,9 @@ WHERE nodename = 'localhost' AND nodeport = :worker_1_port; -- Fourth: Perform the same INSERT (primary key violation) INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); -WARNING: Bad result from localhost:57638 -DETAIL: Remote message: duplicate key value violates unique constraint "limit_orders_pkey_750001" +WARNING: duplicate key value violates unique constraint "limit_orders_pkey_750001" +DETAIL: Key (id)=(275) already exists. +CONTEXT: Error occurred on remote connection to localhost:57638. -- Last: Verify the insert worked but the placement with the PK violation is now unhealthy SELECT count(*) FROM limit_orders WHERE id = 275; count diff --git a/src/test/regress/output/multi_alter_table_statements.source b/src/test/regress/output/multi_alter_table_statements.source index 467793f28..7d209e9c5 100644 --- a/src/test/regress/output/multi_alter_table_statements.source +++ b/src/test/regress/output/multi_alter_table_statements.source @@ -261,8 +261,8 @@ ALTER TABLE IF EXISTS non_existent_table ADD COLUMN new_column INTEGER; NOTICE: relation "non_existent_table" does not exist, skipping ALTER TABLE IF EXISTS lineitem_alter ALTER COLUMN int_column2 SET DATA TYPE INTEGER; ALTER TABLE lineitem_alter DROP COLUMN non_existent_column; -WARNING: Bad result from localhost:57638 -DETAIL: Remote message: column "non_existent_column" of relation "lineitem_alter_220000" does not exist +WARNING: column "non_existent_column" of relation "lineitem_alter_220000" does not exist +CONTEXT: Error occurred on remote connection to localhost:57638. ERROR: could not execute DDL command on worker node shards ALTER TABLE lineitem_alter DROP COLUMN IF EXISTS non_existent_column; NOTICE: column "non_existent_column" of relation "lineitem_alter" does not exist, skipping @@ -361,16 +361,16 @@ DETAIL: Only ADD|DROP COLUMN, SET|DROP NOT NULL, SET|DROP DEFAULT and TYPE subc -- Verify that we error out in case of postgres errors on supported statement -- types ALTER TABLE lineitem_alter ADD COLUMN new_column non_existent_type; -WARNING: Bad result from localhost:57638 -DETAIL: Remote message: type "non_existent_type" does not exist +WARNING: type "non_existent_type" does not exist +CONTEXT: Error occurred on remote connection to localhost:57638. ERROR: could not execute DDL command on worker node shards ALTER TABLE lineitem_alter ALTER COLUMN null_column SET NOT NULL; -WARNING: Bad result from localhost:57638 -DETAIL: Remote message: column "null_column" contains null values +WARNING: column "null_column" contains null values +CONTEXT: Error occurred on remote connection to localhost:57638. ERROR: could not execute DDL command on worker node shards ALTER TABLE lineitem_alter ALTER COLUMN l_partkey SET DEFAULT 'a'; -WARNING: Bad result from localhost:57638 -DETAIL: Remote message: invalid input syntax for integer: "a" +WARNING: invalid input syntax for integer: "a" +CONTEXT: Error occurred on remote connection to localhost:57638. ERROR: could not execute DDL command on worker node shards -- Verify that we error out on statements involving RENAME ALTER TABLE lineitem_alter RENAME TO lineitem_renamed; diff --git a/src/test/regress/output/multi_copy.source b/src/test/regress/output/multi_copy.source index c7021550d..eca01bb4e 100644 --- a/src/test/regress/output/multi_copy.source +++ b/src/test/regress/output/multi_copy.source @@ -455,8 +455,8 @@ COPY customer_worker_copy_append FROM '@abs_srcdir@/data/customer.1.data' with ( COPY customer_worker_copy_append FROM '@abs_srcdir@/data/customer.2.data' with (delimiter '|', master_host 'localhost', master_port 57636); -- Test if there is no relation to copy data with the worker copy COPY lineitem_copy_none FROM '@abs_srcdir@/data/lineitem.1.data' with (delimiter '|', master_host 'localhost', master_port 57636); -WARNING: Bad result from localhost:57636 -DETAIL: Remote message: relation "lineitem_copy_none" does not exist +WARNING: relation "lineitem_copy_none" does not exist +CONTEXT: Error occurred on remote connection to localhost:57636. ERROR: could not run copy from the worker node -- Connect back to the master node \c - - - 57636 From 7d0c90b39831aa05798d840e765eb03a39229447 Mon Sep 17 00:00:00 2001 From: Metin Doslu Date: Tue, 7 Jun 2016 17:57:42 +0300 Subject: [PATCH 2/5] Fail fast on constraint violations in router executor --- src/backend/distributed/commands/multi_copy.c | 8 +-- .../executor/multi_client_executor.c | 16 +++--- .../executor/multi_router_executor.c | 20 +++++-- .../master/master_modify_multiple_shards.c | 4 +- .../distributed/test/connection_cache.c | 4 +- .../distributed/utils/connection_cache.c | 54 ++++++++++++++----- .../distributed/utils/multi_transaction.c | 2 +- src/include/distributed/connection_cache.h | 3 +- .../expected/multi_create_insert_proxy.out | 2 +- .../regress/expected/multi_modifications.out | 39 +++++++------- src/test/regress/sql/multi_modifications.sql | 30 +++++------ 11 files changed, 113 insertions(+), 69 deletions(-) diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c index f858bb660..cbfb62027 100644 --- a/src/backend/distributed/commands/multi_copy.c +++ b/src/backend/distributed/commands/multi_copy.c @@ -827,7 +827,7 @@ MasterPartitionMethod(RangeVar *relation) } else { - ReportRemoteError(masterConnection, queryResult); + ReportRemoteError(masterConnection, queryResult, false); ereport(ERROR, (errmsg("could not get the partition method of the " "distributed table"))); } @@ -924,7 +924,7 @@ OpenCopyTransactions(CopyStmt *copyStatement, ShardConnections *shardConnections result = PQexec(connection, "BEGIN"); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); failedPlacementList = lappend(failedPlacementList, placement); PQclear(result); @@ -937,7 +937,7 @@ OpenCopyTransactions(CopyStmt *copyStatement, ShardConnections *shardConnections result = PQexec(connection, copyCommand->data); if (PQresultStatus(result) != PGRES_COPY_IN) { - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); failedPlacementList = lappend(failedPlacementList, placement); PQclear(result); @@ -1504,7 +1504,7 @@ RemoteCreateEmptyShard(char *relationName) } else { - ReportRemoteError(masterConnection, queryResult); + ReportRemoteError(masterConnection, queryResult, false); ereport(ERROR, (errmsg("could not create a new empty shard on the remote node"))); } diff --git a/src/backend/distributed/executor/multi_client_executor.c b/src/backend/distributed/executor/multi_client_executor.c index 0cccff038..241e62daa 100644 --- a/src/backend/distributed/executor/multi_client_executor.c +++ b/src/backend/distributed/executor/multi_client_executor.c @@ -141,7 +141,7 @@ MultiClientConnect(const char *nodeName, uint32 nodePort, const char *nodeDataba } else { - ReportRemoteError(connection, NULL); + ReportRemoteError(connection, NULL, false); PQfinish(connection); connectionId = INVALID_CONNECTION_ID; @@ -194,7 +194,7 @@ MultiClientConnectStart(const char *nodeName, uint32 nodePort, const char *nodeD } else { - ReportRemoteError(connection, NULL); + ReportRemoteError(connection, NULL, false); PQfinish(connection); connectionId = INVALID_CONNECTION_ID; @@ -249,7 +249,7 @@ MultiClientConnectPoll(int32 connectionId) } else if (pollingStatus == PGRES_POLLING_FAILED) { - ReportRemoteError(connection, NULL); + ReportRemoteError(connection, NULL, false); connectStatus = CLIENT_CONNECTION_BAD; } @@ -433,7 +433,7 @@ MultiClientQueryResult(int32 connectionId, void **queryResult, int *rowCount, } else { - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); PQclear(result); } @@ -500,7 +500,7 @@ MultiClientBatchResult(int32 connectionId, void **queryResult, int *rowCount, } else { - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); PQclear(result); queryStatus = CLIENT_BATCH_QUERY_FAILED; } @@ -585,7 +585,7 @@ MultiClientQueryStatus(int32 connectionId) copyResults = true; } - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); } /* clear the result object */ @@ -675,7 +675,7 @@ MultiClientCopyData(int32 connectionId, int32 fileDescriptor) { copyStatus = CLIENT_COPY_FAILED; - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); } PQclear(result); @@ -685,7 +685,7 @@ MultiClientCopyData(int32 connectionId, int32 fileDescriptor) /* received an error */ copyStatus = CLIENT_COPY_FAILED; - ReportRemoteError(connection, NULL); + ReportRemoteError(connection, NULL, false); } /* if copy out completed, make sure we drain all results from libpq */ diff --git a/src/backend/distributed/executor/multi_router_executor.c b/src/backend/distributed/executor/multi_router_executor.c index 46b7b9b7d..5720ab63d 100644 --- a/src/backend/distributed/executor/multi_router_executor.c +++ b/src/backend/distributed/executor/multi_router_executor.c @@ -267,7 +267,19 @@ ExecuteDistributedModify(Task *task) result = PQexec(connection, task->queryString); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - ReportRemoteError(connection, result); + char *sqlStateString = PQresultErrorField(result, PG_DIAG_SQLSTATE); + int category = 0; + bool raiseError = false; + + /* + * If the error code is in constraint violation class, we want to + * fail fast because we must get the same error from all shard + * placements. + */ + category = ERRCODE_TO_CATEGORY(ERRCODE_INTEGRITY_CONSTRAINT_VIOLATION); + raiseError = SqlStateMatchesCategory(sqlStateString, category); + + ReportRemoteError(connection, result, raiseError); PQclear(result); failedPlacementList = lappend(failedPlacementList, taskPlacement); @@ -451,14 +463,14 @@ SendQueryInSingleRowMode(PGconn *connection, char *query) querySent = PQsendQuery(connection, query); if (querySent == 0) { - ReportRemoteError(connection, NULL); + ReportRemoteError(connection, NULL, false); return false; } singleRowMode = PQsetSingleRowMode(connection); if (singleRowMode == 0) { - ReportRemoteError(connection, NULL); + ReportRemoteError(connection, NULL, false); return false; } @@ -505,7 +517,7 @@ StoreQueryResult(PGconn *connection, TupleDesc tupleDescriptor, resultStatus = PQresultStatus(result); if ((resultStatus != PGRES_SINGLE_TUPLE) && (resultStatus != PGRES_TUPLES_OK)) { - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); PQclear(result); return false; diff --git a/src/backend/distributed/master/master_modify_multiple_shards.c b/src/backend/distributed/master/master_modify_multiple_shards.c index 65aef5aa8..3933eaae1 100644 --- a/src/backend/distributed/master/master_modify_multiple_shards.c +++ b/src/backend/distributed/master/master_modify_multiple_shards.c @@ -371,14 +371,14 @@ SendQueryToPlacements(char *shardQueryString, ShardConnections *shardConnections result = PQexec(connection, "BEGIN"); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); ereport(ERROR, (errmsg("could not send query to shard placement"))); } result = PQexec(connection, shardQueryString); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); ereport(ERROR, (errmsg("could not send query to shard placement"))); } diff --git a/src/backend/distributed/test/connection_cache.c b/src/backend/distributed/test/connection_cache.c index 8ce7a2cce..1e99836a1 100644 --- a/src/backend/distributed/test/connection_cache.c +++ b/src/backend/distributed/test/connection_cache.c @@ -59,7 +59,7 @@ initialize_remote_temp_table(PG_FUNCTION_ARGS) result = PQexec(connection, POPULATE_TEMP_TABLE); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); } PQclear(result); @@ -90,7 +90,7 @@ count_remote_temp_table_rows(PG_FUNCTION_ARGS) result = PQexec(connection, COUNT_TEMP_TABLE); if (PQresultStatus(result) != PGRES_TUPLES_OK) { - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); } else { diff --git a/src/backend/distributed/utils/connection_cache.c b/src/backend/distributed/utils/connection_cache.c index ce592d8b5..eeca2d8f7 100644 --- a/src/backend/distributed/utils/connection_cache.c +++ b/src/backend/distributed/utils/connection_cache.c @@ -190,12 +190,36 @@ PurgeConnection(PGconn *connection) } +/* + * SqlStateMatchesCategory returns true if the given sql state is in the given + * error category. Note that we use ERRCODE_TO_CATEGORY macro to determine error + * category of the sql state and expect the caller to use the same macro for the + * error category. + */ +bool +SqlStateMatchesCategory(char *sqlStateString, int category) +{ + bool sqlStateMatchesCategory = false; + int sqlState = MAKE_SQLSTATE(sqlStateString[0], sqlStateString[1], sqlStateString[2], + sqlStateString[3], sqlStateString[4]); + + int sqlStateCategory = ERRCODE_TO_CATEGORY(sqlState); + if (sqlStateCategory == category) + { + sqlStateMatchesCategory = true; + } + + return sqlStateMatchesCategory; +} + + /* * ReportRemoteError retrieves various error fields from the a remote result and - * produces an error report at the WARNING level. + * produces an error report at the WARNING level or at the ERROR level if raise + * error is set. */ void -ReportRemoteError(PGconn *connection, PGresult *result) +ReportRemoteError(PGconn *connection, PGresult *result, bool raiseError) { char *sqlStateString = PQresultErrorField(result, PG_DIAG_SQLSTATE); char *messagePrimary = PQresultErrorField(result, PG_DIAG_MESSAGE_PRIMARY); @@ -206,6 +230,7 @@ ReportRemoteError(PGconn *connection, PGresult *result) char *nodeName = ConnectionGetOptionValue(connection, "host"); char *nodePort = ConnectionGetOptionValue(connection, "port"); int sqlState = ERRCODE_CONNECTION_FAILURE; + int errorLevel = WARNING; if (sqlStateString != NULL) { @@ -213,6 +238,11 @@ ReportRemoteError(PGconn *connection, PGresult *result) sqlStateString[3], sqlStateString[4]); } + if (raiseError) + { + errorLevel = ERROR; + } + /* * If the PGresult did not contain a message, the connection may provide a * suitable top level one. At worst, this is an empty string. @@ -233,18 +263,18 @@ ReportRemoteError(PGconn *connection, PGresult *result) if (sqlState == ERRCODE_CONNECTION_FAILURE) { - ereport(WARNING, (errcode(sqlState), - errmsg("connection failed to %s:%s", nodeName, nodePort), - errdetail("%s", messagePrimary))); + ereport(errorLevel, (errcode(sqlState), + errmsg("connection failed to %s:%s", nodeName, nodePort), + errdetail("%s", messagePrimary))); } else { - ereport(WARNING, (errcode(sqlState), errmsg("%s", messagePrimary), - messageDetail ? errdetail("%s", messageDetail) : 0, - messageHint ? errhint("%s", messageHint) : 0, - messageContext ? errcontext("%s", messageContext) : 0, - errcontext("Error occurred on remote connection to %s:%s.", - nodeName, nodePort))); + ereport(errorLevel, (errcode(sqlState), errmsg("%s", messagePrimary), + messageDetail ? errdetail("%s", messageDetail) : 0, + messageHint ? errhint("%s", messageHint) : 0, + messageContext ? errcontext("%s", messageContext) : 0, + errcontext("Error occurred on remote connection to %s:%s.", + nodeName, nodePort))); } } @@ -316,7 +346,7 @@ ConnectToNode(char *nodeName, int32 nodePort, char *nodeUser) /* warn if still erroring on final attempt */ if (attemptIndex == MAX_CONNECT_ATTEMPTS - 1) { - ReportRemoteError(connection, NULL); + ReportRemoteError(connection, NULL, false); } PQfinish(connection); diff --git a/src/backend/distributed/utils/multi_transaction.c b/src/backend/distributed/utils/multi_transaction.c index f9885f4e0..d9e48b270 100644 --- a/src/backend/distributed/utils/multi_transaction.c +++ b/src/backend/distributed/utils/multi_transaction.c @@ -75,7 +75,7 @@ PrepareRemoteTransactions(List *connectionList) /* a failure to prepare is an implicit rollback */ transactionConnection->transactionState = TRANSACTION_STATE_CLOSED; - ReportRemoteError(connection, result); + ReportRemoteError(connection, result, false); PQclear(result); ereport(ERROR, (errcode(ERRCODE_IO_ERROR), diff --git a/src/include/distributed/connection_cache.h b/src/include/distributed/connection_cache.h index fb4f01a89..9d8492794 100644 --- a/src/include/distributed/connection_cache.h +++ b/src/include/distributed/connection_cache.h @@ -56,7 +56,8 @@ typedef struct NodeConnectionEntry /* function declarations for obtaining and using a connection */ extern PGconn * GetOrEstablishConnection(char *nodeName, int32 nodePort); extern void PurgeConnection(PGconn *connection); -extern void ReportRemoteError(PGconn *connection, PGresult *result); +extern bool SqlStateMatchesCategory(char *sqlStateString, int category); +extern void ReportRemoteError(PGconn *connection, PGresult *result, bool raiseError); extern PGconn * ConnectToNode(char *nodeName, int nodePort, char *nodeUser); extern char * ConnectionGetOptionValue(PGconn *connection, char *optionKeyword); #endif /* CONNECTION_CACHE_H */ diff --git a/src/test/regress/expected/multi_create_insert_proxy.out b/src/test/regress/expected/multi_create_insert_proxy.out index 93faa5e71..a18205995 100644 --- a/src/test/regress/expected/multi_create_insert_proxy.out +++ b/src/test/regress/expected/multi_create_insert_proxy.out @@ -68,7 +68,7 @@ INSERT INTO pg_temp.:"proxy_tablename" (id) VALUES (1); -- test copy with bad row in middle \set VERBOSITY terse COPY pg_temp.:"proxy_tablename" FROM stdin; -ERROR: could not modify any active placements +ERROR: null value in column "data" violates not-null constraint \set VERBOSITY default -- verify rows were copied to distributed table SELECT * FROM insert_target ORDER BY id ASC; diff --git a/src/test/regress/expected/multi_modifications.out b/src/test/regress/expected/multi_modifications.out index 5d0dcdaee..52d725c31 100644 --- a/src/test/regress/expected/multi_modifications.out +++ b/src/test/regress/expected/multi_modifications.out @@ -149,10 +149,14 @@ ERROR: cannot plan INSERT using row with NULL value in partition column -- INSERT violating column constraint INSERT INTO limit_orders VALUES (18811, 'BUD', 14962, '2014-04-05 08:32:16', 'sell', -5.00); -ERROR: could not modify any active placements +ERROR: new row for relation "limit_orders_750000" violates check constraint "limit_orders_limit_price_check" +DETAIL: Failing row contains (18811, BUD, 14962, 2014-04-05 08:32:16, sell, -5.00). +CONTEXT: Error occurred on remote connection to localhost:57637. -- INSERT violating primary key constraint INSERT INTO limit_orders VALUES (32743, 'LUV', 5994, '2001-04-16 03:37:28', 'buy', 0.58); -ERROR: could not modify any active placements +ERROR: duplicate key value violates unique constraint "limit_orders_pkey_750001" +DETAIL: Key (id)=(32743) already exists. +CONTEXT: Error occurred on remote connection to localhost:57638. SET client_min_messages TO DEFAULT; -- commands with non-constant partition values are unsupported INSERT INTO limit_orders VALUES (random() * 100, 'ORCL', 152, '2011-08-25 11:50:45', @@ -261,26 +265,25 @@ SELECT kind, limit_price FROM limit_orders WHERE id = 246; buy | 0.00 (1 row) --- Test that shards which miss a modification are marked unhealthy --- First: Mark all placements for a node as inactive -UPDATE pg_dist_shard_placement -SET shardstate = 3 -WHERE nodename = 'localhost' AND - nodeport = :worker_1_port; --- Second: Perform an INSERT to the remaining node +-- Test that on unique contraint violations, we fail fast INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); --- Third: Mark the original placements as healthy again -UPDATE pg_dist_shard_placement -SET shardstate = 1 -WHERE nodename = 'localhost' AND - nodeport = :worker_1_port; --- Fourth: Perform the same INSERT (primary key violation) INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); -WARNING: duplicate key value violates unique constraint "limit_orders_pkey_750001" +ERROR: duplicate key value violates unique constraint "limit_orders_pkey_750001" DETAIL: Key (id)=(275) already exists. CONTEXT: Error occurred on remote connection to localhost:57638. --- Last: Verify the insert worked but the placement with the PK violation is now unhealthy -SELECT count(*) FROM limit_orders WHERE id = 275; +-- Test that shards which miss a modification are marked unhealthy +-- First: Connect to the second worker node +\c - - - :worker_2_port +-- Second: Drop limit_orders shard on the second worker node +DROP TABLE limit_orders_750000; +-- Third: Connect back to master node +\c - - - :master_port +-- Fourth: Perform an INSERT on the remaining node +INSERT INTO limit_orders VALUES (276, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); +WARNING: relation "limit_orders_750000" does not exist +CONTEXT: Error occurred on remote connection to localhost:57638. +-- Last: Verify the insert worked but the deleted placement is now unhealthy +SELECT count(*) FROM limit_orders WHERE id = 276; count ------- 1 diff --git a/src/test/regress/sql/multi_modifications.sql b/src/test/regress/sql/multi_modifications.sql index 238fa57d2..15434dbb5 100644 --- a/src/test/regress/sql/multi_modifications.sql +++ b/src/test/regress/sql/multi_modifications.sql @@ -190,28 +190,26 @@ SELECT bidder_id FROM limit_orders WHERE id = 246; UPDATE limit_orders SET (kind, limit_price) = ('buy', DEFAULT) WHERE id = 246; SELECT kind, limit_price FROM limit_orders WHERE id = 246; +-- Test that on unique contraint violations, we fail fast +INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); +INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); + -- Test that shards which miss a modification are marked unhealthy --- First: Mark all placements for a node as inactive -UPDATE pg_dist_shard_placement -SET shardstate = 3 -WHERE nodename = 'localhost' AND - nodeport = :worker_1_port; +-- First: Connect to the second worker node +\c - - - :worker_2_port --- Second: Perform an INSERT to the remaining node -INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); +-- Second: Drop limit_orders shard on the second worker node +DROP TABLE limit_orders_750000; --- Third: Mark the original placements as healthy again -UPDATE pg_dist_shard_placement -SET shardstate = 1 -WHERE nodename = 'localhost' AND - nodeport = :worker_1_port; +-- Third: Connect back to master node +\c - - - :master_port --- Fourth: Perform the same INSERT (primary key violation) -INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); +-- Fourth: Perform an INSERT on the remaining node +INSERT INTO limit_orders VALUES (276, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); --- Last: Verify the insert worked but the placement with the PK violation is now unhealthy -SELECT count(*) FROM limit_orders WHERE id = 275; +-- Last: Verify the insert worked but the deleted placement is now unhealthy +SELECT count(*) FROM limit_orders WHERE id = 276; SELECT count(*) FROM pg_dist_shard_placement AS sp, pg_dist_shard AS s From 9ba02928ac12d3b93b52c17bb1f8f08adacc3ded Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 7 Jun 2016 12:12:31 -0600 Subject: [PATCH 3/5] Refactor ReportRemoteError to remove boolean arg Broke it into two explicitly-named functions instead: WarnRemoteError and ReraiseRemoteError. --- src/backend/distributed/commands/multi_copy.c | 8 ++--- .../executor/multi_client_executor.c | 16 ++++----- .../executor/multi_router_executor.c | 15 ++++++--- .../master/master_modify_multiple_shards.c | 4 +-- .../distributed/test/connection_cache.c | 4 +-- .../distributed/utils/connection_cache.c | 33 ++++++++++++++++--- .../distributed/utils/multi_transaction.c | 2 +- src/include/distributed/connection_cache.h | 3 +- 8 files changed, 59 insertions(+), 26 deletions(-) diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c index cbfb62027..1dd545c97 100644 --- a/src/backend/distributed/commands/multi_copy.c +++ b/src/backend/distributed/commands/multi_copy.c @@ -827,7 +827,7 @@ MasterPartitionMethod(RangeVar *relation) } else { - ReportRemoteError(masterConnection, queryResult, false); + WarnRemoteError(masterConnection, queryResult); ereport(ERROR, (errmsg("could not get the partition method of the " "distributed table"))); } @@ -924,7 +924,7 @@ OpenCopyTransactions(CopyStmt *copyStatement, ShardConnections *shardConnections result = PQexec(connection, "BEGIN"); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); failedPlacementList = lappend(failedPlacementList, placement); PQclear(result); @@ -937,7 +937,7 @@ OpenCopyTransactions(CopyStmt *copyStatement, ShardConnections *shardConnections result = PQexec(connection, copyCommand->data); if (PQresultStatus(result) != PGRES_COPY_IN) { - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); failedPlacementList = lappend(failedPlacementList, placement); PQclear(result); @@ -1504,7 +1504,7 @@ RemoteCreateEmptyShard(char *relationName) } else { - ReportRemoteError(masterConnection, queryResult, false); + WarnRemoteError(masterConnection, queryResult); ereport(ERROR, (errmsg("could not create a new empty shard on the remote node"))); } diff --git a/src/backend/distributed/executor/multi_client_executor.c b/src/backend/distributed/executor/multi_client_executor.c index 241e62daa..c51c4d79b 100644 --- a/src/backend/distributed/executor/multi_client_executor.c +++ b/src/backend/distributed/executor/multi_client_executor.c @@ -141,7 +141,7 @@ MultiClientConnect(const char *nodeName, uint32 nodePort, const char *nodeDataba } else { - ReportRemoteError(connection, NULL, false); + WarnRemoteError(connection, NULL); PQfinish(connection); connectionId = INVALID_CONNECTION_ID; @@ -194,7 +194,7 @@ MultiClientConnectStart(const char *nodeName, uint32 nodePort, const char *nodeD } else { - ReportRemoteError(connection, NULL, false); + WarnRemoteError(connection, NULL); PQfinish(connection); connectionId = INVALID_CONNECTION_ID; @@ -249,7 +249,7 @@ MultiClientConnectPoll(int32 connectionId) } else if (pollingStatus == PGRES_POLLING_FAILED) { - ReportRemoteError(connection, NULL, false); + WarnRemoteError(connection, NULL); connectStatus = CLIENT_CONNECTION_BAD; } @@ -433,7 +433,7 @@ MultiClientQueryResult(int32 connectionId, void **queryResult, int *rowCount, } else { - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); PQclear(result); } @@ -500,7 +500,7 @@ MultiClientBatchResult(int32 connectionId, void **queryResult, int *rowCount, } else { - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); PQclear(result); queryStatus = CLIENT_BATCH_QUERY_FAILED; } @@ -585,7 +585,7 @@ MultiClientQueryStatus(int32 connectionId) copyResults = true; } - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); } /* clear the result object */ @@ -675,7 +675,7 @@ MultiClientCopyData(int32 connectionId, int32 fileDescriptor) { copyStatus = CLIENT_COPY_FAILED; - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); } PQclear(result); @@ -685,7 +685,7 @@ MultiClientCopyData(int32 connectionId, int32 fileDescriptor) /* received an error */ copyStatus = CLIENT_COPY_FAILED; - ReportRemoteError(connection, NULL, false); + WarnRemoteError(connection, NULL); } /* if copy out completed, make sure we drain all results from libpq */ diff --git a/src/backend/distributed/executor/multi_router_executor.c b/src/backend/distributed/executor/multi_router_executor.c index 5720ab63d..3758d3134 100644 --- a/src/backend/distributed/executor/multi_router_executor.c +++ b/src/backend/distributed/executor/multi_router_executor.c @@ -279,7 +279,14 @@ ExecuteDistributedModify(Task *task) category = ERRCODE_TO_CATEGORY(ERRCODE_INTEGRITY_CONSTRAINT_VIOLATION); raiseError = SqlStateMatchesCategory(sqlStateString, category); - ReportRemoteError(connection, result, raiseError); + if (raiseError) + { + ReraiseRemoteError(connection, result); + } + else + { + WarnRemoteError(connection, result); + } PQclear(result); failedPlacementList = lappend(failedPlacementList, taskPlacement); @@ -463,14 +470,14 @@ SendQueryInSingleRowMode(PGconn *connection, char *query) querySent = PQsendQuery(connection, query); if (querySent == 0) { - ReportRemoteError(connection, NULL, false); + WarnRemoteError(connection, NULL); return false; } singleRowMode = PQsetSingleRowMode(connection); if (singleRowMode == 0) { - ReportRemoteError(connection, NULL, false); + WarnRemoteError(connection, NULL); return false; } @@ -517,7 +524,7 @@ StoreQueryResult(PGconn *connection, TupleDesc tupleDescriptor, resultStatus = PQresultStatus(result); if ((resultStatus != PGRES_SINGLE_TUPLE) && (resultStatus != PGRES_TUPLES_OK)) { - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); PQclear(result); return false; diff --git a/src/backend/distributed/master/master_modify_multiple_shards.c b/src/backend/distributed/master/master_modify_multiple_shards.c index 3933eaae1..0ca5afc52 100644 --- a/src/backend/distributed/master/master_modify_multiple_shards.c +++ b/src/backend/distributed/master/master_modify_multiple_shards.c @@ -371,14 +371,14 @@ SendQueryToPlacements(char *shardQueryString, ShardConnections *shardConnections result = PQexec(connection, "BEGIN"); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); ereport(ERROR, (errmsg("could not send query to shard placement"))); } result = PQexec(connection, shardQueryString); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); ereport(ERROR, (errmsg("could not send query to shard placement"))); } diff --git a/src/backend/distributed/test/connection_cache.c b/src/backend/distributed/test/connection_cache.c index 1e99836a1..3d641d837 100644 --- a/src/backend/distributed/test/connection_cache.c +++ b/src/backend/distributed/test/connection_cache.c @@ -59,7 +59,7 @@ initialize_remote_temp_table(PG_FUNCTION_ARGS) result = PQexec(connection, POPULATE_TEMP_TABLE); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); } PQclear(result); @@ -90,7 +90,7 @@ count_remote_temp_table_rows(PG_FUNCTION_ARGS) result = PQexec(connection, COUNT_TEMP_TABLE); if (PQresultStatus(result) != PGRES_TUPLES_OK) { - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); } else { diff --git a/src/backend/distributed/utils/connection_cache.c b/src/backend/distributed/utils/connection_cache.c index eeca2d8f7..07ae1fe2f 100644 --- a/src/backend/distributed/utils/connection_cache.c +++ b/src/backend/distributed/utils/connection_cache.c @@ -41,6 +41,7 @@ static HTAB *NodeConnectionHash = NULL; /* local function forward declarations */ static HTAB * CreateNodeConnectionHash(void); +static void ReportRemoteError(PGconn *connection, PGresult *result, bool raiseError); /* @@ -214,11 +215,35 @@ SqlStateMatchesCategory(char *sqlStateString, int category) /* - * ReportRemoteError retrieves various error fields from the a remote result and - * produces an error report at the WARNING level or at the ERROR level if raise - * error is set. + * WarnRemoteError retrieves error fields from a remote result and produces an + * error report at the WARNING level after amending the error with a CONTEXT + * field containing the remote node host and port information. */ void +WarnRemoteError(PGconn *connection, PGresult *result) +{ + ReportRemoteError(connection, result, false); +} + + +/* + * ReraiseRemoteError retrieves error fields from a remote result and re-raises + * the error after amending it with a CONTEXT field containing the remote node + * host and port information. + */ +void +ReraiseRemoteError(PGconn *connection, PGresult *result) +{ + ReportRemoteError(connection, result, true); +} + + +/* + * ReportRemoteError is an internal helper function which implements logic + * needed by both WarnRemoteError and ReraiseRemoteError. They wrap this + * function to provide explicit names for the possible behaviors. + */ +static void ReportRemoteError(PGconn *connection, PGresult *result, bool raiseError) { char *sqlStateString = PQresultErrorField(result, PG_DIAG_SQLSTATE); @@ -346,7 +371,7 @@ ConnectToNode(char *nodeName, int32 nodePort, char *nodeUser) /* warn if still erroring on final attempt */ if (attemptIndex == MAX_CONNECT_ATTEMPTS - 1) { - ReportRemoteError(connection, NULL, false); + WarnRemoteError(connection, NULL); } PQfinish(connection); diff --git a/src/backend/distributed/utils/multi_transaction.c b/src/backend/distributed/utils/multi_transaction.c index d9e48b270..7204dfd92 100644 --- a/src/backend/distributed/utils/multi_transaction.c +++ b/src/backend/distributed/utils/multi_transaction.c @@ -75,7 +75,7 @@ PrepareRemoteTransactions(List *connectionList) /* a failure to prepare is an implicit rollback */ transactionConnection->transactionState = TRANSACTION_STATE_CLOSED; - ReportRemoteError(connection, result, false); + WarnRemoteError(connection, result); PQclear(result); ereport(ERROR, (errcode(ERRCODE_IO_ERROR), diff --git a/src/include/distributed/connection_cache.h b/src/include/distributed/connection_cache.h index 9d8492794..326e229c2 100644 --- a/src/include/distributed/connection_cache.h +++ b/src/include/distributed/connection_cache.h @@ -57,7 +57,8 @@ typedef struct NodeConnectionEntry extern PGconn * GetOrEstablishConnection(char *nodeName, int32 nodePort); extern void PurgeConnection(PGconn *connection); extern bool SqlStateMatchesCategory(char *sqlStateString, int category); -extern void ReportRemoteError(PGconn *connection, PGresult *result, bool raiseError); +extern void WarnRemoteError(PGconn *connection, PGresult *result); +extern void ReraiseRemoteError(PGconn *connection, PGresult *result); extern PGconn * ConnectToNode(char *nodeName, int nodePort, char *nodeUser); extern char * ConnectionGetOptionValue(PGconn *connection, char *optionKeyword); #endif /* CONNECTION_CACHE_H */ From 48f4e5d1a56b655757fb18f78aebf8c798d0e2b2 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 7 Jun 2016 12:28:29 -0600 Subject: [PATCH 4/5] Make ReportRemoteError's CONTEXT style-compliant There's not a ton of documentation about what CONTEXT lines should look like, but this seems like the most dominant pattern. Similarly, users should expect lowercase, non-period strings. --- src/backend/distributed/utils/connection_cache.c | 2 +- src/test/regress/expected/multi_index_statements.out | 4 ++-- src/test/regress/expected/multi_modifications.out | 8 ++++---- .../regress/output/multi_alter_table_statements.source | 8 ++++---- src/test/regress/output/multi_copy.source | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/backend/distributed/utils/connection_cache.c b/src/backend/distributed/utils/connection_cache.c index 07ae1fe2f..c30b6594a 100644 --- a/src/backend/distributed/utils/connection_cache.c +++ b/src/backend/distributed/utils/connection_cache.c @@ -298,7 +298,7 @@ ReportRemoteError(PGconn *connection, PGresult *result, bool raiseError) messageDetail ? errdetail("%s", messageDetail) : 0, messageHint ? errhint("%s", messageHint) : 0, messageContext ? errcontext("%s", messageContext) : 0, - errcontext("Error occurred on remote connection to %s:%s.", + errcontext("while executing command on %s:%s", nodeName, nodePort))); } } diff --git a/src/test/regress/expected/multi_index_statements.out b/src/test/regress/expected/multi_index_statements.out index 5f115f0f4..85d593bf3 100644 --- a/src/test/regress/expected/multi_index_statements.out +++ b/src/test/regress/expected/multi_index_statements.out @@ -141,11 +141,11 @@ ERROR: relation "lineitem_orderkey_index" already exists CREATE INDEX try_index ON lineitem USING gist (l_orderkey); WARNING: data type bigint has no default operator class for access method "gist" HINT: You must specify an operator class for the index or define a default operator class for the data type. -CONTEXT: Error occurred on remote connection to localhost:57638. +CONTEXT: while executing command on localhost:57638 ERROR: could not execute DDL command on worker node shards CREATE INDEX try_index ON lineitem (non_existent_column); WARNING: column "non_existent_column" does not exist -CONTEXT: Error occurred on remote connection to localhost:57638. +CONTEXT: while executing command on localhost:57638 ERROR: could not execute DDL command on worker node shards CREATE INDEX ON lineitem (l_orderkey); ERROR: creating index without a name on a distributed table is currently unsupported diff --git a/src/test/regress/expected/multi_modifications.out b/src/test/regress/expected/multi_modifications.out index 52d725c31..07399b6e8 100644 --- a/src/test/regress/expected/multi_modifications.out +++ b/src/test/regress/expected/multi_modifications.out @@ -151,12 +151,12 @@ INSERT INTO limit_orders VALUES (18811, 'BUD', 14962, '2014-04-05 08:32:16', 'se -5.00); ERROR: new row for relation "limit_orders_750000" violates check constraint "limit_orders_limit_price_check" DETAIL: Failing row contains (18811, BUD, 14962, 2014-04-05 08:32:16, sell, -5.00). -CONTEXT: Error occurred on remote connection to localhost:57637. +CONTEXT: while executing command on localhost:57637 -- INSERT violating primary key constraint INSERT INTO limit_orders VALUES (32743, 'LUV', 5994, '2001-04-16 03:37:28', 'buy', 0.58); ERROR: duplicate key value violates unique constraint "limit_orders_pkey_750001" DETAIL: Key (id)=(32743) already exists. -CONTEXT: Error occurred on remote connection to localhost:57638. +CONTEXT: while executing command on localhost:57638 SET client_min_messages TO DEFAULT; -- commands with non-constant partition values are unsupported INSERT INTO limit_orders VALUES (random() * 100, 'ORCL', 152, '2011-08-25 11:50:45', @@ -270,7 +270,7 @@ INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); ERROR: duplicate key value violates unique constraint "limit_orders_pkey_750001" DETAIL: Key (id)=(275) already exists. -CONTEXT: Error occurred on remote connection to localhost:57638. +CONTEXT: while executing command on localhost:57638 -- Test that shards which miss a modification are marked unhealthy -- First: Connect to the second worker node \c - - - :worker_2_port @@ -281,7 +281,7 @@ DROP TABLE limit_orders_750000; -- Fourth: Perform an INSERT on the remaining node INSERT INTO limit_orders VALUES (276, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); WARNING: relation "limit_orders_750000" does not exist -CONTEXT: Error occurred on remote connection to localhost:57638. +CONTEXT: while executing command on localhost:57638 -- Last: Verify the insert worked but the deleted placement is now unhealthy SELECT count(*) FROM limit_orders WHERE id = 276; count diff --git a/src/test/regress/output/multi_alter_table_statements.source b/src/test/regress/output/multi_alter_table_statements.source index 7d209e9c5..2688ce896 100644 --- a/src/test/regress/output/multi_alter_table_statements.source +++ b/src/test/regress/output/multi_alter_table_statements.source @@ -262,7 +262,7 @@ NOTICE: relation "non_existent_table" does not exist, skipping ALTER TABLE IF EXISTS lineitem_alter ALTER COLUMN int_column2 SET DATA TYPE INTEGER; ALTER TABLE lineitem_alter DROP COLUMN non_existent_column; WARNING: column "non_existent_column" of relation "lineitem_alter_220000" does not exist -CONTEXT: Error occurred on remote connection to localhost:57638. +CONTEXT: while executing command on localhost:57638 ERROR: could not execute DDL command on worker node shards ALTER TABLE lineitem_alter DROP COLUMN IF EXISTS non_existent_column; NOTICE: column "non_existent_column" of relation "lineitem_alter" does not exist, skipping @@ -362,15 +362,15 @@ DETAIL: Only ADD|DROP COLUMN, SET|DROP NOT NULL, SET|DROP DEFAULT and TYPE subc -- types ALTER TABLE lineitem_alter ADD COLUMN new_column non_existent_type; WARNING: type "non_existent_type" does not exist -CONTEXT: Error occurred on remote connection to localhost:57638. +CONTEXT: while executing command on localhost:57638 ERROR: could not execute DDL command on worker node shards ALTER TABLE lineitem_alter ALTER COLUMN null_column SET NOT NULL; WARNING: column "null_column" contains null values -CONTEXT: Error occurred on remote connection to localhost:57638. +CONTEXT: while executing command on localhost:57638 ERROR: could not execute DDL command on worker node shards ALTER TABLE lineitem_alter ALTER COLUMN l_partkey SET DEFAULT 'a'; WARNING: invalid input syntax for integer: "a" -CONTEXT: Error occurred on remote connection to localhost:57638. +CONTEXT: while executing command on localhost:57638 ERROR: could not execute DDL command on worker node shards -- Verify that we error out on statements involving RENAME ALTER TABLE lineitem_alter RENAME TO lineitem_renamed; diff --git a/src/test/regress/output/multi_copy.source b/src/test/regress/output/multi_copy.source index eca01bb4e..31db8b841 100644 --- a/src/test/regress/output/multi_copy.source +++ b/src/test/regress/output/multi_copy.source @@ -456,7 +456,7 @@ COPY customer_worker_copy_append FROM '@abs_srcdir@/data/customer.2.data' with ( -- Test if there is no relation to copy data with the worker copy COPY lineitem_copy_none FROM '@abs_srcdir@/data/lineitem.1.data' with (delimiter '|', master_host 'localhost', master_port 57636); WARNING: relation "lineitem_copy_none" does not exist -CONTEXT: Error occurred on remote connection to localhost:57636. +CONTEXT: while executing command on localhost:57636 ERROR: could not run copy from the worker node -- Connect back to the master node \c - - - 57636 From a19520b9bd4927d6707dad7485cbc4237d713a5f Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 7 Jun 2016 13:21:23 -0600 Subject: [PATCH 5/5] Add back test for INSERT where all placements fail Since we now short-circuit on certain remote errors, we want to ensure we preserve the old behavior of not modifying any placement states if a non-short-circuiting error occurs on all placements. --- .../regress/expected/multi_modifications.out | 37 ++++++++++++++++- src/test/regress/sql/multi_modifications.sql | 40 ++++++++++++++++++- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/src/test/regress/expected/multi_modifications.out b/src/test/regress/expected/multi_modifications.out index 07399b6e8..33d64ae68 100644 --- a/src/test/regress/expected/multi_modifications.out +++ b/src/test/regress/expected/multi_modifications.out @@ -274,8 +274,8 @@ CONTEXT: while executing command on localhost:57638 -- Test that shards which miss a modification are marked unhealthy -- First: Connect to the second worker node \c - - - :worker_2_port --- Second: Drop limit_orders shard on the second worker node -DROP TABLE limit_orders_750000; +-- Second: Move aside limit_orders shard on the second worker node +ALTER TABLE limit_orders_750000 RENAME TO renamed_orders; -- Third: Connect back to master node \c - - - :master_port -- Fourth: Perform an INSERT on the remaining node @@ -302,6 +302,39 @@ AND s.logicalrelid = 'limit_orders'::regclass; 1 (1 row) +-- Test that if all shards miss a modification, no state change occurs +-- First: Connect to the first worker node +\c - - - :worker_1_port +-- Second: Move aside limit_orders shard on the second worker node +ALTER TABLE limit_orders_750000 RENAME TO renamed_orders; +-- Third: Connect back to master node +\c - - - :master_port +-- Fourth: Perform an INSERT on the remaining node +INSERT INTO limit_orders VALUES (276, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); +WARNING: relation "limit_orders_750000" does not exist +CONTEXT: while executing command on localhost:57637 +ERROR: could not modify any active placements +-- Last: Verify worker is still healthy +SELECT count(*) +FROM pg_dist_shard_placement AS sp, + pg_dist_shard AS s +WHERE sp.shardid = s.shardid +AND sp.nodename = 'localhost' +AND sp.nodeport = :worker_1_port +AND sp.shardstate = 1 +AND s.logicalrelid = 'limit_orders'::regclass; + count +------- + 2 +(1 row) + +-- Undo our change... +-- First: Connect to the first worker node +\c - - - :worker_1_port +-- Second: Move aside limit_orders shard on the second worker node +ALTER TABLE renamed_orders RENAME TO limit_orders_750000; +-- Third: Connect back to master node +\c - - - :master_port -- commands with no constraints on the partition key are not supported UPDATE limit_orders SET limit_price = 0.00; ERROR: distributed modifications must target exactly one shard diff --git a/src/test/regress/sql/multi_modifications.sql b/src/test/regress/sql/multi_modifications.sql index 15434dbb5..2adf977bd 100644 --- a/src/test/regress/sql/multi_modifications.sql +++ b/src/test/regress/sql/multi_modifications.sql @@ -199,8 +199,8 @@ INSERT INTO limit_orders VALUES (275, 'ADR', 140, '2007-07-02 16:32:15', 'sell', -- First: Connect to the second worker node \c - - - :worker_2_port --- Second: Drop limit_orders shard on the second worker node -DROP TABLE limit_orders_750000; +-- Second: Move aside limit_orders shard on the second worker node +ALTER TABLE limit_orders_750000 RENAME TO renamed_orders; -- Third: Connect back to master node \c - - - :master_port @@ -210,6 +210,7 @@ INSERT INTO limit_orders VALUES (276, 'ADR', 140, '2007-07-02 16:32:15', 'sell', -- Last: Verify the insert worked but the deleted placement is now unhealthy SELECT count(*) FROM limit_orders WHERE id = 276; + SELECT count(*) FROM pg_dist_shard_placement AS sp, pg_dist_shard AS s @@ -219,6 +220,41 @@ AND sp.nodeport = :worker_2_port AND sp.shardstate = 3 AND s.logicalrelid = 'limit_orders'::regclass; +-- Test that if all shards miss a modification, no state change occurs + +-- First: Connect to the first worker node +\c - - - :worker_1_port + +-- Second: Move aside limit_orders shard on the second worker node +ALTER TABLE limit_orders_750000 RENAME TO renamed_orders; + +-- Third: Connect back to master node +\c - - - :master_port + +-- Fourth: Perform an INSERT on the remaining node +INSERT INTO limit_orders VALUES (276, 'ADR', 140, '2007-07-02 16:32:15', 'sell', 43.67); + +-- Last: Verify worker is still healthy +SELECT count(*) +FROM pg_dist_shard_placement AS sp, + pg_dist_shard AS s +WHERE sp.shardid = s.shardid +AND sp.nodename = 'localhost' +AND sp.nodeport = :worker_1_port +AND sp.shardstate = 1 +AND s.logicalrelid = 'limit_orders'::regclass; + +-- Undo our change... + +-- First: Connect to the first worker node +\c - - - :worker_1_port + +-- Second: Move aside limit_orders shard on the second worker node +ALTER TABLE renamed_orders RENAME TO limit_orders_750000; + +-- Third: Connect back to master node +\c - - - :master_port + -- commands with no constraints on the partition key are not supported UPDATE limit_orders SET limit_price = 0.00;