From 3b7b64a8b6a2db55533977c5cdd7258c4ab674f8 Mon Sep 17 00:00:00 2001 From: Metin Doslu Date: Tue, 13 Mar 2018 10:33:27 +0200 Subject: [PATCH] Remove skip_jsonb_validation_in_copy GUC --- src/backend/distributed/commands/multi_copy.c | 146 ------------------ src/backend/distributed/shared_library_init.c | 16 -- src/include/distributed/multi_copy.h | 4 - 3 files changed, 166 deletions(-) diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c index 69702ebbe..370bb366c 100644 --- a/src/backend/distributed/commands/multi_copy.c +++ b/src/backend/distributed/commands/multi_copy.c @@ -59,11 +59,9 @@ #include "catalog/pg_type.h" #include "commands/copy.h" #include "commands/defrem.h" -#include "distributed/listutils.h" #include "distributed/master_protocol.h" #include "distributed/metadata_cache.h" #include "distributed/multi_copy.h" -#include "distributed/multi_executor.h" #include "distributed/multi_partitioning_utils.h" #include "distributed/multi_physical_planner.h" #include "distributed/multi_shard_transaction.h" @@ -89,16 +87,10 @@ static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"; /* use a global connection to the master node in order to skip passing it around */ static MultiConnection *masterConnection = NULL; -/* if true, skip validation of JSONB columns during COPY */ -bool SkipJsonbValidationInCopy = true; - /* Local functions forward declarations */ static void CopyFromWorkerNode(CopyStmt *copyStatement, char *completionTag); static void CopyToExistingShards(CopyStmt *copyStatement, char *completionTag); -static bool IsCopyInBinaryFormat(CopyStmt *copyStatement); -static List * FindJsonbInputColumns(TupleDesc tupleDescriptor, - List *inputColumnNameList); static void CopyToNewShards(CopyStmt *copyStatement, char *completionTag, Oid relationId); static char MasterPartitionMethod(RangeVar *relation); static void RemoveMasterOptions(CopyStmt *copyStatement); @@ -326,7 +318,6 @@ CopyToExistingShards(CopyStmt *copyStatement, char *completionTag) char partitionMethod = 0; bool stopOnFailure = false; - bool isInputFormatBinary = IsCopyInBinaryFormat(copyStatement); CopyState copyState = NULL; uint64 processedRowCount = 0; @@ -414,66 +405,6 @@ CopyToExistingShards(CopyStmt *copyStatement, char *completionTag) copiedDistributedRelationTuple->relkind = RELKIND_RELATION; } - /* - * We make an optimisation to skip JSON parsing for JSONB columns, because many - * Citus users have large objects in this column and parsing it on the coordinator - * causes significant CPU overhead. We do this by forcing BeginCopyFrom and - * NextCopyFrom to parse the column as text and then encoding it as JSON again - * by using citus_text_send_as_jsonb as the binary output function. - * - * The main downside of enabling this optimisation is that it defers validation - * until the object is parsed by the worker, which is unable to give an accurate - * line number. - */ - if (SkipJsonbValidationInCopy && !isInputFormatBinary) - { - CopyOutState copyOutState = copyDest->copyOutState; - List *jsonbColumnIndexList = NIL; - ListCell *jsonbColumnIndexCell = NULL; - - /* get the column indices for all JSONB columns that appear in the input */ - jsonbColumnIndexList = FindJsonbInputColumns(copiedDistributedRelation->rd_att, - copyStatement->attlist); - - foreach(jsonbColumnIndexCell, jsonbColumnIndexList) - { - int columnIndex = lfirst_int(jsonbColumnIndexCell); - Form_pg_attribute currentColumn = - TupleDescAttr(copiedDistributedRelation->rd_att, columnIndex); - - if (columnIndex == partitionColumnIndex) - { - /* - * In the curious case of using a JSONB column as partition column, - * we leave it as is because we want to make sure the hashing works - * correctly. - */ - continue; - } - - ereport(DEBUG1, (errmsg("parsing JSONB column %s as text", - NameStr(currentColumn->attname)))); - - /* parse the column as text instead of JSONB */ - currentColumn->atttypid = TEXTOID; - - if (copyOutState->binary) - { - Oid textSendAsJsonbFunctionId = CitusTextSendAsJsonbFunctionId(); - - /* - * If we're using binary encoding between coordinator and workers - * then we should honour the format expected by jsonb_recv, which - * is a version number followed by text. We therefore use an output - * function which sends the text as if it were jsonb, namely by - * prepending a version number. - */ - fmgr_info(textSendAsJsonbFunctionId, - ©Dest->columnOutputFunctions[columnIndex]); - } - } - } - /* initialize copy state to read from COPY data source */ #if (PG_VERSION_NUM >= 100000) copyState = BeginCopyFrom(NULL, @@ -550,83 +481,6 @@ CopyToExistingShards(CopyStmt *copyStatement, char *completionTag) } -/* - * IsCopyInBinaryFormat determines whether the given COPY statement has the - * WITH (format binary) option. - */ -static bool -IsCopyInBinaryFormat(CopyStmt *copyStatement) -{ - ListCell *optionCell = NULL; - - foreach(optionCell, copyStatement->options) - { - DefElem *defel = lfirst_node(DefElem, optionCell); - if (strcmp(defel->defname, "format") == 0 && - strcmp(defGetString(defel), "binary") == 0) - { - return true; - } - } - - return false; -} - - -/* - * FindJsonbInputColumns finds columns in the tuple descriptor that have - * the JSONB type and appear in inputColumnNameList. If the list is empty then - * all JSONB columns are returned. - */ -static List * -FindJsonbInputColumns(TupleDesc tupleDescriptor, List *inputColumnNameList) -{ - List *jsonbColumnIndexList = NIL; - int columnCount = tupleDescriptor->natts; - int columnIndex = 0; - - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - Form_pg_attribute currentColumn = TupleDescAttr(tupleDescriptor, columnIndex); - if (currentColumn->attisdropped) - { - continue; - } - - if (currentColumn->atttypid != JSONBOID) - { - continue; - } - - if (inputColumnNameList != NIL) - { - ListCell *inputColumnCell = NULL; - bool isInputColumn = false; - - foreach(inputColumnCell, inputColumnNameList) - { - char *inputColumnName = strVal(lfirst(inputColumnCell)); - - if (namestrcmp(¤tColumn->attname, inputColumnName) == 0) - { - isInputColumn = true; - break; - } - } - - if (!isInputColumn) - { - continue; - } - } - - jsonbColumnIndexList = lappend_int(jsonbColumnIndexList, columnIndex); - } - - return jsonbColumnIndexList; -} - - /* * CopyToNewShards implements the COPY table_name FROM ... for append-partitioned * tables where we create new shards into which to copy rows. diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index b6e941a8f..fe852ff1b 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -508,22 +508,6 @@ RegisterCitusConfigVariables(void) GUC_NO_SHOW_ALL, NULL, NULL, NULL); - DefineCustomBoolVariable( - "citus.skip_jsonb_validation_in_copy", - gettext_noop("Skip validation of JSONB columns on the coordinator during COPY " - "into a distributed table"), - gettext_noop("Parsing large JSON objects may incur significant CPU overhead, " - "which can lower COPY throughput. If this GUC is set (the default), " - "JSON parsing is skipped on the coordinator, which means you cannot " - "see the line number in case of malformed JSON, but throughput will " - "be higher. This setting does not apply if the input format is " - "binary."), - &SkipJsonbValidationInCopy, - false, - PGC_USERSET, - GUC_NO_SHOW_ALL, - NULL, NULL, NULL); - DefineCustomIntVariable( "citus.shard_count", gettext_noop("Sets the number of shards for a new hash-partitioned table" diff --git a/src/include/distributed/multi_copy.h b/src/include/distributed/multi_copy.h index fa05b2518..5269a61f1 100644 --- a/src/include/distributed/multi_copy.h +++ b/src/include/distributed/multi_copy.h @@ -108,10 +108,6 @@ typedef struct CitusCopyDestReceiver } CitusCopyDestReceiver; -/* GUCs */ -extern bool SkipJsonbValidationInCopy; - - /* function declarations for copying into a distributed table */ extern CitusCopyDestReceiver * CreateCitusCopyDestReceiver(Oid relationId, List *columnNameList,