Consider dropped columns that precede the partition column in COPY

pull/1606/head
Marco Slot 2017-08-21 22:38:14 +02:00
parent 15af3c5445
commit cf375d6a66
10 changed files with 158 additions and 53 deletions

View File

@ -1149,6 +1149,8 @@ CopyLocalDataIntoShards(Oid distributedRelationId)
List *columnNameList = NIL;
Relation distributedRelation = NULL;
TupleDesc tupleDescriptor = NULL;
Var *partitionColumn = NULL;
int partitionColumnIndex = INVALID_PARTITION_COLUMN_INDEX;
bool stopOnFailure = true;
EState *estate = NULL;
@ -1189,6 +1191,13 @@ CopyLocalDataIntoShards(Oid distributedRelationId)
slot = MakeSingleTupleTableSlot(tupleDescriptor);
columnNameList = TupleDescColumnNameList(tupleDescriptor);
/* determine the partition column in the tuple descriptor */
partitionColumn = PartitionColumn(distributedRelationId, 0);
if (partitionColumn != NULL)
{
partitionColumnIndex = partitionColumn->varattno - 1;
}
/* initialise per-tuple memory context */
estate = CreateExecutorState();
econtext = GetPerTupleExprContext(estate);
@ -1196,8 +1205,9 @@ CopyLocalDataIntoShards(Oid distributedRelationId)
copyDest =
(DestReceiver *) CreateCitusCopyDestReceiver(distributedRelationId,
columnNameList, estate,
stopOnFailure);
columnNameList,
partitionColumnIndex,
estate, stopOnFailure);
/* initialise state for writing to shards, we'll open connections on demand */
copyDest->rStartup(copyDest, 0, tupleDescriptor);

View File

@ -295,6 +295,8 @@ CopyToExistingShards(CopyStmt *copyStatement, char *completionTag)
bool *columnNulls = NULL;
int columnIndex = 0;
List *columnNameList = NIL;
Var *partitionColumn = NULL;
int partitionColumnIndex = INVALID_PARTITION_COLUMN_INDEX;
TupleTableSlot *tupleTableSlot = NULL;
EState *executorState = NULL;
@ -322,6 +324,14 @@ CopyToExistingShards(CopyStmt *copyStatement, char *completionTag)
tupleTableSlot->tts_values = columnValues;
tupleTableSlot->tts_isnull = columnNulls;
/* determine the partition column index in the tuple descriptor */
partitionColumn = PartitionColumn(tableId, 0);
if (partitionColumn != NULL)
{
partitionColumnIndex = partitionColumn->varattno - 1;
}
/* build the list of column names for remote COPY statements */
for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
{
Form_pg_attribute currentColumn = tupleDescriptor->attrs[columnIndex];
@ -346,8 +356,8 @@ CopyToExistingShards(CopyStmt *copyStatement, char *completionTag)
}
/* set up the destination for the COPY */
copyDest = CreateCitusCopyDestReceiver(tableId, columnNameList, executorState,
stopOnFailure);
copyDest = CreateCitusCopyDestReceiver(tableId, columnNameList, partitionColumnIndex,
executorState, stopOnFailure);
dest = (DestReceiver *) copyDest;
dest->rStartup(dest, 0, tupleDescriptor);
@ -1714,10 +1724,14 @@ CopyFlushOutput(CopyOutState cstate, char *start, char *pointer)
/*
* CreateCitusCopyDestReceiver creates a DestReceiver that copies into
* a distributed table.
*
* The caller should provide the list of column names to use in the
* remote COPY statement, and the partition column index in the tuple
* descriptor (*not* the column name list).
*/
CitusCopyDestReceiver *
CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, EState *executorState,
bool stopOnFailure)
CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, int partitionColumnIndex,
EState *executorState, bool stopOnFailure)
{
CitusCopyDestReceiver *copyDest = NULL;
@ -1733,6 +1747,7 @@ CreateCitusCopyDestReceiver(Oid tableId, List *columnNameList, EState *executorS
/* set up output parameters */
copyDest->distributedRelationId = tableId;
copyDest->columnNameList = columnNameList;
copyDest->partitionColumnIndex = partitionColumnIndex;
copyDest->executorState = executorState;
copyDest->stopOnFailure = stopOnFailure;
copyDest->memoryContext = CurrentMemoryContext;
@ -1758,15 +1773,12 @@ CitusCopyDestReceiverStartup(DestReceiver *dest, int operation,
char *schemaName = get_namespace_name(schemaOid);
Relation distributedRelation = NULL;
int columnIndex = 0;
List *columnNameList = copyDest->columnNameList;
List *quotedColumnNameList = NIL;
ListCell *columnNameCell = NULL;
char partitionMethod = '\0';
Var *partitionColumn = PartitionColumn(tableId, 0);
int partitionColumnIndex = -1;
DistTableCacheEntry *cacheEntry = NULL;
CopyStmt *copyStatement = NULL;
@ -1853,37 +1865,23 @@ CitusCopyDestReceiverStartup(DestReceiver *dest, int operation,
copyDest->columnOutputFunctions =
ColumnOutputFunctions(inputTupleDescriptor, copyOutState->binary);
/* find the partition column index in the column list */
/* ensure the column names are properly quoted in the COPY statement */
foreach(columnNameCell, columnNameList)
{
char *columnName = (char *) lfirst(columnNameCell);
char *quotedColumnName = (char *) quote_identifier(columnName);
/* load the column information from pg_attribute */
AttrNumber attrNumber = get_attnum(tableId, columnName);
/* check whether this is the partition column */
if (partitionColumn != NULL && attrNumber == partitionColumn->varattno)
{
Assert(partitionColumnIndex == -1);
partitionColumnIndex = columnIndex;
}
columnIndex++;
quotedColumnNameList = lappend(quotedColumnNameList, quotedColumnName);
}
if (partitionMethod != DISTRIBUTE_BY_NONE && partitionColumnIndex == -1)
if (partitionMethod != DISTRIBUTE_BY_NONE &&
copyDest->partitionColumnIndex == INVALID_PARTITION_COLUMN_INDEX)
{
ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("the partition column of table %s should have a value",
quote_qualified_identifier(schemaName, relationName))));
}
copyDest->partitionColumnIndex = partitionColumnIndex;
/* define the template for the COPY statement that is sent to workers */
copyStatement = makeNode(CopyStmt);
copyStatement->relation = makeRangeVar(schemaName, relationName, -1);
@ -1945,7 +1943,7 @@ CitusCopyDestReceiverReceive(TupleTableSlot *slot, DestReceiver *dest)
* tables. Note that, reference tables has NULL partition column values so
* skip the check.
*/
if (partitionColumnIndex >= 0)
if (partitionColumnIndex != INVALID_PARTITION_COLUMN_INDEX)
{
if (columnNulls[partitionColumnIndex])
{

View File

@ -99,6 +99,8 @@ ExecuteSelectIntoRelation(Oid targetRelationId, List *insertTargetList,
List *columnNameList = NIL;
bool stopOnFailure = false;
char partitionMethod = 0;
Var *partitionColumn = NULL;
int partitionColumnIndex = -1;
CitusCopyDestReceiver *copyDest = NULL;
@ -108,17 +110,32 @@ ExecuteSelectIntoRelation(Oid targetRelationId, List *insertTargetList,
stopOnFailure = true;
}
partitionColumn = PartitionColumn(targetRelationId, 0);
/* build the list of column names for the COPY statement */
foreach(insertTargetCell, insertTargetList)
{
TargetEntry *insertTargetEntry = (TargetEntry *) lfirst(insertTargetCell);
char *columnName = insertTargetEntry->resname;
/* load the column information from pg_attribute */
AttrNumber attrNumber = get_attnum(targetRelationId, columnName);
/* check whether this is the partition column */
if (partitionColumn != NULL && attrNumber == partitionColumn->varattno)
{
Assert(partitionColumnIndex == -1);
partitionColumnIndex = list_length(columnNameList);
}
columnNameList = lappend(columnNameList, insertTargetEntry->resname);
}
/* set up a DestReceiver that copies into the distributed table */
copyDest = CreateCitusCopyDestReceiver(targetRelationId, columnNameList,
executorState, stopOnFailure);
partitionColumnIndex, executorState,
stopOnFailure);
ExecuteIntoDestReceiver(selectQuery, paramListInfo, (DestReceiver *) copyDest);

View File

@ -20,6 +20,9 @@
#include "tcop/dest.h"
#define INVALID_PARTITION_COLUMN_INDEX -1
/*
* A smaller version of copy.c's CopyStateData, trimmed to the elements
* necessary to copy out results. While it'd be a bit nicer to share code,
@ -93,6 +96,7 @@ typedef struct CitusCopyDestReceiver
/* function declarations for copying into a distributed table */
extern CitusCopyDestReceiver * CreateCitusCopyDestReceiver(Oid relationId,
List *columnNameList,
int partitionColumnIndex,
EState *executorState,
bool stopOnFailure);
extern FmgrInfo * ColumnOutputFunctions(TupleDesc rowDescriptor, bool binaryFormat);

View File

@ -533,21 +533,28 @@ END;
CREATE TABLE data_load_test (col1 int, col2 text, col3 text, "CoL4"")" int);
INSERT INTO data_load_test VALUES (132, 'hello', 'world');
INSERT INTO data_load_test VALUES (243, 'world', 'hello');
ALTER TABLE data_load_test DROP COLUMN col2;
SELECT create_distributed_table('data_load_test', 'col1');
ALTER TABLE data_load_test DROP COLUMN col1;
SELECT create_distributed_table('data_load_test', 'col3');
NOTICE: Copying data from local table...
create_distributed_table
--------------------------
(1 row)
SELECT * FROM data_load_test;
col1 | col3 | CoL4")
------+-------+--------
132 | world |
243 | hello |
SELECT * FROM data_load_test ORDER BY col2;
col2 | col3 | CoL4")
-------+-------+--------
hello | world |
world | hello |
(2 rows)
-- make sure the tuple went to the right shard
SELECT * FROM data_load_test WHERE col3 = 'world';
col2 | col3 | CoL4")
-------+-------+--------
hello | world |
(1 row)
DROP TABLE data_load_test;
SET citus.shard_replication_factor TO default;
SET citus.shard_count to 4;

View File

@ -2401,21 +2401,35 @@ SELECT user_id, value_4 FROM test_view ORDER BY user_id, value_4;
-- Drop the view now, because the column we are about to drop depends on it
DROP VIEW test_view;
-- Make sure we handle dropped columns correctly
TRUNCATE raw_events_first;
ALTER TABLE raw_events_first DROP COLUMN value_1;
INSERT INTO raw_events_first (user_id, value_4)
CREATE TABLE drop_col_table (col1 text, col2 text, col3 text);
SELECT create_distributed_table('drop_col_table', 'col2');
create_distributed_table
--------------------------
(1 row)
ALTER TABLE drop_col_table DROP COLUMN col1;
INSERT INTO drop_col_table (col3, col2)
SELECT value_4, user_id FROM raw_events_second LIMIT 5;
SELECT user_id, value_4 FROM raw_events_first ORDER BY user_id;
user_id | value_4
---------+---------
3 | 1
6 | 2
9 | 3
12 | 4
15 | 5
SELECT * FROM drop_col_table ORDER BY col2, col3;
col2 | col3
------+------
1 | 3
2 | 6
3 | 9
4 | 12
5 | 15
(5 rows)
-- make sure the tuple went to the right shard
SELECT * FROM drop_col_table WHERE col2 = '1';
col2 | col3
------+------
1 | 3
(1 row)
RESET client_min_messages;
DROP TABLE drop_col_table;
DROP TABLE raw_table;
DROP TABLE summary_table;
DROP TABLE raw_events_first CASCADE;

View File

@ -781,6 +781,26 @@ SELECT create_distributed_table('tt1','id');
DROP TABLE tt1;
END;
-- Test dropping a column in front of the partition column
CREATE TABLE drop_copy_test_table (col1 int, col2 int, col3 int, col4 int);
SELECT create_distributed_table('drop_copy_test_table','col3');
ALTER TABLE drop_copy_test_table drop column col1;
COPY drop_copy_test_table (col2,col3,col4) from STDIN with CSV;
,1,
,2,
\.
SELECT * FROM drop_copy_test_table WHERE col3 = 1;
ALTER TABLE drop_copy_test_table drop column col4;
COPY drop_copy_test_table (col2,col3) from STDIN with CSV;
,1
,2
\.
SELECT * FROM drop_copy_test_table WHERE col3 = 1;
DROP TABLE drop_copy_test_table;
-- There should be no "tt1" shard on the worker nodes
\c - - - :worker_1_port
SELECT relname FROM pg_class WHERE relname LIKE 'tt1%';

View File

@ -1050,6 +1050,34 @@ SELECT create_distributed_table('tt1','id');
\copy tt1 from STDIN;
DROP TABLE tt1;
END;
-- Test dropping a column in front of the partition column
CREATE TABLE drop_copy_test_table (col1 int, col2 int, col3 int, col4 int);
SELECT create_distributed_table('drop_copy_test_table','col3');
create_distributed_table
--------------------------
(1 row)
ALTER TABLE drop_copy_test_table drop column col1;
NOTICE: using one-phase commit for distributed DDL commands
HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc'
COPY drop_copy_test_table (col2,col3,col4) from STDIN with CSV;
SELECT * FROM drop_copy_test_table WHERE col3 = 1;
col2 | col3 | col4
------+------+------
| 1 |
(1 row)
ALTER TABLE drop_copy_test_table drop column col4;
COPY drop_copy_test_table (col2,col3) from STDIN with CSV;
SELECT * FROM drop_copy_test_table WHERE col3 = 1;
col2 | col3
------+------
| 1
| 1
(2 rows)
DROP TABLE drop_copy_test_table;
-- There should be no "tt1" shard on the worker nodes
\c - - - :worker_1_port
SELECT relname FROM pg_class WHERE relname LIKE 'tt1%';

View File

@ -284,9 +284,11 @@ END;
CREATE TABLE data_load_test (col1 int, col2 text, col3 text, "CoL4"")" int);
INSERT INTO data_load_test VALUES (132, 'hello', 'world');
INSERT INTO data_load_test VALUES (243, 'world', 'hello');
ALTER TABLE data_load_test DROP COLUMN col2;
SELECT create_distributed_table('data_load_test', 'col1');
SELECT * FROM data_load_test;
ALTER TABLE data_load_test DROP COLUMN col1;
SELECT create_distributed_table('data_load_test', 'col3');
SELECT * FROM data_load_test ORDER BY col2;
-- make sure the tuple went to the right shard
SELECT * FROM data_load_test WHERE col3 = 'world';
DROP TABLE data_load_test;
SET citus.shard_replication_factor TO default;

View File

@ -1903,17 +1903,22 @@ SELECT user_id, value_4 FROM test_view ORDER BY user_id, value_4;
DROP VIEW test_view;
-- Make sure we handle dropped columns correctly
TRUNCATE raw_events_first;
CREATE TABLE drop_col_table (col1 text, col2 text, col3 text);
SELECT create_distributed_table('drop_col_table', 'col2');
ALTER TABLE raw_events_first DROP COLUMN value_1;
ALTER TABLE drop_col_table DROP COLUMN col1;
INSERT INTO raw_events_first (user_id, value_4)
INSERT INTO drop_col_table (col3, col2)
SELECT value_4, user_id FROM raw_events_second LIMIT 5;
SELECT user_id, value_4 FROM raw_events_first ORDER BY user_id;
SELECT * FROM drop_col_table ORDER BY col2, col3;
-- make sure the tuple went to the right shard
SELECT * FROM drop_col_table WHERE col2 = '1';
RESET client_min_messages;
DROP TABLE drop_col_table;
DROP TABLE raw_table;
DROP TABLE summary_table;
DROP TABLE raw_events_first CASCADE;