mirror of https://github.com/citusdata/citus.git
Address review feedback in create_distributed_table data loading
parent
db98c28354
commit
56d4d375c2
|
@ -73,13 +73,14 @@ int ReplicationModel = REPLICATION_MODEL_COORDINATOR;
|
||||||
|
|
||||||
|
|
||||||
/* local function forward declarations */
|
/* local function forward declarations */
|
||||||
static void CreateReferenceTable(Oid relationId);
|
static void CreateReferenceTable(Oid distributedRelationId);
|
||||||
static void ConvertToDistributedTable(Oid relationId, char *distributionColumnName,
|
static void ConvertToDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
char distributionMethod, char replicationModel,
|
char distributionMethod, char replicationModel,
|
||||||
uint32 colocationId);
|
uint32 colocationId, bool allowEmpty);
|
||||||
static char LookupDistributionMethod(Oid distributionMethodOid);
|
static char LookupDistributionMethod(Oid distributionMethodOid);
|
||||||
static Oid SupportFunctionForColumn(Var *partitionColumn, Oid accessMethodId,
|
static Oid SupportFunctionForColumn(Var *partitionColumn, Oid accessMethodId,
|
||||||
int16 supportFunctionNumber);
|
int16 supportFunctionNumber);
|
||||||
|
static bool LocalTableEmpty(Oid tableId);
|
||||||
static void ErrorIfNotSupportedConstraint(Relation relation, char distributionMethod,
|
static void ErrorIfNotSupportedConstraint(Relation relation, char distributionMethod,
|
||||||
Var *distributionColumn, uint32 colocationId);
|
Var *distributionColumn, uint32 colocationId);
|
||||||
static void ErrorIfNotSupportedForeignConstraint(Relation relation,
|
static void ErrorIfNotSupportedForeignConstraint(Relation relation,
|
||||||
|
@ -90,7 +91,8 @@ static void CreateHashDistributedTable(Oid relationId, char *distributionColumnN
|
||||||
char *colocateWithTableName,
|
char *colocateWithTableName,
|
||||||
int shardCount, int replicationFactor);
|
int shardCount, int replicationFactor);
|
||||||
static Oid ColumnType(Oid relationId, char *columnName);
|
static Oid ColumnType(Oid relationId, char *columnName);
|
||||||
static void CopyLocalData(Oid relationId);
|
static void CopyLocalDataIntoShards(Oid relationId);
|
||||||
|
static List * TupleDescColumnNameList(TupleDesc tupleDescriptor);
|
||||||
|
|
||||||
/* exports for SQL callable functions */
|
/* exports for SQL callable functions */
|
||||||
PG_FUNCTION_INFO_V1(master_create_distributed_table);
|
PG_FUNCTION_INFO_V1(master_create_distributed_table);
|
||||||
|
@ -114,6 +116,7 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
char *distributionColumnName = text_to_cstring(distributionColumnText);
|
char *distributionColumnName = text_to_cstring(distributionColumnText);
|
||||||
char distributionMethod = LookupDistributionMethod(distributionMethodOid);
|
char distributionMethod = LookupDistributionMethod(distributionMethodOid);
|
||||||
|
bool allowEmpty = false;
|
||||||
|
|
||||||
EnsureCoordinator();
|
EnsureCoordinator();
|
||||||
|
|
||||||
|
@ -129,7 +132,7 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
ConvertToDistributedTable(distributedRelationId, distributionColumnName,
|
ConvertToDistributedTable(distributedRelationId, distributionColumnName,
|
||||||
distributionMethod, REPLICATION_MODEL_COORDINATOR,
|
distributionMethod, REPLICATION_MODEL_COORDINATOR,
|
||||||
INVALID_COLOCATION_ID);
|
INVALID_COLOCATION_ID, allowEmpty);
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
@ -151,7 +154,6 @@ create_distributed_table(PG_FUNCTION_ARGS)
|
||||||
char distributionMethod = LookupDistributionMethod(distributionMethodOid);
|
char distributionMethod = LookupDistributionMethod(distributionMethodOid);
|
||||||
text *colocateWithTableNameText = NULL;
|
text *colocateWithTableNameText = NULL;
|
||||||
char *colocateWithTableName = NULL;
|
char *colocateWithTableName = NULL;
|
||||||
char relationKind = 0;
|
|
||||||
|
|
||||||
EnsureCoordinator();
|
EnsureCoordinator();
|
||||||
|
|
||||||
|
@ -186,6 +188,8 @@ create_distributed_table(PG_FUNCTION_ARGS)
|
||||||
/* if distribution method is not hash, just create partition metadata */
|
/* if distribution method is not hash, just create partition metadata */
|
||||||
if (distributionMethod != DISTRIBUTE_BY_HASH)
|
if (distributionMethod != DISTRIBUTE_BY_HASH)
|
||||||
{
|
{
|
||||||
|
bool allowEmpty = false;
|
||||||
|
|
||||||
if (ReplicationModel != REPLICATION_MODEL_COORDINATOR)
|
if (ReplicationModel != REPLICATION_MODEL_COORDINATOR)
|
||||||
{
|
{
|
||||||
ereport(NOTICE, (errmsg("using statement-based replication"),
|
ereport(NOTICE, (errmsg("using statement-based replication"),
|
||||||
|
@ -195,7 +199,7 @@ create_distributed_table(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
ConvertToDistributedTable(relationId, distributionColumnName,
|
ConvertToDistributedTable(relationId, distributionColumnName,
|
||||||
distributionMethod, REPLICATION_MODEL_COORDINATOR,
|
distributionMethod, REPLICATION_MODEL_COORDINATOR,
|
||||||
INVALID_COLOCATION_ID);
|
INVALID_COLOCATION_ID, allowEmpty);
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,13 +208,6 @@ create_distributed_table(PG_FUNCTION_ARGS)
|
||||||
colocateWithTableName, ShardCount,
|
colocateWithTableName, ShardCount,
|
||||||
ShardReplicationFactor);
|
ShardReplicationFactor);
|
||||||
|
|
||||||
/* copy over data from regular relations */
|
|
||||||
relationKind = get_rel_relkind(relationId);
|
|
||||||
if (relationKind == RELKIND_RELATION)
|
|
||||||
{
|
|
||||||
CopyLocalData(relationId);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ShouldSyncTableMetadata(relationId))
|
if (ShouldSyncTableMetadata(relationId))
|
||||||
{
|
{
|
||||||
CreateTableMetadataOnWorkers(relationId);
|
CreateTableMetadataOnWorkers(relationId);
|
||||||
|
@ -229,17 +226,9 @@ Datum
|
||||||
create_reference_table(PG_FUNCTION_ARGS)
|
create_reference_table(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
Oid relationId = PG_GETARG_OID(0);
|
Oid relationId = PG_GETARG_OID(0);
|
||||||
char relationKind = 0;
|
|
||||||
|
|
||||||
CreateReferenceTable(relationId);
|
CreateReferenceTable(relationId);
|
||||||
|
|
||||||
/* copy over data from regular relations */
|
|
||||||
relationKind = get_rel_relkind(relationId);
|
|
||||||
if (relationKind == RELKIND_RELATION)
|
|
||||||
{
|
|
||||||
CopyLocalData(relationId);
|
|
||||||
}
|
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -256,6 +245,8 @@ CreateReferenceTable(Oid relationId)
|
||||||
List *workerNodeList = WorkerNodeList();
|
List *workerNodeList = WorkerNodeList();
|
||||||
int replicationFactor = list_length(workerNodeList);
|
int replicationFactor = list_length(workerNodeList);
|
||||||
char *distributionColumnName = NULL;
|
char *distributionColumnName = NULL;
|
||||||
|
bool canLoadData = false;
|
||||||
|
char relationKind = 0;
|
||||||
|
|
||||||
EnsureCoordinator();
|
EnsureCoordinator();
|
||||||
|
|
||||||
|
@ -269,16 +260,30 @@ CreateReferenceTable(Oid relationId)
|
||||||
errdetail("There are no active worker nodes.")));
|
errdetail("There are no active worker nodes.")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* we only support data loading for regular (non-foreign) relations */
|
||||||
|
relationKind = get_rel_relkind(relationId);
|
||||||
|
if (relationKind == RELKIND_RELATION)
|
||||||
|
{
|
||||||
|
canLoadData = true;
|
||||||
|
}
|
||||||
|
|
||||||
colocationId = CreateReferenceTableColocationId();
|
colocationId = CreateReferenceTableColocationId();
|
||||||
|
|
||||||
/* first, convert the relation into distributed relation */
|
/* first, convert the relation into distributed relation */
|
||||||
ConvertToDistributedTable(relationId, distributionColumnName,
|
ConvertToDistributedTable(relationId, distributionColumnName,
|
||||||
DISTRIBUTE_BY_NONE, REPLICATION_MODEL_2PC, colocationId);
|
DISTRIBUTE_BY_NONE, REPLICATION_MODEL_2PC, colocationId,
|
||||||
|
canLoadData);
|
||||||
|
|
||||||
/* now, create the single shard replicated to all nodes */
|
/* now, create the single shard replicated to all nodes */
|
||||||
CreateReferenceTableShard(relationId);
|
CreateReferenceTableShard(relationId);
|
||||||
|
|
||||||
CreateTableMetadataOnWorkers(relationId);
|
CreateTableMetadataOnWorkers(relationId);
|
||||||
|
|
||||||
|
/* copy over data from regular relations */
|
||||||
|
if (canLoadData)
|
||||||
|
{
|
||||||
|
CopyLocalDataIntoShards(relationId);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -296,7 +301,7 @@ CreateReferenceTable(Oid relationId)
|
||||||
static void
|
static void
|
||||||
ConvertToDistributedTable(Oid relationId, char *distributionColumnName,
|
ConvertToDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
char distributionMethod, char replicationModel,
|
char distributionMethod, char replicationModel,
|
||||||
uint32 colocationId)
|
uint32 colocationId, bool allowEmpty)
|
||||||
{
|
{
|
||||||
Relation relation = NULL;
|
Relation relation = NULL;
|
||||||
TupleDesc relationDesc = NULL;
|
TupleDesc relationDesc = NULL;
|
||||||
|
@ -346,6 +351,17 @@ ConvertToDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
"foreign tables.")));
|
"foreign tables.")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* check that the relation does not contain any rows */
|
||||||
|
if (!allowEmpty && !LocalTableEmpty(relationId))
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
|
||||||
|
errmsg("cannot distribute relation \"%s\"",
|
||||||
|
relationName),
|
||||||
|
errdetail("Relation \"%s\" contains data.",
|
||||||
|
relationName),
|
||||||
|
errhint("Empty your table before distributing it.")));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Distribution column returns NULL for reference tables,
|
* Distribution column returns NULL for reference tables,
|
||||||
* but it is not used below for reference tables.
|
* but it is not used below for reference tables.
|
||||||
|
@ -829,6 +845,62 @@ SupportFunctionForColumn(Var *partitionColumn, Oid accessMethodId,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* LocalTableEmpty function checks whether given local table contains any row and
|
||||||
|
* returns false if there is any data. This function is only for local tables and
|
||||||
|
* should not be called for distributed tables.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
LocalTableEmpty(Oid tableId)
|
||||||
|
{
|
||||||
|
Oid schemaId = get_rel_namespace(tableId);
|
||||||
|
char *schemaName = get_namespace_name(schemaId);
|
||||||
|
char *tableName = get_rel_name(tableId);
|
||||||
|
char *tableQualifiedName = quote_qualified_identifier(schemaName, tableName);
|
||||||
|
|
||||||
|
int spiConnectionResult = 0;
|
||||||
|
int spiQueryResult = 0;
|
||||||
|
StringInfo selectExistQueryString = makeStringInfo();
|
||||||
|
|
||||||
|
HeapTuple tuple = NULL;
|
||||||
|
Datum hasDataDatum = 0;
|
||||||
|
bool localTableEmpty = false;
|
||||||
|
bool columnNull = false;
|
||||||
|
bool readOnly = true;
|
||||||
|
|
||||||
|
int rowId = 0;
|
||||||
|
int attributeId = 1;
|
||||||
|
|
||||||
|
AssertArg(!IsDistributedTable(tableId));
|
||||||
|
|
||||||
|
spiConnectionResult = SPI_connect();
|
||||||
|
if (spiConnectionResult != SPI_OK_CONNECT)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("could not connect to SPI manager")));
|
||||||
|
}
|
||||||
|
|
||||||
|
appendStringInfo(selectExistQueryString, SELECT_EXIST_QUERY, tableQualifiedName);
|
||||||
|
|
||||||
|
spiQueryResult = SPI_execute(selectExistQueryString->data, readOnly, 0);
|
||||||
|
if (spiQueryResult != SPI_OK_SELECT)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("execution was not successful \"%s\"",
|
||||||
|
selectExistQueryString->data)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we expect that SELECT EXISTS query will return single value in a single row */
|
||||||
|
Assert(SPI_processed == 1);
|
||||||
|
|
||||||
|
tuple = SPI_tuptable->vals[rowId];
|
||||||
|
hasDataDatum = SPI_getbinval(tuple, SPI_tuptable->tupdesc, attributeId, &columnNull);
|
||||||
|
localTableEmpty = !DatumGetBool(hasDataDatum);
|
||||||
|
|
||||||
|
SPI_finish();
|
||||||
|
|
||||||
|
return localTableEmpty;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CreateTruncateTrigger creates a truncate trigger on table identified by relationId
|
* CreateTruncateTrigger creates a truncate trigger on table identified by relationId
|
||||||
* and assigns citus_truncate_trigger() as handler.
|
* and assigns citus_truncate_trigger() as handler.
|
||||||
|
@ -872,6 +944,8 @@ CreateHashDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
uint32 colocationId = INVALID_COLOCATION_ID;
|
uint32 colocationId = INVALID_COLOCATION_ID;
|
||||||
Oid sourceRelationId = InvalidOid;
|
Oid sourceRelationId = InvalidOid;
|
||||||
Oid distributionColumnType = InvalidOid;
|
Oid distributionColumnType = InvalidOid;
|
||||||
|
bool canLoadData = false;
|
||||||
|
char relationKind = 0;
|
||||||
|
|
||||||
/* get an access lock on the relation to prevent DROP TABLE and ALTER TABLE */
|
/* get an access lock on the relation to prevent DROP TABLE and ALTER TABLE */
|
||||||
distributedRelation = relation_open(relationId, AccessShareLock);
|
distributedRelation = relation_open(relationId, AccessShareLock);
|
||||||
|
@ -914,9 +988,16 @@ CreateHashDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
colocationId = TableColocationId(sourceRelationId);
|
colocationId = TableColocationId(sourceRelationId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* we only support data loading for regular (non-foreign) relations */
|
||||||
|
relationKind = get_rel_relkind(relationId);
|
||||||
|
if (relationKind == RELKIND_RELATION)
|
||||||
|
{
|
||||||
|
canLoadData = true;
|
||||||
|
}
|
||||||
|
|
||||||
/* create distributed table metadata */
|
/* create distributed table metadata */
|
||||||
ConvertToDistributedTable(relationId, distributionColumnName, DISTRIBUTE_BY_HASH,
|
ConvertToDistributedTable(relationId, distributionColumnName, DISTRIBUTE_BY_HASH,
|
||||||
ReplicationModel, colocationId);
|
ReplicationModel, colocationId, canLoadData);
|
||||||
|
|
||||||
/* create shards */
|
/* create shards */
|
||||||
if (sourceRelationId != InvalidOid)
|
if (sourceRelationId != InvalidOid)
|
||||||
|
@ -933,6 +1014,12 @@ CreateHashDistributedTable(Oid relationId, char *distributionColumnName,
|
||||||
CreateShardsWithRoundRobinPolicy(relationId, shardCount, replicationFactor);
|
CreateShardsWithRoundRobinPolicy(relationId, shardCount, replicationFactor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* copy over data from regular relations */
|
||||||
|
if (canLoadData)
|
||||||
|
{
|
||||||
|
CopyLocalDataIntoShards(relationId);
|
||||||
|
}
|
||||||
|
|
||||||
heap_close(pgDistColocation, NoLock);
|
heap_close(pgDistColocation, NoLock);
|
||||||
relation_close(distributedRelation, NoLock);
|
relation_close(distributedRelation, NoLock);
|
||||||
}
|
}
|
||||||
|
@ -981,16 +1068,37 @@ EnsureReplicationSettings(Oid relationId, char replicationModel)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CopyLocalData copies local data into the shards.
|
* CopyLocalDataIntoShards copies data from the local table, which is hidden
|
||||||
|
* after converting it to a distributed table, into the shards of the distributed
|
||||||
|
* table.
|
||||||
|
*
|
||||||
|
* This function uses CitusCopyDestReceiver to invoke the distributed COPY logic.
|
||||||
|
* We cannot use a regular COPY here since that cannot read from a table. Instead
|
||||||
|
* we read from the table and pass each tuple to the CitusCopyDestReceiver which
|
||||||
|
* opens a connection and starts a COPY for each shard placement that will have
|
||||||
|
* data.
|
||||||
|
*
|
||||||
|
* We could call the planner and executor here and send the output to the
|
||||||
|
* DestReceiver, but we are in a tricky spot here since Citus is already
|
||||||
|
* intercepting queries on this table in the planner and executor hooks and we
|
||||||
|
* want to read from the local table. To keep it simple, we perform a heap scan
|
||||||
|
* directly on the table.
|
||||||
|
*
|
||||||
|
* Any writes on the table that are started during this operation will be handled
|
||||||
|
* as distributed queries once the current transaction commits. SELECTs will
|
||||||
|
* continue to read from the local table until the current transaction commits,
|
||||||
|
* after which new SELECTs will be handled as distributed queries.
|
||||||
|
*
|
||||||
|
* After copying local data into the distributed table, the local data remains
|
||||||
|
* in place and should be truncated at a later time.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
CopyLocalData(Oid relationId)
|
CopyLocalDataIntoShards(Oid distributedRelationId)
|
||||||
{
|
{
|
||||||
DestReceiver *copyDest = NULL;
|
DestReceiver *copyDest = NULL;
|
||||||
List *columnNameList = NIL;
|
List *columnNameList = NIL;
|
||||||
Relation distributedRelation = NULL;
|
Relation distributedRelation = NULL;
|
||||||
TupleDesc tupleDescriptor = NULL;
|
TupleDesc tupleDescriptor = NULL;
|
||||||
int columnIndex = 0;
|
|
||||||
bool stopOnFailure = true;
|
bool stopOnFailure = true;
|
||||||
|
|
||||||
EState *estate = NULL;
|
EState *estate = NULL;
|
||||||
|
@ -1001,9 +1109,86 @@ CopyLocalData(Oid relationId)
|
||||||
TupleTableSlot *slot = NULL;
|
TupleTableSlot *slot = NULL;
|
||||||
uint64 rowsCopied = 0;
|
uint64 rowsCopied = 0;
|
||||||
|
|
||||||
distributedRelation = heap_open(relationId, ExclusiveLock);
|
/* take an ExclusiveLock to block all operations except SELECT */
|
||||||
|
distributedRelation = heap_open(distributedRelationId, ExclusiveLock);
|
||||||
|
|
||||||
|
/* get the table columns */
|
||||||
tupleDescriptor = RelationGetDescr(distributedRelation);
|
tupleDescriptor = RelationGetDescr(distributedRelation);
|
||||||
slot = MakeSingleTupleTableSlot(tupleDescriptor);
|
slot = MakeSingleTupleTableSlot(tupleDescriptor);
|
||||||
|
columnNameList = TupleDescColumnNameList(tupleDescriptor);
|
||||||
|
|
||||||
|
/* initialise per-tuple memory context */
|
||||||
|
estate = CreateExecutorState();
|
||||||
|
econtext = GetPerTupleExprContext(estate);
|
||||||
|
econtext->ecxt_scantuple = slot;
|
||||||
|
|
||||||
|
copyDest =
|
||||||
|
(DestReceiver *) CreateCitusCopyDestReceiver(distributedRelationId,
|
||||||
|
columnNameList, estate,
|
||||||
|
stopOnFailure);
|
||||||
|
|
||||||
|
/* initialise state for writing to shards, we'll open connections on demand */
|
||||||
|
copyDest->rStartup(copyDest, 0, tupleDescriptor);
|
||||||
|
|
||||||
|
/* begin reading from local table */
|
||||||
|
scan = heap_beginscan(distributedRelation, GetActiveSnapshot(), 0, NULL);
|
||||||
|
|
||||||
|
oldContext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
||||||
|
|
||||||
|
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
||||||
|
{
|
||||||
|
/* materialize tuple and send it to a shard */
|
||||||
|
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
|
||||||
|
copyDest->receiveSlot(slot, copyDest);
|
||||||
|
|
||||||
|
/* clear tuple memory */
|
||||||
|
ResetPerTupleExprContext(estate);
|
||||||
|
|
||||||
|
/* make sure we roll back on cancellation */
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
|
if (rowsCopied == 0)
|
||||||
|
{
|
||||||
|
ereport(NOTICE, (errmsg("Copying data from local table...")));
|
||||||
|
}
|
||||||
|
|
||||||
|
rowsCopied++;
|
||||||
|
|
||||||
|
if (rowsCopied % 1000000 == 0)
|
||||||
|
{
|
||||||
|
ereport(DEBUG1, (errmsg("Copied %ld rows", rowsCopied)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rowsCopied % 1000000 != 0)
|
||||||
|
{
|
||||||
|
ereport(DEBUG1, (errmsg("Copied %ld rows", rowsCopied)));
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryContextSwitchTo(oldContext);
|
||||||
|
|
||||||
|
/* finish reading from the local table */
|
||||||
|
heap_endscan(scan);
|
||||||
|
|
||||||
|
/* finish writing into the shards */
|
||||||
|
copyDest->rShutdown(copyDest);
|
||||||
|
|
||||||
|
/* free memory and close the relation */
|
||||||
|
ExecDropSingleTupleTableSlot(slot);
|
||||||
|
FreeExecutorState(estate);
|
||||||
|
heap_close(distributedRelation, NoLock);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TupleDescColumnNameList returns a list of column names for the given tuple
|
||||||
|
* descriptor as plain strings.
|
||||||
|
*/
|
||||||
|
static List *
|
||||||
|
TupleDescColumnNameList(TupleDesc tupleDescriptor)
|
||||||
|
{
|
||||||
|
List *columnNameList = NIL;
|
||||||
|
int columnIndex = 0;
|
||||||
|
|
||||||
for (columnIndex = 0; columnIndex < tupleDescriptor->natts; columnIndex++)
|
for (columnIndex = 0; columnIndex < tupleDescriptor->natts; columnIndex++)
|
||||||
{
|
{
|
||||||
|
@ -1018,52 +1203,5 @@ CopyLocalData(Oid relationId)
|
||||||
columnNameList = lappend(columnNameList, columnName);
|
columnNameList = lappend(columnNameList, columnName);
|
||||||
}
|
}
|
||||||
|
|
||||||
estate = CreateExecutorState();
|
return columnNameList;
|
||||||
econtext = GetPerTupleExprContext(estate);
|
|
||||||
econtext->ecxt_scantuple = slot;
|
|
||||||
|
|
||||||
copyDest =
|
|
||||||
(DestReceiver *) CreateCitusCopyDestReceiver(relationId, columnNameList,
|
|
||||||
estate, stopOnFailure);
|
|
||||||
|
|
||||||
copyDest->rStartup(copyDest, 0, tupleDescriptor);
|
|
||||||
|
|
||||||
scan = heap_beginscan(distributedRelation, GetActiveSnapshot(), 0, NULL);
|
|
||||||
|
|
||||||
oldContext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
|
||||||
|
|
||||||
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
|
||||||
{
|
|
||||||
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
|
|
||||||
|
|
||||||
copyDest->receiveSlot(slot, copyDest);
|
|
||||||
|
|
||||||
CHECK_FOR_INTERRUPTS();
|
|
||||||
|
|
||||||
ResetPerTupleExprContext(estate);
|
|
||||||
|
|
||||||
if (rowsCopied == 0)
|
|
||||||
{
|
|
||||||
ereport(NOTICE, (errmsg("Copying data from local table...")));
|
|
||||||
}
|
|
||||||
|
|
||||||
rowsCopied++;
|
|
||||||
|
|
||||||
if (rowsCopied % 1000000 == 0)
|
|
||||||
{
|
|
||||||
ereport(NOTICE, (errmsg("Copied %ld rows", rowsCopied)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rowsCopied % 1000000 != 0)
|
|
||||||
{
|
|
||||||
ereport(NOTICE, (errmsg("Copied %ld rows", rowsCopied)));
|
|
||||||
}
|
|
||||||
|
|
||||||
MemoryContextSwitchTo(oldContext);
|
|
||||||
heap_endscan(scan);
|
|
||||||
copyDest->rShutdown(copyDest);
|
|
||||||
ExecDropSingleTupleTableSlot(slot);
|
|
||||||
FreeExecutorState(estate);
|
|
||||||
heap_close(distributedRelation, NoLock);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -355,26 +355,134 @@ SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regcl
|
||||||
DROP TABLE repmodel_test;
|
DROP TABLE repmodel_test;
|
||||||
RESET citus.replication_model;
|
RESET citus.replication_model;
|
||||||
-- Test initial data loading
|
-- Test initial data loading
|
||||||
CREATE TABLE data_load_test (col1 int, col2 text);
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 serial);
|
||||||
INSERT INTO data_load_test VALUES (132, 'hello');
|
INSERT INTO data_load_test VALUES (132, 'hello');
|
||||||
INSERT INTO data_load_test VALUES (243, 'world');
|
INSERT INTO data_load_test VALUES (243, 'world');
|
||||||
-- create_distributed_table copies data into the distributed table
|
-- table must be empty when using append- or range-partitioning
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1', 'append');
|
||||||
|
ERROR: cannot distribute relation "data_load_test"
|
||||||
|
DETAIL: Relation "data_load_test" contains data.
|
||||||
|
HINT: Empty your table before distributing it.
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1', 'range');
|
||||||
|
ERROR: cannot distribute relation "data_load_test"
|
||||||
|
DETAIL: Relation "data_load_test" contains data.
|
||||||
|
HINT: Empty your table before distributing it.
|
||||||
|
-- table must be empty when using master_create_distributed_table (no shards created)
|
||||||
|
SELECT master_create_distributed_table('data_load_test', 'col1', 'hash');
|
||||||
|
ERROR: cannot distribute relation "data_load_test"
|
||||||
|
DETAIL: Relation "data_load_test" contains data.
|
||||||
|
HINT: Empty your table before distributing it.
|
||||||
|
-- create_distributed_table creates shards and copies data into the distributed table
|
||||||
SELECT create_distributed_table('data_load_test', 'col1');
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
NOTICE: Copying data from local table...
|
NOTICE: Copying data from local table...
|
||||||
NOTICE: Copied 2 rows
|
|
||||||
create_distributed_table
|
create_distributed_table
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT * FROM data_load_test;
|
SELECT * FROM data_load_test ORDER BY col1;
|
||||||
col1 | col2
|
col1 | col2 | col3
|
||||||
------+-------
|
------+-------+------
|
||||||
132 | hello
|
132 | hello | 1
|
||||||
243 | world
|
243 | world | 2
|
||||||
(2 rows)
|
(2 rows)
|
||||||
|
|
||||||
DROP TABLE data_load_test;
|
DROP TABLE data_load_test;
|
||||||
|
-- ensure writes in the same transaction as create_distributed_table are visible
|
||||||
|
BEGIN;
|
||||||
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test VALUES (132, 'hello');
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
|
NOTICE: Copying data from local table...
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
INSERT INTO data_load_test VALUES (243, 'world');
|
||||||
|
END;
|
||||||
|
SELECT * FROM data_load_test ORDER BY col1;
|
||||||
|
col1 | col2 | col3
|
||||||
|
------+-------+------
|
||||||
|
132 | hello | 1
|
||||||
|
243 | world | 2
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
DROP TABLE data_load_test;
|
||||||
|
-- creating co-located distributed tables in the same transaction works
|
||||||
|
BEGIN;
|
||||||
|
CREATE TABLE data_load_test1 (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test1 VALUES (132, 'hello');
|
||||||
|
SELECT create_distributed_table('data_load_test1', 'col1');
|
||||||
|
NOTICE: Copying data from local table...
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE TABLE data_load_test2 (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test2 VALUES (132, 'world');
|
||||||
|
SELECT create_distributed_table('data_load_test2', 'col1');
|
||||||
|
NOTICE: Copying data from local table...
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT a.col2 ||' '|| b.col2
|
||||||
|
FROM data_load_test1 a JOIN data_load_test2 b USING (col1)
|
||||||
|
WHERE col1 = 132;
|
||||||
|
?column?
|
||||||
|
-------------
|
||||||
|
hello world
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP TABLE data_load_test1, data_load_test2;
|
||||||
|
END;
|
||||||
|
-- creating an index after loading data works
|
||||||
|
BEGIN;
|
||||||
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test VALUES (132, 'hello');
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
|
NOTICE: Copying data from local table...
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE INDEX data_load_test_idx ON data_load_test (col2);
|
||||||
|
END;
|
||||||
|
DROP TABLE data_load_test;
|
||||||
|
-- popping in and out of existence in the same transaction works
|
||||||
|
BEGIN;
|
||||||
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test VALUES (132, 'hello');
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
|
NOTICE: Copying data from local table...
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP TABLE data_load_test;
|
||||||
|
END;
|
||||||
|
-- but dropping after a write on the distributed table is currently disallowed
|
||||||
|
BEGIN;
|
||||||
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test VALUES (132, 'hello');
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
|
NOTICE: Copying data from local table...
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
INSERT INTO data_load_test VALUES (243, 'world');
|
||||||
|
DROP TABLE data_load_test;
|
||||||
|
ERROR: shard drop operations must not appear in transaction blocks containing other distributed modifications
|
||||||
|
CONTEXT: SQL statement "SELECT master_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name)"
|
||||||
|
PL/pgSQL function citus_drop_trigger() line 21 at PERFORM
|
||||||
|
END;
|
||||||
-- Test data loading after dropping a column
|
-- Test data loading after dropping a column
|
||||||
CREATE TABLE data_load_test (col1 int, col2 text, col3 text);
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 text);
|
||||||
INSERT INTO data_load_test VALUES (132, 'hello', 'world');
|
INSERT INTO data_load_test VALUES (132, 'hello', 'world');
|
||||||
|
@ -382,7 +490,6 @@ INSERT INTO data_load_test VALUES (243, 'world', 'hello');
|
||||||
ALTER TABLE data_load_test DROP COLUMN col2;
|
ALTER TABLE data_load_test DROP COLUMN col2;
|
||||||
SELECT create_distributed_table('data_load_test', 'col1');
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
NOTICE: Copying data from local table...
|
NOTICE: Copying data from local table...
|
||||||
NOTICE: Copied 2 rows
|
|
||||||
create_distributed_table
|
create_distributed_table
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,6 @@ INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01');
|
||||||
-- create the reference table
|
-- create the reference table
|
||||||
SELECT create_reference_table('reference_table_test');
|
SELECT create_reference_table('reference_table_test');
|
||||||
NOTICE: Copying data from local table...
|
NOTICE: Copying data from local table...
|
||||||
NOTICE: Copied 1 rows
|
|
||||||
create_reference_table
|
create_reference_table
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
|
|
|
@ -191,15 +191,75 @@ DROP TABLE repmodel_test;
|
||||||
RESET citus.replication_model;
|
RESET citus.replication_model;
|
||||||
|
|
||||||
-- Test initial data loading
|
-- Test initial data loading
|
||||||
CREATE TABLE data_load_test (col1 int, col2 text);
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 serial);
|
||||||
INSERT INTO data_load_test VALUES (132, 'hello');
|
INSERT INTO data_load_test VALUES (132, 'hello');
|
||||||
INSERT INTO data_load_test VALUES (243, 'world');
|
INSERT INTO data_load_test VALUES (243, 'world');
|
||||||
|
|
||||||
-- create_distributed_table copies data into the distributed table
|
-- table must be empty when using append- or range-partitioning
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1', 'append');
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1', 'range');
|
||||||
|
|
||||||
|
-- table must be empty when using master_create_distributed_table (no shards created)
|
||||||
|
SELECT master_create_distributed_table('data_load_test', 'col1', 'hash');
|
||||||
|
|
||||||
|
-- create_distributed_table creates shards and copies data into the distributed table
|
||||||
SELECT create_distributed_table('data_load_test', 'col1');
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
SELECT * FROM data_load_test;
|
SELECT * FROM data_load_test ORDER BY col1;
|
||||||
DROP TABLE data_load_test;
|
DROP TABLE data_load_test;
|
||||||
|
|
||||||
|
-- ensure writes in the same transaction as create_distributed_table are visible
|
||||||
|
BEGIN;
|
||||||
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test VALUES (132, 'hello');
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
|
INSERT INTO data_load_test VALUES (243, 'world');
|
||||||
|
END;
|
||||||
|
SELECT * FROM data_load_test ORDER BY col1;
|
||||||
|
DROP TABLE data_load_test;
|
||||||
|
|
||||||
|
-- creating co-located distributed tables in the same transaction works
|
||||||
|
BEGIN;
|
||||||
|
CREATE TABLE data_load_test1 (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test1 VALUES (132, 'hello');
|
||||||
|
SELECT create_distributed_table('data_load_test1', 'col1');
|
||||||
|
|
||||||
|
CREATE TABLE data_load_test2 (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test2 VALUES (132, 'world');
|
||||||
|
SELECT create_distributed_table('data_load_test2', 'col1');
|
||||||
|
|
||||||
|
SELECT a.col2 ||' '|| b.col2
|
||||||
|
FROM data_load_test1 a JOIN data_load_test2 b USING (col1)
|
||||||
|
WHERE col1 = 132;
|
||||||
|
|
||||||
|
DROP TABLE data_load_test1, data_load_test2;
|
||||||
|
END;
|
||||||
|
|
||||||
|
-- creating an index after loading data works
|
||||||
|
BEGIN;
|
||||||
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test VALUES (132, 'hello');
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
|
CREATE INDEX data_load_test_idx ON data_load_test (col2);
|
||||||
|
END;
|
||||||
|
DROP TABLE data_load_test;
|
||||||
|
|
||||||
|
-- popping in and out of existence in the same transaction works
|
||||||
|
BEGIN;
|
||||||
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test VALUES (132, 'hello');
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
|
DROP TABLE data_load_test;
|
||||||
|
END;
|
||||||
|
|
||||||
|
-- but dropping after a write on the distributed table is currently disallowed
|
||||||
|
BEGIN;
|
||||||
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 serial);
|
||||||
|
INSERT INTO data_load_test VALUES (132, 'hello');
|
||||||
|
SELECT create_distributed_table('data_load_test', 'col1');
|
||||||
|
INSERT INTO data_load_test VALUES (243, 'world');
|
||||||
|
DROP TABLE data_load_test;
|
||||||
|
END;
|
||||||
|
|
||||||
-- Test data loading after dropping a column
|
-- Test data loading after dropping a column
|
||||||
CREATE TABLE data_load_test (col1 int, col2 text, col3 text);
|
CREATE TABLE data_load_test (col1 int, col2 text, col3 text);
|
||||||
INSERT INTO data_load_test VALUES (132, 'hello', 'world');
|
INSERT INTO data_load_test VALUES (132, 'hello', 'world');
|
||||||
|
|
Loading…
Reference in New Issue