Columnar Store Storage Id

cstore_storageid
Hadi Moshayedi 2020-11-23 23:27:31 -08:00
parent fc0ef8abba
commit 0a9fc2148c
18 changed files with 391 additions and 175 deletions

View File

@ -215,12 +215,18 @@ static Cost
CStoreScanCost(RangeTblEntry *rte)
{
Relation rel = RelationIdGetRelation(rte->relid);
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false);
if (UninitializedDatafile(rel->rd_node))
{
RelationClose(rel);
return 0;
}
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node, false);
RelationClose(rel);
uint32 maxColumnCount = 0;
uint64 totalStripeSize = 0;
ListCell *stripeMetadataCell = NULL;
RelationClose(rel);
rel = NULL;
foreach(stripeMetadataCell, metadata->stripeMetadataList)

View File

@ -41,6 +41,23 @@
#include "utils/memutils.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/relfilenodemap.h"
/*
* Content of the first page in main fork, which stores metadata at file
* level.
*/
typedef struct ColumnarMetapage
{
/*
* Each of the metadata table rows are identified by a storageId.
* We store it also in the main fork so we can link metadata rows
* with data files.
*/
uint64 storageId;
} ColumnarMetapage;
typedef struct
{
@ -48,11 +65,11 @@ typedef struct
EState *estate;
} ModifyState;
static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe);
static void GetHighestUsedAddressAndId(Oid relfilenode,
static void InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe);
static void GetHighestUsedAddressAndId(uint64 storageId,
uint64 *highestUsedAddress,
uint64 *highestUsedId);
static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot);
static List * ReadDataFileStripeList(uint64 storageId, Snapshot snapshot);
static Oid CStoreStripesRelationId(void);
static Oid CStoreStripesIndexRelationId(void);
static Oid CStoreDataFilesRelationId(void);
@ -60,7 +77,7 @@ static Oid CStoreDataFilesIndexRelationId(void);
static Oid CStoreSkipNodesRelationId(void);
static Oid CStoreSkipNodesIndexRelationId(void);
static Oid CStoreNamespaceId(void);
static bool ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata);
static bool ReadCStoreDataFiles(uint64 storageId, DataFileMetadata *metadata);
static ModifyState * StartModifyRelation(Relation rel);
static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values,
bool *nulls);
@ -69,10 +86,16 @@ static void FinishModifyRelation(ModifyState *state);
static EState * create_estate_for_relation(Relation rel);
static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm);
static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm);
static ColumnarMetapage InitMetapage(Relation relation);
static bool ReadMetapage(RelFileNode relfilenode, ColumnarMetapage *metapage);
static uint64 GetNextStorageId(void);
PG_FUNCTION_INFO_V1(columnar_relation_storageid);
/* constants for cstore_table */
#define Natts_cstore_data_files 6
#define Anum_cstore_data_files_relfilenode 1
#define Anum_cstore_data_files_storageid 1
#define Anum_cstore_data_files_block_row_count 2
#define Anum_cstore_data_files_stripe_row_count 3
#define Anum_cstore_data_files_compression 4
@ -85,7 +108,7 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm);
*/
typedef struct FormData_cstore_data_files
{
Oid relfilenode;
uint64 storageid;
int32 block_row_count;
int32 stripe_row_count;
NameData compression;
@ -99,7 +122,7 @@ typedef FormData_cstore_data_files *Form_cstore_data_files;
/* constants for cstore_stripe */
#define Natts_cstore_stripes 8
#define Anum_cstore_stripes_relfilenode 1
#define Anum_cstore_stripes_storageid 1
#define Anum_cstore_stripes_stripe 2
#define Anum_cstore_stripes_file_offset 3
#define Anum_cstore_stripes_data_length 4
@ -110,7 +133,7 @@ typedef FormData_cstore_data_files *Form_cstore_data_files;
/* constants for cstore_skipnodes */
#define Natts_cstore_skipnodes 12
#define Anum_cstore_skipnodes_relfilenode 1
#define Anum_cstore_skipnodes_storageid 1
#define Anum_cstore_skipnodes_stripe 2
#define Anum_cstore_skipnodes_attr 3
#define Anum_cstore_skipnodes_block 4
@ -125,18 +148,19 @@ typedef FormData_cstore_data_files *Form_cstore_data_files;
/*
* InitCStoreDataFileMetadata adds a record for the given relfilenode
* InitCStoreDataFileMetadata adds a record for the given relation
* in cstore_data_files.
*/
void
InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount,
InitCStoreDataFileMetadata(Relation relation, int blockRowCount,
int stripeRowCount,
CompressionType compression)
{
NameData compressionName = { 0 };
ColumnarMetapage metapage = InitMetapage(relation);
bool nulls[Natts_cstore_data_files] = { 0 };
Datum values[Natts_cstore_data_files] = {
ObjectIdGetDatum(relfilenode),
UInt64GetDatum(metapage.storageId),
Int32GetDatum(blockRowCount),
Int32GetDatum(stripeRowCount),
0, /* to be filled below */
@ -144,10 +168,11 @@ InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCoun
Int32GetDatum(CSTORE_VERSION_MINOR)
};
NameData compressionName = { 0 };
namestrcpy(&compressionName, CompressionTypeStr(compression));
values[Anum_cstore_data_files_compression - 1] = NameGetDatum(&compressionName);
DeleteDataFileMetadataRowIfExists(relfilenode);
DeleteDataFileMetadataRowIfExists(relation->rd_node);
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
Relation cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock);
@ -163,7 +188,8 @@ InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCoun
void
UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount,
UpdateCStoreDataFileMetadata(RelFileNode relfilenode, int blockRowCount, int
stripeRowCount,
CompressionType compression)
{
const int scanKeyCount = 1;
@ -174,11 +200,17 @@ UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCo
bool replace[Natts_cstore_data_files] = { 0 };
bool changed = false;
ColumnarMetapage metapage;
if (!ReadMetapage(relfilenode, &metapage))
{
elog(ERROR, "metapage was not found");
}
Relation cstoreDataFiles = heap_open(CStoreDataFilesRelationId(), RowExclusiveLock);
TupleDesc tupleDescriptor = RelationGetDescr(cstoreDataFiles);
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber,
F_INT8EQ, ObjectIdGetDatum(relfilenode));
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_storageid, BTEqualStrategyNumber,
F_INT8EQ, UInt64GetDatum(metapage.storageId));
SysScanDesc scanDescriptor = systable_beginscan(cstoreDataFiles,
CStoreDataFilesIndexRelationId(),
@ -189,7 +221,7 @@ UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCo
if (heapTuple == NULL)
{
ereport(ERROR, (errmsg("relfilenode %d doesn't belong to a cstore table",
relfilenode)));
relfilenode.relNode)));
}
Form_cstore_data_files metadata = (Form_cstore_data_files) GETSTRUCT(heapTuple);
@ -242,13 +274,19 @@ UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCo
* of cstore_skipnodes.
*/
void
SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList,
SaveStripeSkipList(RelFileNode relfilenode, uint64 stripe, StripeSkipList *stripeSkipList,
TupleDesc tupleDescriptor)
{
uint32 columnIndex = 0;
uint32 blockIndex = 0;
uint32 columnCount = stripeSkipList->columnCount;
ColumnarMetapage metapage;
if (!ReadMetapage(relfilenode, &metapage))
{
elog(WARNING, "metapage was not found");
}
Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId();
Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock);
ModifyState *modifyState = StartModifyRelation(cstoreSkipNodes);
@ -261,7 +299,7 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis
&stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex];
Datum values[Natts_cstore_skipnodes] = {
ObjectIdGetDatum(relfilenode),
UInt64GetDatum(metapage.storageId),
Int64GetDatum(stripe),
Int32GetDatum(columnIndex + 1),
Int32GetDatum(blockIndex),
@ -307,7 +345,7 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis
* ReadStripeSkipList fetches StripeSkipList for a given stripe.
*/
StripeSkipList *
ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
uint32 blockCount)
{
int32 columnIndex = 0;
@ -315,12 +353,18 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
uint32 columnCount = tupleDescriptor->natts;
ScanKeyData scanKey[2];
ColumnarMetapage metapage;
if (!ReadMetapage(relfilenode, &metapage))
{
elog(WARNING, "metapage was not found");
}
Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId();
Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock);
Relation index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock);
ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relfilenode,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_storageid,
BTEqualStrategyNumber, F_OIDEQ, UInt64GetDatum(metapage.storageId));
ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe));
@ -410,11 +454,11 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
* InsertStripeMetadataRow adds a row to cstore_stripes.
*/
static void
InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe)
InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe)
{
bool nulls[Natts_cstore_stripes] = { 0 };
Datum values[Natts_cstore_stripes] = {
ObjectIdGetDatum(relfilenode),
UInt64GetDatum(storageId),
Int64GetDatum(stripe->id),
Int64GetDatum(stripe->fileOffset),
Int64GetDatum(stripe->dataLength),
@ -444,16 +488,23 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe)
* from cstore_data_files and cstore_stripes.
*/
DataFileMetadata *
ReadDataFileMetadata(Oid relfilenode, bool missingOk)
ReadDataFileMetadata(RelFileNode relfilenode, bool missingOk)
{
ColumnarMetapage metapage;
DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata));
bool found = ReadCStoreDataFiles(relfilenode, datafileMetadata);
bool found = ReadMetapage(relfilenode, &metapage);
if (found)
{
found = ReadCStoreDataFiles(metapage.storageId, datafileMetadata);
}
if (!found)
{
if (!missingOk)
{
ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.",
relfilenode)));
relfilenode.relNode)));
}
else
{
@ -462,7 +513,7 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk)
}
datafileMetadata->stripeMetadataList =
ReadDataFileStripeList(relfilenode, GetTransactionSnapshot());
ReadDataFileStripeList(metapage.storageId, GetTransactionSnapshot());
return datafileMetadata;
}
@ -473,12 +524,17 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk)
* relfilenode across all active and inactive transactions.
*/
uint64
GetHighestUsedAddress(Oid relfilenode)
GetHighestUsedAddress(RelFileNode relfilenode)
{
uint64 highestUsedAddress = 0;
uint64 highestUsedId = 0;
ColumnarMetapage metapage;
if (!ReadMetapage(relfilenode, &metapage))
{
elog(ERROR, "metapage was not found");
}
GetHighestUsedAddressAndId(relfilenode, &highestUsedAddress, &highestUsedId);
GetHighestUsedAddressAndId(metapage.storageId, &highestUsedAddress, &highestUsedId);
return highestUsedAddress;
}
@ -489,7 +545,7 @@ GetHighestUsedAddress(Oid relfilenode)
* the given relfilenode across all active and inactive transactions.
*/
static void
GetHighestUsedAddressAndId(Oid relfilenode,
GetHighestUsedAddressAndId(uint64 storageId,
uint64 *highestUsedAddress,
uint64 *highestUsedId)
{
@ -498,10 +554,12 @@ GetHighestUsedAddressAndId(Oid relfilenode,
SnapshotData SnapshotDirty;
InitDirtySnapshot(SnapshotDirty);
List *stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty);
List *stripeMetadataList = ReadDataFileStripeList(storageId, &SnapshotDirty);
*highestUsedId = 0;
*highestUsedAddress = 0;
/* file starts with metapage */
*highestUsedAddress = sizeof(ColumnarMetapage);
foreach(stripeMetadataCell, stripeMetadataList)
{
@ -535,8 +593,14 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
*/
LockRelation(rel, ShareUpdateExclusiveLock);
Oid relfilenode = rel->rd_node.relNode;
GetHighestUsedAddressAndId(relfilenode, &currLogicalHigh, &highestId);
RelFileNode relfilenode = rel->rd_node;
ColumnarMetapage metapage;
if (!ReadMetapage(relfilenode, &metapage))
{
elog(WARNING, "metapage was not found");
}
GetHighestUsedAddressAndId(metapage.storageId, &currLogicalHigh, &highestId);
SmgrAddr currSmgrHigh = logical_to_smgr(currLogicalHigh);
SmgrAddr resSmgrStart = next_block_start(currSmgrHigh);
@ -565,7 +629,7 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
stripe.rowCount = rowCount;
stripe.id = highestId + 1;
InsertStripeMetadataRow(relfilenode, &stripe);
InsertStripeMetadataRow(metapage.storageId, &stripe);
UnlockRelation(rel, ShareUpdateExclusiveLock);
@ -574,18 +638,18 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
/*
* ReadDataFileStripeList reads the stripe list for a given relfilenode
* ReadDataFileStripeList reads the stripe list for a given storageId
* in the given snapshot.
*/
static List *
ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot)
ReadDataFileStripeList(uint64 storageId, Snapshot snapshot)
{
List *stripeMetadataList = NIL;
ScanKeyData scanKey[1];
HeapTuple heapTuple;
ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
ScanKeyInit(&scanKey[0], Anum_cstore_stripes_storageid,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId));
Oid cstoreStripesOid = CStoreStripesRelationId();
Relation cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock);
@ -634,13 +698,13 @@ ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot)
* false if table was not found in cstore_data_files.
*/
static bool
ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata)
ReadCStoreDataFiles(uint64 storageid, DataFileMetadata *metadata)
{
bool found = false;
ScanKeyData scanKey[1];
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_storageid,
BTEqualStrategyNumber, F_OIDEQ, UInt64GetDatum(storageid));
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock);
@ -684,6 +748,7 @@ ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata)
datumArray[Anum_cstore_data_files_compression - 1]);
metadata->compression = ParseCompressionType(NameStr(*compressionName));
}
found = true;
}
@ -699,7 +764,7 @@ ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata)
* DeleteDataFileMetadataRowIfExists removes the row with given relfilenode from cstore_stripes.
*/
void
DeleteDataFileMetadataRowIfExists(Oid relfilenode)
DeleteDataFileMetadataRowIfExists(RelFileNode relfilenode)
{
ScanKeyData scanKey[1];
@ -712,8 +777,14 @@ DeleteDataFileMetadataRowIfExists(Oid relfilenode)
return;
}
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
ColumnarMetapage metapage;
if (!ReadMetapage(relfilenode, &metapage))
{
return;
}
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_storageid,
BTEqualStrategyNumber, F_INT8EQ, UInt64GetDatum(metapage.storageId));
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock);
@ -996,3 +1067,90 @@ CStoreNamespaceId(void)
{
return get_namespace_oid("cstore", false);
}
static bool
ReadMetapage(RelFileNode relfilenode, ColumnarMetapage *metapage)
{
Oid relationId = RelidByRelfilenode(relfilenode.spcNode,
relfilenode.relNode);
if (!OidIsValid(relationId))
{
return false;
}
Relation relation = relation_open(relationId, NoLock);
RelationOpenSmgr(relation);
int nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM);
RelationCloseSmgr(relation);
if (nblocks == 0)
{
relation_close(relation, NoLock);
return false;
}
StringInfo metapageBuffer = ReadFromSmgr(relation, 0, sizeof(ColumnarMetapage));
relation_close(relation, NoLock);
memcpy((void *) metapage, metapageBuffer->data, sizeof(ColumnarMetapage));
return true;
}
static ColumnarMetapage
InitMetapage(Relation relation)
{
ColumnarMetapage metapage;
metapage.storageId = GetNextStorageId();
/* create the first block */
Buffer newBuffer = ReadBuffer(relation, P_NEW);
ReleaseBuffer(newBuffer);
Assert(sizeof(ColumnarMetapage) <= BLCKSZ - SizeOfPageHeaderData);
WriteToSmgr(relation, 0, (char *) &metapage, sizeof(ColumnarMetapage));
return metapage;
}
static uint64
GetNextStorageId(void)
{
Oid sequenceId = get_relname_relid("cstore_storageid_seq", CStoreNamespaceId());
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
/*
* Generate new and unique storage id from sequence.
* TODO: should we restrict access to the sequence, which might require
* switching security context here?
*/
Datum storageIdDatum = DirectFunctionCall1(nextval_oid, sequenceIdDatum);
uint64 storageId = DatumGetInt64(storageIdDatum);
return storageId;
}
Datum
columnar_relation_storageid(PG_FUNCTION_ARGS)
{
Oid relationId = PG_GETARG_OID(0);
uint64 storageId = -1;
Relation relation = relation_open(relationId, AccessShareLock);
ColumnarMetapage metadata;
if (IsCStoreTableAmTable(relationId) &&
ReadMetapage(relation->rd_node, &metadata))
{
storageId = metadata.storageId;
}
relation_close(relation, AccessShareLock);
PG_RETURN_INT64(storageId);
}

View File

@ -76,7 +76,6 @@ static BlockData * DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blo
List *projectedColumnList);
static Datum ColumnDefaultValue(TupleConstr *tupleConstraints,
Form_pg_attribute attributeForm);
static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size);
/*
* CStoreBeginRead initializes a cstore read operation. This function returns a
@ -86,9 +85,7 @@ TableReadState *
CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor,
List *projectedColumnList, List *whereClauseList)
{
Oid relNode = relation->rd_node.relNode;
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relNode, false);
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relation->rd_node, true);
/*
* We allocate all stripe specific data in the stripeReadContext, and reset
@ -127,6 +124,11 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu
StripeMetadata *stripeMetadata = readState->currentStripeMetadata;
MemoryContext oldContext = NULL;
if (readState->datafileMetadata == NULL)
{
return false;
}
/*
* If no stripes are loaded, load the next non-empty stripe. Note that when
* loading stripes, we skip over blocks whose contents can be filtered with
@ -238,8 +240,13 @@ void
CStoreEndRead(TableReadState *readState)
{
MemoryContextDelete(readState->stripeReadContext);
list_free_deep(readState->datafileMetadata->stripeMetadataList);
pfree(readState->datafileMetadata);
if (readState->datafileMetadata)
{
list_free_deep(readState->datafileMetadata->stripeMetadataList);
pfree(readState->datafileMetadata);
}
pfree(readState);
}
@ -317,7 +324,12 @@ CStoreTableRowCount(Relation relation)
ListCell *stripeMetadataCell = NULL;
uint64 totalRowCount = 0;
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode,
if (UninitializedDatafile(relation->rd_node))
{
return 0;
}
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relation->rd_node,
false);
foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList)
@ -345,7 +357,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList);
StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node.relNode,
StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node,
stripeMetadata->id,
tupleDescriptor,
stripeMetadata->blockCount);
@ -1009,7 +1021,7 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor
}
static StringInfo
StringInfo
ReadFromSmgr(Relation rel, uint64 offset, uint32 size)
{
StringInfo resultBuffer = makeStringInfo();

View File

@ -106,7 +106,6 @@ static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement,
char *completionTag);
#endif
static bool IsCStoreTableAmTable(Oid relationId);
static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode,
int timeout, int retryInterval);
static void LogRelationStats(Relation rel, int elevel);
@ -117,8 +116,8 @@ static void TruncateCStore(Relation rel, int elevel);
* CStoreTableAMDefaultOptions returns the default options for a cstore table am table.
* These options are based on the GUC's controlling the defaults.
*/
static CStoreOptions *
CStoreTableAMDefaultOptions()
CStoreOptions *
CStoreTableAMDefaultOptions(void)
{
CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions));
cstoreOptions->compressionType = cstore_compression;
@ -133,11 +132,14 @@ CStoreTableAMDefaultOptions()
* relation is a cstore table am table, if not it will raise an error
*/
CStoreOptions *
CStoreTableAMGetOptions(Oid relfilenode)
CStoreTableAMGetOptions(RelFileNode relfilenode)
{
Assert(OidIsValid(relfilenode));
CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions));
if (UninitializedDatafile(relfilenode))
{
return CStoreTableAMDefaultOptions();
}
DataFileMetadata *metadata = ReadDataFileMetadata(relfilenode, false);
cstoreOptions->compressionType = metadata->compression;
cstoreOptions->stripeRowCount = metadata->stripeRowCount;
@ -570,50 +572,32 @@ cstore_relation_set_new_filenode(Relation rel,
TransactionId *freezeXid,
MultiXactId *minmulti)
{
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true);
uint64 blockRowCount = 0;
uint64 stripeRowCount = 0;
CompressionType compression = 0;
Oid oldRelfilenode = rel->rd_node.relNode;
MarkRelfilenodeDropped(oldRelfilenode, GetCurrentSubTransactionId());
if (metadata != NULL)
{
/* existing table (e.g. TRUNCATE), use existing blockRowCount */
blockRowCount = metadata->blockRowCount;
stripeRowCount = metadata->stripeRowCount;
compression = metadata->compression;
}
else
{
/* new table, use options */
CStoreOptions *options = CStoreTableAMDefaultOptions();
blockRowCount = options->blockRowCount;
stripeRowCount = options->stripeRowCount;
compression = options->compressionType;
}
/* delete old relfilenode metadata */
DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode);
DeleteDataFileMetadataRowIfExists(rel->rd_node);
Assert(persistence == RELPERSISTENCE_PERMANENT);
*freezeXid = RecentXmin;
*minmulti = GetOldestMultiXactId();
SMgrRelation srel = RelationCreateStorage(*newrnode, persistence);
InitCStoreDataFileMetadata(newrnode->relNode, blockRowCount, stripeRowCount,
compression);
smgrclose(srel);
/* we will lazily initialize metadata in first write */
}
static void
cstore_relation_nontransactional_truncate(Relation rel)
{
Oid relfilenode = rel->rd_node.relNode;
DataFileMetadata *metadata = ReadDataFileMetadata(relfilenode, false);
RelFileNode relfilenode = rel->rd_node;
NonTransactionDropWriteState(relfilenode);
NonTransactionDropWriteState(relfilenode.relNode);
/* Delete old relfilenode metadata and recreate it */
DeleteDataFileMetadataRowIfExists(relfilenode);
/*
* No need to set new relfilenode, since the table was created in this
@ -624,10 +608,7 @@ cstore_relation_nontransactional_truncate(Relation rel)
*/
RelationTruncate(rel, 0);
/* Delete old relfilenode metadata and recreate it */
DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode);
InitCStoreDataFileMetadata(rel->rd_node.relNode, metadata->blockRowCount,
metadata->stripeRowCount, metadata->compression);
/* we will lazily initialize new metadata in first write */
}
@ -675,14 +656,9 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
* relation first.
*/
CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(OldHeap->rd_node.relNode);
CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(OldHeap->rd_node);
UpdateCStoreDataFileMetadata(NewHeap->rd_node.relNode,
cstoreOptions->blockRowCount,
cstoreOptions->stripeRowCount,
cstoreOptions->compressionType);
cstoreOptions = CStoreTableAMGetOptions(NewHeap->rd_node.relNode);
cstoreOptions = CStoreTableAMGetOptions(NewHeap->rd_node);
TableWriteState *writeState = CStoreBeginWrite(NewHeap->rd_node,
cstoreOptions->compressionType,
@ -740,7 +716,7 @@ static void
LogRelationStats(Relation rel, int elevel)
{
ListCell *stripeMetadataCell = NULL;
Oid relfilenode = rel->rd_node.relNode;
RelFileNode relfilenode = rel->rd_node;
StringInfo infoBuf = makeStringInfo();
int compressionStats[COMPRESSION_COUNT] = { 0 };
@ -866,7 +842,7 @@ TruncateCStore(Relation rel, int elevel)
* we're truncating.
*/
SmgrAddr highestPhysicalAddress =
logical_to_smgr(GetHighestUsedAddress(rel->rd_node.relNode));
logical_to_smgr(GetHighestUsedAddress(rel->rd_node));
BlockNumber new_rel_pages = highestPhysicalAddress.blockno + 1;
if (new_rel_pages == old_rel_pages)
@ -1234,10 +1210,10 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId
* tableam tables storage is managed by postgres.
*/
Relation rel = table_open(objectId, AccessExclusiveLock);
Oid relfilenode = rel->rd_node.relNode;
RelFileNode relfilenode = rel->rd_node;
DeleteDataFileMetadataRowIfExists(relfilenode);
MarkRelfilenodeDropped(relfilenode, GetCurrentSubTransactionId());
MarkRelfilenodeDropped(relfilenode.relNode, GetCurrentSubTransactionId());
/* keep the lock since we did physical changes to the relation */
table_close(rel, NoLock);
@ -1249,7 +1225,7 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId
* IsCStoreTableAmTable returns true if relation has cstore_tableam
* access method. This can be called before extension creation.
*/
static bool
bool
IsCStoreTableAmTable(Oid relationId)
{
if (!OidIsValid(relationId))
@ -1367,7 +1343,7 @@ alter_columnar_table_set(PG_FUNCTION_ARGS)
Oid relationId = PG_GETARG_OID(0);
Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true);
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node, true);
if (!metadata)
{
ereport(ERROR, (errmsg("table %s is not a cstore table",
@ -1406,7 +1382,7 @@ alter_columnar_table_set(PG_FUNCTION_ARGS)
CompressionTypeStr(compression))));
}
UpdateCStoreDataFileMetadata(rel->rd_node.relNode, blockRowCount, stripeRowCount,
UpdateCStoreDataFileMetadata(rel->rd_node, blockRowCount, stripeRowCount,
compression);
table_close(rel, NoLock);
@ -1422,7 +1398,7 @@ alter_columnar_table_reset(PG_FUNCTION_ARGS)
Oid relationId = PG_GETARG_OID(0);
Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true);
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node, true);
if (!metadata)
{
ereport(ERROR, (errmsg("table %s is not a cstore table",
@ -1455,7 +1431,7 @@ alter_columnar_table_reset(PG_FUNCTION_ARGS)
CompressionTypeStr(compression))));
}
UpdateCStoreDataFileMetadata(rel->rd_node.relNode, blockRowCount, stripeRowCount,
UpdateCStoreDataFileMetadata(rel->rd_node, blockRowCount, stripeRowCount,
compression);
table_close(rel, NoLock);

View File

@ -51,6 +51,7 @@ static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode,
FmgrInfo *comparisonFunction);
static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength);
static StringInfo CopyStringInfo(StringInfo sourceString);
static void InitializeMetadata(RelFileNode relfilenode);
/*
* CStoreBeginWrite initializes a cstore data load operation and returns a table
@ -65,12 +66,16 @@ CStoreBeginWrite(RelFileNode relfilenode,
uint64 stripeMaxRowCount, uint32 blockRowCount,
TupleDesc tupleDescriptor)
{
uint32 columnIndex = 0;
/* initialize metadata if this is the first write */
if (UninitializedDatafile(relfilenode))
{
InitializeMetadata(relfilenode);
}
/* get comparison function pointers for each of the columns */
uint32 columnCount = tupleDescriptor->natts;
FmgrInfo **comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *));
for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
for (uint32 columnIndex = 0; columnIndex < columnCount; columnIndex++)
{
FmgrInfo *comparisonFunction = NULL;
FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor,
@ -122,6 +127,34 @@ CStoreBeginWrite(RelFileNode relfilenode,
}
/*
* UninitializedDatafile returns true if metadata for the
* given relfilenode hasn't been initialized.
*/
bool
UninitializedDatafile(RelFileNode relfilenode)
{
return ReadDataFileMetadata(relfilenode, true) == NULL;
}
static void
InitializeMetadata(RelFileNode relfilenode)
{
Oid relationId = RelidByRelfilenode(relfilenode.spcNode, relfilenode.relNode);
Relation relation = relation_open(relationId, NoLock);
CStoreOptions *options = CStoreTableAMDefaultOptions();
InitCStoreDataFileMetadata(relation,
options->blockRowCount,
options->stripeRowCount,
options->compressionType);
relation_close(relation, NoLock);
}
/*
* CStoreWriteRow adds a row to the cstore file. If the stripe is not initialized,
* we create structures to hold stripe data and skip list. Then, we serialize and
@ -323,8 +356,9 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount,
}
static void
WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength)
void
WriteToSmgr(Relation relation, uint64 logicalOffset, char *data,
uint32 dataLength)
{
uint64 remaining = dataLength;
Buffer buffer;
@ -333,13 +367,13 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength)
{
SmgrAddr addr = logical_to_smgr(logicalOffset);
RelationOpenSmgr(rel);
BlockNumber nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
RelationOpenSmgr(relation);
BlockNumber nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM);
Assert(addr.blockno < nblocks);
(void) nblocks; /* keep compiler quiet */
RelationCloseSmgr(rel);
RelationCloseSmgr(relation);
buffer = ReadBuffer(rel, addr.blockno);
buffer = ReadBuffer(relation, addr.blockno);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
Page page = BufferGetPage(buffer);
@ -372,7 +406,7 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength)
MarkBufferDirty(buffer);
if (RelationNeedsWAL(rel))
if (RelationNeedsWAL(relation))
{
XLogBeginInsert();
@ -521,7 +555,7 @@ FlushStripe(TableWriteState *writeState)
}
/* create skip list and footer buffers */
SaveStripeSkipList(relation->rd_node.relNode,
SaveStripeSkipList(writeState->relfilenode,
stripeMetadata.id,
stripeSkipList, tupleDescriptor);

View File

@ -3,20 +3,22 @@
CREATE SCHEMA cstore;
SET search_path TO cstore;
CREATE SEQUENCE cstore_storageid_seq NO CYCLE;
CREATE TABLE cstore_data_files (
relfilenode oid NOT NULL,
storageid bigint NOT NULL,
block_row_count int NOT NULL,
stripe_row_count int NOT NULL,
compression name NOT NULL,
version_major bigint NOT NULL,
version_minor bigint NOT NULL,
PRIMARY KEY (relfilenode)
PRIMARY KEY (storageid)
) WITH (user_catalog_table = true);
COMMENT ON TABLE cstore_data_files IS 'CStore data file wide metadata';
CREATE TABLE cstore_stripes (
relfilenode oid NOT NULL,
storageid bigint NOT NULL,
stripe bigint NOT NULL,
file_offset bigint NOT NULL,
data_length bigint NOT NULL,
@ -24,14 +26,14 @@ CREATE TABLE cstore_stripes (
block_count int NOT NULL,
block_row_count int NOT NULL,
row_count bigint NOT NULL,
PRIMARY KEY (relfilenode, stripe),
FOREIGN KEY (relfilenode) REFERENCES cstore_data_files(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED
PRIMARY KEY (storageid, stripe),
FOREIGN KEY (storageid) REFERENCES cstore_data_files(storageid) ON DELETE CASCADE INITIALLY DEFERRED
) WITH (user_catalog_table = true);
COMMENT ON TABLE cstore_stripes IS 'CStore per stripe metadata';
CREATE TABLE cstore_skipnodes (
relfilenode oid NOT NULL,
storageid bigint NOT NULL,
stripe bigint NOT NULL,
attr int NOT NULL,
block int NOT NULL,
@ -43,19 +45,23 @@ CREATE TABLE cstore_skipnodes (
exists_stream_offset bigint NOT NULL,
exists_stream_length bigint NOT NULL,
value_compression_type int NOT NULL,
PRIMARY KEY (relfilenode, stripe, attr, block),
FOREIGN KEY (relfilenode, stripe) REFERENCES cstore_stripes(relfilenode, stripe) ON DELETE CASCADE INITIALLY DEFERRED
PRIMARY KEY (storageid, stripe, attr, block),
FOREIGN KEY (storageid, stripe) REFERENCES cstore_stripes(storageid, stripe) ON DELETE CASCADE INITIALLY DEFERRED
) WITH (user_catalog_table = true);
COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata';
CREATE FUNCTION columnar_relation_storageid(relid oid) RETURNS bigint
LANGUAGE C STABLE STRICT
AS 'MODULE_PATHNAME', $$columnar_relation_storageid$$;
CREATE VIEW columnar_options AS
SELECT c.oid::regclass regclass,
d.block_row_count,
d.stripe_row_count,
d.compression
FROM pg_class c
JOIN cstore.cstore_data_files d USING(relfilenode);
FROM pg_class c, cstore.cstore_data_files d
WHERE d.storageid=columnar_relation_storageid(c.oid);
COMMENT ON VIEW columnar_options IS 'CStore per table settings';

View File

@ -31,6 +31,7 @@ DROP VIEW columnar_options;
DROP TABLE cstore_skipnodes;
DROP TABLE cstore_stripes;
DROP TABLE cstore_data_files;
DROP SEQUENCE cstore_storageid_seq;
DROP FUNCTION citus_internal.cstore_ensure_objects_exist();

View File

@ -178,7 +178,7 @@ cstore_init_write_state(RelFileNode relfilenode, TupleDesc tupdesc,
*/
MemoryContext oldContext = MemoryContextSwitchTo(WriteStateContext);
CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(relfilenode.relNode);
CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(relfilenode);
SubXidWriteState *stackEntry = palloc0(sizeof(SubXidWriteState));
stackEntry->writeState = CStoreBeginWrite(relfilenode,
cstoreOptions->compressionType,

View File

@ -263,6 +263,7 @@ extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues,
extern void CStoreFlushPendingWrites(TableWriteState *state);
extern void CStoreEndWrite(TableWriteState *state);
extern bool ContainsPendingWrites(TableWriteState *state);
extern bool UninitializedDatafile(RelFileNode relfilenode);
/* Function declarations for reading from a cstore file */
extern TableReadState * CStoreBeginRead(Relation relation,
@ -285,23 +286,29 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer,
CompressionType compressionType);
extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType);
extern char * CompressionTypeStr(CompressionType type);
extern CStoreOptions * CStoreTableAMGetOptions(Oid relfilenode);
extern CStoreOptions * CStoreTableAMGetOptions(RelFileNode relfilenode);
extern void WriteToSmgr(Relation relation, uint64 logicalOffset,
char *data, uint32 dataLength);
extern StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size);
extern bool IsCStoreTableAmTable(Oid relationId);
extern CStoreOptions * CStoreTableAMDefaultOptions(void);
/* cstore_metadata_tables.c */
extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode);
extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int
stripeRowCount, CompressionType compression);
extern void UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int
extern void DeleteDataFileMetadataRowIfExists(RelFileNode relfilenode);
extern void InitCStoreDataFileMetadata(Relation relation,
int blockRowCount, int stripeRowCount,
CompressionType compression);
extern void UpdateCStoreDataFileMetadata(RelFileNode relfilenode, int blockRowCount, int
stripeRowCount, CompressionType compression);
extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk);
extern uint64 GetHighestUsedAddress(Oid relfilenode);
extern DataFileMetadata * ReadDataFileMetadata(RelFileNode relfilenode, bool missingOk);
extern uint64 GetHighestUsedAddress(RelFileNode relfilenode);
extern StripeMetadata ReserveStripe(Relation rel, uint64 size,
uint64 rowCount, uint64 columnCount,
uint64 blockCount, uint64 blockRowCount);
extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe,
extern void SaveStripeSkipList(RelFileNode relfilenode, uint64 stripe,
StripeSkipList *stripeSkipList,
TupleDesc tupleDescriptor);
extern StripeSkipList * ReadStripeSkipList(Oid relfilenode, uint64 stripe,
extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe,
TupleDesc tupleDescriptor,
uint32 blockCount);

View File

@ -26,6 +26,7 @@ SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files;
-- Create a cstore_fdw table under a schema and drop it.
CREATE SCHEMA test_schema;
CREATE TABLE test_schema.test_table(data int) USING columnar;
INSERT INTO test_schema.test_table VALUES (1);
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset
DROP SCHEMA test_schema CASCADE;
NOTICE: drop cascades to table test_schema.test_table

View File

@ -35,20 +35,21 @@ SELECT * FROM t_view a ORDER BY a;
(6 rows)
-- verify that we have created metadata entries for the materialized view
SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
SELECT cstore.columnar_relation_storageid(oid) AS storageid
FROM pg_class WHERE relname='t_view' \gset
SELECT count(*) FROM cstore.cstore_data_files WHERE storageid=:storageid;
count
---------------------------------------------------------------------
1
(1 row)
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
count
---------------------------------------------------------------------
1
(1 row)
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
count
---------------------------------------------------------------------
3
@ -57,19 +58,19 @@ SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
DROP TABLE t CASCADE;
NOTICE: drop cascades to materialized view t_view
-- dropping must remove metadata
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_data_files WHERE storageid=:storageid;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
count
---------------------------------------------------------------------
0

View File

@ -12,7 +12,7 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i;
-- there are no subtransactions, so above statement should batch
-- INSERTs inside the UDF and create on stripe per table.
SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2')
WHERE cstore.columnar_relation_storageid(b.oid)=a.storageid AND relname IN ('t1', 't2')
GROUP BY relname
ORDER BY relname;
relname | count

View File

@ -2,6 +2,9 @@
-- Testing we handle rollbacks properly
--
CREATE TABLE t(a int, b int) USING columnar;
CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = cstore.columnar_relation_storageid(b.oid) AND b.relname = 't';
BEGIN;
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
ROLLBACK;
@ -12,8 +15,7 @@ SELECT count(*) FROM t;
(1 row)
-- check stripe metadata also have been rolled-back
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
0
@ -26,8 +28,7 @@ SELECT count(*) FROM t;
10
(1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
1
@ -73,11 +74,11 @@ SELECT count(*) FROM t;
20
(1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
2
(1 row)
DROP TABLE t;
DROP VIEW t_stripes;

View File

@ -27,6 +27,7 @@ SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files;
-- Create a cstore_fdw table under a schema and drop it.
CREATE SCHEMA test_schema;
CREATE TABLE test_schema.test_table(data int) USING columnar;
INSERT INTO test_schema.test_table VALUES (1);
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset
DROP SCHEMA test_schema CASCADE;

View File

@ -20,15 +20,16 @@ REFRESH MATERIALIZED VIEW t_view;
SELECT * FROM t_view a ORDER BY a;
-- verify that we have created metadata entries for the materialized view
SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset
SELECT cstore.columnar_relation_storageid(oid) AS storageid
FROM pg_class WHERE relname='t_view' \gset
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_data_files WHERE storageid=:storageid;
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
DROP TABLE t CASCADE;
-- dropping must remove metadata
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_data_files WHERE storageid=:storageid;
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;

View File

@ -16,7 +16,7 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i;
-- there are no subtransactions, so above statement should batch
-- INSERTs inside the UDF and create on stripe per table.
SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2')
WHERE cstore.columnar_relation_storageid(b.oid)=a.storageid AND relname IN ('t1', 't2')
GROUP BY relname
ORDER BY relname;

View File

@ -4,20 +4,22 @@
CREATE TABLE t(a int, b int) USING columnar;
CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = cstore.columnar_relation_storageid(b.oid) AND b.relname = 't';
BEGIN;
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
ROLLBACK;
SELECT count(*) FROM t;
-- check stripe metadata also have been rolled-back
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
SELECT count(*) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
-- savepoint rollback
BEGIN;
@ -36,7 +38,7 @@ COMMIT;
SELECT count(*) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
DROP TABLE t;
DROP VIEW t_stripes;

View File

@ -2,41 +2,49 @@ SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset
CREATE TABLE t(a int, b int) USING columnar;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = cstore.columnar_relation_storageid(b.oid) AND b.relname='t';
SELECT count(*) FROM t_stripes;
INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i;
INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i;
INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i;
SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
-- vacuum full should merge stripes together
VACUUM FULL t;
SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
-- test the case when all data cannot fit into a single stripe
SELECT alter_columnar_table_set('t', stripe_row_count => 1000);
INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i;
SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
VACUUM FULL t;
SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs
ALTER TABLE t DROP COLUMN a;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
FROM cstore.cstore_skipnodes a, pg_class b
WHERE a.storageid = cstore.columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
VACUUM FULL t;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
FROM cstore.cstore_skipnodes a, pg_class b
WHERE a.storageid = cstore.columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;
@ -99,6 +107,7 @@ VACUUM FULL t;
VACUUM VERBOSE t;
DROP TABLE t;
DROP VIEW t_stripes;
-- Make sure we cleaned the metadata for t too
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;