Associate column store metadata with storage id (#4347)

pull/4371/head
Hadi Moshayedi 2020-11-30 18:01:43 -08:00 committed by GitHub
parent de22b633cb
commit a94e8c9cda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 414 additions and 345 deletions

View File

@ -215,15 +215,15 @@ static Cost
CStoreScanCost(RangeTblEntry *rte)
{
Relation rel = RelationIdGetRelation(rte->relid);
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false);
List *stripeList = StripesForRelfilenode(rel->rd_node);
RelationClose(rel);
uint32 maxColumnCount = 0;
uint64 totalStripeSize = 0;
ListCell *stripeMetadataCell = NULL;
RelationClose(rel);
rel = NULL;
foreach(stripeMetadataCell, metadata->stripeMetadataList)
foreach(stripeMetadataCell, stripeList)
{
StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell);
totalStripeSize += stripeMetadata->dataLength;

View File

@ -12,6 +12,7 @@
#include "safe_lib.h"
#include "citus_version.h"
#include "columnar/cstore.h"
#include "columnar/cstore_version_compat.h"
@ -27,6 +28,7 @@
#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "commands/trigger.h"
#include "distributed/metadata_cache.h"
#include "executor/executor.h"
#include "executor/spi.h"
#include "miscadmin.h"
@ -41,6 +43,30 @@
#include "utils/memutils.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/relfilenodemap.h"
/*
* Content of the first page in main fork, which stores metadata at file
* level.
*/
typedef struct ColumnarMetapage
{
/*
* Store version of file format used, so we can detect files from
* previous versions if we change file format.
*/
int versionMajor;
int versionMinor;
/*
* Each of the metadata table rows are identified by a storageId.
* We store it also in the main fork so we can link metadata rows
* with data files.
*/
uint64 storageId;
} ColumnarMetapage;
typedef struct
{
@ -48,21 +74,18 @@ typedef struct
EState *estate;
} ModifyState;
static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe);
static void GetHighestUsedAddressAndId(Oid relfilenode,
static void InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe);
static void GetHighestUsedAddressAndId(uint64 storageId,
uint64 *highestUsedAddress,
uint64 *highestUsedId);
static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot);
static List * ReadDataFileStripeList(uint64 storageId, Snapshot snapshot);
static Oid CStoreStripesRelationId(void);
static Oid CStoreStripesIndexRelationId(void);
static Oid ColumnarOptionsRelationId(void);
static Oid ColumnarOptionsIndexRegclass(void);
static Oid CStoreDataFilesRelationId(void);
static Oid CStoreDataFilesIndexRelationId(void);
static Oid CStoreSkipNodesRelationId(void);
static Oid CStoreSkipNodesIndexRelationId(void);
static Oid CStoreNamespaceId(void);
static bool ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata);
static ModifyState * StartModifyRelation(Relation rel);
static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values,
bool *nulls);
@ -71,9 +94,13 @@ static void FinishModifyRelation(ModifyState *state);
static EState * create_estate_for_relation(Relation rel);
static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm);
static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm);
static ColumnarMetapage * InitMetapage(Relation relation);
static ColumnarMetapage * ReadMetapage(RelFileNode relfilenode, bool missingOk);
static uint64 GetNextStorageId(void);
static bool WriteColumnarOptions(Oid regclass, ColumnarOptions *options, bool overwrite);
PG_FUNCTION_INFO_V1(columnar_relation_storageid);
/* constants for cstore.options */
#define Natts_cstore_options 4
#define Anum_cstore_options_regclass 1
@ -97,33 +124,10 @@ typedef struct FormData_cstore_options
} FormData_cstore_options;
typedef FormData_cstore_options *Form_cstore_options;
/* constants for cstore_table */
#define Natts_cstore_data_files 3
#define Anum_cstore_data_files_relfilenode 1
#define Anum_cstore_data_files_version_major 2
#define Anum_cstore_data_files_version_minor 3
/* ----------------
* cstore.cstore_data_files definition.
* ----------------
*/
typedef struct FormData_cstore_data_files
{
Oid relfilenode;
int32 block_row_count;
int32 stripe_row_count;
NameData compression;
int64 version_major;
int64 version_minor;
#ifdef CATALOG_VARLEN /* variable-length fields start here */
#endif
} FormData_cstore_data_files;
typedef FormData_cstore_data_files *Form_cstore_data_files;
/* constants for cstore_stripe */
#define Natts_cstore_stripes 8
#define Anum_cstore_stripes_relfilenode 1
#define Anum_cstore_stripes_storageid 1
#define Anum_cstore_stripes_stripe 2
#define Anum_cstore_stripes_file_offset 3
#define Anum_cstore_stripes_data_length 4
@ -134,7 +138,7 @@ typedef FormData_cstore_data_files *Form_cstore_data_files;
/* constants for cstore_skipnodes */
#define Natts_cstore_skipnodes 12
#define Anum_cstore_skipnodes_relfilenode 1
#define Anum_cstore_skipnodes_storageid 1
#define Anum_cstore_skipnodes_stripe 2
#define Anum_cstore_skipnodes_attr 3
#define Anum_cstore_skipnodes_block 4
@ -357,47 +361,19 @@ ReadColumnarOptions(Oid regclass, ColumnarOptions *options)
}
/*
* InitCStoreDataFileMetadata adds a record for the given relfilenode
* in cstore_data_files.
*/
void
InitCStoreDataFileMetadata(Oid relfilenode)
{
bool nulls[Natts_cstore_data_files] = { 0 };
Datum values[Natts_cstore_data_files] = {
ObjectIdGetDatum(relfilenode),
Int32GetDatum(CSTORE_VERSION_MAJOR),
Int32GetDatum(CSTORE_VERSION_MINOR)
};
DeleteDataFileMetadataRowIfExists(relfilenode);
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
Relation cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock);
ModifyState *modifyState = StartModifyRelation(cstoreDataFiles);
InsertTupleAndEnforceConstraints(modifyState, values, nulls);
FinishModifyRelation(modifyState);
CommandCounterIncrement();
heap_close(cstoreDataFiles, NoLock);
}
/*
* SaveStripeSkipList saves StripeSkipList for a given stripe as rows
* of cstore_skipnodes.
*/
void
SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList,
SaveStripeSkipList(RelFileNode relfilenode, uint64 stripe, StripeSkipList *stripeSkipList,
TupleDesc tupleDescriptor)
{
uint32 columnIndex = 0;
uint32 blockIndex = 0;
uint32 columnCount = stripeSkipList->columnCount;
ColumnarMetapage *metapage = ReadMetapage(relfilenode, false);
Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId();
Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock);
ModifyState *modifyState = StartModifyRelation(cstoreSkipNodes);
@ -410,7 +386,7 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis
&stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex];
Datum values[Natts_cstore_skipnodes] = {
ObjectIdGetDatum(relfilenode),
UInt64GetDatum(metapage->storageId),
Int64GetDatum(stripe),
Int32GetDatum(columnIndex + 1),
Int32GetDatum(blockIndex),
@ -456,7 +432,7 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis
* ReadStripeSkipList fetches StripeSkipList for a given stripe.
*/
StripeSkipList *
ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
uint32 blockCount)
{
int32 columnIndex = 0;
@ -464,12 +440,14 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
uint32 columnCount = tupleDescriptor->natts;
ScanKeyData scanKey[2];
ColumnarMetapage *metapage = ReadMetapage(relfilenode, false);
Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId();
Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock);
Relation index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock);
ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relfilenode,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_storageid,
BTEqualStrategyNumber, F_OIDEQ, UInt64GetDatum(metapage->storageId));
ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe));
@ -559,11 +537,11 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
* InsertStripeMetadataRow adds a row to cstore_stripes.
*/
static void
InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe)
InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe)
{
bool nulls[Natts_cstore_stripes] = { 0 };
Datum values[Natts_cstore_stripes] = {
ObjectIdGetDatum(relfilenode),
UInt64GetDatum(storageId),
Int64GetDatum(stripe->id),
Int64GetDatum(stripe->fileOffset),
Int64GetDatum(stripe->dataLength),
@ -589,31 +567,21 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe)
/*
* ReadDataFileMetadata constructs DataFileMetadata for a given relfilenode by reading
* from cstore_data_files and cstore_stripes.
* StripesForRelfilenode returns a list of StripeMetadata for stripes
* of the given relfilenode.
*/
DataFileMetadata *
ReadDataFileMetadata(Oid relfilenode, bool missingOk)
List *
StripesForRelfilenode(RelFileNode relfilenode)
{
DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata));
bool found = ReadCStoreDataFiles(relfilenode, datafileMetadata);
if (!found)
ColumnarMetapage *metapage = ReadMetapage(relfilenode, true);
if (metapage == NULL)
{
if (!missingOk)
{
ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.",
relfilenode)));
}
else
{
return NULL;
}
/* empty relation */
return NIL;
}
datafileMetadata->stripeMetadataList =
ReadDataFileStripeList(relfilenode, GetTransactionSnapshot());
return datafileMetadata;
return ReadDataFileStripeList(metapage->storageId, GetTransactionSnapshot());
}
@ -622,12 +590,13 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk)
* relfilenode across all active and inactive transactions.
*/
uint64
GetHighestUsedAddress(Oid relfilenode)
GetHighestUsedAddress(RelFileNode relfilenode)
{
uint64 highestUsedAddress = 0;
uint64 highestUsedId = 0;
ColumnarMetapage *metapage = ReadMetapage(relfilenode, false);
GetHighestUsedAddressAndId(relfilenode, &highestUsedAddress, &highestUsedId);
GetHighestUsedAddressAndId(metapage->storageId, &highestUsedAddress, &highestUsedId);
return highestUsedAddress;
}
@ -638,7 +607,7 @@ GetHighestUsedAddress(Oid relfilenode)
* the given relfilenode across all active and inactive transactions.
*/
static void
GetHighestUsedAddressAndId(Oid relfilenode,
GetHighestUsedAddressAndId(uint64 storageId,
uint64 *highestUsedAddress,
uint64 *highestUsedId)
{
@ -647,10 +616,12 @@ GetHighestUsedAddressAndId(Oid relfilenode,
SnapshotData SnapshotDirty;
InitDirtySnapshot(SnapshotDirty);
List *stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty);
List *stripeMetadataList = ReadDataFileStripeList(storageId, &SnapshotDirty);
*highestUsedId = 0;
*highestUsedAddress = 0;
/* file starts with metapage */
*highestUsedAddress = CSTORE_BYTES_PER_PAGE;
foreach(stripeMetadataCell, stripeMetadataList)
{
@ -684,8 +655,20 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
*/
LockRelation(rel, ShareUpdateExclusiveLock);
Oid relfilenode = rel->rd_node.relNode;
GetHighestUsedAddressAndId(relfilenode, &currLogicalHigh, &highestId);
RelFileNode relfilenode = rel->rd_node;
/*
* If this is the first stripe for this relation, initialize the
* metapage, otherwise use the previously initialized metapage.
*/
ColumnarMetapage *metapage = ReadMetapage(relfilenode, true);
if (metapage == NULL)
{
metapage = InitMetapage(rel);
}
GetHighestUsedAddressAndId(metapage->storageId, &currLogicalHigh, &highestId);
SmgrAddr currSmgrHigh = logical_to_smgr(currLogicalHigh);
SmgrAddr resSmgrStart = next_block_start(currSmgrHigh);
@ -714,7 +697,7 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
stripe.rowCount = rowCount;
stripe.id = highestId + 1;
InsertStripeMetadataRow(relfilenode, &stripe);
InsertStripeMetadataRow(metapage->storageId, &stripe);
UnlockRelation(rel, ShareUpdateExclusiveLock);
@ -723,18 +706,18 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
/*
* ReadDataFileStripeList reads the stripe list for a given relfilenode
* ReadDataFileStripeList reads the stripe list for a given storageId
* in the given snapshot.
*/
static List *
ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot)
ReadDataFileStripeList(uint64 storageId, Snapshot snapshot)
{
List *stripeMetadataList = NIL;
ScanKeyData scanKey[1];
HeapTuple heapTuple;
ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
ScanKeyInit(&scanKey[0], Anum_cstore_stripes_storageid,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId));
Oid cstoreStripesOid = CStoreStripesRelationId();
Relation cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock);
@ -779,60 +762,10 @@ ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot)
/*
* ReadCStoreDataFiles reads corresponding record from cstore_data_files. Returns
* false if table was not found in cstore_data_files.
*/
static bool
ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata)
{
bool found = false;
ScanKeyData scanKey[1];
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock);
if (cstoreDataFiles == NULL)
{
/*
* Extension has been dropped. This can be called while
* dropping extension or database via ObjectAccess().
*/
return false;
}
Relation index = try_relation_open(CStoreDataFilesIndexRelationId(), AccessShareLock);
if (index == NULL)
{
heap_close(cstoreDataFiles, NoLock);
/* extension has been dropped */
return false;
}
SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL,
1, scanKey);
HeapTuple heapTuple = systable_getnext(scanDescriptor);
if (HeapTupleIsValid(heapTuple))
{
found = true;
}
systable_endscan_ordered(scanDescriptor);
index_close(index, NoLock);
heap_close(cstoreDataFiles, NoLock);
return found;
}
/*
* DeleteDataFileMetadataRowIfExists removes the row with given relfilenode from cstore_stripes.
* DeleteMetadataRows removes the rows with given relfilenode from cstore_stripes.
*/
void
DeleteDataFileMetadataRowIfExists(Oid relfilenode)
DeleteMetadataRows(RelFileNode relfilenode)
{
ScanKeyData scanKey[1];
@ -845,33 +778,46 @@ DeleteDataFileMetadataRowIfExists(Oid relfilenode)
return;
}
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
ColumnarMetapage *metapage = ReadMetapage(relfilenode, true);
if (metapage == NULL)
{
/*
* No data has been written to this storage yet, so there is no
* associated metadata yet.
*/
return;
}
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock);
if (cstoreDataFiles == NULL)
ScanKeyInit(&scanKey[0], Anum_cstore_stripes_storageid,
BTEqualStrategyNumber, F_INT8EQ, UInt64GetDatum(metapage->storageId));
Oid cstoreStripesOid = CStoreStripesRelationId();
Relation cstoreStripes = try_relation_open(cstoreStripesOid, AccessShareLock);
if (cstoreStripes == NULL)
{
/* extension has been dropped */
return;
}
Relation index = index_open(CStoreDataFilesIndexRelationId(), AccessShareLock);
Relation index = index_open(CStoreStripesIndexRelationId(), AccessShareLock);
SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL,
SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL,
1, scanKey);
ModifyState *modifyState = StartModifyRelation(cstoreStripes);
HeapTuple heapTuple = systable_getnext(scanDescriptor);
if (HeapTupleIsValid(heapTuple))
while (HeapTupleIsValid(heapTuple))
{
ModifyState *modifyState = StartModifyRelation(cstoreDataFiles);
DeleteTupleAndEnforceConstraints(modifyState, heapTuple);
FinishModifyRelation(modifyState);
heapTuple = systable_getnext(scanDescriptor);
}
FinishModifyRelation(modifyState);
systable_endscan_ordered(scanDescriptor);
index_close(index, NoLock);
heap_close(cstoreDataFiles, NoLock);
heap_close(cstoreStripes, NoLock);
}
@ -1096,28 +1042,6 @@ ColumnarOptionsIndexRegclass(void)
}
/*
* CStoreDataFilesRelationId returns relation id of cstore_data_files.
* TODO: should we cache this similar to citus?
*/
static Oid
CStoreDataFilesRelationId(void)
{
return get_relname_relid("cstore_data_files", CStoreNamespaceId());
}
/*
* CStoreDataFilesIndexRelationId returns relation id of cstore_data_files_pkey.
* TODO: should we cache this similar to citus?
*/
static Oid
CStoreDataFilesIndexRelationId(void)
{
return get_relname_relid("cstore_data_files_pkey", CStoreNamespaceId());
}
/*
* CStoreSkipNodesRelationId returns relation id of cstore_skipnodes.
* TODO: should we cache this similar to citus?
@ -1149,3 +1073,128 @@ CStoreNamespaceId(void)
{
return get_namespace_oid("cstore", false);
}
/*
* ReadMetapage reads metapage for the given relfilenode. It returns
* false if the relation doesn't have a meta page yet.
*/
static ColumnarMetapage *
ReadMetapage(RelFileNode relfilenode, bool missingOk)
{
StringInfo metapageBuffer = NULL;
Oid relationId = RelidByRelfilenode(relfilenode.spcNode,
relfilenode.relNode);
if (OidIsValid(relationId))
{
Relation relation = relation_open(relationId, NoLock);
RelationOpenSmgr(relation);
int nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM);
RelationCloseSmgr(relation);
if (nblocks != 0)
{
metapageBuffer = ReadFromSmgr(relation, 0, sizeof(ColumnarMetapage));
}
relation_close(relation, NoLock);
}
if (metapageBuffer == NULL)
{
if (!missingOk)
{
elog(ERROR, "columnar metapage was not found");
}
return NULL;
}
ColumnarMetapage *metapage = palloc0(sizeof(ColumnarMetapage));
memcpy_s((void *) metapage, sizeof(ColumnarMetapage),
metapageBuffer->data, sizeof(ColumnarMetapage));
return metapage;
}
/*
* InitMetapage initializes metapage for the given relation.
*/
static ColumnarMetapage *
InitMetapage(Relation relation)
{
ColumnarMetapage *metapage = palloc0(sizeof(ColumnarMetapage));
metapage->storageId = GetNextStorageId();
metapage->versionMajor = CSTORE_VERSION_MAJOR;
metapage->versionMinor = CSTORE_VERSION_MINOR;
/* create the first block */
Buffer newBuffer = ReadBuffer(relation, P_NEW);
ReleaseBuffer(newBuffer);
Assert(sizeof(ColumnarMetapage) <= BLCKSZ - SizeOfPageHeaderData);
WriteToSmgr(relation, 0, (char *) metapage, sizeof(ColumnarMetapage));
return metapage;
}
/*
* GetNextStorageId returns the next value from the storage id sequence.
*/
static uint64
GetNextStorageId(void)
{
Oid savedUserId = InvalidOid;
int savedSecurityContext = 0;
Oid sequenceId = get_relname_relid("storageid_seq", CStoreNamespaceId());
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
/*
* Not all users have update access to the sequence, so switch
* security context.
*/
GetUserIdAndSecContext(&savedUserId, &savedSecurityContext);
SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE);
/*
* Generate new and unique storage id from sequence.
*/
Datum storageIdDatum = DirectFunctionCall1(nextval_oid, sequenceIdDatum);
SetUserIdAndSecContext(savedUserId, savedSecurityContext);
uint64 storageId = DatumGetInt64(storageIdDatum);
return storageId;
}
/*
* columnar_relation_storageid returns storage id associated with the
* given relation id, or -1 if there is no associated storage id yet.
*/
Datum
columnar_relation_storageid(PG_FUNCTION_ARGS)
{
uint64 storageId = -1;
#if HAS_TABLEAM
Oid relationId = PG_GETARG_OID(0);
Relation relation = relation_open(relationId, AccessShareLock);
if (IsCStoreTableAmTable(relationId))
{
ColumnarMetapage *metadata = ReadMetapage(relation->rd_node, true);
if (metadata != NULL)
{
storageId = metadata->storageId;
}
}
relation_close(relation, AccessShareLock);
#endif
PG_RETURN_INT64(storageId);
}

View File

@ -76,7 +76,6 @@ static BlockData * DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blo
List *projectedColumnList);
static Datum ColumnDefaultValue(TupleConstr *tupleConstraints,
Form_pg_attribute attributeForm);
static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size);
/*
* CStoreBeginRead initializes a cstore read operation. This function returns a
@ -86,9 +85,7 @@ TableReadState *
CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor,
List *projectedColumnList, List *whereClauseList)
{
Oid relNode = relation->rd_node.relNode;
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relNode, false);
List *stripeList = StripesForRelfilenode(relation->rd_node);
/*
* We allocate all stripe specific data in the stripeReadContext, and reset
@ -101,7 +98,7 @@ CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor,
TableReadState *readState = palloc0(sizeof(TableReadState));
readState->relation = relation;
readState->datafileMetadata = datafileMetadata;
readState->stripeList = stripeList;
readState->projectedColumnList = projectedColumnList;
readState->whereClauseList = whereClauseList;
readState->stripeBuffers = NULL;
@ -135,7 +132,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu
*/
while (readState->stripeBuffers == NULL)
{
List *stripeMetadataList = readState->datafileMetadata->stripeMetadataList;
List *stripeMetadataList = readState->stripeList;
uint32 stripeCount = list_length(stripeMetadataList);
/* if we have read all stripes, return false */
@ -238,8 +235,7 @@ void
CStoreEndRead(TableReadState *readState)
{
MemoryContextDelete(readState->stripeReadContext);
list_free_deep(readState->datafileMetadata->stripeMetadataList);
pfree(readState->datafileMetadata);
list_free_deep(readState->stripeList);
pfree(readState);
}
@ -316,11 +312,9 @@ CStoreTableRowCount(Relation relation)
{
ListCell *stripeMetadataCell = NULL;
uint64 totalRowCount = 0;
List *stripeList = StripesForRelfilenode(relation->rd_node);
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode,
false);
foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList)
foreach(stripeMetadataCell, stripeList)
{
StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell);
totalRowCount += stripeMetadata->rowCount;
@ -345,7 +339,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList);
StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node.relNode,
StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node,
stripeMetadata->id,
tupleDescriptor,
stripeMetadata->blockCount);
@ -1009,7 +1003,7 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor
}
static StringInfo
StringInfo
ReadFromSmgr(Relation rel, uint64 offset, uint32 size)
{
StringInfo resultBuffer = makeStringInfo();

View File

@ -106,7 +106,6 @@ static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement,
char *completionTag);
#endif
static bool IsCStoreTableAmTable(Oid relationId);
static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode,
int timeout, int retryInterval);
static void LogRelationStats(Relation rel, int elevel);
@ -542,26 +541,30 @@ cstore_relation_set_new_filenode(Relation rel,
MarkRelfilenodeDropped(oldRelfilenode, GetCurrentSubTransactionId());
/* delete old relfilenode metadata */
DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode);
DeleteMetadataRows(rel->rd_node);
Assert(persistence == RELPERSISTENCE_PERMANENT);
*freezeXid = RecentXmin;
*minmulti = GetOldestMultiXactId();
SMgrRelation srel = RelationCreateStorage(*newrnode, persistence);
InitCStoreDataFileMetadata(newrnode->relNode);
InitColumnarOptions(rel->rd_id);
smgrclose(srel);
/* we will lazily initialize metadata in first stripe reservation */
}
static void
cstore_relation_nontransactional_truncate(Relation rel)
{
Oid relfilenode = rel->rd_node.relNode;
RelFileNode relfilenode = rel->rd_node;
NonTransactionDropWriteState(relfilenode);
NonTransactionDropWriteState(relfilenode.relNode);
/* Delete old relfilenode metadata */
DeleteMetadataRows(relfilenode);
/*
* No need to set new relfilenode, since the table was created in this
@ -572,9 +575,7 @@ cstore_relation_nontransactional_truncate(Relation rel)
*/
RelationTruncate(rel, 0);
/* Delete old relfilenode metadata and recreate it */
DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode);
InitCStoreDataFileMetadata(rel->rd_node.relNode);
/* we will lazily initialize new metadata in first stripe reservation */
}
@ -673,11 +674,14 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params,
}
/*
* LogRelationStats logs statistics as the output of the VACUUM VERBOSE.
*/
static void
LogRelationStats(Relation rel, int elevel)
{
ListCell *stripeMetadataCell = NULL;
Oid relfilenode = rel->rd_node.relNode;
RelFileNode relfilenode = rel->rd_node;
StringInfo infoBuf = makeStringInfo();
int compressionStats[COMPRESSION_COUNT] = { 0 };
@ -687,10 +691,10 @@ LogRelationStats(Relation rel, int elevel)
TupleDesc tupdesc = RelationGetDescr(rel);
uint64 droppedBlocksWithData = 0;
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relfilenode, false);
int stripeCount = list_length(datafileMetadata->stripeMetadataList);
List *stripeList = StripesForRelfilenode(relfilenode);
int stripeCount = list_length(stripeList);
foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList)
foreach(stripeMetadataCell, stripeList)
{
StripeMetadata *stripe = lfirst(stripeMetadataCell);
StripeSkipList *skiplist = ReadStripeSkipList(relfilenode, stripe->id,
@ -726,6 +730,10 @@ LogRelationStats(Relation rel, int elevel)
uint64 relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
RelationCloseSmgr(rel);
Datum storageId = DirectFunctionCall1(columnar_relation_storageid,
ObjectIdGetDatum(RelationGetRelid(rel)));
appendStringInfo(infoBuf, "storage id: %ld\n", DatumGetInt64(storageId));
appendStringInfo(infoBuf, "total file size: %ld, total data size: %ld\n",
relPages * BLCKSZ, totalStripeLength);
appendStringInfo(infoBuf,
@ -803,7 +811,7 @@ TruncateCStore(Relation rel, int elevel)
* we're truncating.
*/
SmgrAddr highestPhysicalAddress =
logical_to_smgr(GetHighestUsedAddress(rel->rd_node.relNode));
logical_to_smgr(GetHighestUsedAddress(rel->rd_node));
BlockNumber new_rel_pages = highestPhysicalAddress.blockno + 1;
if (new_rel_pages == old_rel_pages)
@ -1171,11 +1179,11 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId
* tableam tables storage is managed by postgres.
*/
Relation rel = table_open(objectId, AccessExclusiveLock);
Oid relfilenode = rel->rd_node.relNode;
DeleteDataFileMetadataRowIfExists(relfilenode);
RelFileNode relfilenode = rel->rd_node;
DeleteMetadataRows(relfilenode);
DeleteColumnarTableOptions(rel->rd_id, true);
MarkRelfilenodeDropped(relfilenode, GetCurrentSubTransactionId());
MarkRelfilenodeDropped(relfilenode.relNode, GetCurrentSubTransactionId());
/* keep the lock since we did physical changes to the relation */
table_close(rel, NoLock);
@ -1187,7 +1195,7 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId
* IsCStoreTableAmTable returns true if relation has cstore_tableam
* access method. This can be called before extension creation.
*/
static bool
bool
IsCStoreTableAmTable(Oid relationId)
{
if (!OidIsValid(relationId))

View File

@ -65,12 +65,10 @@ CStoreBeginWrite(RelFileNode relfilenode,
uint64 stripeMaxRowCount, uint32 blockRowCount,
TupleDesc tupleDescriptor)
{
uint32 columnIndex = 0;
/* get comparison function pointers for each of the columns */
uint32 columnCount = tupleDescriptor->natts;
FmgrInfo **comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *));
for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
for (uint32 columnIndex = 0; columnIndex < columnCount; columnIndex++)
{
FmgrInfo *comparisonFunction = NULL;
FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor,
@ -323,7 +321,7 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount,
}
static void
void
WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength)
{
uint64 remaining = dataLength;
@ -521,7 +519,7 @@ FlushStripe(TableWriteState *writeState)
}
/* create skip list and footer buffers */
SaveStripeSkipList(relation->rd_node.relNode,
SaveStripeSkipList(writeState->relfilenode,
stripeMetadata.id,
stripeSkipList, tupleDescriptor);

View File

@ -3,6 +3,8 @@
CREATE SCHEMA cstore;
SET search_path TO cstore;
CREATE SEQUENCE storageid_seq MINVALUE 10000000000 NO CYCLE;
CREATE TABLE options (
regclass regclass NOT NULL PRIMARY KEY,
block_row_count int NOT NULL,
@ -12,17 +14,8 @@ CREATE TABLE options (
COMMENT ON TABLE options IS 'columnar table specific options, maintained by alter_columnar_table_set';
CREATE TABLE cstore_data_files (
relfilenode oid NOT NULL,
version_major bigint NOT NULL,
version_minor bigint NOT NULL,
PRIMARY KEY (relfilenode)
) WITH (user_catalog_table = true);
COMMENT ON TABLE cstore_data_files IS 'CStore data file wide metadata';
CREATE TABLE cstore_stripes (
relfilenode oid NOT NULL,
storageid bigint NOT NULL,
stripe bigint NOT NULL,
file_offset bigint NOT NULL,
data_length bigint NOT NULL,
@ -30,14 +23,13 @@ CREATE TABLE cstore_stripes (
block_count int NOT NULL,
block_row_count int NOT NULL,
row_count bigint NOT NULL,
PRIMARY KEY (relfilenode, stripe),
FOREIGN KEY (relfilenode) REFERENCES cstore_data_files(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED
PRIMARY KEY (storageid, stripe)
) WITH (user_catalog_table = true);
COMMENT ON TABLE cstore_stripes IS 'CStore per stripe metadata';
CREATE TABLE cstore_skipnodes (
relfilenode oid NOT NULL,
storageid bigint NOT NULL,
stripe bigint NOT NULL,
attr int NOT NULL,
block int NOT NULL,
@ -49,8 +41,8 @@ CREATE TABLE cstore_skipnodes (
exists_stream_offset bigint NOT NULL,
exists_stream_length bigint NOT NULL,
value_compression_type int NOT NULL,
PRIMARY KEY (relfilenode, stripe, attr, block),
FOREIGN KEY (relfilenode, stripe) REFERENCES cstore_stripes(relfilenode, stripe) ON DELETE CASCADE INITIALLY DEFERRED
PRIMARY KEY (storageid, stripe, attr, block),
FOREIGN KEY (storageid, stripe) REFERENCES cstore_stripes(storageid, stripe) ON DELETE CASCADE INITIALLY DEFERRED
) WITH (user_catalog_table = true);
COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata';

View File

@ -29,8 +29,8 @@ END$proc$;
DROP TABLE cstore_skipnodes;
DROP TABLE cstore_stripes;
DROP TABLE cstore_data_files;
DROP TABLE options;
DROP SEQUENCE storageid_seq;
DROP FUNCTION citus_internal.cstore_ensure_objects_exist();

View File

@ -49,6 +49,7 @@
#define CSTORE_TUPLE_COST_MULTIPLIER 10
#define CSTORE_POSTSCRIPT_SIZE_LENGTH 1
#define CSTORE_POSTSCRIPT_SIZE_MAX 256
#define CSTORE_BYTES_PER_PAGE (BLCKSZ - SizeOfPageHeaderData)
/* Enumaration for cstore file's compression method */
typedef enum
@ -195,7 +196,7 @@ typedef struct StripeBuffers
/* TableReadState represents state of a cstore file read operation. */
typedef struct TableReadState
{
DataFileMetadata *datafileMetadata;
List *stripeList;
StripeMetadata *currentStripeMetadata;
TupleDesc tupleDescriptor;
Relation relation;
@ -289,21 +290,25 @@ extern bool InitColumnarOptions(Oid regclass);
extern void SetColumnarOptions(Oid regclass, ColumnarOptions *options);
extern bool DeleteColumnarTableOptions(Oid regclass, bool missingOk);
extern bool ReadColumnarOptions(Oid regclass, ColumnarOptions *options);
extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode);
extern void InitCStoreDataFileMetadata(Oid relfilenode);
extern void UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int
stripeRowCount, CompressionType compression);
extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk);
extern uint64 GetHighestUsedAddress(Oid relfilenode);
extern void WriteToSmgr(Relation relation, uint64 logicalOffset,
char *data, uint32 dataLength);
extern StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size);
extern bool IsCStoreTableAmTable(Oid relationId);
/* cstore_metadata_tables.c */
extern void DeleteMetadataRows(RelFileNode relfilenode);
extern List * StripesForRelfilenode(RelFileNode relfilenode);
extern uint64 GetHighestUsedAddress(RelFileNode relfilenode);
extern StripeMetadata ReserveStripe(Relation rel, uint64 size,
uint64 rowCount, uint64 columnCount,
uint64 blockCount, uint64 blockRowCount);
extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe,
extern void SaveStripeSkipList(RelFileNode relfilenode, uint64 stripe,
StripeSkipList *stripeSkipList,
TupleDesc tupleDescriptor);
extern StripeSkipList * ReadStripeSkipList(Oid relfilenode, uint64 stripe,
extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe,
TupleDesc tupleDescriptor,
uint32 blockCount);
extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS);
/* write_state_management.c */
@ -335,11 +340,10 @@ typedef struct SmgrAddr
static inline SmgrAddr
logical_to_smgr(uint64 logicalOffset)
{
uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData;
SmgrAddr addr;
addr.blockno = logicalOffset / bytes_per_page;
addr.offset = SizeOfPageHeaderData + (logicalOffset % bytes_per_page);
addr.blockno = logicalOffset / CSTORE_BYTES_PER_PAGE;
addr.offset = SizeOfPageHeaderData + (logicalOffset % CSTORE_BYTES_PER_PAGE);
return addr;
}
@ -351,8 +355,7 @@ logical_to_smgr(uint64 logicalOffset)
static inline uint64
smgr_to_logical(SmgrAddr addr)
{
uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData;
return bytes_per_page * addr.blockno + addr.offset - SizeOfPageHeaderData;
return CSTORE_BYTES_PER_PAGE * addr.blockno + addr.offset - SizeOfPageHeaderData;
}

View File

@ -190,3 +190,6 @@ s/relation with OID [0-9]+ does not exist/relation with OID XXXX does not exist/
# ignore timing statistics for VACUUM VERBOSE
/CPU: user: .*s, system: .*s, elapsed: .*s/d
# normalize storage id of columnar tables
s/^storage id: [0-9]+$/storage id: xxxxx/g

View File

@ -23,7 +23,6 @@
/hyperscale_tutorial.out
/am_block_filtering.out
/am_copyto.out
/am_create.out
/am_data_types.out
/am_load.out
/fdw_block_filtering.out

View File

@ -13,8 +13,12 @@ CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT,
-- Test that querying an empty table works
ANALYZE contestant;
SELECT count(*) FROM contestant;
count
-------
count
---------------------------------------------------------------------
0
(1 row)
-- Utility functions to be used throughout tests
CREATE FUNCTION columnar_relation_storageid(relid oid) RETURNS bigint
LANGUAGE C STABLE STRICT
AS 'citus', $$columnar_relation_storageid$$;

View File

@ -12,12 +12,12 @@
-- 'postgres' directory is excluded from comparison to have the same result.
-- store postgres database oid
SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset
SELECT count(distinct storageid) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset
-- DROP cstore_fdw tables
DROP TABLE contestant;
DROP TABLE contestant_compressed;
-- make sure DROP deletes metadata
SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files;
SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes;
?column?
---------------------------------------------------------------------
2
@ -26,10 +26,11 @@ SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files;
-- Create a cstore_fdw table under a schema and drop it.
CREATE SCHEMA test_schema;
CREATE TABLE test_schema.test_table(data int) USING columnar;
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset
INSERT INTO test_schema.test_table VALUES (1);
SELECT count(*) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset
DROP SCHEMA test_schema CASCADE;
NOTICE: drop cascades to table test_schema.test_table
SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files;
SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes;
?column?
---------------------------------------------------------------------
1

View File

@ -65,20 +65,15 @@ SELECT * FROM t_view a ORDER BY a;
(6 rows)
-- verify that we have created metadata entries for the materialized view
SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
SELECT columnar_relation_storageid(oid) AS storageid
FROM pg_class WHERE relname='t_view' \gset
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
count
---------------------------------------------------------------------
1
(1 row)
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
count
---------------------------------------------------------------------
1
(1 row)
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
count
---------------------------------------------------------------------
3
@ -87,19 +82,13 @@ SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
DROP TABLE t CASCADE;
NOTICE: drop cascades to materialized view t_view
-- dropping must remove metadata
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
count
---------------------------------------------------------------------
0

View File

@ -12,7 +12,7 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i;
-- there are no subtransactions, so above statement should batch
-- INSERTs inside the UDF and create on stripe per table.
SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2')
WHERE columnar_relation_storageid(b.oid)=a.storageid AND relname IN ('t1', 't2')
GROUP BY relname
ORDER BY relname;
relname | count

View File

@ -2,6 +2,9 @@
-- Testing we handle rollbacks properly
--
CREATE TABLE t(a int, b int) USING columnar;
CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname = 't';
BEGIN;
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
ROLLBACK;
@ -12,8 +15,7 @@ SELECT count(*) FROM t;
(1 row)
-- check stripe metadata also have been rolled-back
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
0
@ -26,8 +28,7 @@ SELECT count(*) FROM t;
10
(1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
1
@ -73,11 +74,11 @@ SELECT count(*) FROM t;
20
(1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
2
(1 row)
DROP TABLE t;
DROP VIEW t_stripes;

View File

@ -15,7 +15,7 @@ CREATE TABLE cstore_truncate_test_second (a int, b int) USING columnar;
-- COMPRESSED
CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING columnar;
CREATE TABLE cstore_truncate_test_regular (a int, b int);
SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset
SELECT count(distinct storageid) AS cstore_data_files_before_truncate FROM cstore.cstore_stripes \gset
INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a;
set cstore.compression = 'pglz';
INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a;
@ -147,7 +147,7 @@ SELECT * from cstore_truncate_test;
(0 rows)
-- make sure TRUNATE deletes metadata for old relfilenode
SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files;
SELECT :cstore_data_files_before_truncate - count(distinct storageid) FROM cstore.cstore_stripes;
?column?
---------------------------------------------------------------------
0
@ -161,7 +161,7 @@ TRUNCATE cstore_same_transaction_truncate;
INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23);
COMMIT;
-- should output "1" for the newly created relation
SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files;
SELECT count(distinct storageid) - :cstore_data_files_before_truncate FROM cstore.cstore_stripes;
?column?
---------------------------------------------------------------------
1

View File

@ -1,6 +1,9 @@
SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset
SELECT count(distinct storageid) AS columnar_table_count FROM cstore.cstore_stripes \gset
CREATE TABLE t(a int, b int) USING columnar;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
0
@ -15,7 +18,7 @@ SELECT sum(a), sum(b) FROM t;
465 | 9455
(1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
3
@ -29,7 +32,7 @@ SELECT sum(a), sum(b) FROM t;
465 | 9455
(1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
1
@ -49,7 +52,7 @@ SELECT sum(a), sum(b) FROM t;
3126715 | 6261955
(1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
4
@ -62,7 +65,7 @@ SELECT sum(a), sum(b) FROM t;
3126715 | 6261955
(1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
count
---------------------------------------------------------------------
3
@ -70,7 +73,9 @@ SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.r
-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs
ALTER TABLE t DROP COLUMN a;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
FROM cstore.cstore_skipnodes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
stripe | attr | block | ?column? | ?column?
---------------------------------------------------------------------
1 | 1 | 0 | f | f
@ -82,7 +87,9 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs
(6 rows)
VACUUM FULL t;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
FROM cstore.cstore_skipnodes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
stripe | attr | block | ?column? | ?column?
---------------------------------------------------------------------
1 | 1 | 0 | t | t
@ -94,7 +101,7 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs
(6 rows)
-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;
SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes;
?column?
---------------------------------------------------------------------
1
@ -112,14 +119,14 @@ SELECT count(*) FROM t;
SELECT pg_size_pretty(pg_relation_size('t'));
pg_size_pretty
---------------------------------------------------------------------
32 kB
40 kB
(1 row)
INSERT INTO t SELECT i FROM generate_series(1, 10000) i;
SELECT pg_size_pretty(pg_relation_size('t'));
pg_size_pretty
---------------------------------------------------------------------
112 kB
120 kB
(1 row)
SELECT count(*) FROM t;
@ -134,22 +141,23 @@ ROLLBACK TO SAVEPOINT s1;
SELECT pg_size_pretty(pg_relation_size('t'));
pg_size_pretty
---------------------------------------------------------------------
112 kB
120 kB
(1 row)
COMMIT;
-- vacuum should truncate the relation to the usable space
VACUUM VERBOSE t;
INFO: statistics for "t":
total file size: 114688, total data size: 10754
storage id: xxxxx
total file size: 122880, total data size: 10754
total row count: 2530, stripe count: 3, average rows per stripe: 843
block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0
INFO: "t": truncated 14 to 4 pages
INFO: "t": truncated 15 to 5 pages
SELECT pg_size_pretty(pg_relation_size('t'));
pg_size_pretty
---------------------------------------------------------------------
32 kB
40 kB
(1 row)
SELECT count(*) FROM t;
@ -187,7 +195,8 @@ INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i;
COMMIT;
VACUUM VERBOSE t;
INFO: statistics for "t":
total file size: 49152, total data size: 18808
storage id: xxxxx
total file size: 57344, total data size: 18808
total row count: 5530, stripe count: 5, average rows per stripe: 1106
block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2
@ -203,7 +212,8 @@ INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i;
ALTER TABLE t DROP COLUMN c;
VACUUM VERBOSE t;
INFO: statistics for "t":
total file size: 65536, total data size: 31372
storage id: xxxxx
total file size: 73728, total data size: 31372
total row count: 7030, stripe count: 6, average rows per stripe: 1171
block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2
@ -219,13 +229,15 @@ SELECT alter_columnar_table_set('t', compression => 'pglz');
VACUUM FULL t;
VACUUM VERBOSE t;
INFO: statistics for "t":
total file size: 49152, total data size: 15728
storage id: xxxxx
total file size: 57344, total data size: 15728
total row count: 7030, stripe count: 4, average rows per stripe: 1757
block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6
DROP TABLE t;
DROP VIEW t_stripes;
-- Make sure we cleaned the metadata for t too
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;
SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes;
?column?
---------------------------------------------------------------------
0

View File

@ -11,7 +11,8 @@ step s1-insert:
INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i;
s2: INFO: statistics for "test_vacuum_vs_insert":
total file size: 16384, total data size: 26
storage id: xxxxx
total file size: 24576, total data size: 26
total row count: 3, stripe count: 1, average rows per stripe: 3
block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0
@ -51,7 +52,7 @@ step s1-commit:
COMMIT;
s2: INFO: vacuuming "public.test_vacuum_vs_insert"
s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 3 pages
s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 4 pages
DETAIL: 0 dead row versions cannot be removed yet.
step s2-vacuum-full: <... completed>
step s2-select:

View File

@ -483,7 +483,7 @@ SELECT * FROM print_extension_changes();
| function citus_internal.cstore_ensure_objects_exist()
| function cstore.columnar_handler(internal)
| schema cstore
| table cstore.cstore_data_files
| sequence cstore.storageid_seq
| table cstore.cstore_skipnodes
| table cstore.cstore_stripes
| table cstore.options

View File

@ -479,7 +479,7 @@ SELECT * FROM print_extension_changes();
---------------------------------------------------------------------
| function citus_internal.cstore_ensure_objects_exist()
| schema cstore
| table cstore.cstore_data_files
| sequence cstore.storageid_seq
| table cstore.cstore_skipnodes
| table cstore.cstore_stripes
| table cstore.options

View File

@ -185,13 +185,13 @@ ORDER BY 1;
schema citus
schema citus_internal
schema cstore
sequence cstore.storageid_seq
sequence pg_dist_colocationid_seq
sequence pg_dist_groupid_seq
sequence pg_dist_node_nodeid_seq
sequence pg_dist_placement_placementid_seq
sequence pg_dist_shardid_seq
table citus.pg_dist_object
table cstore.cstore_data_files
table cstore.cstore_skipnodes
table cstore.cstore_stripes
table cstore.options

View File

@ -181,13 +181,13 @@ ORDER BY 1;
schema citus
schema citus_internal
schema cstore
sequence cstore.storageid_seq
sequence pg_dist_colocationid_seq
sequence pg_dist_groupid_seq
sequence pg_dist_node_nodeid_seq
sequence pg_dist_placement_placementid_seq
sequence pg_dist_shardid_seq
table citus.pg_dist_object
table cstore.cstore_data_files
table cstore.cstore_skipnodes
table cstore.cstore_stripes
table cstore.options

View File

@ -22,7 +22,6 @@
/hyperscale_tutorial.sql
/am_block_filtering.sql
/am_copyto.sql
/am_create.sql
/am_data_types.sql
/am_load.sql
/fdw_block_filtering.sql

View File

@ -18,3 +18,8 @@ CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT,
-- Test that querying an empty table works
ANALYZE contestant;
SELECT count(*) FROM contestant;
-- Utility functions to be used throughout tests
CREATE FUNCTION columnar_relation_storageid(relid oid) RETURNS bigint
LANGUAGE C STABLE STRICT
AS 'citus', $$columnar_relation_storageid$$;

View File

@ -15,22 +15,23 @@
-- store postgres database oid
SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset
SELECT count(distinct storageid) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset
-- DROP cstore_fdw tables
DROP TABLE contestant;
DROP TABLE contestant_compressed;
-- make sure DROP deletes metadata
SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files;
SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes;
-- Create a cstore_fdw table under a schema and drop it.
CREATE SCHEMA test_schema;
CREATE TABLE test_schema.test_table(data int) USING columnar;
INSERT INTO test_schema.test_table VALUES (1);
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset
SELECT count(*) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset
DROP SCHEMA test_schema CASCADE;
SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files;
SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes;
SELECT current_database() datname \gset

View File

@ -33,15 +33,14 @@ WHERE regclass = 't_view'::regclass;
SELECT * FROM t_view a ORDER BY a;
-- verify that we have created metadata entries for the materialized view
SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset
SELECT columnar_relation_storageid(oid) AS storageid
FROM pg_class WHERE relname='t_view' \gset
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
DROP TABLE t CASCADE;
-- dropping must remove metadata
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;

View File

@ -16,7 +16,7 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i;
-- there are no subtransactions, so above statement should batch
-- INSERTs inside the UDF and create on stripe per table.
SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2')
WHERE columnar_relation_storageid(b.oid)=a.storageid AND relname IN ('t1', 't2')
GROUP BY relname
ORDER BY relname;

View File

@ -4,20 +4,22 @@
CREATE TABLE t(a int, b int) USING columnar;
CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname = 't';
BEGIN;
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
ROLLBACK;
SELECT count(*) FROM t;
-- check stripe metadata also have been rolled-back
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
SELECT count(*) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
-- savepoint rollback
BEGIN;
@ -36,7 +38,7 @@ COMMIT;
SELECT count(*) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
SELECT count(*) FROM t_stripes;
DROP TABLE t;
DROP VIEW t_stripes;

View File

@ -13,7 +13,7 @@ CREATE TABLE cstore_truncate_test_second (a int, b int) USING columnar;
CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING columnar;
CREATE TABLE cstore_truncate_test_regular (a int, b int);
SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset
SELECT count(distinct storageid) AS cstore_data_files_before_truncate FROM cstore.cstore_stripes \gset
INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a;
@ -63,7 +63,7 @@ TRUNCATE TABLE cstore_truncate_test;
SELECT * from cstore_truncate_test;
-- make sure TRUNATE deletes metadata for old relfilenode
SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files;
SELECT :cstore_data_files_before_truncate - count(distinct storageid) FROM cstore.cstore_stripes;
-- test if truncation in the same transaction that created the table works properly
BEGIN;
@ -74,7 +74,7 @@ INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 2
COMMIT;
-- should output "1" for the newly created relation
SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files;
SELECT count(distinct storageid) - :cstore_data_files_before_truncate FROM cstore.cstore_stripes;
SELECT * FROM cstore_same_transaction_truncate;
DROP TABLE cstore_same_transaction_truncate;

View File

@ -1,45 +1,53 @@
SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset
SELECT count(distinct storageid) AS columnar_table_count FROM cstore.cstore_stripes \gset
CREATE TABLE t(a int, b int) USING columnar;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t';
SELECT count(*) FROM t_stripes;
INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i;
INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i;
INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i;
SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
-- vacuum full should merge stripes together
VACUUM FULL t;
SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
-- test the case when all data cannot fit into a single stripe
SELECT alter_columnar_table_set('t', stripe_row_count => 1000);
INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i;
SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
VACUUM FULL t;
SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
SELECT count(*) FROM t_stripes;
-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs
ALTER TABLE t DROP COLUMN a;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
FROM cstore.cstore_skipnodes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
VACUUM FULL t;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
FROM cstore.cstore_skipnodes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;
SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes;
-- do this in a transaction so concurrent autovacuum doesn't interfere with results
BEGIN;
@ -99,6 +107,7 @@ VACUUM FULL t;
VACUUM VERBOSE t;
DROP TABLE t;
DROP VIEW t_stripes;
-- Make sure we cleaned the metadata for t too
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;
SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes;