Associate column store metadata with storage id (#4347)

pull/4371/head
Hadi Moshayedi 2020-11-30 18:01:43 -08:00 committed by GitHub
parent de22b633cb
commit a94e8c9cda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 414 additions and 345 deletions

View File

@ -215,15 +215,15 @@ static Cost
CStoreScanCost(RangeTblEntry *rte) CStoreScanCost(RangeTblEntry *rte)
{ {
Relation rel = RelationIdGetRelation(rte->relid); Relation rel = RelationIdGetRelation(rte->relid);
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); List *stripeList = StripesForRelfilenode(rel->rd_node);
RelationClose(rel);
uint32 maxColumnCount = 0; uint32 maxColumnCount = 0;
uint64 totalStripeSize = 0; uint64 totalStripeSize = 0;
ListCell *stripeMetadataCell = NULL; ListCell *stripeMetadataCell = NULL;
RelationClose(rel);
rel = NULL; rel = NULL;
foreach(stripeMetadataCell, metadata->stripeMetadataList) foreach(stripeMetadataCell, stripeList)
{ {
StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell);
totalStripeSize += stripeMetadata->dataLength; totalStripeSize += stripeMetadata->dataLength;

View File

@ -12,6 +12,7 @@
#include "safe_lib.h" #include "safe_lib.h"
#include "citus_version.h"
#include "columnar/cstore.h" #include "columnar/cstore.h"
#include "columnar/cstore_version_compat.h" #include "columnar/cstore_version_compat.h"
@ -27,6 +28,7 @@
#include "catalog/namespace.h" #include "catalog/namespace.h"
#include "commands/defrem.h" #include "commands/defrem.h"
#include "commands/trigger.h" #include "commands/trigger.h"
#include "distributed/metadata_cache.h"
#include "executor/executor.h" #include "executor/executor.h"
#include "executor/spi.h" #include "executor/spi.h"
#include "miscadmin.h" #include "miscadmin.h"
@ -41,6 +43,30 @@
#include "utils/memutils.h" #include "utils/memutils.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
#include "utils/rel.h" #include "utils/rel.h"
#include "utils/relfilenodemap.h"
/*
* Content of the first page in main fork, which stores metadata at file
* level.
*/
typedef struct ColumnarMetapage
{
/*
* Store version of file format used, so we can detect files from
* previous versions if we change file format.
*/
int versionMajor;
int versionMinor;
/*
* Each of the metadata table rows are identified by a storageId.
* We store it also in the main fork so we can link metadata rows
* with data files.
*/
uint64 storageId;
} ColumnarMetapage;
typedef struct typedef struct
{ {
@ -48,21 +74,18 @@ typedef struct
EState *estate; EState *estate;
} ModifyState; } ModifyState;
static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); static void InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe);
static void GetHighestUsedAddressAndId(Oid relfilenode, static void GetHighestUsedAddressAndId(uint64 storageId,
uint64 *highestUsedAddress, uint64 *highestUsedAddress,
uint64 *highestUsedId); uint64 *highestUsedId);
static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot); static List * ReadDataFileStripeList(uint64 storageId, Snapshot snapshot);
static Oid CStoreStripesRelationId(void); static Oid CStoreStripesRelationId(void);
static Oid CStoreStripesIndexRelationId(void); static Oid CStoreStripesIndexRelationId(void);
static Oid ColumnarOptionsRelationId(void); static Oid ColumnarOptionsRelationId(void);
static Oid ColumnarOptionsIndexRegclass(void); static Oid ColumnarOptionsIndexRegclass(void);
static Oid CStoreDataFilesRelationId(void);
static Oid CStoreDataFilesIndexRelationId(void);
static Oid CStoreSkipNodesRelationId(void); static Oid CStoreSkipNodesRelationId(void);
static Oid CStoreSkipNodesIndexRelationId(void); static Oid CStoreSkipNodesIndexRelationId(void);
static Oid CStoreNamespaceId(void); static Oid CStoreNamespaceId(void);
static bool ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata);
static ModifyState * StartModifyRelation(Relation rel); static ModifyState * StartModifyRelation(Relation rel);
static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values,
bool *nulls); bool *nulls);
@ -71,9 +94,13 @@ static void FinishModifyRelation(ModifyState *state);
static EState * create_estate_for_relation(Relation rel); static EState * create_estate_for_relation(Relation rel);
static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm);
static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm);
static ColumnarMetapage * InitMetapage(Relation relation);
static ColumnarMetapage * ReadMetapage(RelFileNode relfilenode, bool missingOk);
static uint64 GetNextStorageId(void);
static bool WriteColumnarOptions(Oid regclass, ColumnarOptions *options, bool overwrite); static bool WriteColumnarOptions(Oid regclass, ColumnarOptions *options, bool overwrite);
PG_FUNCTION_INFO_V1(columnar_relation_storageid);
/* constants for cstore.options */ /* constants for cstore.options */
#define Natts_cstore_options 4 #define Natts_cstore_options 4
#define Anum_cstore_options_regclass 1 #define Anum_cstore_options_regclass 1
@ -97,33 +124,10 @@ typedef struct FormData_cstore_options
} FormData_cstore_options; } FormData_cstore_options;
typedef FormData_cstore_options *Form_cstore_options; typedef FormData_cstore_options *Form_cstore_options;
/* constants for cstore_table */
#define Natts_cstore_data_files 3
#define Anum_cstore_data_files_relfilenode 1
#define Anum_cstore_data_files_version_major 2
#define Anum_cstore_data_files_version_minor 3
/* ----------------
* cstore.cstore_data_files definition.
* ----------------
*/
typedef struct FormData_cstore_data_files
{
Oid relfilenode;
int32 block_row_count;
int32 stripe_row_count;
NameData compression;
int64 version_major;
int64 version_minor;
#ifdef CATALOG_VARLEN /* variable-length fields start here */
#endif
} FormData_cstore_data_files;
typedef FormData_cstore_data_files *Form_cstore_data_files;
/* constants for cstore_stripe */ /* constants for cstore_stripe */
#define Natts_cstore_stripes 8 #define Natts_cstore_stripes 8
#define Anum_cstore_stripes_relfilenode 1 #define Anum_cstore_stripes_storageid 1
#define Anum_cstore_stripes_stripe 2 #define Anum_cstore_stripes_stripe 2
#define Anum_cstore_stripes_file_offset 3 #define Anum_cstore_stripes_file_offset 3
#define Anum_cstore_stripes_data_length 4 #define Anum_cstore_stripes_data_length 4
@ -134,7 +138,7 @@ typedef FormData_cstore_data_files *Form_cstore_data_files;
/* constants for cstore_skipnodes */ /* constants for cstore_skipnodes */
#define Natts_cstore_skipnodes 12 #define Natts_cstore_skipnodes 12
#define Anum_cstore_skipnodes_relfilenode 1 #define Anum_cstore_skipnodes_storageid 1
#define Anum_cstore_skipnodes_stripe 2 #define Anum_cstore_skipnodes_stripe 2
#define Anum_cstore_skipnodes_attr 3 #define Anum_cstore_skipnodes_attr 3
#define Anum_cstore_skipnodes_block 4 #define Anum_cstore_skipnodes_block 4
@ -357,47 +361,19 @@ ReadColumnarOptions(Oid regclass, ColumnarOptions *options)
} }
/*
* InitCStoreDataFileMetadata adds a record for the given relfilenode
* in cstore_data_files.
*/
void
InitCStoreDataFileMetadata(Oid relfilenode)
{
bool nulls[Natts_cstore_data_files] = { 0 };
Datum values[Natts_cstore_data_files] = {
ObjectIdGetDatum(relfilenode),
Int32GetDatum(CSTORE_VERSION_MAJOR),
Int32GetDatum(CSTORE_VERSION_MINOR)
};
DeleteDataFileMetadataRowIfExists(relfilenode);
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
Relation cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock);
ModifyState *modifyState = StartModifyRelation(cstoreDataFiles);
InsertTupleAndEnforceConstraints(modifyState, values, nulls);
FinishModifyRelation(modifyState);
CommandCounterIncrement();
heap_close(cstoreDataFiles, NoLock);
}
/* /*
* SaveStripeSkipList saves StripeSkipList for a given stripe as rows * SaveStripeSkipList saves StripeSkipList for a given stripe as rows
* of cstore_skipnodes. * of cstore_skipnodes.
*/ */
void void
SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, SaveStripeSkipList(RelFileNode relfilenode, uint64 stripe, StripeSkipList *stripeSkipList,
TupleDesc tupleDescriptor) TupleDesc tupleDescriptor)
{ {
uint32 columnIndex = 0; uint32 columnIndex = 0;
uint32 blockIndex = 0; uint32 blockIndex = 0;
uint32 columnCount = stripeSkipList->columnCount; uint32 columnCount = stripeSkipList->columnCount;
ColumnarMetapage *metapage = ReadMetapage(relfilenode, false);
Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId(); Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId();
Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock); Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock);
ModifyState *modifyState = StartModifyRelation(cstoreSkipNodes); ModifyState *modifyState = StartModifyRelation(cstoreSkipNodes);
@ -410,7 +386,7 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis
&stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex]; &stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex];
Datum values[Natts_cstore_skipnodes] = { Datum values[Natts_cstore_skipnodes] = {
ObjectIdGetDatum(relfilenode), UInt64GetDatum(metapage->storageId),
Int64GetDatum(stripe), Int64GetDatum(stripe),
Int32GetDatum(columnIndex + 1), Int32GetDatum(columnIndex + 1),
Int32GetDatum(blockIndex), Int32GetDatum(blockIndex),
@ -456,7 +432,7 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis
* ReadStripeSkipList fetches StripeSkipList for a given stripe. * ReadStripeSkipList fetches StripeSkipList for a given stripe.
*/ */
StripeSkipList * StripeSkipList *
ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
uint32 blockCount) uint32 blockCount)
{ {
int32 columnIndex = 0; int32 columnIndex = 0;
@ -464,12 +440,14 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
uint32 columnCount = tupleDescriptor->natts; uint32 columnCount = tupleDescriptor->natts;
ScanKeyData scanKey[2]; ScanKeyData scanKey[2];
ColumnarMetapage *metapage = ReadMetapage(relfilenode, false);
Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId(); Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId();
Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock); Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock);
Relation index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock); Relation index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock);
ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relfilenode, ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_storageid,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); BTEqualStrategyNumber, F_OIDEQ, UInt64GetDatum(metapage->storageId));
ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe, ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe)); BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe));
@ -559,11 +537,11 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
* InsertStripeMetadataRow adds a row to cstore_stripes. * InsertStripeMetadataRow adds a row to cstore_stripes.
*/ */
static void static void
InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe)
{ {
bool nulls[Natts_cstore_stripes] = { 0 }; bool nulls[Natts_cstore_stripes] = { 0 };
Datum values[Natts_cstore_stripes] = { Datum values[Natts_cstore_stripes] = {
ObjectIdGetDatum(relfilenode), UInt64GetDatum(storageId),
Int64GetDatum(stripe->id), Int64GetDatum(stripe->id),
Int64GetDatum(stripe->fileOffset), Int64GetDatum(stripe->fileOffset),
Int64GetDatum(stripe->dataLength), Int64GetDatum(stripe->dataLength),
@ -589,31 +567,21 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe)
/* /*
* ReadDataFileMetadata constructs DataFileMetadata for a given relfilenode by reading * StripesForRelfilenode returns a list of StripeMetadata for stripes
* from cstore_data_files and cstore_stripes. * of the given relfilenode.
*/ */
DataFileMetadata * List *
ReadDataFileMetadata(Oid relfilenode, bool missingOk) StripesForRelfilenode(RelFileNode relfilenode)
{ {
DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata)); ColumnarMetapage *metapage = ReadMetapage(relfilenode, true);
bool found = ReadCStoreDataFiles(relfilenode, datafileMetadata); if (metapage == NULL)
if (!found)
{ {
if (!missingOk) /* empty relation */
{ return NIL;
ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.",
relfilenode)));
}
else
{
return NULL;
}
} }
datafileMetadata->stripeMetadataList =
ReadDataFileStripeList(relfilenode, GetTransactionSnapshot());
return datafileMetadata; return ReadDataFileStripeList(metapage->storageId, GetTransactionSnapshot());
} }
@ -622,12 +590,13 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk)
* relfilenode across all active and inactive transactions. * relfilenode across all active and inactive transactions.
*/ */
uint64 uint64
GetHighestUsedAddress(Oid relfilenode) GetHighestUsedAddress(RelFileNode relfilenode)
{ {
uint64 highestUsedAddress = 0; uint64 highestUsedAddress = 0;
uint64 highestUsedId = 0; uint64 highestUsedId = 0;
ColumnarMetapage *metapage = ReadMetapage(relfilenode, false);
GetHighestUsedAddressAndId(relfilenode, &highestUsedAddress, &highestUsedId); GetHighestUsedAddressAndId(metapage->storageId, &highestUsedAddress, &highestUsedId);
return highestUsedAddress; return highestUsedAddress;
} }
@ -638,7 +607,7 @@ GetHighestUsedAddress(Oid relfilenode)
* the given relfilenode across all active and inactive transactions. * the given relfilenode across all active and inactive transactions.
*/ */
static void static void
GetHighestUsedAddressAndId(Oid relfilenode, GetHighestUsedAddressAndId(uint64 storageId,
uint64 *highestUsedAddress, uint64 *highestUsedAddress,
uint64 *highestUsedId) uint64 *highestUsedId)
{ {
@ -647,10 +616,12 @@ GetHighestUsedAddressAndId(Oid relfilenode,
SnapshotData SnapshotDirty; SnapshotData SnapshotDirty;
InitDirtySnapshot(SnapshotDirty); InitDirtySnapshot(SnapshotDirty);
List *stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty); List *stripeMetadataList = ReadDataFileStripeList(storageId, &SnapshotDirty);
*highestUsedId = 0; *highestUsedId = 0;
*highestUsedAddress = 0;
/* file starts with metapage */
*highestUsedAddress = CSTORE_BYTES_PER_PAGE;
foreach(stripeMetadataCell, stripeMetadataList) foreach(stripeMetadataCell, stripeMetadataList)
{ {
@ -684,8 +655,20 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
*/ */
LockRelation(rel, ShareUpdateExclusiveLock); LockRelation(rel, ShareUpdateExclusiveLock);
Oid relfilenode = rel->rd_node.relNode; RelFileNode relfilenode = rel->rd_node;
GetHighestUsedAddressAndId(relfilenode, &currLogicalHigh, &highestId);
/*
* If this is the first stripe for this relation, initialize the
* metapage, otherwise use the previously initialized metapage.
*/
ColumnarMetapage *metapage = ReadMetapage(relfilenode, true);
if (metapage == NULL)
{
metapage = InitMetapage(rel);
}
GetHighestUsedAddressAndId(metapage->storageId, &currLogicalHigh, &highestId);
SmgrAddr currSmgrHigh = logical_to_smgr(currLogicalHigh); SmgrAddr currSmgrHigh = logical_to_smgr(currLogicalHigh);
SmgrAddr resSmgrStart = next_block_start(currSmgrHigh); SmgrAddr resSmgrStart = next_block_start(currSmgrHigh);
@ -714,7 +697,7 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
stripe.rowCount = rowCount; stripe.rowCount = rowCount;
stripe.id = highestId + 1; stripe.id = highestId + 1;
InsertStripeMetadataRow(relfilenode, &stripe); InsertStripeMetadataRow(metapage->storageId, &stripe);
UnlockRelation(rel, ShareUpdateExclusiveLock); UnlockRelation(rel, ShareUpdateExclusiveLock);
@ -723,18 +706,18 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
/* /*
* ReadDataFileStripeList reads the stripe list for a given relfilenode * ReadDataFileStripeList reads the stripe list for a given storageId
* in the given snapshot. * in the given snapshot.
*/ */
static List * static List *
ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) ReadDataFileStripeList(uint64 storageId, Snapshot snapshot)
{ {
List *stripeMetadataList = NIL; List *stripeMetadataList = NIL;
ScanKeyData scanKey[1]; ScanKeyData scanKey[1];
HeapTuple heapTuple; HeapTuple heapTuple;
ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, ScanKeyInit(&scanKey[0], Anum_cstore_stripes_storageid,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId));
Oid cstoreStripesOid = CStoreStripesRelationId(); Oid cstoreStripesOid = CStoreStripesRelationId();
Relation cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock); Relation cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock);
@ -779,60 +762,10 @@ ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot)
/* /*
* ReadCStoreDataFiles reads corresponding record from cstore_data_files. Returns * DeleteMetadataRows removes the rows with given relfilenode from cstore_stripes.
* false if table was not found in cstore_data_files.
*/
static bool
ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata)
{
bool found = false;
ScanKeyData scanKey[1];
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock);
if (cstoreDataFiles == NULL)
{
/*
* Extension has been dropped. This can be called while
* dropping extension or database via ObjectAccess().
*/
return false;
}
Relation index = try_relation_open(CStoreDataFilesIndexRelationId(), AccessShareLock);
if (index == NULL)
{
heap_close(cstoreDataFiles, NoLock);
/* extension has been dropped */
return false;
}
SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL,
1, scanKey);
HeapTuple heapTuple = systable_getnext(scanDescriptor);
if (HeapTupleIsValid(heapTuple))
{
found = true;
}
systable_endscan_ordered(scanDescriptor);
index_close(index, NoLock);
heap_close(cstoreDataFiles, NoLock);
return found;
}
/*
* DeleteDataFileMetadataRowIfExists removes the row with given relfilenode from cstore_stripes.
*/ */
void void
DeleteDataFileMetadataRowIfExists(Oid relfilenode) DeleteMetadataRows(RelFileNode relfilenode)
{ {
ScanKeyData scanKey[1]; ScanKeyData scanKey[1];
@ -845,33 +778,46 @@ DeleteDataFileMetadataRowIfExists(Oid relfilenode)
return; return;
} }
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, ColumnarMetapage *metapage = ReadMetapage(relfilenode, true);
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); if (metapage == NULL)
{
/*
* No data has been written to this storage yet, so there is no
* associated metadata yet.
*/
return;
}
Oid cstoreDataFilesOid = CStoreDataFilesRelationId(); ScanKeyInit(&scanKey[0], Anum_cstore_stripes_storageid,
Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); BTEqualStrategyNumber, F_INT8EQ, UInt64GetDatum(metapage->storageId));
if (cstoreDataFiles == NULL)
Oid cstoreStripesOid = CStoreStripesRelationId();
Relation cstoreStripes = try_relation_open(cstoreStripesOid, AccessShareLock);
if (cstoreStripes == NULL)
{ {
/* extension has been dropped */ /* extension has been dropped */
return; return;
} }
Relation index = index_open(CStoreDataFilesIndexRelationId(), AccessShareLock); Relation index = index_open(CStoreStripesIndexRelationId(), AccessShareLock);
SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL,
1, scanKey); 1, scanKey);
ModifyState *modifyState = StartModifyRelation(cstoreStripes);
HeapTuple heapTuple = systable_getnext(scanDescriptor); HeapTuple heapTuple = systable_getnext(scanDescriptor);
if (HeapTupleIsValid(heapTuple)) while (HeapTupleIsValid(heapTuple))
{ {
ModifyState *modifyState = StartModifyRelation(cstoreDataFiles);
DeleteTupleAndEnforceConstraints(modifyState, heapTuple); DeleteTupleAndEnforceConstraints(modifyState, heapTuple);
FinishModifyRelation(modifyState); heapTuple = systable_getnext(scanDescriptor);
} }
FinishModifyRelation(modifyState);
systable_endscan_ordered(scanDescriptor); systable_endscan_ordered(scanDescriptor);
index_close(index, NoLock); index_close(index, NoLock);
heap_close(cstoreDataFiles, NoLock); heap_close(cstoreStripes, NoLock);
} }
@ -1096,28 +1042,6 @@ ColumnarOptionsIndexRegclass(void)
} }
/*
* CStoreDataFilesRelationId returns relation id of cstore_data_files.
* TODO: should we cache this similar to citus?
*/
static Oid
CStoreDataFilesRelationId(void)
{
return get_relname_relid("cstore_data_files", CStoreNamespaceId());
}
/*
* CStoreDataFilesIndexRelationId returns relation id of cstore_data_files_pkey.
* TODO: should we cache this similar to citus?
*/
static Oid
CStoreDataFilesIndexRelationId(void)
{
return get_relname_relid("cstore_data_files_pkey", CStoreNamespaceId());
}
/* /*
* CStoreSkipNodesRelationId returns relation id of cstore_skipnodes. * CStoreSkipNodesRelationId returns relation id of cstore_skipnodes.
* TODO: should we cache this similar to citus? * TODO: should we cache this similar to citus?
@ -1149,3 +1073,128 @@ CStoreNamespaceId(void)
{ {
return get_namespace_oid("cstore", false); return get_namespace_oid("cstore", false);
} }
/*
* ReadMetapage reads metapage for the given relfilenode. It returns
* false if the relation doesn't have a meta page yet.
*/
static ColumnarMetapage *
ReadMetapage(RelFileNode relfilenode, bool missingOk)
{
StringInfo metapageBuffer = NULL;
Oid relationId = RelidByRelfilenode(relfilenode.spcNode,
relfilenode.relNode);
if (OidIsValid(relationId))
{
Relation relation = relation_open(relationId, NoLock);
RelationOpenSmgr(relation);
int nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM);
RelationCloseSmgr(relation);
if (nblocks != 0)
{
metapageBuffer = ReadFromSmgr(relation, 0, sizeof(ColumnarMetapage));
}
relation_close(relation, NoLock);
}
if (metapageBuffer == NULL)
{
if (!missingOk)
{
elog(ERROR, "columnar metapage was not found");
}
return NULL;
}
ColumnarMetapage *metapage = palloc0(sizeof(ColumnarMetapage));
memcpy_s((void *) metapage, sizeof(ColumnarMetapage),
metapageBuffer->data, sizeof(ColumnarMetapage));
return metapage;
}
/*
* InitMetapage initializes metapage for the given relation.
*/
static ColumnarMetapage *
InitMetapage(Relation relation)
{
ColumnarMetapage *metapage = palloc0(sizeof(ColumnarMetapage));
metapage->storageId = GetNextStorageId();
metapage->versionMajor = CSTORE_VERSION_MAJOR;
metapage->versionMinor = CSTORE_VERSION_MINOR;
/* create the first block */
Buffer newBuffer = ReadBuffer(relation, P_NEW);
ReleaseBuffer(newBuffer);
Assert(sizeof(ColumnarMetapage) <= BLCKSZ - SizeOfPageHeaderData);
WriteToSmgr(relation, 0, (char *) metapage, sizeof(ColumnarMetapage));
return metapage;
}
/*
* GetNextStorageId returns the next value from the storage id sequence.
*/
static uint64
GetNextStorageId(void)
{
Oid savedUserId = InvalidOid;
int savedSecurityContext = 0;
Oid sequenceId = get_relname_relid("storageid_seq", CStoreNamespaceId());
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
/*
* Not all users have update access to the sequence, so switch
* security context.
*/
GetUserIdAndSecContext(&savedUserId, &savedSecurityContext);
SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE);
/*
* Generate new and unique storage id from sequence.
*/
Datum storageIdDatum = DirectFunctionCall1(nextval_oid, sequenceIdDatum);
SetUserIdAndSecContext(savedUserId, savedSecurityContext);
uint64 storageId = DatumGetInt64(storageIdDatum);
return storageId;
}
/*
* columnar_relation_storageid returns storage id associated with the
* given relation id, or -1 if there is no associated storage id yet.
*/
Datum
columnar_relation_storageid(PG_FUNCTION_ARGS)
{
uint64 storageId = -1;
#if HAS_TABLEAM
Oid relationId = PG_GETARG_OID(0);
Relation relation = relation_open(relationId, AccessShareLock);
if (IsCStoreTableAmTable(relationId))
{
ColumnarMetapage *metadata = ReadMetapage(relation->rd_node, true);
if (metadata != NULL)
{
storageId = metadata->storageId;
}
}
relation_close(relation, AccessShareLock);
#endif
PG_RETURN_INT64(storageId);
}

View File

@ -76,7 +76,6 @@ static BlockData * DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blo
List *projectedColumnList); List *projectedColumnList);
static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, static Datum ColumnDefaultValue(TupleConstr *tupleConstraints,
Form_pg_attribute attributeForm); Form_pg_attribute attributeForm);
static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size);
/* /*
* CStoreBeginRead initializes a cstore read operation. This function returns a * CStoreBeginRead initializes a cstore read operation. This function returns a
@ -86,9 +85,7 @@ TableReadState *
CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor,
List *projectedColumnList, List *whereClauseList) List *projectedColumnList, List *whereClauseList)
{ {
Oid relNode = relation->rd_node.relNode; List *stripeList = StripesForRelfilenode(relation->rd_node);
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relNode, false);
/* /*
* We allocate all stripe specific data in the stripeReadContext, and reset * We allocate all stripe specific data in the stripeReadContext, and reset
@ -101,7 +98,7 @@ CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor,
TableReadState *readState = palloc0(sizeof(TableReadState)); TableReadState *readState = palloc0(sizeof(TableReadState));
readState->relation = relation; readState->relation = relation;
readState->datafileMetadata = datafileMetadata; readState->stripeList = stripeList;
readState->projectedColumnList = projectedColumnList; readState->projectedColumnList = projectedColumnList;
readState->whereClauseList = whereClauseList; readState->whereClauseList = whereClauseList;
readState->stripeBuffers = NULL; readState->stripeBuffers = NULL;
@ -135,7 +132,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu
*/ */
while (readState->stripeBuffers == NULL) while (readState->stripeBuffers == NULL)
{ {
List *stripeMetadataList = readState->datafileMetadata->stripeMetadataList; List *stripeMetadataList = readState->stripeList;
uint32 stripeCount = list_length(stripeMetadataList); uint32 stripeCount = list_length(stripeMetadataList);
/* if we have read all stripes, return false */ /* if we have read all stripes, return false */
@ -238,8 +235,7 @@ void
CStoreEndRead(TableReadState *readState) CStoreEndRead(TableReadState *readState)
{ {
MemoryContextDelete(readState->stripeReadContext); MemoryContextDelete(readState->stripeReadContext);
list_free_deep(readState->datafileMetadata->stripeMetadataList); list_free_deep(readState->stripeList);
pfree(readState->datafileMetadata);
pfree(readState); pfree(readState);
} }
@ -316,11 +312,9 @@ CStoreTableRowCount(Relation relation)
{ {
ListCell *stripeMetadataCell = NULL; ListCell *stripeMetadataCell = NULL;
uint64 totalRowCount = 0; uint64 totalRowCount = 0;
List *stripeList = StripesForRelfilenode(relation->rd_node);
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode, foreach(stripeMetadataCell, stripeList)
false);
foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList)
{ {
StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell);
totalRowCount += stripeMetadata->rowCount; totalRowCount += stripeMetadata->rowCount;
@ -345,7 +339,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList);
StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node.relNode, StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node,
stripeMetadata->id, stripeMetadata->id,
tupleDescriptor, tupleDescriptor,
stripeMetadata->blockCount); stripeMetadata->blockCount);
@ -1009,7 +1003,7 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor
} }
static StringInfo StringInfo
ReadFromSmgr(Relation rel, uint64 offset, uint32 size) ReadFromSmgr(Relation rel, uint64 offset, uint32 size)
{ {
StringInfo resultBuffer = makeStringInfo(); StringInfo resultBuffer = makeStringInfo();

View File

@ -106,7 +106,6 @@ static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement,
char *completionTag); char *completionTag);
#endif #endif
static bool IsCStoreTableAmTable(Oid relationId);
static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode,
int timeout, int retryInterval); int timeout, int retryInterval);
static void LogRelationStats(Relation rel, int elevel); static void LogRelationStats(Relation rel, int elevel);
@ -542,26 +541,30 @@ cstore_relation_set_new_filenode(Relation rel,
MarkRelfilenodeDropped(oldRelfilenode, GetCurrentSubTransactionId()); MarkRelfilenodeDropped(oldRelfilenode, GetCurrentSubTransactionId());
/* delete old relfilenode metadata */ /* delete old relfilenode metadata */
DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); DeleteMetadataRows(rel->rd_node);
Assert(persistence == RELPERSISTENCE_PERMANENT); Assert(persistence == RELPERSISTENCE_PERMANENT);
*freezeXid = RecentXmin; *freezeXid = RecentXmin;
*minmulti = GetOldestMultiXactId(); *minmulti = GetOldestMultiXactId();
SMgrRelation srel = RelationCreateStorage(*newrnode, persistence); SMgrRelation srel = RelationCreateStorage(*newrnode, persistence);
InitCStoreDataFileMetadata(newrnode->relNode);
InitColumnarOptions(rel->rd_id); InitColumnarOptions(rel->rd_id);
smgrclose(srel); smgrclose(srel);
/* we will lazily initialize metadata in first stripe reservation */
} }
static void static void
cstore_relation_nontransactional_truncate(Relation rel) cstore_relation_nontransactional_truncate(Relation rel)
{ {
Oid relfilenode = rel->rd_node.relNode; RelFileNode relfilenode = rel->rd_node;
NonTransactionDropWriteState(relfilenode); NonTransactionDropWriteState(relfilenode.relNode);
/* Delete old relfilenode metadata */
DeleteMetadataRows(relfilenode);
/* /*
* No need to set new relfilenode, since the table was created in this * No need to set new relfilenode, since the table was created in this
@ -572,9 +575,7 @@ cstore_relation_nontransactional_truncate(Relation rel)
*/ */
RelationTruncate(rel, 0); RelationTruncate(rel, 0);
/* Delete old relfilenode metadata and recreate it */ /* we will lazily initialize new metadata in first stripe reservation */
DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode);
InitCStoreDataFileMetadata(rel->rd_node.relNode);
} }
@ -673,11 +674,14 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params,
} }
/*
* LogRelationStats logs statistics as the output of the VACUUM VERBOSE.
*/
static void static void
LogRelationStats(Relation rel, int elevel) LogRelationStats(Relation rel, int elevel)
{ {
ListCell *stripeMetadataCell = NULL; ListCell *stripeMetadataCell = NULL;
Oid relfilenode = rel->rd_node.relNode; RelFileNode relfilenode = rel->rd_node;
StringInfo infoBuf = makeStringInfo(); StringInfo infoBuf = makeStringInfo();
int compressionStats[COMPRESSION_COUNT] = { 0 }; int compressionStats[COMPRESSION_COUNT] = { 0 };
@ -687,10 +691,10 @@ LogRelationStats(Relation rel, int elevel)
TupleDesc tupdesc = RelationGetDescr(rel); TupleDesc tupdesc = RelationGetDescr(rel);
uint64 droppedBlocksWithData = 0; uint64 droppedBlocksWithData = 0;
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relfilenode, false); List *stripeList = StripesForRelfilenode(relfilenode);
int stripeCount = list_length(datafileMetadata->stripeMetadataList); int stripeCount = list_length(stripeList);
foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) foreach(stripeMetadataCell, stripeList)
{ {
StripeMetadata *stripe = lfirst(stripeMetadataCell); StripeMetadata *stripe = lfirst(stripeMetadataCell);
StripeSkipList *skiplist = ReadStripeSkipList(relfilenode, stripe->id, StripeSkipList *skiplist = ReadStripeSkipList(relfilenode, stripe->id,
@ -726,6 +730,10 @@ LogRelationStats(Relation rel, int elevel)
uint64 relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); uint64 relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
RelationCloseSmgr(rel); RelationCloseSmgr(rel);
Datum storageId = DirectFunctionCall1(columnar_relation_storageid,
ObjectIdGetDatum(RelationGetRelid(rel)));
appendStringInfo(infoBuf, "storage id: %ld\n", DatumGetInt64(storageId));
appendStringInfo(infoBuf, "total file size: %ld, total data size: %ld\n", appendStringInfo(infoBuf, "total file size: %ld, total data size: %ld\n",
relPages * BLCKSZ, totalStripeLength); relPages * BLCKSZ, totalStripeLength);
appendStringInfo(infoBuf, appendStringInfo(infoBuf,
@ -803,7 +811,7 @@ TruncateCStore(Relation rel, int elevel)
* we're truncating. * we're truncating.
*/ */
SmgrAddr highestPhysicalAddress = SmgrAddr highestPhysicalAddress =
logical_to_smgr(GetHighestUsedAddress(rel->rd_node.relNode)); logical_to_smgr(GetHighestUsedAddress(rel->rd_node));
BlockNumber new_rel_pages = highestPhysicalAddress.blockno + 1; BlockNumber new_rel_pages = highestPhysicalAddress.blockno + 1;
if (new_rel_pages == old_rel_pages) if (new_rel_pages == old_rel_pages)
@ -1171,11 +1179,11 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId
* tableam tables storage is managed by postgres. * tableam tables storage is managed by postgres.
*/ */
Relation rel = table_open(objectId, AccessExclusiveLock); Relation rel = table_open(objectId, AccessExclusiveLock);
Oid relfilenode = rel->rd_node.relNode; RelFileNode relfilenode = rel->rd_node;
DeleteDataFileMetadataRowIfExists(relfilenode); DeleteMetadataRows(relfilenode);
DeleteColumnarTableOptions(rel->rd_id, true); DeleteColumnarTableOptions(rel->rd_id, true);
MarkRelfilenodeDropped(relfilenode, GetCurrentSubTransactionId()); MarkRelfilenodeDropped(relfilenode.relNode, GetCurrentSubTransactionId());
/* keep the lock since we did physical changes to the relation */ /* keep the lock since we did physical changes to the relation */
table_close(rel, NoLock); table_close(rel, NoLock);
@ -1187,7 +1195,7 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId
* IsCStoreTableAmTable returns true if relation has cstore_tableam * IsCStoreTableAmTable returns true if relation has cstore_tableam
* access method. This can be called before extension creation. * access method. This can be called before extension creation.
*/ */
static bool bool
IsCStoreTableAmTable(Oid relationId) IsCStoreTableAmTable(Oid relationId)
{ {
if (!OidIsValid(relationId)) if (!OidIsValid(relationId))

View File

@ -65,12 +65,10 @@ CStoreBeginWrite(RelFileNode relfilenode,
uint64 stripeMaxRowCount, uint32 blockRowCount, uint64 stripeMaxRowCount, uint32 blockRowCount,
TupleDesc tupleDescriptor) TupleDesc tupleDescriptor)
{ {
uint32 columnIndex = 0;
/* get comparison function pointers for each of the columns */ /* get comparison function pointers for each of the columns */
uint32 columnCount = tupleDescriptor->natts; uint32 columnCount = tupleDescriptor->natts;
FmgrInfo **comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *)); FmgrInfo **comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *));
for (columnIndex = 0; columnIndex < columnCount; columnIndex++) for (uint32 columnIndex = 0; columnIndex < columnCount; columnIndex++)
{ {
FmgrInfo *comparisonFunction = NULL; FmgrInfo *comparisonFunction = NULL;
FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor, FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor,
@ -323,7 +321,7 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount,
} }
static void void
WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength)
{ {
uint64 remaining = dataLength; uint64 remaining = dataLength;
@ -521,7 +519,7 @@ FlushStripe(TableWriteState *writeState)
} }
/* create skip list and footer buffers */ /* create skip list and footer buffers */
SaveStripeSkipList(relation->rd_node.relNode, SaveStripeSkipList(writeState->relfilenode,
stripeMetadata.id, stripeMetadata.id,
stripeSkipList, tupleDescriptor); stripeSkipList, tupleDescriptor);

View File

@ -3,6 +3,8 @@
CREATE SCHEMA cstore; CREATE SCHEMA cstore;
SET search_path TO cstore; SET search_path TO cstore;
CREATE SEQUENCE storageid_seq MINVALUE 10000000000 NO CYCLE;
CREATE TABLE options ( CREATE TABLE options (
regclass regclass NOT NULL PRIMARY KEY, regclass regclass NOT NULL PRIMARY KEY,
block_row_count int NOT NULL, block_row_count int NOT NULL,
@ -12,17 +14,8 @@ CREATE TABLE options (
COMMENT ON TABLE options IS 'columnar table specific options, maintained by alter_columnar_table_set'; COMMENT ON TABLE options IS 'columnar table specific options, maintained by alter_columnar_table_set';
CREATE TABLE cstore_data_files (
relfilenode oid NOT NULL,
version_major bigint NOT NULL,
version_minor bigint NOT NULL,
PRIMARY KEY (relfilenode)
) WITH (user_catalog_table = true);
COMMENT ON TABLE cstore_data_files IS 'CStore data file wide metadata';
CREATE TABLE cstore_stripes ( CREATE TABLE cstore_stripes (
relfilenode oid NOT NULL, storageid bigint NOT NULL,
stripe bigint NOT NULL, stripe bigint NOT NULL,
file_offset bigint NOT NULL, file_offset bigint NOT NULL,
data_length bigint NOT NULL, data_length bigint NOT NULL,
@ -30,14 +23,13 @@ CREATE TABLE cstore_stripes (
block_count int NOT NULL, block_count int NOT NULL,
block_row_count int NOT NULL, block_row_count int NOT NULL,
row_count bigint NOT NULL, row_count bigint NOT NULL,
PRIMARY KEY (relfilenode, stripe), PRIMARY KEY (storageid, stripe)
FOREIGN KEY (relfilenode) REFERENCES cstore_data_files(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED
) WITH (user_catalog_table = true); ) WITH (user_catalog_table = true);
COMMENT ON TABLE cstore_stripes IS 'CStore per stripe metadata'; COMMENT ON TABLE cstore_stripes IS 'CStore per stripe metadata';
CREATE TABLE cstore_skipnodes ( CREATE TABLE cstore_skipnodes (
relfilenode oid NOT NULL, storageid bigint NOT NULL,
stripe bigint NOT NULL, stripe bigint NOT NULL,
attr int NOT NULL, attr int NOT NULL,
block int NOT NULL, block int NOT NULL,
@ -49,8 +41,8 @@ CREATE TABLE cstore_skipnodes (
exists_stream_offset bigint NOT NULL, exists_stream_offset bigint NOT NULL,
exists_stream_length bigint NOT NULL, exists_stream_length bigint NOT NULL,
value_compression_type int NOT NULL, value_compression_type int NOT NULL,
PRIMARY KEY (relfilenode, stripe, attr, block), PRIMARY KEY (storageid, stripe, attr, block),
FOREIGN KEY (relfilenode, stripe) REFERENCES cstore_stripes(relfilenode, stripe) ON DELETE CASCADE INITIALLY DEFERRED FOREIGN KEY (storageid, stripe) REFERENCES cstore_stripes(storageid, stripe) ON DELETE CASCADE INITIALLY DEFERRED
) WITH (user_catalog_table = true); ) WITH (user_catalog_table = true);
COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata'; COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata';

View File

@ -29,8 +29,8 @@ END$proc$;
DROP TABLE cstore_skipnodes; DROP TABLE cstore_skipnodes;
DROP TABLE cstore_stripes; DROP TABLE cstore_stripes;
DROP TABLE cstore_data_files;
DROP TABLE options; DROP TABLE options;
DROP SEQUENCE storageid_seq;
DROP FUNCTION citus_internal.cstore_ensure_objects_exist(); DROP FUNCTION citus_internal.cstore_ensure_objects_exist();

View File

@ -49,6 +49,7 @@
#define CSTORE_TUPLE_COST_MULTIPLIER 10 #define CSTORE_TUPLE_COST_MULTIPLIER 10
#define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 #define CSTORE_POSTSCRIPT_SIZE_LENGTH 1
#define CSTORE_POSTSCRIPT_SIZE_MAX 256 #define CSTORE_POSTSCRIPT_SIZE_MAX 256
#define CSTORE_BYTES_PER_PAGE (BLCKSZ - SizeOfPageHeaderData)
/* Enumaration for cstore file's compression method */ /* Enumaration for cstore file's compression method */
typedef enum typedef enum
@ -195,7 +196,7 @@ typedef struct StripeBuffers
/* TableReadState represents state of a cstore file read operation. */ /* TableReadState represents state of a cstore file read operation. */
typedef struct TableReadState typedef struct TableReadState
{ {
DataFileMetadata *datafileMetadata; List *stripeList;
StripeMetadata *currentStripeMetadata; StripeMetadata *currentStripeMetadata;
TupleDesc tupleDescriptor; TupleDesc tupleDescriptor;
Relation relation; Relation relation;
@ -289,21 +290,25 @@ extern bool InitColumnarOptions(Oid regclass);
extern void SetColumnarOptions(Oid regclass, ColumnarOptions *options); extern void SetColumnarOptions(Oid regclass, ColumnarOptions *options);
extern bool DeleteColumnarTableOptions(Oid regclass, bool missingOk); extern bool DeleteColumnarTableOptions(Oid regclass, bool missingOk);
extern bool ReadColumnarOptions(Oid regclass, ColumnarOptions *options); extern bool ReadColumnarOptions(Oid regclass, ColumnarOptions *options);
extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); extern void WriteToSmgr(Relation relation, uint64 logicalOffset,
extern void InitCStoreDataFileMetadata(Oid relfilenode); char *data, uint32 dataLength);
extern void UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int extern StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size);
stripeRowCount, CompressionType compression); extern bool IsCStoreTableAmTable(Oid relationId);
extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk);
extern uint64 GetHighestUsedAddress(Oid relfilenode); /* cstore_metadata_tables.c */
extern void DeleteMetadataRows(RelFileNode relfilenode);
extern List * StripesForRelfilenode(RelFileNode relfilenode);
extern uint64 GetHighestUsedAddress(RelFileNode relfilenode);
extern StripeMetadata ReserveStripe(Relation rel, uint64 size, extern StripeMetadata ReserveStripe(Relation rel, uint64 size,
uint64 rowCount, uint64 columnCount, uint64 rowCount, uint64 columnCount,
uint64 blockCount, uint64 blockRowCount); uint64 blockCount, uint64 blockRowCount);
extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, extern void SaveStripeSkipList(RelFileNode relfilenode, uint64 stripe,
StripeSkipList *stripeSkipList, StripeSkipList *stripeSkipList,
TupleDesc tupleDescriptor); TupleDesc tupleDescriptor);
extern StripeSkipList * ReadStripeSkipList(Oid relfilenode, uint64 stripe, extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe,
TupleDesc tupleDescriptor, TupleDesc tupleDescriptor,
uint32 blockCount); uint32 blockCount);
extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS);
/* write_state_management.c */ /* write_state_management.c */
@ -335,11 +340,10 @@ typedef struct SmgrAddr
static inline SmgrAddr static inline SmgrAddr
logical_to_smgr(uint64 logicalOffset) logical_to_smgr(uint64 logicalOffset)
{ {
uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData;
SmgrAddr addr; SmgrAddr addr;
addr.blockno = logicalOffset / bytes_per_page; addr.blockno = logicalOffset / CSTORE_BYTES_PER_PAGE;
addr.offset = SizeOfPageHeaderData + (logicalOffset % bytes_per_page); addr.offset = SizeOfPageHeaderData + (logicalOffset % CSTORE_BYTES_PER_PAGE);
return addr; return addr;
} }
@ -351,8 +355,7 @@ logical_to_smgr(uint64 logicalOffset)
static inline uint64 static inline uint64
smgr_to_logical(SmgrAddr addr) smgr_to_logical(SmgrAddr addr)
{ {
uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData; return CSTORE_BYTES_PER_PAGE * addr.blockno + addr.offset - SizeOfPageHeaderData;
return bytes_per_page * addr.blockno + addr.offset - SizeOfPageHeaderData;
} }

View File

@ -190,3 +190,6 @@ s/relation with OID [0-9]+ does not exist/relation with OID XXXX does not exist/
# ignore timing statistics for VACUUM VERBOSE # ignore timing statistics for VACUUM VERBOSE
/CPU: user: .*s, system: .*s, elapsed: .*s/d /CPU: user: .*s, system: .*s, elapsed: .*s/d
# normalize storage id of columnar tables
s/^storage id: [0-9]+$/storage id: xxxxx/g

View File

@ -23,7 +23,6 @@
/hyperscale_tutorial.out /hyperscale_tutorial.out
/am_block_filtering.out /am_block_filtering.out
/am_copyto.out /am_copyto.out
/am_create.out
/am_data_types.out /am_data_types.out
/am_load.out /am_load.out
/fdw_block_filtering.out /fdw_block_filtering.out

View File

@ -13,8 +13,12 @@ CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT,
-- Test that querying an empty table works -- Test that querying an empty table works
ANALYZE contestant; ANALYZE contestant;
SELECT count(*) FROM contestant; SELECT count(*) FROM contestant;
count count
------- ---------------------------------------------------------------------
0 0
(1 row) (1 row)
-- Utility functions to be used throughout tests
CREATE FUNCTION columnar_relation_storageid(relid oid) RETURNS bigint
LANGUAGE C STABLE STRICT
AS 'citus', $$columnar_relation_storageid$$;

View File

@ -12,12 +12,12 @@
-- 'postgres' directory is excluded from comparison to have the same result. -- 'postgres' directory is excluded from comparison to have the same result.
-- store postgres database oid -- store postgres database oid
SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset SELECT count(distinct storageid) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset
-- DROP cstore_fdw tables -- DROP cstore_fdw tables
DROP TABLE contestant; DROP TABLE contestant;
DROP TABLE contestant_compressed; DROP TABLE contestant_compressed;
-- make sure DROP deletes metadata -- make sure DROP deletes metadata
SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
2 2
@ -26,10 +26,11 @@ SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files;
-- Create a cstore_fdw table under a schema and drop it. -- Create a cstore_fdw table under a schema and drop it.
CREATE SCHEMA test_schema; CREATE SCHEMA test_schema;
CREATE TABLE test_schema.test_table(data int) USING columnar; CREATE TABLE test_schema.test_table(data int) USING columnar;
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset INSERT INTO test_schema.test_table VALUES (1);
SELECT count(*) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset
DROP SCHEMA test_schema CASCADE; DROP SCHEMA test_schema CASCADE;
NOTICE: drop cascades to table test_schema.test_table NOTICE: drop cascades to table test_schema.test_table
SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 1

View File

@ -65,20 +65,15 @@ SELECT * FROM t_view a ORDER BY a;
(6 rows) (6 rows)
-- verify that we have created metadata entries for the materialized view -- verify that we have created metadata entries for the materialized view
SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset SELECT columnar_relation_storageid(oid) AS storageid
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; FROM pg_class WHERE relname='t_view' \gset
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 1
(1 row) (1 row)
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
count
---------------------------------------------------------------------
1
(1 row)
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
3 3
@ -87,19 +82,13 @@ SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
DROP TABLE t CASCADE; DROP TABLE t CASCADE;
NOTICE: drop cascades to materialized view t_view NOTICE: drop cascades to materialized view t_view
-- dropping must remove metadata -- dropping must remove metadata
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
0 0
(1 row) (1 row)
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
0 0

View File

@ -12,7 +12,7 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i;
-- there are no subtransactions, so above statement should batch -- there are no subtransactions, so above statement should batch
-- INSERTs inside the UDF and create on stripe per table. -- INSERTs inside the UDF and create on stripe per table.
SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2') WHERE columnar_relation_storageid(b.oid)=a.storageid AND relname IN ('t1', 't2')
GROUP BY relname GROUP BY relname
ORDER BY relname; ORDER BY relname;
relname | count relname | count

View File

@ -2,6 +2,9 @@
-- Testing we handle rollbacks properly -- Testing we handle rollbacks properly
-- --
CREATE TABLE t(a int, b int) USING columnar; CREATE TABLE t(a int, b int) USING columnar;
CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname = 't';
BEGIN; BEGIN;
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
ROLLBACK; ROLLBACK;
@ -12,8 +15,7 @@ SELECT count(*) FROM t;
(1 row) (1 row)
-- check stripe metadata also have been rolled-back -- check stripe metadata also have been rolled-back
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b SELECT count(*) FROM t_stripes;
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
0 0
@ -26,8 +28,7 @@ SELECT count(*) FROM t;
10 10
(1 row) (1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b SELECT count(*) FROM t_stripes;
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 1
@ -73,11 +74,11 @@ SELECT count(*) FROM t;
20 20
(1 row) (1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b SELECT count(*) FROM t_stripes;
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
2 2
(1 row) (1 row)
DROP TABLE t; DROP TABLE t;
DROP VIEW t_stripes;

View File

@ -15,7 +15,7 @@ CREATE TABLE cstore_truncate_test_second (a int, b int) USING columnar;
-- COMPRESSED -- COMPRESSED
CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING columnar; CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING columnar;
CREATE TABLE cstore_truncate_test_regular (a int, b int); CREATE TABLE cstore_truncate_test_regular (a int, b int);
SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset SELECT count(distinct storageid) AS cstore_data_files_before_truncate FROM cstore.cstore_stripes \gset
INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a;
set cstore.compression = 'pglz'; set cstore.compression = 'pglz';
INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a;
@ -147,7 +147,7 @@ SELECT * from cstore_truncate_test;
(0 rows) (0 rows)
-- make sure TRUNATE deletes metadata for old relfilenode -- make sure TRUNATE deletes metadata for old relfilenode
SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; SELECT :cstore_data_files_before_truncate - count(distinct storageid) FROM cstore.cstore_stripes;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
0 0
@ -161,7 +161,7 @@ TRUNCATE cstore_same_transaction_truncate;
INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23);
COMMIT; COMMIT;
-- should output "1" for the newly created relation -- should output "1" for the newly created relation
SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; SELECT count(distinct storageid) - :cstore_data_files_before_truncate FROM cstore.cstore_stripes;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 1

View File

@ -1,6 +1,9 @@
SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset SELECT count(distinct storageid) AS columnar_table_count FROM cstore.cstore_stripes \gset
CREATE TABLE t(a int, b int) USING columnar; CREATE TABLE t(a int, b int) USING columnar;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t';
SELECT count(*) FROM t_stripes;
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
0 0
@ -15,7 +18,7 @@ SELECT sum(a), sum(b) FROM t;
465 | 9455 465 | 9455
(1 row) (1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; SELECT count(*) FROM t_stripes;
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
3 3
@ -29,7 +32,7 @@ SELECT sum(a), sum(b) FROM t;
465 | 9455 465 | 9455
(1 row) (1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; SELECT count(*) FROM t_stripes;
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 1
@ -49,7 +52,7 @@ SELECT sum(a), sum(b) FROM t;
3126715 | 6261955 3126715 | 6261955
(1 row) (1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; SELECT count(*) FROM t_stripes;
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
4 4
@ -62,7 +65,7 @@ SELECT sum(a), sum(b) FROM t;
3126715 | 6261955 3126715 | 6261955
(1 row) (1 row)
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; SELECT count(*) FROM t_stripes;
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
3 3
@ -70,7 +73,9 @@ SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.r
-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs -- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs
ALTER TABLE t DROP COLUMN a; ALTER TABLE t DROP COLUMN a;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
FROM cstore.cstore_skipnodes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
stripe | attr | block | ?column? | ?column? stripe | attr | block | ?column? | ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 | 1 | 0 | f | f 1 | 1 | 0 | f | f
@ -82,7 +87,9 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs
(6 rows) (6 rows)
VACUUM FULL t; VACUUM FULL t;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
FROM cstore.cstore_skipnodes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
stripe | attr | block | ?column? | ?column? stripe | attr | block | ?column? | ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 | 1 | 0 | t | t 1 | 1 | 0 | t | t
@ -94,7 +101,7 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs
(6 rows) (6 rows)
-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 1
@ -112,14 +119,14 @@ SELECT count(*) FROM t;
SELECT pg_size_pretty(pg_relation_size('t')); SELECT pg_size_pretty(pg_relation_size('t'));
pg_size_pretty pg_size_pretty
--------------------------------------------------------------------- ---------------------------------------------------------------------
32 kB 40 kB
(1 row) (1 row)
INSERT INTO t SELECT i FROM generate_series(1, 10000) i; INSERT INTO t SELECT i FROM generate_series(1, 10000) i;
SELECT pg_size_pretty(pg_relation_size('t')); SELECT pg_size_pretty(pg_relation_size('t'));
pg_size_pretty pg_size_pretty
--------------------------------------------------------------------- ---------------------------------------------------------------------
112 kB 120 kB
(1 row) (1 row)
SELECT count(*) FROM t; SELECT count(*) FROM t;
@ -134,22 +141,23 @@ ROLLBACK TO SAVEPOINT s1;
SELECT pg_size_pretty(pg_relation_size('t')); SELECT pg_size_pretty(pg_relation_size('t'));
pg_size_pretty pg_size_pretty
--------------------------------------------------------------------- ---------------------------------------------------------------------
112 kB 120 kB
(1 row) (1 row)
COMMIT; COMMIT;
-- vacuum should truncate the relation to the usable space -- vacuum should truncate the relation to the usable space
VACUUM VERBOSE t; VACUUM VERBOSE t;
INFO: statistics for "t": INFO: statistics for "t":
total file size: 114688, total data size: 10754 storage id: xxxxx
total file size: 122880, total data size: 10754
total row count: 2530, stripe count: 3, average rows per stripe: 843 total row count: 2530, stripe count: 3, average rows per stripe: 843
block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0
INFO: "t": truncated 14 to 4 pages INFO: "t": truncated 15 to 5 pages
SELECT pg_size_pretty(pg_relation_size('t')); SELECT pg_size_pretty(pg_relation_size('t'));
pg_size_pretty pg_size_pretty
--------------------------------------------------------------------- ---------------------------------------------------------------------
32 kB 40 kB
(1 row) (1 row)
SELECT count(*) FROM t; SELECT count(*) FROM t;
@ -187,7 +195,8 @@ INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i;
COMMIT; COMMIT;
VACUUM VERBOSE t; VACUUM VERBOSE t;
INFO: statistics for "t": INFO: statistics for "t":
total file size: 49152, total data size: 18808 storage id: xxxxx
total file size: 57344, total data size: 18808
total row count: 5530, stripe count: 5, average rows per stripe: 1106 total row count: 5530, stripe count: 5, average rows per stripe: 1106
block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2 block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2
@ -203,7 +212,8 @@ INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i;
ALTER TABLE t DROP COLUMN c; ALTER TABLE t DROP COLUMN c;
VACUUM VERBOSE t; VACUUM VERBOSE t;
INFO: statistics for "t": INFO: statistics for "t":
total file size: 65536, total data size: 31372 storage id: xxxxx
total file size: 73728, total data size: 31372
total row count: 7030, stripe count: 6, average rows per stripe: 1171 total row count: 7030, stripe count: 6, average rows per stripe: 1171
block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2 block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2
@ -219,13 +229,15 @@ SELECT alter_columnar_table_set('t', compression => 'pglz');
VACUUM FULL t; VACUUM FULL t;
VACUUM VERBOSE t; VACUUM VERBOSE t;
INFO: statistics for "t": INFO: statistics for "t":
total file size: 49152, total data size: 15728 storage id: xxxxx
total file size: 57344, total data size: 15728
total row count: 7030, stripe count: 4, average rows per stripe: 1757 total row count: 7030, stripe count: 4, average rows per stripe: 1757
block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6 block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6
DROP TABLE t; DROP TABLE t;
DROP VIEW t_stripes;
-- Make sure we cleaned the metadata for t too -- Make sure we cleaned the metadata for t too
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes;
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
0 0

View File

@ -11,7 +11,8 @@ step s1-insert:
INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i;
s2: INFO: statistics for "test_vacuum_vs_insert": s2: INFO: statistics for "test_vacuum_vs_insert":
total file size: 16384, total data size: 26 storage id: xxxxx
total file size: 24576, total data size: 26
total row count: 3, stripe count: 1, average rows per stripe: 3 total row count: 3, stripe count: 1, average rows per stripe: 3
block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0
@ -51,7 +52,7 @@ step s1-commit:
COMMIT; COMMIT;
s2: INFO: vacuuming "public.test_vacuum_vs_insert" s2: INFO: vacuuming "public.test_vacuum_vs_insert"
s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 3 pages s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 4 pages
DETAIL: 0 dead row versions cannot be removed yet. DETAIL: 0 dead row versions cannot be removed yet.
step s2-vacuum-full: <... completed> step s2-vacuum-full: <... completed>
step s2-select: step s2-select:

View File

@ -483,7 +483,7 @@ SELECT * FROM print_extension_changes();
| function citus_internal.cstore_ensure_objects_exist() | function citus_internal.cstore_ensure_objects_exist()
| function cstore.columnar_handler(internal) | function cstore.columnar_handler(internal)
| schema cstore | schema cstore
| table cstore.cstore_data_files | sequence cstore.storageid_seq
| table cstore.cstore_skipnodes | table cstore.cstore_skipnodes
| table cstore.cstore_stripes | table cstore.cstore_stripes
| table cstore.options | table cstore.options

View File

@ -479,7 +479,7 @@ SELECT * FROM print_extension_changes();
--------------------------------------------------------------------- ---------------------------------------------------------------------
| function citus_internal.cstore_ensure_objects_exist() | function citus_internal.cstore_ensure_objects_exist()
| schema cstore | schema cstore
| table cstore.cstore_data_files | sequence cstore.storageid_seq
| table cstore.cstore_skipnodes | table cstore.cstore_skipnodes
| table cstore.cstore_stripes | table cstore.cstore_stripes
| table cstore.options | table cstore.options

View File

@ -185,13 +185,13 @@ ORDER BY 1;
schema citus schema citus
schema citus_internal schema citus_internal
schema cstore schema cstore
sequence cstore.storageid_seq
sequence pg_dist_colocationid_seq sequence pg_dist_colocationid_seq
sequence pg_dist_groupid_seq sequence pg_dist_groupid_seq
sequence pg_dist_node_nodeid_seq sequence pg_dist_node_nodeid_seq
sequence pg_dist_placement_placementid_seq sequence pg_dist_placement_placementid_seq
sequence pg_dist_shardid_seq sequence pg_dist_shardid_seq
table citus.pg_dist_object table citus.pg_dist_object
table cstore.cstore_data_files
table cstore.cstore_skipnodes table cstore.cstore_skipnodes
table cstore.cstore_stripes table cstore.cstore_stripes
table cstore.options table cstore.options

View File

@ -181,13 +181,13 @@ ORDER BY 1;
schema citus schema citus
schema citus_internal schema citus_internal
schema cstore schema cstore
sequence cstore.storageid_seq
sequence pg_dist_colocationid_seq sequence pg_dist_colocationid_seq
sequence pg_dist_groupid_seq sequence pg_dist_groupid_seq
sequence pg_dist_node_nodeid_seq sequence pg_dist_node_nodeid_seq
sequence pg_dist_placement_placementid_seq sequence pg_dist_placement_placementid_seq
sequence pg_dist_shardid_seq sequence pg_dist_shardid_seq
table citus.pg_dist_object table citus.pg_dist_object
table cstore.cstore_data_files
table cstore.cstore_skipnodes table cstore.cstore_skipnodes
table cstore.cstore_stripes table cstore.cstore_stripes
table cstore.options table cstore.options

View File

@ -22,7 +22,6 @@
/hyperscale_tutorial.sql /hyperscale_tutorial.sql
/am_block_filtering.sql /am_block_filtering.sql
/am_copyto.sql /am_copyto.sql
/am_create.sql
/am_data_types.sql /am_data_types.sql
/am_load.sql /am_load.sql
/fdw_block_filtering.sql /fdw_block_filtering.sql

View File

@ -18,3 +18,8 @@ CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT,
-- Test that querying an empty table works -- Test that querying an empty table works
ANALYZE contestant; ANALYZE contestant;
SELECT count(*) FROM contestant; SELECT count(*) FROM contestant;
-- Utility functions to be used throughout tests
CREATE FUNCTION columnar_relation_storageid(relid oid) RETURNS bigint
LANGUAGE C STABLE STRICT
AS 'citus', $$columnar_relation_storageid$$;

View File

@ -15,22 +15,23 @@
-- store postgres database oid -- store postgres database oid
SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset SELECT count(distinct storageid) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset
-- DROP cstore_fdw tables -- DROP cstore_fdw tables
DROP TABLE contestant; DROP TABLE contestant;
DROP TABLE contestant_compressed; DROP TABLE contestant_compressed;
-- make sure DROP deletes metadata -- make sure DROP deletes metadata
SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes;
-- Create a cstore_fdw table under a schema and drop it. -- Create a cstore_fdw table under a schema and drop it.
CREATE SCHEMA test_schema; CREATE SCHEMA test_schema;
CREATE TABLE test_schema.test_table(data int) USING columnar; CREATE TABLE test_schema.test_table(data int) USING columnar;
INSERT INTO test_schema.test_table VALUES (1);
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset SELECT count(*) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset
DROP SCHEMA test_schema CASCADE; DROP SCHEMA test_schema CASCADE;
SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes;
SELECT current_database() datname \gset SELECT current_database() datname \gset

View File

@ -33,15 +33,14 @@ WHERE regclass = 't_view'::regclass;
SELECT * FROM t_view a ORDER BY a; SELECT * FROM t_view a ORDER BY a;
-- verify that we have created metadata entries for the materialized view -- verify that we have created metadata entries for the materialized view
SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset SELECT columnar_relation_storageid(oid) AS storageid
FROM pg_class WHERE relname='t_view' \gset
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
DROP TABLE t CASCADE; DROP TABLE t CASCADE;
-- dropping must remove metadata -- dropping must remove metadata
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;

View File

@ -16,7 +16,7 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i;
-- there are no subtransactions, so above statement should batch -- there are no subtransactions, so above statement should batch
-- INSERTs inside the UDF and create on stripe per table. -- INSERTs inside the UDF and create on stripe per table.
SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b
WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2') WHERE columnar_relation_storageid(b.oid)=a.storageid AND relname IN ('t1', 't2')
GROUP BY relname GROUP BY relname
ORDER BY relname; ORDER BY relname;

View File

@ -4,20 +4,22 @@
CREATE TABLE t(a int, b int) USING columnar; CREATE TABLE t(a int, b int) USING columnar;
CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname = 't';
BEGIN; BEGIN;
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
ROLLBACK; ROLLBACK;
SELECT count(*) FROM t; SELECT count(*) FROM t;
-- check stripe metadata also have been rolled-back -- check stripe metadata also have been rolled-back
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b SELECT count(*) FROM t_stripes;
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
SELECT count(*) FROM t; SELECT count(*) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b SELECT count(*) FROM t_stripes;
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
-- savepoint rollback -- savepoint rollback
BEGIN; BEGIN;
@ -36,7 +38,7 @@ COMMIT;
SELECT count(*) FROM t; SELECT count(*) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b SELECT count(*) FROM t_stripes;
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
DROP TABLE t; DROP TABLE t;
DROP VIEW t_stripes;

View File

@ -13,7 +13,7 @@ CREATE TABLE cstore_truncate_test_second (a int, b int) USING columnar;
CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING columnar; CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING columnar;
CREATE TABLE cstore_truncate_test_regular (a int, b int); CREATE TABLE cstore_truncate_test_regular (a int, b int);
SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset SELECT count(distinct storageid) AS cstore_data_files_before_truncate FROM cstore.cstore_stripes \gset
INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a;
@ -63,7 +63,7 @@ TRUNCATE TABLE cstore_truncate_test;
SELECT * from cstore_truncate_test; SELECT * from cstore_truncate_test;
-- make sure TRUNATE deletes metadata for old relfilenode -- make sure TRUNATE deletes metadata for old relfilenode
SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; SELECT :cstore_data_files_before_truncate - count(distinct storageid) FROM cstore.cstore_stripes;
-- test if truncation in the same transaction that created the table works properly -- test if truncation in the same transaction that created the table works properly
BEGIN; BEGIN;
@ -74,7 +74,7 @@ INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 2
COMMIT; COMMIT;
-- should output "1" for the newly created relation -- should output "1" for the newly created relation
SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; SELECT count(distinct storageid) - :cstore_data_files_before_truncate FROM cstore.cstore_stripes;
SELECT * FROM cstore_same_transaction_truncate; SELECT * FROM cstore_same_transaction_truncate;
DROP TABLE cstore_same_transaction_truncate; DROP TABLE cstore_same_transaction_truncate;

View File

@ -1,45 +1,53 @@
SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset SELECT count(distinct storageid) AS columnar_table_count FROM cstore.cstore_stripes \gset
CREATE TABLE t(a int, b int) USING columnar; CREATE TABLE t(a int, b int) USING columnar;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; CREATE VIEW t_stripes AS
SELECT * FROM cstore.cstore_stripes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t';
SELECT count(*) FROM t_stripes;
INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i;
INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i; INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i;
INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i; INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i;
SELECT sum(a), sum(b) FROM t; SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; SELECT count(*) FROM t_stripes;
-- vacuum full should merge stripes together -- vacuum full should merge stripes together
VACUUM FULL t; VACUUM FULL t;
SELECT sum(a), sum(b) FROM t; SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; SELECT count(*) FROM t_stripes;
-- test the case when all data cannot fit into a single stripe -- test the case when all data cannot fit into a single stripe
SELECT alter_columnar_table_set('t', stripe_row_count => 1000); SELECT alter_columnar_table_set('t', stripe_row_count => 1000);
INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i;
SELECT sum(a), sum(b) FROM t; SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; SELECT count(*) FROM t_stripes;
VACUUM FULL t; VACUUM FULL t;
SELECT sum(a), sum(b) FROM t; SELECT sum(a), sum(b) FROM t;
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; SELECT count(*) FROM t_stripes;
-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs -- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs
ALTER TABLE t DROP COLUMN a; ALTER TABLE t DROP COLUMN a;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
FROM cstore.cstore_skipnodes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
VACUUM FULL t; VACUUM FULL t;
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
FROM cstore.cstore_skipnodes a, pg_class b
WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes;
-- do this in a transaction so concurrent autovacuum doesn't interfere with results -- do this in a transaction so concurrent autovacuum doesn't interfere with results
BEGIN; BEGIN;
@ -99,6 +107,7 @@ VACUUM FULL t;
VACUUM VERBOSE t; VACUUM VERBOSE t;
DROP TABLE t; DROP TABLE t;
DROP VIEW t_stripes;
-- Make sure we cleaned the metadata for t too -- Make sure we cleaned the metadata for t too
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes;