mirror of https://github.com/citusdata/citus.git
Columnar Store Storage Id
parent
fc0ef8abba
commit
0a9fc2148c
|
@ -215,12 +215,18 @@ static Cost
|
|||
CStoreScanCost(RangeTblEntry *rte)
|
||||
{
|
||||
Relation rel = RelationIdGetRelation(rte->relid);
|
||||
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false);
|
||||
if (UninitializedDatafile(rel->rd_node))
|
||||
{
|
||||
RelationClose(rel);
|
||||
return 0;
|
||||
}
|
||||
|
||||
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node, false);
|
||||
RelationClose(rel);
|
||||
|
||||
uint32 maxColumnCount = 0;
|
||||
uint64 totalStripeSize = 0;
|
||||
ListCell *stripeMetadataCell = NULL;
|
||||
|
||||
RelationClose(rel);
|
||||
rel = NULL;
|
||||
|
||||
foreach(stripeMetadataCell, metadata->stripeMetadataList)
|
||||
|
|
|
@ -41,6 +41,23 @@
|
|||
#include "utils/memutils.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/relfilenodemap.h"
|
||||
|
||||
|
||||
/*
|
||||
* Content of the first page in main fork, which stores metadata at file
|
||||
* level.
|
||||
*/
|
||||
typedef struct ColumnarMetapage
|
||||
{
|
||||
/*
|
||||
* Each of the metadata table rows are identified by a storageId.
|
||||
* We store it also in the main fork so we can link metadata rows
|
||||
* with data files.
|
||||
*/
|
||||
uint64 storageId;
|
||||
} ColumnarMetapage;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
@ -48,11 +65,11 @@ typedef struct
|
|||
EState *estate;
|
||||
} ModifyState;
|
||||
|
||||
static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe);
|
||||
static void GetHighestUsedAddressAndId(Oid relfilenode,
|
||||
static void InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe);
|
||||
static void GetHighestUsedAddressAndId(uint64 storageId,
|
||||
uint64 *highestUsedAddress,
|
||||
uint64 *highestUsedId);
|
||||
static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot);
|
||||
static List * ReadDataFileStripeList(uint64 storageId, Snapshot snapshot);
|
||||
static Oid CStoreStripesRelationId(void);
|
||||
static Oid CStoreStripesIndexRelationId(void);
|
||||
static Oid CStoreDataFilesRelationId(void);
|
||||
|
@ -60,7 +77,7 @@ static Oid CStoreDataFilesIndexRelationId(void);
|
|||
static Oid CStoreSkipNodesRelationId(void);
|
||||
static Oid CStoreSkipNodesIndexRelationId(void);
|
||||
static Oid CStoreNamespaceId(void);
|
||||
static bool ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata);
|
||||
static bool ReadCStoreDataFiles(uint64 storageId, DataFileMetadata *metadata);
|
||||
static ModifyState * StartModifyRelation(Relation rel);
|
||||
static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values,
|
||||
bool *nulls);
|
||||
|
@ -69,10 +86,16 @@ static void FinishModifyRelation(ModifyState *state);
|
|||
static EState * create_estate_for_relation(Relation rel);
|
||||
static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm);
|
||||
static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm);
|
||||
static ColumnarMetapage InitMetapage(Relation relation);
|
||||
static bool ReadMetapage(RelFileNode relfilenode, ColumnarMetapage *metapage);
|
||||
static uint64 GetNextStorageId(void);
|
||||
|
||||
PG_FUNCTION_INFO_V1(columnar_relation_storageid);
|
||||
|
||||
|
||||
/* constants for cstore_table */
|
||||
#define Natts_cstore_data_files 6
|
||||
#define Anum_cstore_data_files_relfilenode 1
|
||||
#define Anum_cstore_data_files_storageid 1
|
||||
#define Anum_cstore_data_files_block_row_count 2
|
||||
#define Anum_cstore_data_files_stripe_row_count 3
|
||||
#define Anum_cstore_data_files_compression 4
|
||||
|
@ -85,7 +108,7 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm);
|
|||
*/
|
||||
typedef struct FormData_cstore_data_files
|
||||
{
|
||||
Oid relfilenode;
|
||||
uint64 storageid;
|
||||
int32 block_row_count;
|
||||
int32 stripe_row_count;
|
||||
NameData compression;
|
||||
|
@ -99,7 +122,7 @@ typedef FormData_cstore_data_files *Form_cstore_data_files;
|
|||
|
||||
/* constants for cstore_stripe */
|
||||
#define Natts_cstore_stripes 8
|
||||
#define Anum_cstore_stripes_relfilenode 1
|
||||
#define Anum_cstore_stripes_storageid 1
|
||||
#define Anum_cstore_stripes_stripe 2
|
||||
#define Anum_cstore_stripes_file_offset 3
|
||||
#define Anum_cstore_stripes_data_length 4
|
||||
|
@ -110,7 +133,7 @@ typedef FormData_cstore_data_files *Form_cstore_data_files;
|
|||
|
||||
/* constants for cstore_skipnodes */
|
||||
#define Natts_cstore_skipnodes 12
|
||||
#define Anum_cstore_skipnodes_relfilenode 1
|
||||
#define Anum_cstore_skipnodes_storageid 1
|
||||
#define Anum_cstore_skipnodes_stripe 2
|
||||
#define Anum_cstore_skipnodes_attr 3
|
||||
#define Anum_cstore_skipnodes_block 4
|
||||
|
@ -125,18 +148,19 @@ typedef FormData_cstore_data_files *Form_cstore_data_files;
|
|||
|
||||
|
||||
/*
|
||||
* InitCStoreDataFileMetadata adds a record for the given relfilenode
|
||||
* InitCStoreDataFileMetadata adds a record for the given relation
|
||||
* in cstore_data_files.
|
||||
*/
|
||||
void
|
||||
InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount,
|
||||
InitCStoreDataFileMetadata(Relation relation, int blockRowCount,
|
||||
int stripeRowCount,
|
||||
CompressionType compression)
|
||||
{
|
||||
NameData compressionName = { 0 };
|
||||
ColumnarMetapage metapage = InitMetapage(relation);
|
||||
|
||||
bool nulls[Natts_cstore_data_files] = { 0 };
|
||||
Datum values[Natts_cstore_data_files] = {
|
||||
ObjectIdGetDatum(relfilenode),
|
||||
UInt64GetDatum(metapage.storageId),
|
||||
Int32GetDatum(blockRowCount),
|
||||
Int32GetDatum(stripeRowCount),
|
||||
0, /* to be filled below */
|
||||
|
@ -144,10 +168,11 @@ InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCoun
|
|||
Int32GetDatum(CSTORE_VERSION_MINOR)
|
||||
};
|
||||
|
||||
NameData compressionName = { 0 };
|
||||
namestrcpy(&compressionName, CompressionTypeStr(compression));
|
||||
values[Anum_cstore_data_files_compression - 1] = NameGetDatum(&compressionName);
|
||||
|
||||
DeleteDataFileMetadataRowIfExists(relfilenode);
|
||||
DeleteDataFileMetadataRowIfExists(relation->rd_node);
|
||||
|
||||
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
|
||||
Relation cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock);
|
||||
|
@ -163,7 +188,8 @@ InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCoun
|
|||
|
||||
|
||||
void
|
||||
UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount,
|
||||
UpdateCStoreDataFileMetadata(RelFileNode relfilenode, int blockRowCount, int
|
||||
stripeRowCount,
|
||||
CompressionType compression)
|
||||
{
|
||||
const int scanKeyCount = 1;
|
||||
|
@ -174,11 +200,17 @@ UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCo
|
|||
bool replace[Natts_cstore_data_files] = { 0 };
|
||||
bool changed = false;
|
||||
|
||||
ColumnarMetapage metapage;
|
||||
if (!ReadMetapage(relfilenode, &metapage))
|
||||
{
|
||||
elog(ERROR, "metapage was not found");
|
||||
}
|
||||
|
||||
Relation cstoreDataFiles = heap_open(CStoreDataFilesRelationId(), RowExclusiveLock);
|
||||
TupleDesc tupleDescriptor = RelationGetDescr(cstoreDataFiles);
|
||||
|
||||
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber,
|
||||
F_INT8EQ, ObjectIdGetDatum(relfilenode));
|
||||
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_storageid, BTEqualStrategyNumber,
|
||||
F_INT8EQ, UInt64GetDatum(metapage.storageId));
|
||||
|
||||
SysScanDesc scanDescriptor = systable_beginscan(cstoreDataFiles,
|
||||
CStoreDataFilesIndexRelationId(),
|
||||
|
@ -189,7 +221,7 @@ UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCo
|
|||
if (heapTuple == NULL)
|
||||
{
|
||||
ereport(ERROR, (errmsg("relfilenode %d doesn't belong to a cstore table",
|
||||
relfilenode)));
|
||||
relfilenode.relNode)));
|
||||
}
|
||||
|
||||
Form_cstore_data_files metadata = (Form_cstore_data_files) GETSTRUCT(heapTuple);
|
||||
|
@ -242,13 +274,19 @@ UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCo
|
|||
* of cstore_skipnodes.
|
||||
*/
|
||||
void
|
||||
SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList,
|
||||
SaveStripeSkipList(RelFileNode relfilenode, uint64 stripe, StripeSkipList *stripeSkipList,
|
||||
TupleDesc tupleDescriptor)
|
||||
{
|
||||
uint32 columnIndex = 0;
|
||||
uint32 blockIndex = 0;
|
||||
uint32 columnCount = stripeSkipList->columnCount;
|
||||
|
||||
ColumnarMetapage metapage;
|
||||
if (!ReadMetapage(relfilenode, &metapage))
|
||||
{
|
||||
elog(WARNING, "metapage was not found");
|
||||
}
|
||||
|
||||
Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId();
|
||||
Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock);
|
||||
ModifyState *modifyState = StartModifyRelation(cstoreSkipNodes);
|
||||
|
@ -261,7 +299,7 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis
|
|||
&stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex];
|
||||
|
||||
Datum values[Natts_cstore_skipnodes] = {
|
||||
ObjectIdGetDatum(relfilenode),
|
||||
UInt64GetDatum(metapage.storageId),
|
||||
Int64GetDatum(stripe),
|
||||
Int32GetDatum(columnIndex + 1),
|
||||
Int32GetDatum(blockIndex),
|
||||
|
@ -307,7 +345,7 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis
|
|||
* ReadStripeSkipList fetches StripeSkipList for a given stripe.
|
||||
*/
|
||||
StripeSkipList *
|
||||
ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
|
||||
ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
|
||||
uint32 blockCount)
|
||||
{
|
||||
int32 columnIndex = 0;
|
||||
|
@ -315,12 +353,18 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
|
|||
uint32 columnCount = tupleDescriptor->natts;
|
||||
ScanKeyData scanKey[2];
|
||||
|
||||
ColumnarMetapage metapage;
|
||||
if (!ReadMetapage(relfilenode, &metapage))
|
||||
{
|
||||
elog(WARNING, "metapage was not found");
|
||||
}
|
||||
|
||||
Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId();
|
||||
Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock);
|
||||
Relation index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock);
|
||||
|
||||
ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relfilenode,
|
||||
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
|
||||
ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_storageid,
|
||||
BTEqualStrategyNumber, F_OIDEQ, UInt64GetDatum(metapage.storageId));
|
||||
ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe,
|
||||
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe));
|
||||
|
||||
|
@ -410,11 +454,11 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
|
|||
* InsertStripeMetadataRow adds a row to cstore_stripes.
|
||||
*/
|
||||
static void
|
||||
InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe)
|
||||
InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe)
|
||||
{
|
||||
bool nulls[Natts_cstore_stripes] = { 0 };
|
||||
Datum values[Natts_cstore_stripes] = {
|
||||
ObjectIdGetDatum(relfilenode),
|
||||
UInt64GetDatum(storageId),
|
||||
Int64GetDatum(stripe->id),
|
||||
Int64GetDatum(stripe->fileOffset),
|
||||
Int64GetDatum(stripe->dataLength),
|
||||
|
@ -444,16 +488,23 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe)
|
|||
* from cstore_data_files and cstore_stripes.
|
||||
*/
|
||||
DataFileMetadata *
|
||||
ReadDataFileMetadata(Oid relfilenode, bool missingOk)
|
||||
ReadDataFileMetadata(RelFileNode relfilenode, bool missingOk)
|
||||
{
|
||||
ColumnarMetapage metapage;
|
||||
DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata));
|
||||
bool found = ReadCStoreDataFiles(relfilenode, datafileMetadata);
|
||||
|
||||
bool found = ReadMetapage(relfilenode, &metapage);
|
||||
if (found)
|
||||
{
|
||||
found = ReadCStoreDataFiles(metapage.storageId, datafileMetadata);
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
if (!missingOk)
|
||||
{
|
||||
ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.",
|
||||
relfilenode)));
|
||||
relfilenode.relNode)));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -462,7 +513,7 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk)
|
|||
}
|
||||
|
||||
datafileMetadata->stripeMetadataList =
|
||||
ReadDataFileStripeList(relfilenode, GetTransactionSnapshot());
|
||||
ReadDataFileStripeList(metapage.storageId, GetTransactionSnapshot());
|
||||
|
||||
return datafileMetadata;
|
||||
}
|
||||
|
@ -473,12 +524,17 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk)
|
|||
* relfilenode across all active and inactive transactions.
|
||||
*/
|
||||
uint64
|
||||
GetHighestUsedAddress(Oid relfilenode)
|
||||
GetHighestUsedAddress(RelFileNode relfilenode)
|
||||
{
|
||||
uint64 highestUsedAddress = 0;
|
||||
uint64 highestUsedId = 0;
|
||||
ColumnarMetapage metapage;
|
||||
if (!ReadMetapage(relfilenode, &metapage))
|
||||
{
|
||||
elog(ERROR, "metapage was not found");
|
||||
}
|
||||
|
||||
GetHighestUsedAddressAndId(relfilenode, &highestUsedAddress, &highestUsedId);
|
||||
GetHighestUsedAddressAndId(metapage.storageId, &highestUsedAddress, &highestUsedId);
|
||||
|
||||
return highestUsedAddress;
|
||||
}
|
||||
|
@ -489,7 +545,7 @@ GetHighestUsedAddress(Oid relfilenode)
|
|||
* the given relfilenode across all active and inactive transactions.
|
||||
*/
|
||||
static void
|
||||
GetHighestUsedAddressAndId(Oid relfilenode,
|
||||
GetHighestUsedAddressAndId(uint64 storageId,
|
||||
uint64 *highestUsedAddress,
|
||||
uint64 *highestUsedId)
|
||||
{
|
||||
|
@ -498,10 +554,12 @@ GetHighestUsedAddressAndId(Oid relfilenode,
|
|||
SnapshotData SnapshotDirty;
|
||||
InitDirtySnapshot(SnapshotDirty);
|
||||
|
||||
List *stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty);
|
||||
List *stripeMetadataList = ReadDataFileStripeList(storageId, &SnapshotDirty);
|
||||
|
||||
*highestUsedId = 0;
|
||||
*highestUsedAddress = 0;
|
||||
|
||||
/* file starts with metapage */
|
||||
*highestUsedAddress = sizeof(ColumnarMetapage);
|
||||
|
||||
foreach(stripeMetadataCell, stripeMetadataList)
|
||||
{
|
||||
|
@ -535,8 +593,14 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
|
|||
*/
|
||||
LockRelation(rel, ShareUpdateExclusiveLock);
|
||||
|
||||
Oid relfilenode = rel->rd_node.relNode;
|
||||
GetHighestUsedAddressAndId(relfilenode, &currLogicalHigh, &highestId);
|
||||
RelFileNode relfilenode = rel->rd_node;
|
||||
ColumnarMetapage metapage;
|
||||
if (!ReadMetapage(relfilenode, &metapage))
|
||||
{
|
||||
elog(WARNING, "metapage was not found");
|
||||
}
|
||||
|
||||
GetHighestUsedAddressAndId(metapage.storageId, &currLogicalHigh, &highestId);
|
||||
SmgrAddr currSmgrHigh = logical_to_smgr(currLogicalHigh);
|
||||
|
||||
SmgrAddr resSmgrStart = next_block_start(currSmgrHigh);
|
||||
|
@ -565,7 +629,7 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
|
|||
stripe.rowCount = rowCount;
|
||||
stripe.id = highestId + 1;
|
||||
|
||||
InsertStripeMetadataRow(relfilenode, &stripe);
|
||||
InsertStripeMetadataRow(metapage.storageId, &stripe);
|
||||
|
||||
UnlockRelation(rel, ShareUpdateExclusiveLock);
|
||||
|
||||
|
@ -574,18 +638,18 @@ ReserveStripe(Relation rel, uint64 sizeBytes,
|
|||
|
||||
|
||||
/*
|
||||
* ReadDataFileStripeList reads the stripe list for a given relfilenode
|
||||
* ReadDataFileStripeList reads the stripe list for a given storageId
|
||||
* in the given snapshot.
|
||||
*/
|
||||
static List *
|
||||
ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot)
|
||||
ReadDataFileStripeList(uint64 storageId, Snapshot snapshot)
|
||||
{
|
||||
List *stripeMetadataList = NIL;
|
||||
ScanKeyData scanKey[1];
|
||||
HeapTuple heapTuple;
|
||||
|
||||
ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode,
|
||||
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
|
||||
ScanKeyInit(&scanKey[0], Anum_cstore_stripes_storageid,
|
||||
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId));
|
||||
|
||||
Oid cstoreStripesOid = CStoreStripesRelationId();
|
||||
Relation cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock);
|
||||
|
@ -634,13 +698,13 @@ ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot)
|
|||
* false if table was not found in cstore_data_files.
|
||||
*/
|
||||
static bool
|
||||
ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata)
|
||||
ReadCStoreDataFiles(uint64 storageid, DataFileMetadata *metadata)
|
||||
{
|
||||
bool found = false;
|
||||
ScanKeyData scanKey[1];
|
||||
|
||||
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode,
|
||||
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
|
||||
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_storageid,
|
||||
BTEqualStrategyNumber, F_OIDEQ, UInt64GetDatum(storageid));
|
||||
|
||||
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
|
||||
Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock);
|
||||
|
@ -684,6 +748,7 @@ ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata)
|
|||
datumArray[Anum_cstore_data_files_compression - 1]);
|
||||
metadata->compression = ParseCompressionType(NameStr(*compressionName));
|
||||
}
|
||||
|
||||
found = true;
|
||||
}
|
||||
|
||||
|
@ -699,7 +764,7 @@ ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata)
|
|||
* DeleteDataFileMetadataRowIfExists removes the row with given relfilenode from cstore_stripes.
|
||||
*/
|
||||
void
|
||||
DeleteDataFileMetadataRowIfExists(Oid relfilenode)
|
||||
DeleteDataFileMetadataRowIfExists(RelFileNode relfilenode)
|
||||
{
|
||||
ScanKeyData scanKey[1];
|
||||
|
||||
|
@ -712,8 +777,14 @@ DeleteDataFileMetadataRowIfExists(Oid relfilenode)
|
|||
return;
|
||||
}
|
||||
|
||||
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode,
|
||||
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
|
||||
ColumnarMetapage metapage;
|
||||
if (!ReadMetapage(relfilenode, &metapage))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_storageid,
|
||||
BTEqualStrategyNumber, F_INT8EQ, UInt64GetDatum(metapage.storageId));
|
||||
|
||||
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
|
||||
Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock);
|
||||
|
@ -996,3 +1067,90 @@ CStoreNamespaceId(void)
|
|||
{
|
||||
return get_namespace_oid("cstore", false);
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
ReadMetapage(RelFileNode relfilenode, ColumnarMetapage *metapage)
|
||||
{
|
||||
Oid relationId = RelidByRelfilenode(relfilenode.spcNode,
|
||||
relfilenode.relNode);
|
||||
if (!OidIsValid(relationId))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Relation relation = relation_open(relationId, NoLock);
|
||||
|
||||
RelationOpenSmgr(relation);
|
||||
int nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM);
|
||||
RelationCloseSmgr(relation);
|
||||
|
||||
if (nblocks == 0)
|
||||
{
|
||||
relation_close(relation, NoLock);
|
||||
return false;
|
||||
}
|
||||
|
||||
StringInfo metapageBuffer = ReadFromSmgr(relation, 0, sizeof(ColumnarMetapage));
|
||||
relation_close(relation, NoLock);
|
||||
|
||||
memcpy((void *) metapage, metapageBuffer->data, sizeof(ColumnarMetapage));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static ColumnarMetapage
|
||||
InitMetapage(Relation relation)
|
||||
{
|
||||
ColumnarMetapage metapage;
|
||||
metapage.storageId = GetNextStorageId();
|
||||
|
||||
/* create the first block */
|
||||
Buffer newBuffer = ReadBuffer(relation, P_NEW);
|
||||
ReleaseBuffer(newBuffer);
|
||||
|
||||
Assert(sizeof(ColumnarMetapage) <= BLCKSZ - SizeOfPageHeaderData);
|
||||
WriteToSmgr(relation, 0, (char *) &metapage, sizeof(ColumnarMetapage));
|
||||
|
||||
return metapage;
|
||||
}
|
||||
|
||||
|
||||
static uint64
|
||||
GetNextStorageId(void)
|
||||
{
|
||||
Oid sequenceId = get_relname_relid("cstore_storageid_seq", CStoreNamespaceId());
|
||||
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
|
||||
|
||||
/*
|
||||
* Generate new and unique storage id from sequence.
|
||||
* TODO: should we restrict access to the sequence, which might require
|
||||
* switching security context here?
|
||||
*/
|
||||
Datum storageIdDatum = DirectFunctionCall1(nextval_oid, sequenceIdDatum);
|
||||
|
||||
uint64 storageId = DatumGetInt64(storageIdDatum);
|
||||
|
||||
return storageId;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
columnar_relation_storageid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid relationId = PG_GETARG_OID(0);
|
||||
uint64 storageId = -1;
|
||||
|
||||
Relation relation = relation_open(relationId, AccessShareLock);
|
||||
ColumnarMetapage metadata;
|
||||
if (IsCStoreTableAmTable(relationId) &&
|
||||
ReadMetapage(relation->rd_node, &metadata))
|
||||
{
|
||||
storageId = metadata.storageId;
|
||||
}
|
||||
|
||||
relation_close(relation, AccessShareLock);
|
||||
|
||||
PG_RETURN_INT64(storageId);
|
||||
}
|
||||
|
|
|
@ -76,7 +76,6 @@ static BlockData * DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blo
|
|||
List *projectedColumnList);
|
||||
static Datum ColumnDefaultValue(TupleConstr *tupleConstraints,
|
||||
Form_pg_attribute attributeForm);
|
||||
static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size);
|
||||
|
||||
/*
|
||||
* CStoreBeginRead initializes a cstore read operation. This function returns a
|
||||
|
@ -86,9 +85,7 @@ TableReadState *
|
|||
CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor,
|
||||
List *projectedColumnList, List *whereClauseList)
|
||||
{
|
||||
Oid relNode = relation->rd_node.relNode;
|
||||
|
||||
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relNode, false);
|
||||
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relation->rd_node, true);
|
||||
|
||||
/*
|
||||
* We allocate all stripe specific data in the stripeReadContext, and reset
|
||||
|
@ -127,6 +124,11 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu
|
|||
StripeMetadata *stripeMetadata = readState->currentStripeMetadata;
|
||||
MemoryContext oldContext = NULL;
|
||||
|
||||
if (readState->datafileMetadata == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If no stripes are loaded, load the next non-empty stripe. Note that when
|
||||
* loading stripes, we skip over blocks whose contents can be filtered with
|
||||
|
@ -238,8 +240,13 @@ void
|
|||
CStoreEndRead(TableReadState *readState)
|
||||
{
|
||||
MemoryContextDelete(readState->stripeReadContext);
|
||||
list_free_deep(readState->datafileMetadata->stripeMetadataList);
|
||||
pfree(readState->datafileMetadata);
|
||||
|
||||
if (readState->datafileMetadata)
|
||||
{
|
||||
list_free_deep(readState->datafileMetadata->stripeMetadataList);
|
||||
pfree(readState->datafileMetadata);
|
||||
}
|
||||
|
||||
pfree(readState);
|
||||
}
|
||||
|
||||
|
@ -317,7 +324,12 @@ CStoreTableRowCount(Relation relation)
|
|||
ListCell *stripeMetadataCell = NULL;
|
||||
uint64 totalRowCount = 0;
|
||||
|
||||
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode,
|
||||
if (UninitializedDatafile(relation->rd_node))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relation->rd_node,
|
||||
false);
|
||||
|
||||
foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList)
|
||||
|
@ -345,7 +357,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
|
|||
|
||||
bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList);
|
||||
|
||||
StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node.relNode,
|
||||
StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node,
|
||||
stripeMetadata->id,
|
||||
tupleDescriptor,
|
||||
stripeMetadata->blockCount);
|
||||
|
@ -1009,7 +1021,7 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor
|
|||
}
|
||||
|
||||
|
||||
static StringInfo
|
||||
StringInfo
|
||||
ReadFromSmgr(Relation rel, uint64 offset, uint32 size)
|
||||
{
|
||||
StringInfo resultBuffer = makeStringInfo();
|
||||
|
|
|
@ -106,7 +106,6 @@ static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement,
|
|||
char *completionTag);
|
||||
#endif
|
||||
|
||||
static bool IsCStoreTableAmTable(Oid relationId);
|
||||
static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode,
|
||||
int timeout, int retryInterval);
|
||||
static void LogRelationStats(Relation rel, int elevel);
|
||||
|
@ -117,8 +116,8 @@ static void TruncateCStore(Relation rel, int elevel);
|
|||
* CStoreTableAMDefaultOptions returns the default options for a cstore table am table.
|
||||
* These options are based on the GUC's controlling the defaults.
|
||||
*/
|
||||
static CStoreOptions *
|
||||
CStoreTableAMDefaultOptions()
|
||||
CStoreOptions *
|
||||
CStoreTableAMDefaultOptions(void)
|
||||
{
|
||||
CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions));
|
||||
cstoreOptions->compressionType = cstore_compression;
|
||||
|
@ -133,11 +132,14 @@ CStoreTableAMDefaultOptions()
|
|||
* relation is a cstore table am table, if not it will raise an error
|
||||
*/
|
||||
CStoreOptions *
|
||||
CStoreTableAMGetOptions(Oid relfilenode)
|
||||
CStoreTableAMGetOptions(RelFileNode relfilenode)
|
||||
{
|
||||
Assert(OidIsValid(relfilenode));
|
||||
|
||||
CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions));
|
||||
if (UninitializedDatafile(relfilenode))
|
||||
{
|
||||
return CStoreTableAMDefaultOptions();
|
||||
}
|
||||
|
||||
DataFileMetadata *metadata = ReadDataFileMetadata(relfilenode, false);
|
||||
cstoreOptions->compressionType = metadata->compression;
|
||||
cstoreOptions->stripeRowCount = metadata->stripeRowCount;
|
||||
|
@ -570,50 +572,32 @@ cstore_relation_set_new_filenode(Relation rel,
|
|||
TransactionId *freezeXid,
|
||||
MultiXactId *minmulti)
|
||||
{
|
||||
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true);
|
||||
uint64 blockRowCount = 0;
|
||||
uint64 stripeRowCount = 0;
|
||||
CompressionType compression = 0;
|
||||
Oid oldRelfilenode = rel->rd_node.relNode;
|
||||
|
||||
MarkRelfilenodeDropped(oldRelfilenode, GetCurrentSubTransactionId());
|
||||
|
||||
if (metadata != NULL)
|
||||
{
|
||||
/* existing table (e.g. TRUNCATE), use existing blockRowCount */
|
||||
blockRowCount = metadata->blockRowCount;
|
||||
stripeRowCount = metadata->stripeRowCount;
|
||||
compression = metadata->compression;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* new table, use options */
|
||||
CStoreOptions *options = CStoreTableAMDefaultOptions();
|
||||
blockRowCount = options->blockRowCount;
|
||||
stripeRowCount = options->stripeRowCount;
|
||||
compression = options->compressionType;
|
||||
}
|
||||
|
||||
/* delete old relfilenode metadata */
|
||||
DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode);
|
||||
DeleteDataFileMetadataRowIfExists(rel->rd_node);
|
||||
|
||||
Assert(persistence == RELPERSISTENCE_PERMANENT);
|
||||
*freezeXid = RecentXmin;
|
||||
*minmulti = GetOldestMultiXactId();
|
||||
SMgrRelation srel = RelationCreateStorage(*newrnode, persistence);
|
||||
InitCStoreDataFileMetadata(newrnode->relNode, blockRowCount, stripeRowCount,
|
||||
compression);
|
||||
smgrclose(srel);
|
||||
|
||||
/* we will lazily initialize metadata in first write */
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cstore_relation_nontransactional_truncate(Relation rel)
|
||||
{
|
||||
Oid relfilenode = rel->rd_node.relNode;
|
||||
DataFileMetadata *metadata = ReadDataFileMetadata(relfilenode, false);
|
||||
RelFileNode relfilenode = rel->rd_node;
|
||||
|
||||
NonTransactionDropWriteState(relfilenode);
|
||||
NonTransactionDropWriteState(relfilenode.relNode);
|
||||
|
||||
/* Delete old relfilenode metadata and recreate it */
|
||||
DeleteDataFileMetadataRowIfExists(relfilenode);
|
||||
|
||||
/*
|
||||
* No need to set new relfilenode, since the table was created in this
|
||||
|
@ -624,10 +608,7 @@ cstore_relation_nontransactional_truncate(Relation rel)
|
|||
*/
|
||||
RelationTruncate(rel, 0);
|
||||
|
||||
/* Delete old relfilenode metadata and recreate it */
|
||||
DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode);
|
||||
InitCStoreDataFileMetadata(rel->rd_node.relNode, metadata->blockRowCount,
|
||||
metadata->stripeRowCount, metadata->compression);
|
||||
/* we will lazily initialize new metadata in first write */
|
||||
}
|
||||
|
||||
|
||||
|
@ -675,14 +656,9 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
|
|||
* relation first.
|
||||
*/
|
||||
|
||||
CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(OldHeap->rd_node.relNode);
|
||||
CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(OldHeap->rd_node);
|
||||
|
||||
UpdateCStoreDataFileMetadata(NewHeap->rd_node.relNode,
|
||||
cstoreOptions->blockRowCount,
|
||||
cstoreOptions->stripeRowCount,
|
||||
cstoreOptions->compressionType);
|
||||
|
||||
cstoreOptions = CStoreTableAMGetOptions(NewHeap->rd_node.relNode);
|
||||
cstoreOptions = CStoreTableAMGetOptions(NewHeap->rd_node);
|
||||
|
||||
TableWriteState *writeState = CStoreBeginWrite(NewHeap->rd_node,
|
||||
cstoreOptions->compressionType,
|
||||
|
@ -740,7 +716,7 @@ static void
|
|||
LogRelationStats(Relation rel, int elevel)
|
||||
{
|
||||
ListCell *stripeMetadataCell = NULL;
|
||||
Oid relfilenode = rel->rd_node.relNode;
|
||||
RelFileNode relfilenode = rel->rd_node;
|
||||
StringInfo infoBuf = makeStringInfo();
|
||||
|
||||
int compressionStats[COMPRESSION_COUNT] = { 0 };
|
||||
|
@ -866,7 +842,7 @@ TruncateCStore(Relation rel, int elevel)
|
|||
* we're truncating.
|
||||
*/
|
||||
SmgrAddr highestPhysicalAddress =
|
||||
logical_to_smgr(GetHighestUsedAddress(rel->rd_node.relNode));
|
||||
logical_to_smgr(GetHighestUsedAddress(rel->rd_node));
|
||||
|
||||
BlockNumber new_rel_pages = highestPhysicalAddress.blockno + 1;
|
||||
if (new_rel_pages == old_rel_pages)
|
||||
|
@ -1234,10 +1210,10 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId
|
|||
* tableam tables storage is managed by postgres.
|
||||
*/
|
||||
Relation rel = table_open(objectId, AccessExclusiveLock);
|
||||
Oid relfilenode = rel->rd_node.relNode;
|
||||
RelFileNode relfilenode = rel->rd_node;
|
||||
DeleteDataFileMetadataRowIfExists(relfilenode);
|
||||
|
||||
MarkRelfilenodeDropped(relfilenode, GetCurrentSubTransactionId());
|
||||
MarkRelfilenodeDropped(relfilenode.relNode, GetCurrentSubTransactionId());
|
||||
|
||||
/* keep the lock since we did physical changes to the relation */
|
||||
table_close(rel, NoLock);
|
||||
|
@ -1249,7 +1225,7 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId
|
|||
* IsCStoreTableAmTable returns true if relation has cstore_tableam
|
||||
* access method. This can be called before extension creation.
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
IsCStoreTableAmTable(Oid relationId)
|
||||
{
|
||||
if (!OidIsValid(relationId))
|
||||
|
@ -1367,7 +1343,7 @@ alter_columnar_table_set(PG_FUNCTION_ARGS)
|
|||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
|
||||
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true);
|
||||
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node, true);
|
||||
if (!metadata)
|
||||
{
|
||||
ereport(ERROR, (errmsg("table %s is not a cstore table",
|
||||
|
@ -1406,7 +1382,7 @@ alter_columnar_table_set(PG_FUNCTION_ARGS)
|
|||
CompressionTypeStr(compression))));
|
||||
}
|
||||
|
||||
UpdateCStoreDataFileMetadata(rel->rd_node.relNode, blockRowCount, stripeRowCount,
|
||||
UpdateCStoreDataFileMetadata(rel->rd_node, blockRowCount, stripeRowCount,
|
||||
compression);
|
||||
|
||||
table_close(rel, NoLock);
|
||||
|
@ -1422,7 +1398,7 @@ alter_columnar_table_reset(PG_FUNCTION_ARGS)
|
|||
Oid relationId = PG_GETARG_OID(0);
|
||||
|
||||
Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */
|
||||
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true);
|
||||
DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node, true);
|
||||
if (!metadata)
|
||||
{
|
||||
ereport(ERROR, (errmsg("table %s is not a cstore table",
|
||||
|
@ -1455,7 +1431,7 @@ alter_columnar_table_reset(PG_FUNCTION_ARGS)
|
|||
CompressionTypeStr(compression))));
|
||||
}
|
||||
|
||||
UpdateCStoreDataFileMetadata(rel->rd_node.relNode, blockRowCount, stripeRowCount,
|
||||
UpdateCStoreDataFileMetadata(rel->rd_node, blockRowCount, stripeRowCount,
|
||||
compression);
|
||||
|
||||
table_close(rel, NoLock);
|
||||
|
|
|
@ -51,6 +51,7 @@ static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode,
|
|||
FmgrInfo *comparisonFunction);
|
||||
static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength);
|
||||
static StringInfo CopyStringInfo(StringInfo sourceString);
|
||||
static void InitializeMetadata(RelFileNode relfilenode);
|
||||
|
||||
/*
|
||||
* CStoreBeginWrite initializes a cstore data load operation and returns a table
|
||||
|
@ -65,12 +66,16 @@ CStoreBeginWrite(RelFileNode relfilenode,
|
|||
uint64 stripeMaxRowCount, uint32 blockRowCount,
|
||||
TupleDesc tupleDescriptor)
|
||||
{
|
||||
uint32 columnIndex = 0;
|
||||
/* initialize metadata if this is the first write */
|
||||
if (UninitializedDatafile(relfilenode))
|
||||
{
|
||||
InitializeMetadata(relfilenode);
|
||||
}
|
||||
|
||||
/* get comparison function pointers for each of the columns */
|
||||
uint32 columnCount = tupleDescriptor->natts;
|
||||
FmgrInfo **comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *));
|
||||
for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
|
||||
for (uint32 columnIndex = 0; columnIndex < columnCount; columnIndex++)
|
||||
{
|
||||
FmgrInfo *comparisonFunction = NULL;
|
||||
FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor,
|
||||
|
@ -122,6 +127,34 @@ CStoreBeginWrite(RelFileNode relfilenode,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* UninitializedDatafile returns true if metadata for the
|
||||
* given relfilenode hasn't been initialized.
|
||||
*/
|
||||
bool
|
||||
UninitializedDatafile(RelFileNode relfilenode)
|
||||
{
|
||||
return ReadDataFileMetadata(relfilenode, true) == NULL;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
InitializeMetadata(RelFileNode relfilenode)
|
||||
{
|
||||
Oid relationId = RelidByRelfilenode(relfilenode.spcNode, relfilenode.relNode);
|
||||
Relation relation = relation_open(relationId, NoLock);
|
||||
|
||||
CStoreOptions *options = CStoreTableAMDefaultOptions();
|
||||
|
||||
InitCStoreDataFileMetadata(relation,
|
||||
options->blockRowCount,
|
||||
options->stripeRowCount,
|
||||
options->compressionType);
|
||||
|
||||
relation_close(relation, NoLock);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CStoreWriteRow adds a row to the cstore file. If the stripe is not initialized,
|
||||
* we create structures to hold stripe data and skip list. Then, we serialize and
|
||||
|
@ -323,8 +356,9 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount,
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength)
|
||||
void
|
||||
WriteToSmgr(Relation relation, uint64 logicalOffset, char *data,
|
||||
uint32 dataLength)
|
||||
{
|
||||
uint64 remaining = dataLength;
|
||||
Buffer buffer;
|
||||
|
@ -333,13 +367,13 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength)
|
|||
{
|
||||
SmgrAddr addr = logical_to_smgr(logicalOffset);
|
||||
|
||||
RelationOpenSmgr(rel);
|
||||
BlockNumber nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
|
||||
RelationOpenSmgr(relation);
|
||||
BlockNumber nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM);
|
||||
Assert(addr.blockno < nblocks);
|
||||
(void) nblocks; /* keep compiler quiet */
|
||||
RelationCloseSmgr(rel);
|
||||
RelationCloseSmgr(relation);
|
||||
|
||||
buffer = ReadBuffer(rel, addr.blockno);
|
||||
buffer = ReadBuffer(relation, addr.blockno);
|
||||
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
Page page = BufferGetPage(buffer);
|
||||
|
@ -372,7 +406,7 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength)
|
|||
|
||||
MarkBufferDirty(buffer);
|
||||
|
||||
if (RelationNeedsWAL(rel))
|
||||
if (RelationNeedsWAL(relation))
|
||||
{
|
||||
XLogBeginInsert();
|
||||
|
||||
|
@ -521,7 +555,7 @@ FlushStripe(TableWriteState *writeState)
|
|||
}
|
||||
|
||||
/* create skip list and footer buffers */
|
||||
SaveStripeSkipList(relation->rd_node.relNode,
|
||||
SaveStripeSkipList(writeState->relfilenode,
|
||||
stripeMetadata.id,
|
||||
stripeSkipList, tupleDescriptor);
|
||||
|
||||
|
|
|
@ -3,20 +3,22 @@
|
|||
CREATE SCHEMA cstore;
|
||||
SET search_path TO cstore;
|
||||
|
||||
CREATE SEQUENCE cstore_storageid_seq NO CYCLE;
|
||||
|
||||
CREATE TABLE cstore_data_files (
|
||||
relfilenode oid NOT NULL,
|
||||
storageid bigint NOT NULL,
|
||||
block_row_count int NOT NULL,
|
||||
stripe_row_count int NOT NULL,
|
||||
compression name NOT NULL,
|
||||
version_major bigint NOT NULL,
|
||||
version_minor bigint NOT NULL,
|
||||
PRIMARY KEY (relfilenode)
|
||||
PRIMARY KEY (storageid)
|
||||
) WITH (user_catalog_table = true);
|
||||
|
||||
COMMENT ON TABLE cstore_data_files IS 'CStore data file wide metadata';
|
||||
|
||||
CREATE TABLE cstore_stripes (
|
||||
relfilenode oid NOT NULL,
|
||||
storageid bigint NOT NULL,
|
||||
stripe bigint NOT NULL,
|
||||
file_offset bigint NOT NULL,
|
||||
data_length bigint NOT NULL,
|
||||
|
@ -24,14 +26,14 @@ CREATE TABLE cstore_stripes (
|
|||
block_count int NOT NULL,
|
||||
block_row_count int NOT NULL,
|
||||
row_count bigint NOT NULL,
|
||||
PRIMARY KEY (relfilenode, stripe),
|
||||
FOREIGN KEY (relfilenode) REFERENCES cstore_data_files(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED
|
||||
PRIMARY KEY (storageid, stripe),
|
||||
FOREIGN KEY (storageid) REFERENCES cstore_data_files(storageid) ON DELETE CASCADE INITIALLY DEFERRED
|
||||
) WITH (user_catalog_table = true);
|
||||
|
||||
COMMENT ON TABLE cstore_stripes IS 'CStore per stripe metadata';
|
||||
|
||||
CREATE TABLE cstore_skipnodes (
|
||||
relfilenode oid NOT NULL,
|
||||
storageid bigint NOT NULL,
|
||||
stripe bigint NOT NULL,
|
||||
attr int NOT NULL,
|
||||
block int NOT NULL,
|
||||
|
@ -43,19 +45,23 @@ CREATE TABLE cstore_skipnodes (
|
|||
exists_stream_offset bigint NOT NULL,
|
||||
exists_stream_length bigint NOT NULL,
|
||||
value_compression_type int NOT NULL,
|
||||
PRIMARY KEY (relfilenode, stripe, attr, block),
|
||||
FOREIGN KEY (relfilenode, stripe) REFERENCES cstore_stripes(relfilenode, stripe) ON DELETE CASCADE INITIALLY DEFERRED
|
||||
PRIMARY KEY (storageid, stripe, attr, block),
|
||||
FOREIGN KEY (storageid, stripe) REFERENCES cstore_stripes(storageid, stripe) ON DELETE CASCADE INITIALLY DEFERRED
|
||||
) WITH (user_catalog_table = true);
|
||||
|
||||
COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata';
|
||||
|
||||
CREATE FUNCTION columnar_relation_storageid(relid oid) RETURNS bigint
|
||||
LANGUAGE C STABLE STRICT
|
||||
AS 'MODULE_PATHNAME', $$columnar_relation_storageid$$;
|
||||
|
||||
CREATE VIEW columnar_options AS
|
||||
SELECT c.oid::regclass regclass,
|
||||
d.block_row_count,
|
||||
d.stripe_row_count,
|
||||
d.compression
|
||||
FROM pg_class c
|
||||
JOIN cstore.cstore_data_files d USING(relfilenode);
|
||||
FROM pg_class c, cstore.cstore_data_files d
|
||||
WHERE d.storageid=columnar_relation_storageid(c.oid);
|
||||
|
||||
COMMENT ON VIEW columnar_options IS 'CStore per table settings';
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ DROP VIEW columnar_options;
|
|||
DROP TABLE cstore_skipnodes;
|
||||
DROP TABLE cstore_stripes;
|
||||
DROP TABLE cstore_data_files;
|
||||
DROP SEQUENCE cstore_storageid_seq;
|
||||
|
||||
DROP FUNCTION citus_internal.cstore_ensure_objects_exist();
|
||||
|
||||
|
|
|
@ -178,7 +178,7 @@ cstore_init_write_state(RelFileNode relfilenode, TupleDesc tupdesc,
|
|||
*/
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(WriteStateContext);
|
||||
|
||||
CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(relfilenode.relNode);
|
||||
CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(relfilenode);
|
||||
SubXidWriteState *stackEntry = palloc0(sizeof(SubXidWriteState));
|
||||
stackEntry->writeState = CStoreBeginWrite(relfilenode,
|
||||
cstoreOptions->compressionType,
|
||||
|
|
|
@ -263,6 +263,7 @@ extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues,
|
|||
extern void CStoreFlushPendingWrites(TableWriteState *state);
|
||||
extern void CStoreEndWrite(TableWriteState *state);
|
||||
extern bool ContainsPendingWrites(TableWriteState *state);
|
||||
extern bool UninitializedDatafile(RelFileNode relfilenode);
|
||||
|
||||
/* Function declarations for reading from a cstore file */
|
||||
extern TableReadState * CStoreBeginRead(Relation relation,
|
||||
|
@ -285,23 +286,29 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer,
|
|||
CompressionType compressionType);
|
||||
extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType);
|
||||
extern char * CompressionTypeStr(CompressionType type);
|
||||
extern CStoreOptions * CStoreTableAMGetOptions(Oid relfilenode);
|
||||
extern CStoreOptions * CStoreTableAMGetOptions(RelFileNode relfilenode);
|
||||
extern void WriteToSmgr(Relation relation, uint64 logicalOffset,
|
||||
char *data, uint32 dataLength);
|
||||
extern StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size);
|
||||
extern bool IsCStoreTableAmTable(Oid relationId);
|
||||
extern CStoreOptions * CStoreTableAMDefaultOptions(void);
|
||||
|
||||
/* cstore_metadata_tables.c */
|
||||
extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode);
|
||||
extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int
|
||||
stripeRowCount, CompressionType compression);
|
||||
extern void UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int
|
||||
extern void DeleteDataFileMetadataRowIfExists(RelFileNode relfilenode);
|
||||
extern void InitCStoreDataFileMetadata(Relation relation,
|
||||
int blockRowCount, int stripeRowCount,
|
||||
CompressionType compression);
|
||||
extern void UpdateCStoreDataFileMetadata(RelFileNode relfilenode, int blockRowCount, int
|
||||
stripeRowCount, CompressionType compression);
|
||||
extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk);
|
||||
extern uint64 GetHighestUsedAddress(Oid relfilenode);
|
||||
extern DataFileMetadata * ReadDataFileMetadata(RelFileNode relfilenode, bool missingOk);
|
||||
extern uint64 GetHighestUsedAddress(RelFileNode relfilenode);
|
||||
extern StripeMetadata ReserveStripe(Relation rel, uint64 size,
|
||||
uint64 rowCount, uint64 columnCount,
|
||||
uint64 blockCount, uint64 blockRowCount);
|
||||
extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe,
|
||||
extern void SaveStripeSkipList(RelFileNode relfilenode, uint64 stripe,
|
||||
StripeSkipList *stripeSkipList,
|
||||
TupleDesc tupleDescriptor);
|
||||
extern StripeSkipList * ReadStripeSkipList(Oid relfilenode, uint64 stripe,
|
||||
extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe,
|
||||
TupleDesc tupleDescriptor,
|
||||
uint32 blockCount);
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files;
|
|||
-- Create a cstore_fdw table under a schema and drop it.
|
||||
CREATE SCHEMA test_schema;
|
||||
CREATE TABLE test_schema.test_table(data int) USING columnar;
|
||||
INSERT INTO test_schema.test_table VALUES (1);
|
||||
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset
|
||||
DROP SCHEMA test_schema CASCADE;
|
||||
NOTICE: drop cascades to table test_schema.test_table
|
||||
|
|
|
@ -35,20 +35,21 @@ SELECT * FROM t_view a ORDER BY a;
|
|||
(6 rows)
|
||||
|
||||
-- verify that we have created metadata entries for the materialized view
|
||||
SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset
|
||||
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
|
||||
SELECT cstore.columnar_relation_storageid(oid) AS storageid
|
||||
FROM pg_class WHERE relname='t_view' \gset
|
||||
SELECT count(*) FROM cstore.cstore_data_files WHERE storageid=:storageid;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
3
|
||||
|
@ -57,19 +58,19 @@ SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
|
|||
DROP TABLE t CASCADE;
|
||||
NOTICE: drop cascades to materialized view t_view
|
||||
-- dropping must remove metadata
|
||||
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_data_files WHERE storageid=:storageid;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
0
|
||||
|
|
|
@ -12,7 +12,7 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i;
|
|||
-- there are no subtransactions, so above statement should batch
|
||||
-- INSERTs inside the UDF and create on stripe per table.
|
||||
SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2')
|
||||
WHERE cstore.columnar_relation_storageid(b.oid)=a.storageid AND relname IN ('t1', 't2')
|
||||
GROUP BY relname
|
||||
ORDER BY relname;
|
||||
relname | count
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
-- Testing we handle rollbacks properly
|
||||
--
|
||||
CREATE TABLE t(a int, b int) USING columnar;
|
||||
CREATE VIEW t_stripes AS
|
||||
SELECT * FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.storageid = cstore.columnar_relation_storageid(b.oid) AND b.relname = 't';
|
||||
BEGIN;
|
||||
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
|
||||
ROLLBACK;
|
||||
|
@ -12,8 +15,7 @@ SELECT count(*) FROM t;
|
|||
(1 row)
|
||||
|
||||
-- check stripe metadata also have been rolled-back
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
|
||||
SELECT count(*) FROM t_stripes;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
0
|
||||
|
@ -26,8 +28,7 @@ SELECT count(*) FROM t;
|
|||
10
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
|
||||
SELECT count(*) FROM t_stripes;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
1
|
||||
|
@ -73,11 +74,11 @@ SELECT count(*) FROM t;
|
|||
20
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
|
||||
SELECT count(*) FROM t_stripes;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
DROP TABLE t;
|
||||
DROP VIEW t_stripes;
|
||||
|
|
|
@ -27,6 +27,7 @@ SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files;
|
|||
-- Create a cstore_fdw table under a schema and drop it.
|
||||
CREATE SCHEMA test_schema;
|
||||
CREATE TABLE test_schema.test_table(data int) USING columnar;
|
||||
INSERT INTO test_schema.test_table VALUES (1);
|
||||
|
||||
SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset
|
||||
DROP SCHEMA test_schema CASCADE;
|
||||
|
|
|
@ -20,15 +20,16 @@ REFRESH MATERIALIZED VIEW t_view;
|
|||
SELECT * FROM t_view a ORDER BY a;
|
||||
|
||||
-- verify that we have created metadata entries for the materialized view
|
||||
SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset
|
||||
SELECT cstore.columnar_relation_storageid(oid) AS storageid
|
||||
FROM pg_class WHERE relname='t_view' \gset
|
||||
|
||||
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_data_files WHERE storageid=:storageid;
|
||||
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
|
||||
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
|
||||
|
||||
DROP TABLE t CASCADE;
|
||||
|
||||
-- dropping must remove metadata
|
||||
SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode;
|
||||
SELECT count(*) FROM cstore.cstore_data_files WHERE storageid=:storageid;
|
||||
SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid;
|
||||
SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid;
|
||||
|
|
|
@ -16,7 +16,7 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i;
|
|||
-- there are no subtransactions, so above statement should batch
|
||||
-- INSERTs inside the UDF and create on stripe per table.
|
||||
SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2')
|
||||
WHERE cstore.columnar_relation_storageid(b.oid)=a.storageid AND relname IN ('t1', 't2')
|
||||
GROUP BY relname
|
||||
ORDER BY relname;
|
||||
|
||||
|
|
|
@ -4,20 +4,22 @@
|
|||
|
||||
CREATE TABLE t(a int, b int) USING columnar;
|
||||
|
||||
CREATE VIEW t_stripes AS
|
||||
SELECT * FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.storageid = cstore.columnar_relation_storageid(b.oid) AND b.relname = 't';
|
||||
|
||||
BEGIN;
|
||||
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
|
||||
ROLLBACK;
|
||||
SELECT count(*) FROM t;
|
||||
|
||||
-- check stripe metadata also have been rolled-back
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
|
||||
SELECT count(*) FROM t_stripes;
|
||||
|
||||
INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i;
|
||||
SELECT count(*) FROM t;
|
||||
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
|
||||
SELECT count(*) FROM t_stripes;
|
||||
|
||||
-- savepoint rollback
|
||||
BEGIN;
|
||||
|
@ -36,7 +38,7 @@ COMMIT;
|
|||
|
||||
SELECT count(*) FROM t;
|
||||
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.relfilenode = b.relfilenode AND b.relname = 't';
|
||||
SELECT count(*) FROM t_stripes;
|
||||
|
||||
DROP TABLE t;
|
||||
DROP VIEW t_stripes;
|
||||
|
|
|
@ -2,41 +2,49 @@ SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset
|
|||
|
||||
CREATE TABLE t(a int, b int) USING columnar;
|
||||
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
|
||||
CREATE VIEW t_stripes AS
|
||||
SELECT * FROM cstore.cstore_stripes a, pg_class b
|
||||
WHERE a.storageid = cstore.columnar_relation_storageid(b.oid) AND b.relname='t';
|
||||
|
||||
SELECT count(*) FROM t_stripes;
|
||||
|
||||
INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i;
|
||||
INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i;
|
||||
INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i;
|
||||
|
||||
SELECT sum(a), sum(b) FROM t;
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
|
||||
SELECT count(*) FROM t_stripes;
|
||||
|
||||
-- vacuum full should merge stripes together
|
||||
VACUUM FULL t;
|
||||
|
||||
SELECT sum(a), sum(b) FROM t;
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
|
||||
SELECT count(*) FROM t_stripes;
|
||||
|
||||
-- test the case when all data cannot fit into a single stripe
|
||||
SELECT alter_columnar_table_set('t', stripe_row_count => 1000);
|
||||
INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i;
|
||||
|
||||
SELECT sum(a), sum(b) FROM t;
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
|
||||
SELECT count(*) FROM t_stripes;
|
||||
|
||||
VACUUM FULL t;
|
||||
|
||||
SELECT sum(a), sum(b) FROM t;
|
||||
SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t';
|
||||
SELECT count(*) FROM t_stripes;
|
||||
|
||||
-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs
|
||||
ALTER TABLE t DROP COLUMN a;
|
||||
|
||||
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3;
|
||||
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
|
||||
FROM cstore.cstore_skipnodes a, pg_class b
|
||||
WHERE a.storageid = cstore.columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
|
||||
|
||||
VACUUM FULL t;
|
||||
|
||||
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3;
|
||||
SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL
|
||||
FROM cstore.cstore_skipnodes a, pg_class b
|
||||
WHERE a.storageid = cstore.columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3;
|
||||
|
||||
-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands
|
||||
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;
|
||||
|
@ -99,6 +107,7 @@ VACUUM FULL t;
|
|||
VACUUM VERBOSE t;
|
||||
|
||||
DROP TABLE t;
|
||||
DROP VIEW t_stripes;
|
||||
|
||||
-- Make sure we cleaned the metadata for t too
|
||||
SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;
|
||||
|
|
Loading…
Reference in New Issue