mirror of https://github.com/citusdata/citus.git
998 lines
28 KiB
C
998 lines
28 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* cstore_metadata_tables.c
|
|
*
|
|
* Copyright (c), Citus Data, Inc.
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "safe_lib.h"
|
|
|
|
#include "columnar/cstore.h"
|
|
#include "columnar/cstore_version_compat.h"
|
|
|
|
#include <sys/stat.h>
|
|
#include "access/heapam.h"
|
|
#include "access/htup_details.h"
|
|
#include "access/nbtree.h"
|
|
#include "access/xact.h"
|
|
#include "catalog/indexing.h"
|
|
#include "catalog/pg_namespace.h"
|
|
#include "catalog/pg_collation.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "catalog/namespace.h"
|
|
#include "commands/defrem.h"
|
|
#include "commands/trigger.h"
|
|
#include "executor/executor.h"
|
|
#include "executor/spi.h"
|
|
#include "miscadmin.h"
|
|
#include "nodes/execnodes.h"
|
|
#include "lib/stringinfo.h"
|
|
#include "port.h"
|
|
#include "storage/fd.h"
|
|
#include "storage/lmgr.h"
|
|
#include "storage/smgr.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/fmgroids.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/rel.h"
|
|
|
|
typedef struct
|
|
{
|
|
Relation rel;
|
|
EState *estate;
|
|
} ModifyState;
|
|
|
|
static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe);
|
|
static void GetHighestUsedAddressAndId(Oid relfilenode,
|
|
uint64 *highestUsedAddress,
|
|
uint64 *highestUsedId);
|
|
static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot);
|
|
static Oid CStoreStripesRelationId(void);
|
|
static Oid CStoreStripesIndexRelationId(void);
|
|
static Oid CStoreDataFilesRelationId(void);
|
|
static Oid CStoreDataFilesIndexRelationId(void);
|
|
static Oid CStoreSkipNodesRelationId(void);
|
|
static Oid CStoreSkipNodesIndexRelationId(void);
|
|
static Oid CStoreNamespaceId(void);
|
|
static bool ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata);
|
|
static ModifyState * StartModifyRelation(Relation rel);
|
|
static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values,
|
|
bool *nulls);
|
|
static void DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple);
|
|
static void FinishModifyRelation(ModifyState *state);
|
|
static EState * create_estate_for_relation(Relation rel);
|
|
static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm);
|
|
static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm);
|
|
|
|
/* constants for cstore_table */
|
|
#define Natts_cstore_data_files 6
|
|
#define Anum_cstore_data_files_relfilenode 1
|
|
#define Anum_cstore_data_files_block_row_count 2
|
|
#define Anum_cstore_data_files_stripe_row_count 3
|
|
#define Anum_cstore_data_files_compression 4
|
|
#define Anum_cstore_data_files_version_major 5
|
|
#define Anum_cstore_data_files_version_minor 6
|
|
|
|
/* ----------------
|
|
* cstore.cstore_data_files definition.
|
|
* ----------------
|
|
*/
|
|
typedef struct FormData_cstore_data_files
|
|
{
|
|
Oid relfilenode;
|
|
int32 block_row_count;
|
|
int32 stripe_row_count;
|
|
NameData compression;
|
|
int64 version_major;
|
|
int64 version_minor;
|
|
|
|
#ifdef CATALOG_VARLEN /* variable-length fields start here */
|
|
#endif
|
|
} FormData_cstore_data_files;
|
|
typedef FormData_cstore_data_files *Form_cstore_data_files;
|
|
|
|
/* constants for cstore_stripe */
|
|
#define Natts_cstore_stripes 8
|
|
#define Anum_cstore_stripes_relfilenode 1
|
|
#define Anum_cstore_stripes_stripe 2
|
|
#define Anum_cstore_stripes_file_offset 3
|
|
#define Anum_cstore_stripes_data_length 4
|
|
#define Anum_cstore_stripes_column_count 5
|
|
#define Anum_cstore_stripes_block_count 6
|
|
#define Anum_cstore_stripes_block_row_count 7
|
|
#define Anum_cstore_stripes_row_count 8
|
|
|
|
/* constants for cstore_skipnodes */
|
|
#define Natts_cstore_skipnodes 12
|
|
#define Anum_cstore_skipnodes_relfilenode 1
|
|
#define Anum_cstore_skipnodes_stripe 2
|
|
#define Anum_cstore_skipnodes_attr 3
|
|
#define Anum_cstore_skipnodes_block 4
|
|
#define Anum_cstore_skipnodes_row_count 5
|
|
#define Anum_cstore_skipnodes_minimum_value 6
|
|
#define Anum_cstore_skipnodes_maximum_value 7
|
|
#define Anum_cstore_skipnodes_value_stream_offset 8
|
|
#define Anum_cstore_skipnodes_value_stream_length 9
|
|
#define Anum_cstore_skipnodes_exists_stream_offset 10
|
|
#define Anum_cstore_skipnodes_exists_stream_length 11
|
|
#define Anum_cstore_skipnodes_value_compression_type 12
|
|
|
|
|
|
/*
|
|
* InitCStoreDataFileMetadata adds a record for the given relfilenode
|
|
* in cstore_data_files.
|
|
*/
|
|
void
|
|
InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount,
|
|
CompressionType compression)
|
|
{
|
|
NameData compressionName = { 0 };
|
|
|
|
namestrcpy(&compressionName, CompressionTypeStr(compression));
|
|
|
|
bool nulls[Natts_cstore_data_files] = { 0 };
|
|
Datum values[Natts_cstore_data_files] = {
|
|
ObjectIdGetDatum(relfilenode),
|
|
Int32GetDatum(blockRowCount),
|
|
Int32GetDatum(stripeRowCount),
|
|
NameGetDatum(&compressionName),
|
|
Int32GetDatum(CSTORE_VERSION_MAJOR),
|
|
Int32GetDatum(CSTORE_VERSION_MINOR)
|
|
};
|
|
|
|
DeleteDataFileMetadataRowIfExists(relfilenode);
|
|
|
|
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
|
|
Relation cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock);
|
|
|
|
ModifyState *modifyState = StartModifyRelation(cstoreDataFiles);
|
|
InsertTupleAndEnforceConstraints(modifyState, values, nulls);
|
|
FinishModifyRelation(modifyState);
|
|
|
|
CommandCounterIncrement();
|
|
|
|
heap_close(cstoreDataFiles, NoLock);
|
|
}
|
|
|
|
|
|
void
|
|
UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount,
|
|
CompressionType compression)
|
|
{
|
|
const int scanKeyCount = 1;
|
|
ScanKeyData scanKey[1];
|
|
bool indexOK = true;
|
|
Datum values[Natts_cstore_data_files] = { 0 };
|
|
bool isnull[Natts_cstore_data_files] = { 0 };
|
|
bool replace[Natts_cstore_data_files] = { 0 };
|
|
|
|
Relation cstoreDataFiles = heap_open(CStoreDataFilesRelationId(), RowExclusiveLock);
|
|
TupleDesc tupleDescriptor = RelationGetDescr(cstoreDataFiles);
|
|
|
|
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber,
|
|
F_INT8EQ, ObjectIdGetDatum(relfilenode));
|
|
|
|
SysScanDesc scanDescriptor = systable_beginscan(cstoreDataFiles,
|
|
CStoreDataFilesIndexRelationId(),
|
|
indexOK,
|
|
NULL, scanKeyCount, scanKey);
|
|
|
|
HeapTuple heapTuple = systable_getnext(scanDescriptor);
|
|
if (heapTuple == NULL)
|
|
{
|
|
ereport(ERROR, (errmsg("relfilenode %d doesn't belong to a cstore table",
|
|
relfilenode)));
|
|
}
|
|
|
|
Form_cstore_data_files metadata = (Form_cstore_data_files) GETSTRUCT(heapTuple);
|
|
|
|
bool changed = false;
|
|
if (metadata->block_row_count != blockRowCount)
|
|
{
|
|
values[Anum_cstore_data_files_block_row_count - 1] = Int32GetDatum(blockRowCount);
|
|
isnull[Anum_cstore_data_files_block_row_count - 1] = false;
|
|
replace[Anum_cstore_data_files_block_row_count - 1] = true;
|
|
changed = true;
|
|
}
|
|
|
|
if (metadata->stripe_row_count != stripeRowCount)
|
|
{
|
|
values[Anum_cstore_data_files_stripe_row_count - 1] = Int32GetDatum(
|
|
stripeRowCount);
|
|
isnull[Anum_cstore_data_files_stripe_row_count - 1] = false;
|
|
replace[Anum_cstore_data_files_stripe_row_count - 1] = true;
|
|
changed = true;
|
|
}
|
|
|
|
if (ParseCompressionType(NameStr(metadata->compression)) != compression)
|
|
{
|
|
Name compressionName = palloc0(sizeof(NameData));
|
|
namestrcpy(compressionName, CompressionTypeStr(compression));
|
|
values[Anum_cstore_data_files_compression - 1] = NameGetDatum(compressionName);
|
|
isnull[Anum_cstore_data_files_compression - 1] = false;
|
|
replace[Anum_cstore_data_files_compression - 1] = true;
|
|
changed = true;
|
|
}
|
|
|
|
if (changed)
|
|
{
|
|
heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull,
|
|
replace);
|
|
|
|
CatalogTupleUpdate(cstoreDataFiles, &heapTuple->t_self, heapTuple);
|
|
|
|
CommandCounterIncrement();
|
|
}
|
|
|
|
systable_endscan(scanDescriptor);
|
|
|
|
heap_close(cstoreDataFiles, NoLock);
|
|
}
|
|
|
|
|
|
/*
|
|
* SaveStripeSkipList saves StripeSkipList for a given stripe as rows
|
|
* of cstore_skipnodes.
|
|
*/
|
|
void
|
|
SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList,
|
|
TupleDesc tupleDescriptor)
|
|
{
|
|
uint32 columnIndex = 0;
|
|
uint32 blockIndex = 0;
|
|
uint32 columnCount = stripeSkipList->columnCount;
|
|
|
|
Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId();
|
|
Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock);
|
|
ModifyState *modifyState = StartModifyRelation(cstoreSkipNodes);
|
|
|
|
for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
|
|
{
|
|
for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++)
|
|
{
|
|
ColumnBlockSkipNode *skipNode =
|
|
&stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex];
|
|
|
|
Datum values[Natts_cstore_skipnodes] = {
|
|
ObjectIdGetDatum(relfilenode),
|
|
Int64GetDatum(stripe),
|
|
Int32GetDatum(columnIndex + 1),
|
|
Int32GetDatum(blockIndex),
|
|
Int64GetDatum(skipNode->rowCount),
|
|
0, /* to be filled below */
|
|
0, /* to be filled below */
|
|
Int64GetDatum(skipNode->valueBlockOffset),
|
|
Int64GetDatum(skipNode->valueLength),
|
|
Int64GetDatum(skipNode->existsBlockOffset),
|
|
Int64GetDatum(skipNode->existsLength),
|
|
Int32GetDatum(skipNode->valueCompressionType)
|
|
};
|
|
|
|
bool nulls[Natts_cstore_skipnodes] = { false };
|
|
|
|
if (skipNode->hasMinMax)
|
|
{
|
|
values[Anum_cstore_skipnodes_minimum_value - 1] =
|
|
PointerGetDatum(DatumToBytea(skipNode->minimumValue,
|
|
&tupleDescriptor->attrs[columnIndex]));
|
|
values[Anum_cstore_skipnodes_maximum_value - 1] =
|
|
PointerGetDatum(DatumToBytea(skipNode->maximumValue,
|
|
&tupleDescriptor->attrs[columnIndex]));
|
|
}
|
|
else
|
|
{
|
|
nulls[Anum_cstore_skipnodes_minimum_value - 1] = true;
|
|
nulls[Anum_cstore_skipnodes_maximum_value - 1] = true;
|
|
}
|
|
|
|
InsertTupleAndEnforceConstraints(modifyState, values, nulls);
|
|
}
|
|
}
|
|
|
|
FinishModifyRelation(modifyState);
|
|
heap_close(cstoreSkipNodes, NoLock);
|
|
|
|
CommandCounterIncrement();
|
|
}
|
|
|
|
|
|
/*
|
|
* ReadStripeSkipList fetches StripeSkipList for a given stripe.
|
|
*/
|
|
StripeSkipList *
|
|
ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor,
|
|
uint32 blockCount)
|
|
{
|
|
int32 columnIndex = 0;
|
|
HeapTuple heapTuple = NULL;
|
|
uint32 columnCount = tupleDescriptor->natts;
|
|
ScanKeyData scanKey[2];
|
|
|
|
Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId();
|
|
Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock);
|
|
Relation index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock);
|
|
|
|
ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relfilenode,
|
|
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
|
|
ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe,
|
|
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe));
|
|
|
|
SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreSkipNodes, index, NULL,
|
|
2, scanKey);
|
|
|
|
StripeSkipList *skipList = palloc0(sizeof(StripeSkipList));
|
|
skipList->blockCount = blockCount;
|
|
skipList->columnCount = columnCount;
|
|
skipList->blockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *));
|
|
for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
|
|
{
|
|
skipList->blockSkipNodeArray[columnIndex] =
|
|
palloc0(blockCount * sizeof(ColumnBlockSkipNode));
|
|
}
|
|
|
|
while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
|
|
{
|
|
Datum datumArray[Natts_cstore_skipnodes];
|
|
bool isNullArray[Natts_cstore_skipnodes];
|
|
|
|
heap_deform_tuple(heapTuple, RelationGetDescr(cstoreSkipNodes), datumArray,
|
|
isNullArray);
|
|
|
|
int32 attr = DatumGetInt32(datumArray[Anum_cstore_skipnodes_attr - 1]);
|
|
int32 blockIndex = DatumGetInt32(datumArray[Anum_cstore_skipnodes_block - 1]);
|
|
|
|
if (attr <= 0 || attr > columnCount)
|
|
{
|
|
ereport(ERROR, (errmsg("invalid stripe skipnode entry"),
|
|
errdetail("Attribute number out of range: %d", attr)));
|
|
}
|
|
|
|
if (blockIndex < 0 || blockIndex >= blockCount)
|
|
{
|
|
ereport(ERROR, (errmsg("invalid stripe skipnode entry"),
|
|
errdetail("Block number out of range: %d", blockIndex)));
|
|
}
|
|
|
|
columnIndex = attr - 1;
|
|
|
|
ColumnBlockSkipNode *skipNode =
|
|
&skipList->blockSkipNodeArray[columnIndex][blockIndex];
|
|
skipNode->rowCount = DatumGetInt64(datumArray[Anum_cstore_skipnodes_row_count -
|
|
1]);
|
|
skipNode->valueBlockOffset =
|
|
DatumGetInt64(datumArray[Anum_cstore_skipnodes_value_stream_offset - 1]);
|
|
skipNode->valueLength =
|
|
DatumGetInt64(datumArray[Anum_cstore_skipnodes_value_stream_length - 1]);
|
|
skipNode->existsBlockOffset =
|
|
DatumGetInt64(datumArray[Anum_cstore_skipnodes_exists_stream_offset - 1]);
|
|
skipNode->existsLength =
|
|
DatumGetInt64(datumArray[Anum_cstore_skipnodes_exists_stream_length - 1]);
|
|
skipNode->valueCompressionType =
|
|
DatumGetInt32(datumArray[Anum_cstore_skipnodes_value_compression_type - 1]);
|
|
|
|
if (isNullArray[Anum_cstore_skipnodes_minimum_value - 1] ||
|
|
isNullArray[Anum_cstore_skipnodes_maximum_value - 1])
|
|
{
|
|
skipNode->hasMinMax = false;
|
|
}
|
|
else
|
|
{
|
|
bytea *minValue = DatumGetByteaP(
|
|
datumArray[Anum_cstore_skipnodes_minimum_value - 1]);
|
|
bytea *maxValue = DatumGetByteaP(
|
|
datumArray[Anum_cstore_skipnodes_maximum_value - 1]);
|
|
|
|
skipNode->minimumValue =
|
|
ByteaToDatum(minValue, &tupleDescriptor->attrs[columnIndex]);
|
|
skipNode->maximumValue =
|
|
ByteaToDatum(maxValue, &tupleDescriptor->attrs[columnIndex]);
|
|
|
|
skipNode->hasMinMax = true;
|
|
}
|
|
}
|
|
|
|
systable_endscan_ordered(scanDescriptor);
|
|
index_close(index, NoLock);
|
|
heap_close(cstoreSkipNodes, NoLock);
|
|
|
|
return skipList;
|
|
}
|
|
|
|
|
|
/*
|
|
* InsertStripeMetadataRow adds a row to cstore_stripes.
|
|
*/
|
|
static void
|
|
InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe)
|
|
{
|
|
bool nulls[Natts_cstore_stripes] = { 0 };
|
|
Datum values[Natts_cstore_stripes] = {
|
|
ObjectIdGetDatum(relfilenode),
|
|
Int64GetDatum(stripe->id),
|
|
Int64GetDatum(stripe->fileOffset),
|
|
Int64GetDatum(stripe->dataLength),
|
|
Int32GetDatum(stripe->columnCount),
|
|
Int32GetDatum(stripe->blockCount),
|
|
Int32GetDatum(stripe->blockRowCount),
|
|
Int64GetDatum(stripe->rowCount)
|
|
};
|
|
|
|
Oid cstoreStripesOid = CStoreStripesRelationId();
|
|
Relation cstoreStripes = heap_open(cstoreStripesOid, RowExclusiveLock);
|
|
|
|
ModifyState *modifyState = StartModifyRelation(cstoreStripes);
|
|
|
|
InsertTupleAndEnforceConstraints(modifyState, values, nulls);
|
|
|
|
FinishModifyRelation(modifyState);
|
|
|
|
CommandCounterIncrement();
|
|
|
|
heap_close(cstoreStripes, NoLock);
|
|
}
|
|
|
|
|
|
/*
|
|
* ReadDataFileMetadata constructs DataFileMetadata for a given relfilenode by reading
|
|
* from cstore_data_files and cstore_stripes.
|
|
*/
|
|
DataFileMetadata *
|
|
ReadDataFileMetadata(Oid relfilenode, bool missingOk)
|
|
{
|
|
DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata));
|
|
bool found = ReadCStoreDataFiles(relfilenode, datafileMetadata);
|
|
if (!found)
|
|
{
|
|
if (!missingOk)
|
|
{
|
|
ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.",
|
|
relfilenode)));
|
|
}
|
|
else
|
|
{
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
datafileMetadata->stripeMetadataList =
|
|
ReadDataFileStripeList(relfilenode, GetTransactionSnapshot());
|
|
|
|
return datafileMetadata;
|
|
}
|
|
|
|
|
|
/*
|
|
* GetHighestUsedAddress returns the highest used address for the given
|
|
* relfilenode across all active and inactive transactions.
|
|
*/
|
|
uint64
|
|
GetHighestUsedAddress(Oid relfilenode)
|
|
{
|
|
uint64 highestUsedAddress = 0;
|
|
uint64 highestUsedId = 0;
|
|
|
|
GetHighestUsedAddressAndId(relfilenode, &highestUsedAddress, &highestUsedId);
|
|
|
|
return highestUsedAddress;
|
|
}
|
|
|
|
|
|
/*
|
|
* GetHighestUsedAddressAndId returns the highest used address and id for
|
|
* the given relfilenode across all active and inactive transactions.
|
|
*/
|
|
static void
|
|
GetHighestUsedAddressAndId(Oid relfilenode,
|
|
uint64 *highestUsedAddress,
|
|
uint64 *highestUsedId)
|
|
{
|
|
ListCell *stripeMetadataCell = NULL;
|
|
|
|
SnapshotData SnapshotDirty;
|
|
InitDirtySnapshot(SnapshotDirty);
|
|
|
|
List *stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty);
|
|
|
|
*highestUsedId = 0;
|
|
*highestUsedAddress = 0;
|
|
|
|
foreach(stripeMetadataCell, stripeMetadataList)
|
|
{
|
|
StripeMetadata *stripe = lfirst(stripeMetadataCell);
|
|
uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1;
|
|
*highestUsedAddress = Max(*highestUsedAddress, lastByte);
|
|
*highestUsedId = Max(*highestUsedId, stripe->id);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* ReserveStripe reserves and stripe of given size for the given relation,
|
|
* and inserts it into cstore_stripes. It is guaranteed that concurrent
|
|
* writes won't overwrite the returned stripe.
|
|
*/
|
|
StripeMetadata
|
|
ReserveStripe(Relation rel, uint64 sizeBytes,
|
|
uint64 rowCount, uint64 columnCount,
|
|
uint64 blockCount, uint64 blockRowCount)
|
|
{
|
|
StripeMetadata stripe = { 0 };
|
|
uint64 currLogicalHigh = 0;
|
|
uint64 highestId = 0;
|
|
|
|
/*
|
|
* We take ShareUpdateExclusiveLock here, so two space
|
|
* reservations conflict, space reservation <-> vacuum
|
|
* conflict, but space reservation doesn't conflict with
|
|
* reads & writes.
|
|
*/
|
|
LockRelation(rel, ShareUpdateExclusiveLock);
|
|
|
|
Oid relfilenode = rel->rd_node.relNode;
|
|
GetHighestUsedAddressAndId(relfilenode, &currLogicalHigh, &highestId);
|
|
SmgrAddr currSmgrHigh = logical_to_smgr(currLogicalHigh);
|
|
|
|
SmgrAddr resSmgrStart = next_block_start(currSmgrHigh);
|
|
uint64 resLogicalStart = smgr_to_logical(resSmgrStart);
|
|
|
|
uint64 resLogicalEnd = resLogicalStart + sizeBytes - 1;
|
|
SmgrAddr resSmgrEnd = logical_to_smgr(resLogicalEnd);
|
|
|
|
RelationOpenSmgr(rel);
|
|
uint64 nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
|
|
|
|
while (resSmgrEnd.blockno >= nblocks)
|
|
{
|
|
Buffer newBuffer = ReadBuffer(rel, P_NEW);
|
|
ReleaseBuffer(newBuffer);
|
|
nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
|
|
}
|
|
|
|
RelationCloseSmgr(rel);
|
|
|
|
stripe.fileOffset = resLogicalStart;
|
|
stripe.dataLength = sizeBytes;
|
|
stripe.blockCount = blockCount;
|
|
stripe.blockRowCount = blockRowCount;
|
|
stripe.columnCount = columnCount;
|
|
stripe.rowCount = rowCount;
|
|
stripe.id = highestId + 1;
|
|
|
|
InsertStripeMetadataRow(relfilenode, &stripe);
|
|
|
|
UnlockRelation(rel, ShareUpdateExclusiveLock);
|
|
|
|
return stripe;
|
|
}
|
|
|
|
|
|
/*
|
|
* ReadDataFileStripeList reads the stripe list for a given relfilenode
|
|
* in the given snapshot.
|
|
*/
|
|
static List *
|
|
ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot)
|
|
{
|
|
List *stripeMetadataList = NIL;
|
|
ScanKeyData scanKey[1];
|
|
HeapTuple heapTuple;
|
|
|
|
ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode,
|
|
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
|
|
|
|
Oid cstoreStripesOid = CStoreStripesRelationId();
|
|
Relation cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock);
|
|
Relation index = index_open(CStoreStripesIndexRelationId(), AccessShareLock);
|
|
TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripes);
|
|
|
|
SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreStripes, index,
|
|
snapshot, 1,
|
|
scanKey);
|
|
|
|
while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor)))
|
|
{
|
|
Datum datumArray[Natts_cstore_stripes];
|
|
bool isNullArray[Natts_cstore_stripes];
|
|
|
|
heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray);
|
|
|
|
StripeMetadata *stripeMetadata = palloc0(sizeof(StripeMetadata));
|
|
stripeMetadata->id = DatumGetInt64(datumArray[Anum_cstore_stripes_stripe - 1]);
|
|
stripeMetadata->fileOffset = DatumGetInt64(
|
|
datumArray[Anum_cstore_stripes_file_offset - 1]);
|
|
stripeMetadata->dataLength = DatumGetInt64(
|
|
datumArray[Anum_cstore_stripes_data_length - 1]);
|
|
stripeMetadata->columnCount = DatumGetInt32(
|
|
datumArray[Anum_cstore_stripes_column_count - 1]);
|
|
stripeMetadata->blockCount = DatumGetInt32(
|
|
datumArray[Anum_cstore_stripes_block_count - 1]);
|
|
stripeMetadata->blockRowCount = DatumGetInt32(
|
|
datumArray[Anum_cstore_stripes_block_row_count - 1]);
|
|
stripeMetadata->rowCount = DatumGetInt64(
|
|
datumArray[Anum_cstore_stripes_row_count - 1]);
|
|
|
|
stripeMetadataList = lappend(stripeMetadataList, stripeMetadata);
|
|
}
|
|
|
|
systable_endscan_ordered(scanDescriptor);
|
|
index_close(index, NoLock);
|
|
heap_close(cstoreStripes, NoLock);
|
|
|
|
return stripeMetadataList;
|
|
}
|
|
|
|
|
|
/*
|
|
* ReadCStoreDataFiles reads corresponding record from cstore_data_files. Returns
|
|
* false if table was not found in cstore_data_files.
|
|
*/
|
|
static bool
|
|
ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata)
|
|
{
|
|
bool found = false;
|
|
ScanKeyData scanKey[1];
|
|
|
|
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode,
|
|
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
|
|
|
|
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
|
|
Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock);
|
|
if (cstoreDataFiles == NULL)
|
|
{
|
|
/*
|
|
* Extension has been dropped. This can be called while
|
|
* dropping extension or database via ObjectAccess().
|
|
*/
|
|
return false;
|
|
}
|
|
|
|
Relation index = try_relation_open(CStoreDataFilesIndexRelationId(), AccessShareLock);
|
|
if (index == NULL)
|
|
{
|
|
heap_close(cstoreDataFiles, NoLock);
|
|
|
|
/* extension has been dropped */
|
|
return false;
|
|
}
|
|
|
|
TupleDesc tupleDescriptor = RelationGetDescr(cstoreDataFiles);
|
|
|
|
SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL,
|
|
1, scanKey);
|
|
|
|
HeapTuple heapTuple = systable_getnext(scanDescriptor);
|
|
if (HeapTupleIsValid(heapTuple))
|
|
{
|
|
Datum datumArray[Natts_cstore_data_files];
|
|
bool isNullArray[Natts_cstore_data_files];
|
|
heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray);
|
|
|
|
if (metadata)
|
|
{
|
|
metadata->blockRowCount = DatumGetInt32(
|
|
datumArray[Anum_cstore_data_files_block_row_count - 1]);
|
|
metadata->stripeRowCount = DatumGetInt32(
|
|
datumArray[Anum_cstore_data_files_stripe_row_count - 1]);
|
|
Name compressionName = DatumGetName(
|
|
datumArray[Anum_cstore_data_files_compression - 1]);
|
|
metadata->compression = ParseCompressionType(NameStr(*compressionName));
|
|
}
|
|
found = true;
|
|
}
|
|
|
|
systable_endscan_ordered(scanDescriptor);
|
|
index_close(index, NoLock);
|
|
heap_close(cstoreDataFiles, NoLock);
|
|
|
|
return found;
|
|
}
|
|
|
|
|
|
/*
|
|
* DeleteDataFileMetadataRowIfExists removes the row with given relfilenode from cstore_stripes.
|
|
*/
|
|
void
|
|
DeleteDataFileMetadataRowIfExists(Oid relfilenode)
|
|
{
|
|
ScanKeyData scanKey[1];
|
|
|
|
/*
|
|
* During a restore for binary upgrade, metadata tables and indexes may or
|
|
* may not exist.
|
|
*/
|
|
if (IsBinaryUpgrade)
|
|
{
|
|
return;
|
|
}
|
|
|
|
ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode,
|
|
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode));
|
|
|
|
Oid cstoreDataFilesOid = CStoreDataFilesRelationId();
|
|
Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock);
|
|
if (cstoreDataFiles == NULL)
|
|
{
|
|
/* extension has been dropped */
|
|
return;
|
|
}
|
|
|
|
Relation index = index_open(CStoreDataFilesIndexRelationId(), AccessShareLock);
|
|
|
|
SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL,
|
|
1, scanKey);
|
|
|
|
HeapTuple heapTuple = systable_getnext(scanDescriptor);
|
|
if (HeapTupleIsValid(heapTuple))
|
|
{
|
|
ModifyState *modifyState = StartModifyRelation(cstoreDataFiles);
|
|
DeleteTupleAndEnforceConstraints(modifyState, heapTuple);
|
|
FinishModifyRelation(modifyState);
|
|
}
|
|
|
|
systable_endscan_ordered(scanDescriptor);
|
|
index_close(index, NoLock);
|
|
heap_close(cstoreDataFiles, NoLock);
|
|
}
|
|
|
|
|
|
/*
|
|
* StartModifyRelation allocates resources for modifications.
|
|
*/
|
|
static ModifyState *
|
|
StartModifyRelation(Relation rel)
|
|
{
|
|
EState *estate = create_estate_for_relation(rel);
|
|
|
|
/* ExecSimpleRelationInsert, ... require caller to open indexes */
|
|
ExecOpenIndices(estate->es_result_relation_info, false);
|
|
|
|
ModifyState *modifyState = palloc(sizeof(ModifyState));
|
|
modifyState->rel = rel;
|
|
modifyState->estate = estate;
|
|
|
|
return modifyState;
|
|
}
|
|
|
|
|
|
/*
|
|
* InsertTupleAndEnforceConstraints inserts a tuple into a relation and makes
|
|
* sure constraints are enforced and indexes are updated.
|
|
*/
|
|
static void
|
|
InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls)
|
|
{
|
|
TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
|
|
HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls);
|
|
|
|
#if PG_VERSION_NUM >= 120000
|
|
TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor,
|
|
&TTSOpsHeapTuple);
|
|
|
|
ExecStoreHeapTuple(tuple, slot, false);
|
|
#else
|
|
TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor);
|
|
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
|
|
#endif
|
|
|
|
/* use ExecSimpleRelationInsert to enforce constraints */
|
|
ExecSimpleRelationInsert(state->estate, slot);
|
|
}
|
|
|
|
|
|
/*
|
|
* DeleteTupleAndEnforceConstraints deletes a tuple from a relation and
|
|
* makes sure constraints (e.g. FK constraints) are enforced.
|
|
*/
|
|
static void
|
|
DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple)
|
|
{
|
|
EState *estate = state->estate;
|
|
ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
|
|
|
|
ItemPointer tid = &(heapTuple->t_self);
|
|
simple_heap_delete(state->rel, tid);
|
|
|
|
/* execute AFTER ROW DELETE Triggers to enforce constraints */
|
|
ExecARDeleteTriggers(estate, resultRelInfo, tid, NULL, NULL);
|
|
}
|
|
|
|
|
|
/*
|
|
* FinishModifyRelation cleans up resources after modifications are done.
|
|
*/
|
|
static void
|
|
FinishModifyRelation(ModifyState *state)
|
|
{
|
|
ExecCloseIndices(state->estate->es_result_relation_info);
|
|
|
|
AfterTriggerEndQuery(state->estate);
|
|
ExecCleanUpTriggerState(state->estate);
|
|
ExecResetTupleTable(state->estate->es_tupleTable, false);
|
|
FreeExecutorState(state->estate);
|
|
}
|
|
|
|
|
|
/*
|
|
* Based on a similar function from
|
|
* postgres/src/backend/replication/logical/worker.c.
|
|
*
|
|
* Executor state preparation for evaluation of constraint expressions,
|
|
* indexes and triggers.
|
|
*
|
|
* This is based on similar code in copy.c
|
|
*/
|
|
static EState *
|
|
create_estate_for_relation(Relation rel)
|
|
{
|
|
ResultRelInfo *resultRelInfo;
|
|
|
|
EState *estate = CreateExecutorState();
|
|
|
|
RangeTblEntry *rte = makeNode(RangeTblEntry);
|
|
rte->rtekind = RTE_RELATION;
|
|
rte->relid = RelationGetRelid(rel);
|
|
rte->relkind = rel->rd_rel->relkind;
|
|
#if PG_VERSION_NUM >= 120000
|
|
rte->rellockmode = AccessShareLock;
|
|
ExecInitRangeTable(estate, list_make1(rte));
|
|
#endif
|
|
|
|
resultRelInfo = makeNode(ResultRelInfo);
|
|
InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0);
|
|
|
|
estate->es_result_relations = resultRelInfo;
|
|
estate->es_num_result_relations = 1;
|
|
estate->es_result_relation_info = resultRelInfo;
|
|
|
|
estate->es_output_cid = GetCurrentCommandId(true);
|
|
|
|
#if PG_VERSION_NUM < 120000
|
|
|
|
/* Triggers might need a slot */
|
|
if (resultRelInfo->ri_TrigDesc)
|
|
{
|
|
estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL);
|
|
}
|
|
#endif
|
|
|
|
/* Prepare to catch AFTER triggers. */
|
|
AfterTriggerBeginQuery();
|
|
|
|
return estate;
|
|
}
|
|
|
|
|
|
/*
|
|
* DatumToBytea serializes a datum into a bytea value.
|
|
*/
|
|
static bytea *
|
|
DatumToBytea(Datum value, Form_pg_attribute attrForm)
|
|
{
|
|
int datumLength = att_addlength_datum(0, attrForm->attlen, value);
|
|
bytea *result = palloc0(datumLength + VARHDRSZ);
|
|
|
|
SET_VARSIZE(result, datumLength + VARHDRSZ);
|
|
|
|
if (attrForm->attlen > 0)
|
|
{
|
|
if (attrForm->attbyval)
|
|
{
|
|
store_att_byval(VARDATA(result), value, attrForm->attlen);
|
|
}
|
|
else
|
|
{
|
|
memcpy_s(VARDATA(result), datumLength + VARHDRSZ,
|
|
DatumGetPointer(value), attrForm->attlen);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
memcpy_s(VARDATA(result), datumLength + VARHDRSZ,
|
|
DatumGetPointer(value), datumLength);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/*
|
|
* ByteaToDatum deserializes a value which was previously serialized using
|
|
* DatumToBytea.
|
|
*/
|
|
static Datum
|
|
ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm)
|
|
{
|
|
/*
|
|
* We copy the data so the result of this function lives even
|
|
* after the byteaDatum is freed.
|
|
*/
|
|
char *binaryDataCopy = palloc0(VARSIZE_ANY_EXHDR(bytes));
|
|
memcpy_s(binaryDataCopy, VARSIZE_ANY_EXHDR(bytes),
|
|
VARDATA_ANY(bytes), VARSIZE_ANY_EXHDR(bytes));
|
|
|
|
return fetch_att(binaryDataCopy, attrForm->attbyval, attrForm->attlen);
|
|
}
|
|
|
|
|
|
/*
|
|
* CStoreStripesRelationId returns relation id of cstore_stripes.
|
|
* TODO: should we cache this similar to citus?
|
|
*/
|
|
static Oid
|
|
CStoreStripesRelationId(void)
|
|
{
|
|
return get_relname_relid("cstore_stripes", CStoreNamespaceId());
|
|
}
|
|
|
|
|
|
/*
|
|
* CStoreStripesIndexRelationId returns relation id of cstore_stripes_idx.
|
|
* TODO: should we cache this similar to citus?
|
|
*/
|
|
static Oid
|
|
CStoreStripesIndexRelationId(void)
|
|
{
|
|
return get_relname_relid("cstore_stripes_pkey", CStoreNamespaceId());
|
|
}
|
|
|
|
|
|
/*
|
|
* CStoreDataFilesRelationId returns relation id of cstore_data_files.
|
|
* TODO: should we cache this similar to citus?
|
|
*/
|
|
static Oid
|
|
CStoreDataFilesRelationId(void)
|
|
{
|
|
return get_relname_relid("cstore_data_files", CStoreNamespaceId());
|
|
}
|
|
|
|
|
|
/*
|
|
* CStoreDataFilesIndexRelationId returns relation id of cstore_data_files_pkey.
|
|
* TODO: should we cache this similar to citus?
|
|
*/
|
|
static Oid
|
|
CStoreDataFilesIndexRelationId(void)
|
|
{
|
|
return get_relname_relid("cstore_data_files_pkey", CStoreNamespaceId());
|
|
}
|
|
|
|
|
|
/*
|
|
* CStoreSkipNodesRelationId returns relation id of cstore_skipnodes.
|
|
* TODO: should we cache this similar to citus?
|
|
*/
|
|
static Oid
|
|
CStoreSkipNodesRelationId(void)
|
|
{
|
|
return get_relname_relid("cstore_skipnodes", CStoreNamespaceId());
|
|
}
|
|
|
|
|
|
/*
|
|
* CStoreSkipNodesIndexRelationId returns relation id of cstore_skipnodes_pkey.
|
|
* TODO: should we cache this similar to citus?
|
|
*/
|
|
static Oid
|
|
CStoreSkipNodesIndexRelationId(void)
|
|
{
|
|
return get_relname_relid("cstore_skipnodes_pkey", CStoreNamespaceId());
|
|
}
|
|
|
|
|
|
/*
|
|
* CStoreNamespaceId returns namespace id of the schema we store cstore
|
|
* related tables.
|
|
*/
|
|
static Oid
|
|
CStoreNamespaceId(void)
|
|
{
|
|
return get_namespace_oid("cstore", false);
|
|
}
|