Merge pull request #5088 from citusdata/col/refactor-reader

Remove stripeList (list of StripeMetadata) & currentStripe (stripeList index of the
current stripe being read) from ColumnarReadState, introduce currentStripeMetadata.
pull/5092/head
Onur Tirtir 2021-07-07 11:21:16 +03:00 committed by GitHub
commit dfcfa18edc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 137 additions and 55 deletions

View File

@ -66,6 +66,21 @@ typedef struct
EState *estate; EState *estate;
} ModifyState; } ModifyState;
/* RowNumberLookupMode to be used in StripeMetadataLookupRowNumber */
typedef enum RowNumberLookupMode
{
/*
* Find the stripe whose firstRowNumber is less than or equal to given
* input rowNumber.
*/
FIND_LESS_OR_EQUAL,
/*
* Find the stripe whose firstRowNumber is greater than input rowNumber.
*/
FIND_GREATER
} RowNumberLookupMode;
static void InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe); static void InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe);
static void GetHighestUsedAddressAndId(uint64 storageId, static void GetHighestUsedAddressAndId(uint64 storageId,
uint64 *highestUsedAddress, uint64 *highestUsedAddress,
@ -86,7 +101,7 @@ static Oid ColumnarChunkIndexRelationId(void);
static Oid ColumnarChunkGroupIndexRelationId(void); static Oid ColumnarChunkGroupIndexRelationId(void);
static Oid ColumnarNamespaceId(void); static Oid ColumnarNamespaceId(void);
static uint64 LookupStorageId(RelFileNode relfilenode); static uint64 LookupStorageId(RelFileNode relfilenode);
static uint64 GetHighestUsedFirstRowNumber(uint64 storageId); static uint64 GetHighestUsedRowNumber(uint64 storageId);
static void DeleteStorageFromColumnarMetadataTable(Oid metadataTableId, static void DeleteStorageFromColumnarMetadataTable(Oid metadataTableId,
AttrNumber storageIdAtrrNumber, AttrNumber storageIdAtrrNumber,
Oid storageIdIndexId, Oid storageIdIndexId,
@ -100,6 +115,9 @@ static EState * create_estate_for_relation(Relation rel);
static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm);
static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm);
static bool WriteColumnarOptions(Oid regclass, ColumnarOptions *options, bool overwrite); static bool WriteColumnarOptions(Oid regclass, ColumnarOptions *options, bool overwrite);
static StripeMetadata * StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber,
Snapshot snapshot,
RowNumberLookupMode lookupMode);
PG_FUNCTION_INFO_V1(columnar_relation_storageid); PG_FUNCTION_INFO_V1(columnar_relation_storageid);
@ -622,21 +640,87 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri
/* /*
* FindStripeByRowNumber returns StripeMetadata for the stripe that has the * FindStripeByRowNumber returns StripeMetadata for the stripe whose
* row with rowNumber by doing backward index scan on * firstRowNumber is greater than given rowNumber. If no such stripe
* stripe_first_row_number_idx. If no such row exists, then returns NULL. * exists, then returns NULL.
*/
StripeMetadata *
FindNextStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot)
{
return StripeMetadataLookupRowNumber(relation, rowNumber, snapshot, FIND_GREATER);
}
/*
* FindStripeByRowNumber returns StripeMetadata for the stripe that contains
* the row with rowNumber. If no such stripe exists, then returns NULL.
*/ */
StripeMetadata * StripeMetadata *
FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot)
{ {
StripeMetadata *stripeMetadata =
StripeMetadataLookupRowNumber(relation, rowNumber,
snapshot, FIND_LESS_OR_EQUAL);
if (!stripeMetadata)
{
return NULL;
}
if (rowNumber > StripeGetHighestRowNumber(stripeMetadata))
{
return NULL;
}
return stripeMetadata;
}
/*
* StripeGetHighestRowNumber returns rowNumber of the row with highest
* rowNumber in given stripe.
*/
uint64
StripeGetHighestRowNumber(StripeMetadata *stripeMetadata)
{
return stripeMetadata->firstRowNumber + stripeMetadata->rowCount - 1;
}
/*
* StripeMetadataLookupRowNumber returns StripeMetadata for the stripe whose
* firstRowNumber is less than or equal to (FIND_LESS_OR_EQUAL), or is
* greater than (FIND_GREATER) given rowNumber by doing backward index
* scan on stripe_first_row_number_idx.
* If no such stripe exists, then returns NULL.
*/
static StripeMetadata *
StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot,
RowNumberLookupMode lookupMode)
{
Assert(lookupMode == FIND_LESS_OR_EQUAL || lookupMode == FIND_GREATER);
StripeMetadata *foundStripeMetadata = NULL; StripeMetadata *foundStripeMetadata = NULL;
uint64 storageId = ColumnarStorageGetStorageId(relation, false); uint64 storageId = ColumnarStorageGetStorageId(relation, false);
ScanKeyData scanKey[2]; ScanKeyData scanKey[2];
ScanKeyInit(&scanKey[0], Anum_columnar_stripe_storageid, ScanKeyInit(&scanKey[0], Anum_columnar_stripe_storageid,
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId)); BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId));
StrategyNumber strategyNumber = InvalidStrategy;
RegProcedure procedure = InvalidOid;
if (lookupMode == FIND_LESS_OR_EQUAL)
{
strategyNumber = BTLessEqualStrategyNumber;
procedure = F_INT8LE;
}
else if (lookupMode == FIND_GREATER)
{
strategyNumber = BTGreaterStrategyNumber;
procedure = F_INT8GT;
}
ScanKeyInit(&scanKey[1], Anum_columnar_stripe_first_row_number, ScanKeyInit(&scanKey[1], Anum_columnar_stripe_first_row_number,
BTLessEqualStrategyNumber, F_INT8LE, UInt64GetDatum(rowNumber)); strategyNumber, procedure, UInt64GetDatum(rowNumber));
Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock); Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock);
Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(), Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(),
@ -645,7 +729,16 @@ FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot)
snapshot, 2, snapshot, 2,
scanKey); scanKey);
HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, BackwardScanDirection); ScanDirection scanDirection = NoMovementScanDirection;
if (lookupMode == FIND_LESS_OR_EQUAL)
{
scanDirection = BackwardScanDirection;
}
else if (lookupMode == FIND_GREATER)
{
scanDirection = ForwardScanDirection;
}
HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, scanDirection);
if (HeapTupleIsValid(heapTuple)) if (HeapTupleIsValid(heapTuple))
{ {
TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes); TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
@ -653,11 +746,7 @@ FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot)
bool isNullArray[Natts_columnar_stripe]; bool isNullArray[Natts_columnar_stripe];
heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray);
StripeMetadata *stripeMetadata = BuildStripeMetadata(datumArray); foundStripeMetadata = BuildStripeMetadata(datumArray);
if (rowNumber < stripeMetadata->firstRowNumber + stripeMetadata->rowCount)
{
foundStripeMetadata = stripeMetadata;
}
} }
systable_endscan_ordered(scanDescriptor); systable_endscan_ordered(scanDescriptor);
@ -1409,41 +1498,33 @@ ColumnarStorageUpdateIfNeeded(Relation rel, bool isUpgrade)
uint64 reservedStripeId = highestId + 1; uint64 reservedStripeId = highestId + 1;
uint64 reservedOffset = highestOffset + 1; uint64 reservedOffset = highestOffset + 1;
uint64 reservedRowNumber = GetHighestUsedFirstRowNumber(storageId) + 1; uint64 reservedRowNumber = GetHighestUsedRowNumber(storageId) + 1;
ColumnarStorageUpdateCurrent(rel, isUpgrade, reservedStripeId, ColumnarStorageUpdateCurrent(rel, isUpgrade, reservedStripeId,
reservedRowNumber, reservedOffset); reservedRowNumber, reservedOffset);
} }
/* /*
* GetHighestUsedFirstRowNumber returns the highest used first_row_number * GetHighestUsedRowNumber returns the highest used rowNumber for given
* for given storageId. Returns COLUMNAR_INVALID_ROW_NUMBER if storage with * storageId. Returns COLUMNAR_INVALID_ROW_NUMBER if storage with
* storageId has no stripes. * storageId has no stripes.
* Note that normally we would use ColumnarStorageGetReservedRowNumber * Note that normally we would use ColumnarStorageGetReservedRowNumber
* to decide that. However, this function is designed to be used when * to decide that. However, this function is designed to be used when
* building the metapage itself during upgrades. * building the metapage itself during upgrades.
*/ */
static uint64 static uint64
GetHighestUsedFirstRowNumber(uint64 storageId) GetHighestUsedRowNumber(uint64 storageId)
{ {
uint64 highestRowNumber = COLUMNAR_INVALID_ROW_NUMBER;
List *stripeMetadataList = ReadDataFileStripeList(storageId, List *stripeMetadataList = ReadDataFileStripeList(storageId,
GetTransactionSnapshot()); GetTransactionSnapshot());
if (list_length(stripeMetadataList) == 0)
{
return COLUMNAR_INVALID_ROW_NUMBER;
}
/* XXX: Better to have an invalid value for StripeMetadata.rowCount too */
uint64 stripeRowCount = -1;
uint64 highestFirstRowNumber = COLUMNAR_INVALID_ROW_NUMBER;
StripeMetadata *stripeMetadata = NULL; StripeMetadata *stripeMetadata = NULL;
foreach_ptr(stripeMetadata, stripeMetadataList) foreach_ptr(stripeMetadata, stripeMetadataList)
{ {
highestFirstRowNumber = Max(highestFirstRowNumber, highestRowNumber = Max(highestRowNumber,
stripeMetadata->firstRowNumber); StripeGetHighestRowNumber(stripeMetadata));
stripeRowCount = stripeMetadata->rowCount;
} }
return highestFirstRowNumber + stripeRowCount - 1; return highestRowNumber;
} }

View File

@ -64,11 +64,10 @@ typedef struct StripeReadState
struct ColumnarReadState struct ColumnarReadState
{ {
List *stripeList;
TupleDesc tupleDescriptor; TupleDesc tupleDescriptor;
Relation relation; Relation relation;
int64 currentStripe; /* index of current stripe */ StripeMetadata *currentStripeMetadata;
StripeReadState *stripeReadState; StripeReadState *stripeReadState;
/* /*
@ -159,15 +158,6 @@ ColumnarReadState *
ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor, ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor,
List *projectedColumnList, List *whereClauseList) List *projectedColumnList, List *whereClauseList)
{ {
List *stripeList = StripesForRelfilenode(relation->rd_node);
StripeMetadata *stripeMetadata = NULL;
uint64 totalRowCount = 0;
foreach_ptr(stripeMetadata, stripeList)
{
totalRowCount += stripeMetadata->rowCount;
}
/* /*
* We allocate all stripe specific data in the stripeReadContext, and reset * We allocate all stripe specific data in the stripeReadContext, and reset
* this memory context before loading a new stripe. This is to avoid memory * this memory context before loading a new stripe. This is to avoid memory
@ -177,7 +167,6 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor,
ColumnarReadState *readState = palloc0(sizeof(ColumnarReadState)); ColumnarReadState *readState = palloc0(sizeof(ColumnarReadState));
readState->relation = relation; readState->relation = relation;
readState->stripeList = stripeList;
readState->projectedColumnList = projectedColumnList; readState->projectedColumnList = projectedColumnList;
readState->whereClauseList = whereClauseList; readState->whereClauseList = whereClauseList;
readState->whereClauseVars = GetClauseVars(whereClauseList, tupleDescriptor->natts); readState->whereClauseVars = GetClauseVars(whereClauseList, tupleDescriptor->natts);
@ -185,6 +174,9 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor,
readState->tupleDescriptor = tupleDescriptor; readState->tupleDescriptor = tupleDescriptor;
readState->stripeReadContext = stripeReadContext; readState->stripeReadContext = stripeReadContext;
readState->stripeReadState = NULL; readState->stripeReadState = NULL;
readState->currentStripeMetadata = FindNextStripeByRowNumber(relation,
COLUMNAR_INVALID_ROW_NUMBER,
GetTransactionSnapshot());
return readState; return readState;
} }
@ -220,9 +212,7 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col
return false; return false;
} }
StripeMetadata *stripeMetadata = list_nth(readState->stripeList, readState->stripeReadState = BeginStripeRead(readState->currentStripeMetadata,
readState->currentStripe);
readState->stripeReadState = BeginStripeRead(stripeMetadata,
readState->relation, readState->relation,
readState->tupleDescriptor, readState->tupleDescriptor,
readState->projectedColumnList, readState->projectedColumnList,
@ -239,9 +229,7 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col
if (rowNumber) if (rowNumber)
{ {
StripeMetadata *stripeMetadata = list_nth(readState->stripeList, *rowNumber = readState->currentStripeMetadata->firstRowNumber +
readState->currentStripe);
*rowNumber = stripeMetadata->firstRowNumber +
readState->stripeReadState->currentRow - 1; readState->stripeReadState->currentRow - 1;
} }
@ -367,8 +355,7 @@ StripeReadInProgress(ColumnarReadState *readState)
static bool static bool
HasUnreadStripe(ColumnarReadState *readState) HasUnreadStripe(ColumnarReadState *readState)
{ {
uint32 stripeCount = list_length(readState->stripeList); return readState->currentStripeMetadata != NULL;
return readState->currentStripe < stripeCount;
} }
@ -380,7 +367,9 @@ void
ColumnarRescan(ColumnarReadState *readState) ColumnarRescan(ColumnarReadState *readState)
{ {
readState->stripeReadState = NULL; readState->stripeReadState = NULL;
readState->currentStripe = 0; readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation,
COLUMNAR_INVALID_ROW_NUMBER,
GetTransactionSnapshot());
readState->chunkGroupsFiltered = 0; readState->chunkGroupsFiltered = 0;
} }
@ -392,7 +381,11 @@ void
ColumnarEndRead(ColumnarReadState *readState) ColumnarEndRead(ColumnarReadState *readState)
{ {
MemoryContextDelete(readState->stripeReadContext); MemoryContextDelete(readState->stripeReadContext);
list_free_deep(readState->stripeList); if (readState->currentStripeMetadata)
{
pfree(readState->currentStripeMetadata);
}
pfree(readState); pfree(readState);
} }
@ -445,17 +438,23 @@ EndStripeRead(StripeReadState *stripeReadState)
/* /*
* AdvanceStripeRead updates chunkGroupsFiltered and increments currentStripe * AdvanceStripeRead updates chunkGroupsFiltered and sets
* for next stripe read. * currentStripeMetadata for next stripe read.
*/ */
static void static void
AdvanceStripeRead(ColumnarReadState *readState) AdvanceStripeRead(ColumnarReadState *readState)
{ {
readState->chunkGroupsFiltered += readState->chunkGroupsFiltered +=
readState->stripeReadState->chunkGroupsFiltered; readState->stripeReadState->chunkGroupsFiltered;
uint64 lastReadRowNumber =
StripeGetHighestRowNumber(readState->currentStripeMetadata);
EndStripeRead(readState->stripeReadState); EndStripeRead(readState->stripeReadState);
readState->currentStripe++; readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation,
lastReadRowNumber,
GetTransactionSnapshot());
readState->stripeReadState = NULL; readState->stripeReadState = NULL;
MemoryContextReset(readState->stripeReadContext); MemoryContextReset(readState->stripeReadContext);
} }

View File

@ -1295,8 +1295,7 @@ ColumnarGetHighestItemPointer(Relation relation, Snapshot snapshot)
return invalidItemPtr; return invalidItemPtr;
} }
uint64 highestRowNumber = stripeWithHighestRowNumber->firstRowNumber + uint64 highestRowNumber = StripeGetHighestRowNumber(stripeWithHighestRowNumber);
stripeWithHighestRowNumber->rowCount - 1;
return row_number_to_tid(highestRowNumber); return row_number_to_tid(highestRowNumber);
} }

View File

@ -254,8 +254,11 @@ extern void SaveChunkGroups(RelFileNode relfilenode, uint64 stripe,
extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe,
TupleDesc tupleDescriptor, TupleDesc tupleDescriptor,
uint32 chunkCount); uint32 chunkCount);
extern StripeMetadata * FindNextStripeByRowNumber(Relation relation, uint64 rowNumber,
Snapshot snapshot);
extern StripeMetadata * FindStripeByRowNumber(Relation relation, uint64 rowNumber, extern StripeMetadata * FindStripeByRowNumber(Relation relation, uint64 rowNumber,
Snapshot snapshot); Snapshot snapshot);
extern uint64 StripeGetHighestRowNumber(StripeMetadata *stripeMetadata);
extern StripeMetadata * FindStripeWithHighestRowNumber(Relation relation, extern StripeMetadata * FindStripeWithHighestRowNumber(Relation relation,
Snapshot snapshot); Snapshot snapshot);
extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS); extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS);