From 16dee73b1079bffdbd08bd661a14ae9f76d9a9d4 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 2 Jul 2021 17:52:21 +0300 Subject: [PATCH 1/3] Refactor FindStripeByRowNumber into StripeMetadataLookupRowNumber Push the most logic in FindStripeByRowNumber down to an helper function to re-use it in next commit. --- src/backend/columnar/columnar_metadata.c | 73 ++++++++++++++++++++---- 1 file changed, 63 insertions(+), 10 deletions(-) diff --git a/src/backend/columnar/columnar_metadata.c b/src/backend/columnar/columnar_metadata.c index 0ebb533c7..df2946e6a 100644 --- a/src/backend/columnar/columnar_metadata.c +++ b/src/backend/columnar/columnar_metadata.c @@ -66,6 +66,16 @@ typedef struct EState *estate; } ModifyState; +/* RowNumberLookupMode to be used in StripeMetadataLookupRowNumber */ +typedef enum RowNumberLookupMode +{ + /* + * Find the stripe whose firstRowNumber is less than or equal to given + * input rowNumber. + */ + FIND_LESS_OR_EQUAL +} RowNumberLookupMode; + static void InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe); static void GetHighestUsedAddressAndId(uint64 storageId, uint64 *highestUsedAddress, @@ -100,6 +110,9 @@ static EState * create_estate_for_relation(Relation rel); static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); static bool WriteColumnarOptions(Oid regclass, ColumnarOptions *options, bool overwrite); +static StripeMetadata * StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, + Snapshot snapshot, + RowNumberLookupMode lookupMode); PG_FUNCTION_INFO_V1(columnar_relation_storageid); @@ -622,21 +635,60 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri /* - * FindStripeByRowNumber returns StripeMetadata for the stripe that has the - * row with rowNumber by doing backward index scan on - * stripe_first_row_number_idx. If no such row exists, then returns NULL. + * FindStripeByRowNumber returns StripeMetadata for the stripe that contains + * the row with rowNumber. If no such stripe exists, then returns NULL. */ StripeMetadata * FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) { + StripeMetadata *stripeMetadata = + StripeMetadataLookupRowNumber(relation, rowNumber, + snapshot, FIND_LESS_OR_EQUAL); + if (!stripeMetadata) + { + return NULL; + } + + uint64 stripeMaxRowNumber = stripeMetadata->firstRowNumber + + stripeMetadata->rowCount - 1; + if (rowNumber > stripeMaxRowNumber) + { + return NULL; + } + + return stripeMetadata; +} + + +/* + * StripeMetadataLookupRowNumber returns StripeMetadata for the stripe whose + * firstRowNumber is less than or equal to (FIND_LESS_OR_EQUAL) given rowNumber + * by doing backward index scan on stripe_first_row_number_idx. + * If no such stripe exists, then returns NULL. + */ +static StripeMetadata * +StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot, + RowNumberLookupMode lookupMode) +{ + Assert(lookupMode == FIND_LESS_OR_EQUAL); + StripeMetadata *foundStripeMetadata = NULL; uint64 storageId = ColumnarStorageGetStorageId(relation, false); ScanKeyData scanKey[2]; ScanKeyInit(&scanKey[0], Anum_columnar_stripe_storageid, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId)); + + StrategyNumber strategyNumber = InvalidStrategy; + RegProcedure procedure = InvalidOid; + if (lookupMode == FIND_LESS_OR_EQUAL) + { + strategyNumber = BTLessEqualStrategyNumber; + procedure = F_INT8LE; + } ScanKeyInit(&scanKey[1], Anum_columnar_stripe_first_row_number, - BTLessEqualStrategyNumber, F_INT8LE, UInt64GetDatum(rowNumber)); + strategyNumber, procedure, UInt64GetDatum(rowNumber)); + Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock); Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(), @@ -645,7 +697,12 @@ FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) snapshot, 2, scanKey); - HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, BackwardScanDirection); + ScanDirection scanDirection = NoMovementScanDirection; + if (lookupMode == FIND_LESS_OR_EQUAL) + { + scanDirection = BackwardScanDirection; + } + HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, scanDirection); if (HeapTupleIsValid(heapTuple)) { TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes); @@ -653,11 +710,7 @@ FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) bool isNullArray[Natts_columnar_stripe]; heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); - StripeMetadata *stripeMetadata = BuildStripeMetadata(datumArray); - if (rowNumber < stripeMetadata->firstRowNumber + stripeMetadata->rowCount) - { - foundStripeMetadata = stripeMetadata; - } + foundStripeMetadata = BuildStripeMetadata(datumArray); } systable_endscan_ordered(scanDescriptor); From 8942086506f85be36f561681b69d28ab73d765ff Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 2 Jul 2021 18:01:26 +0300 Subject: [PATCH 2/3] Remove stripeList & currentStripe from ColumnarReadState --- src/backend/columnar/columnar_metadata.c | 35 +++++++++++++++-- src/backend/columnar/columnar_reader.c | 49 ++++++++++++------------ src/include/columnar/columnar.h | 2 + 3 files changed, 57 insertions(+), 29 deletions(-) diff --git a/src/backend/columnar/columnar_metadata.c b/src/backend/columnar/columnar_metadata.c index df2946e6a..7f3efc588 100644 --- a/src/backend/columnar/columnar_metadata.c +++ b/src/backend/columnar/columnar_metadata.c @@ -73,7 +73,12 @@ typedef enum RowNumberLookupMode * Find the stripe whose firstRowNumber is less than or equal to given * input rowNumber. */ - FIND_LESS_OR_EQUAL + FIND_LESS_OR_EQUAL, + + /* + * Find the stripe whose firstRowNumber is greater than input rowNumber. + */ + FIND_GREATER } RowNumberLookupMode; static void InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe); @@ -634,6 +639,18 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri } +/* + * FindStripeByRowNumber returns StripeMetadata for the stripe whose + * firstRowNumber is greater than given rowNumber. If no such stripe + * exists, then returns NULL. + */ +StripeMetadata * +FindNextStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) +{ + return StripeMetadataLookupRowNumber(relation, rowNumber, snapshot, FIND_GREATER); +} + + /* * FindStripeByRowNumber returns StripeMetadata for the stripe that contains * the row with rowNumber. If no such stripe exists, then returns NULL. @@ -662,15 +679,16 @@ FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) /* * StripeMetadataLookupRowNumber returns StripeMetadata for the stripe whose - * firstRowNumber is less than or equal to (FIND_LESS_OR_EQUAL) given rowNumber - * by doing backward index scan on stripe_first_row_number_idx. + * firstRowNumber is less than or equal to (FIND_LESS_OR_EQUAL), or is + * greater than (FIND_GREATER) given rowNumber by doing backward index + * scan on stripe_first_row_number_idx. * If no such stripe exists, then returns NULL. */ static StripeMetadata * StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot, RowNumberLookupMode lookupMode) { - Assert(lookupMode == FIND_LESS_OR_EQUAL); + Assert(lookupMode == FIND_LESS_OR_EQUAL || lookupMode == FIND_GREATER); StripeMetadata *foundStripeMetadata = NULL; @@ -686,6 +704,11 @@ StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snap strategyNumber = BTLessEqualStrategyNumber; procedure = F_INT8LE; } + else if (lookupMode == FIND_GREATER) + { + strategyNumber = BTGreaterStrategyNumber; + procedure = F_INT8GT; + } ScanKeyInit(&scanKey[1], Anum_columnar_stripe_first_row_number, strategyNumber, procedure, UInt64GetDatum(rowNumber)); @@ -702,6 +725,10 @@ StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snap { scanDirection = BackwardScanDirection; } + else if (lookupMode == FIND_GREATER) + { + scanDirection = ForwardScanDirection; + } HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, scanDirection); if (HeapTupleIsValid(heapTuple)) { diff --git a/src/backend/columnar/columnar_reader.c b/src/backend/columnar/columnar_reader.c index 0f1a1767f..216d5472c 100644 --- a/src/backend/columnar/columnar_reader.c +++ b/src/backend/columnar/columnar_reader.c @@ -64,11 +64,10 @@ typedef struct StripeReadState struct ColumnarReadState { - List *stripeList; TupleDesc tupleDescriptor; Relation relation; - int64 currentStripe; /* index of current stripe */ + StripeMetadata *currentStripeMetadata; StripeReadState *stripeReadState; /* @@ -159,15 +158,6 @@ ColumnarReadState * ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { - List *stripeList = StripesForRelfilenode(relation->rd_node); - StripeMetadata *stripeMetadata = NULL; - - uint64 totalRowCount = 0; - foreach_ptr(stripeMetadata, stripeList) - { - totalRowCount += stripeMetadata->rowCount; - } - /* * We allocate all stripe specific data in the stripeReadContext, and reset * this memory context before loading a new stripe. This is to avoid memory @@ -177,7 +167,6 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor, ColumnarReadState *readState = palloc0(sizeof(ColumnarReadState)); readState->relation = relation; - readState->stripeList = stripeList; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; readState->whereClauseVars = GetClauseVars(whereClauseList, tupleDescriptor->natts); @@ -185,6 +174,9 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor, readState->tupleDescriptor = tupleDescriptor; readState->stripeReadContext = stripeReadContext; readState->stripeReadState = NULL; + readState->currentStripeMetadata = FindNextStripeByRowNumber(relation, + COLUMNAR_INVALID_ROW_NUMBER, + GetTransactionSnapshot()); return readState; } @@ -220,9 +212,7 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col return false; } - StripeMetadata *stripeMetadata = list_nth(readState->stripeList, - readState->currentStripe); - readState->stripeReadState = BeginStripeRead(stripeMetadata, + readState->stripeReadState = BeginStripeRead(readState->currentStripeMetadata, readState->relation, readState->tupleDescriptor, readState->projectedColumnList, @@ -239,9 +229,7 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col if (rowNumber) { - StripeMetadata *stripeMetadata = list_nth(readState->stripeList, - readState->currentStripe); - *rowNumber = stripeMetadata->firstRowNumber + + *rowNumber = readState->currentStripeMetadata->firstRowNumber + readState->stripeReadState->currentRow - 1; } @@ -367,8 +355,7 @@ StripeReadInProgress(ColumnarReadState *readState) static bool HasUnreadStripe(ColumnarReadState *readState) { - uint32 stripeCount = list_length(readState->stripeList); - return readState->currentStripe < stripeCount; + return readState->currentStripeMetadata != NULL; } @@ -380,7 +367,9 @@ void ColumnarRescan(ColumnarReadState *readState) { readState->stripeReadState = NULL; - readState->currentStripe = 0; + readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation, + COLUMNAR_INVALID_ROW_NUMBER, + GetTransactionSnapshot()); readState->chunkGroupsFiltered = 0; } @@ -392,7 +381,11 @@ void ColumnarEndRead(ColumnarReadState *readState) { MemoryContextDelete(readState->stripeReadContext); - list_free_deep(readState->stripeList); + if (readState->currentStripeMetadata) + { + pfree(readState->currentStripeMetadata); + } + pfree(readState); } @@ -445,17 +438,23 @@ EndStripeRead(StripeReadState *stripeReadState) /* - * AdvanceStripeRead updates chunkGroupsFiltered and increments currentStripe - * for next stripe read. + * AdvanceStripeRead updates chunkGroupsFiltered and sets + * currentStripeMetadata for next stripe read. */ static void AdvanceStripeRead(ColumnarReadState *readState) { readState->chunkGroupsFiltered += readState->stripeReadState->chunkGroupsFiltered; + + uint64 lastReadRowNumber = readState->currentStripeMetadata->firstRowNumber + + readState->currentStripeMetadata->rowCount - 1; + EndStripeRead(readState->stripeReadState); - readState->currentStripe++; + readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation, + lastReadRowNumber, + GetTransactionSnapshot()); readState->stripeReadState = NULL; MemoryContextReset(readState->stripeReadContext); } diff --git a/src/include/columnar/columnar.h b/src/include/columnar/columnar.h index e4770acc2..739a7a95b 100644 --- a/src/include/columnar/columnar.h +++ b/src/include/columnar/columnar.h @@ -254,6 +254,8 @@ extern void SaveChunkGroups(RelFileNode relfilenode, uint64 stripe, extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 chunkCount); +extern StripeMetadata * FindNextStripeByRowNumber(Relation relation, uint64 rowNumber, + Snapshot snapshot); extern StripeMetadata * FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot); extern StripeMetadata * FindStripeWithHighestRowNumber(Relation relation, From 7bfd84bc705bb4cceecc9e2f6ce76defaa3e144d Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 5 Jul 2021 14:03:00 +0300 Subject: [PATCH 3/3] Introduce StripeGetHighestRowNumber --- src/backend/columnar/columnar_metadata.c | 43 ++++++++++++------------ src/backend/columnar/columnar_reader.c | 4 +-- src/backend/columnar/columnar_tableam.c | 3 +- src/include/columnar/columnar.h | 1 + 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/src/backend/columnar/columnar_metadata.c b/src/backend/columnar/columnar_metadata.c index 7f3efc588..c87ccd8fb 100644 --- a/src/backend/columnar/columnar_metadata.c +++ b/src/backend/columnar/columnar_metadata.c @@ -101,7 +101,7 @@ static Oid ColumnarChunkIndexRelationId(void); static Oid ColumnarChunkGroupIndexRelationId(void); static Oid ColumnarNamespaceId(void); static uint64 LookupStorageId(RelFileNode relfilenode); -static uint64 GetHighestUsedFirstRowNumber(uint64 storageId); +static uint64 GetHighestUsedRowNumber(uint64 storageId); static void DeleteStorageFromColumnarMetadataTable(Oid metadataTableId, AttrNumber storageIdAtrrNumber, Oid storageIdIndexId, @@ -666,9 +666,7 @@ FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) return NULL; } - uint64 stripeMaxRowNumber = stripeMetadata->firstRowNumber + - stripeMetadata->rowCount - 1; - if (rowNumber > stripeMaxRowNumber) + if (rowNumber > StripeGetHighestRowNumber(stripeMetadata)) { return NULL; } @@ -677,6 +675,17 @@ FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) } +/* + * StripeGetHighestRowNumber returns rowNumber of the row with highest + * rowNumber in given stripe. + */ +uint64 +StripeGetHighestRowNumber(StripeMetadata *stripeMetadata) +{ + return stripeMetadata->firstRowNumber + stripeMetadata->rowCount - 1; +} + + /* * StripeMetadataLookupRowNumber returns StripeMetadata for the stripe whose * firstRowNumber is less than or equal to (FIND_LESS_OR_EQUAL), or is @@ -1489,41 +1498,33 @@ ColumnarStorageUpdateIfNeeded(Relation rel, bool isUpgrade) uint64 reservedStripeId = highestId + 1; uint64 reservedOffset = highestOffset + 1; - uint64 reservedRowNumber = GetHighestUsedFirstRowNumber(storageId) + 1; + uint64 reservedRowNumber = GetHighestUsedRowNumber(storageId) + 1; ColumnarStorageUpdateCurrent(rel, isUpgrade, reservedStripeId, reservedRowNumber, reservedOffset); } /* - * GetHighestUsedFirstRowNumber returns the highest used first_row_number - * for given storageId. Returns COLUMNAR_INVALID_ROW_NUMBER if storage with + * GetHighestUsedRowNumber returns the highest used rowNumber for given + * storageId. Returns COLUMNAR_INVALID_ROW_NUMBER if storage with * storageId has no stripes. * Note that normally we would use ColumnarStorageGetReservedRowNumber * to decide that. However, this function is designed to be used when * building the metapage itself during upgrades. */ static uint64 -GetHighestUsedFirstRowNumber(uint64 storageId) +GetHighestUsedRowNumber(uint64 storageId) { + uint64 highestRowNumber = COLUMNAR_INVALID_ROW_NUMBER; + List *stripeMetadataList = ReadDataFileStripeList(storageId, GetTransactionSnapshot()); - if (list_length(stripeMetadataList) == 0) - { - return COLUMNAR_INVALID_ROW_NUMBER; - } - - /* XXX: Better to have an invalid value for StripeMetadata.rowCount too */ - uint64 stripeRowCount = -1; - uint64 highestFirstRowNumber = COLUMNAR_INVALID_ROW_NUMBER; - StripeMetadata *stripeMetadata = NULL; foreach_ptr(stripeMetadata, stripeMetadataList) { - highestFirstRowNumber = Max(highestFirstRowNumber, - stripeMetadata->firstRowNumber); - stripeRowCount = stripeMetadata->rowCount; + highestRowNumber = Max(highestRowNumber, + StripeGetHighestRowNumber(stripeMetadata)); } - return highestFirstRowNumber + stripeRowCount - 1; + return highestRowNumber; } diff --git a/src/backend/columnar/columnar_reader.c b/src/backend/columnar/columnar_reader.c index 216d5472c..685e3163d 100644 --- a/src/backend/columnar/columnar_reader.c +++ b/src/backend/columnar/columnar_reader.c @@ -447,8 +447,8 @@ AdvanceStripeRead(ColumnarReadState *readState) readState->chunkGroupsFiltered += readState->stripeReadState->chunkGroupsFiltered; - uint64 lastReadRowNumber = readState->currentStripeMetadata->firstRowNumber + - readState->currentStripeMetadata->rowCount - 1; + uint64 lastReadRowNumber = + StripeGetHighestRowNumber(readState->currentStripeMetadata); EndStripeRead(readState->stripeReadState); diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c index 0e2099fb5..712932194 100644 --- a/src/backend/columnar/columnar_tableam.c +++ b/src/backend/columnar/columnar_tableam.c @@ -1295,8 +1295,7 @@ ColumnarGetHighestItemPointer(Relation relation, Snapshot snapshot) return invalidItemPtr; } - uint64 highestRowNumber = stripeWithHighestRowNumber->firstRowNumber + - stripeWithHighestRowNumber->rowCount - 1; + uint64 highestRowNumber = StripeGetHighestRowNumber(stripeWithHighestRowNumber); return row_number_to_tid(highestRowNumber); } diff --git a/src/include/columnar/columnar.h b/src/include/columnar/columnar.h index 739a7a95b..24c96e419 100644 --- a/src/include/columnar/columnar.h +++ b/src/include/columnar/columnar.h @@ -258,6 +258,7 @@ extern StripeMetadata * FindNextStripeByRowNumber(Relation relation, uint64 rowN Snapshot snapshot); extern StripeMetadata * FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot); +extern uint64 StripeGetHighestRowNumber(StripeMetadata *stripeMetadata); extern StripeMetadata * FindStripeWithHighestRowNumber(Relation relation, Snapshot snapshot); extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS);