diff --git a/src/backend/columnar/columnar_metadata.c b/src/backend/columnar/columnar_metadata.c index df2946e6a..7f3efc588 100644 --- a/src/backend/columnar/columnar_metadata.c +++ b/src/backend/columnar/columnar_metadata.c @@ -73,7 +73,12 @@ typedef enum RowNumberLookupMode * Find the stripe whose firstRowNumber is less than or equal to given * input rowNumber. */ - FIND_LESS_OR_EQUAL + FIND_LESS_OR_EQUAL, + + /* + * Find the stripe whose firstRowNumber is greater than input rowNumber. + */ + FIND_GREATER } RowNumberLookupMode; static void InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe); @@ -634,6 +639,18 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri } +/* + * FindStripeByRowNumber returns StripeMetadata for the stripe whose + * firstRowNumber is greater than given rowNumber. If no such stripe + * exists, then returns NULL. + */ +StripeMetadata * +FindNextStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) +{ + return StripeMetadataLookupRowNumber(relation, rowNumber, snapshot, FIND_GREATER); +} + + /* * FindStripeByRowNumber returns StripeMetadata for the stripe that contains * the row with rowNumber. If no such stripe exists, then returns NULL. @@ -662,15 +679,16 @@ FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) /* * StripeMetadataLookupRowNumber returns StripeMetadata for the stripe whose - * firstRowNumber is less than or equal to (FIND_LESS_OR_EQUAL) given rowNumber - * by doing backward index scan on stripe_first_row_number_idx. + * firstRowNumber is less than or equal to (FIND_LESS_OR_EQUAL), or is + * greater than (FIND_GREATER) given rowNumber by doing backward index + * scan on stripe_first_row_number_idx. * If no such stripe exists, then returns NULL. */ static StripeMetadata * StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot, RowNumberLookupMode lookupMode) { - Assert(lookupMode == FIND_LESS_OR_EQUAL); + Assert(lookupMode == FIND_LESS_OR_EQUAL || lookupMode == FIND_GREATER); StripeMetadata *foundStripeMetadata = NULL; @@ -686,6 +704,11 @@ StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snap strategyNumber = BTLessEqualStrategyNumber; procedure = F_INT8LE; } + else if (lookupMode == FIND_GREATER) + { + strategyNumber = BTGreaterStrategyNumber; + procedure = F_INT8GT; + } ScanKeyInit(&scanKey[1], Anum_columnar_stripe_first_row_number, strategyNumber, procedure, UInt64GetDatum(rowNumber)); @@ -702,6 +725,10 @@ StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snap { scanDirection = BackwardScanDirection; } + else if (lookupMode == FIND_GREATER) + { + scanDirection = ForwardScanDirection; + } HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, scanDirection); if (HeapTupleIsValid(heapTuple)) { diff --git a/src/backend/columnar/columnar_reader.c b/src/backend/columnar/columnar_reader.c index 0f1a1767f..216d5472c 100644 --- a/src/backend/columnar/columnar_reader.c +++ b/src/backend/columnar/columnar_reader.c @@ -64,11 +64,10 @@ typedef struct StripeReadState struct ColumnarReadState { - List *stripeList; TupleDesc tupleDescriptor; Relation relation; - int64 currentStripe; /* index of current stripe */ + StripeMetadata *currentStripeMetadata; StripeReadState *stripeReadState; /* @@ -159,15 +158,6 @@ ColumnarReadState * ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { - List *stripeList = StripesForRelfilenode(relation->rd_node); - StripeMetadata *stripeMetadata = NULL; - - uint64 totalRowCount = 0; - foreach_ptr(stripeMetadata, stripeList) - { - totalRowCount += stripeMetadata->rowCount; - } - /* * We allocate all stripe specific data in the stripeReadContext, and reset * this memory context before loading a new stripe. This is to avoid memory @@ -177,7 +167,6 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor, ColumnarReadState *readState = palloc0(sizeof(ColumnarReadState)); readState->relation = relation; - readState->stripeList = stripeList; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; readState->whereClauseVars = GetClauseVars(whereClauseList, tupleDescriptor->natts); @@ -185,6 +174,9 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor, readState->tupleDescriptor = tupleDescriptor; readState->stripeReadContext = stripeReadContext; readState->stripeReadState = NULL; + readState->currentStripeMetadata = FindNextStripeByRowNumber(relation, + COLUMNAR_INVALID_ROW_NUMBER, + GetTransactionSnapshot()); return readState; } @@ -220,9 +212,7 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col return false; } - StripeMetadata *stripeMetadata = list_nth(readState->stripeList, - readState->currentStripe); - readState->stripeReadState = BeginStripeRead(stripeMetadata, + readState->stripeReadState = BeginStripeRead(readState->currentStripeMetadata, readState->relation, readState->tupleDescriptor, readState->projectedColumnList, @@ -239,9 +229,7 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col if (rowNumber) { - StripeMetadata *stripeMetadata = list_nth(readState->stripeList, - readState->currentStripe); - *rowNumber = stripeMetadata->firstRowNumber + + *rowNumber = readState->currentStripeMetadata->firstRowNumber + readState->stripeReadState->currentRow - 1; } @@ -367,8 +355,7 @@ StripeReadInProgress(ColumnarReadState *readState) static bool HasUnreadStripe(ColumnarReadState *readState) { - uint32 stripeCount = list_length(readState->stripeList); - return readState->currentStripe < stripeCount; + return readState->currentStripeMetadata != NULL; } @@ -380,7 +367,9 @@ void ColumnarRescan(ColumnarReadState *readState) { readState->stripeReadState = NULL; - readState->currentStripe = 0; + readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation, + COLUMNAR_INVALID_ROW_NUMBER, + GetTransactionSnapshot()); readState->chunkGroupsFiltered = 0; } @@ -392,7 +381,11 @@ void ColumnarEndRead(ColumnarReadState *readState) { MemoryContextDelete(readState->stripeReadContext); - list_free_deep(readState->stripeList); + if (readState->currentStripeMetadata) + { + pfree(readState->currentStripeMetadata); + } + pfree(readState); } @@ -445,17 +438,23 @@ EndStripeRead(StripeReadState *stripeReadState) /* - * AdvanceStripeRead updates chunkGroupsFiltered and increments currentStripe - * for next stripe read. + * AdvanceStripeRead updates chunkGroupsFiltered and sets + * currentStripeMetadata for next stripe read. */ static void AdvanceStripeRead(ColumnarReadState *readState) { readState->chunkGroupsFiltered += readState->stripeReadState->chunkGroupsFiltered; + + uint64 lastReadRowNumber = readState->currentStripeMetadata->firstRowNumber + + readState->currentStripeMetadata->rowCount - 1; + EndStripeRead(readState->stripeReadState); - readState->currentStripe++; + readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation, + lastReadRowNumber, + GetTransactionSnapshot()); readState->stripeReadState = NULL; MemoryContextReset(readState->stripeReadContext); } diff --git a/src/include/columnar/columnar.h b/src/include/columnar/columnar.h index e4770acc2..739a7a95b 100644 --- a/src/include/columnar/columnar.h +++ b/src/include/columnar/columnar.h @@ -254,6 +254,8 @@ extern void SaveChunkGroups(RelFileNode relfilenode, uint64 stripe, extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 chunkCount); +extern StripeMetadata * FindNextStripeByRowNumber(Relation relation, uint64 rowNumber, + Snapshot snapshot); extern StripeMetadata * FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot); extern StripeMetadata * FindStripeWithHighestRowNumber(Relation relation,