diff --git a/src/backend/columnar/columnar_reader.c b/src/backend/columnar/columnar_reader.c index cc72fd77b..f90068c76 100644 --- a/src/backend/columnar/columnar_reader.c +++ b/src/backend/columnar/columnar_reader.c @@ -196,13 +196,13 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor, readState->tupleDescriptor = tupleDescriptor; readState->stripeReadContext = stripeReadContext; readState->stripeReadState = NULL; - readState->currentStripeMetadata = FindNextStripeByRowNumber(relation, - COLUMNAR_INVALID_ROW_NUMBER, - snapshot); readState->scanContext = scanContext; readState->snapshot = snapshot; readState->snapshotRegisteredByUs = snapshotRegisteredByUs; + /* set currentStripeMetadata for the first stripe to read */ + AdvanceStripeRead(readState); + return readState; } @@ -472,9 +472,10 @@ ColumnarRescan(ColumnarReadState *readState, List *scanQual) MemoryContext oldContext = MemoryContextSwitchTo(readState->scanContext); ColumnarResetRead(readState); - readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation, - COLUMNAR_INVALID_ROW_NUMBER, - readState->snapshot); + + /* set currentStripeMetadata for the first stripe to read */ + AdvanceStripeRead(readState); + readState->chunkGroupsFiltered = 0; readState->whereClauseList = copyObject(scanQual); @@ -572,11 +573,17 @@ AdvanceStripeRead(ColumnarReadState *readState) { MemoryContext oldContext = MemoryContextSwitchTo(readState->scanContext); - readState->chunkGroupsFiltered += - readState->stripeReadState->chunkGroupsFiltered; + /* if not read any stripes yet, start from the first one .. */ + uint64 lastReadRowNumber = COLUMNAR_INVALID_ROW_NUMBER; + if (StripeReadInProgress(readState)) + { + /* .. otherwise, continue with the next stripe */ + lastReadRowNumber = StripeGetHighestRowNumber(readState->currentStripeMetadata); + + readState->chunkGroupsFiltered += + readState->stripeReadState->chunkGroupsFiltered; + } - uint64 lastReadRowNumber = - StripeGetHighestRowNumber(readState->currentStripeMetadata); readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation, lastReadRowNumber, readState->snapshot); diff --git a/src/test/regress/expected/columnar_indexes.out b/src/test/regress/expected/columnar_indexes.out index f3cd5d2f3..1a39c781b 100644 --- a/src/test/regress/expected/columnar_indexes.out +++ b/src/test/regress/expected/columnar_indexes.out @@ -544,6 +544,29 @@ ERROR: duplicate key value violates unique constraint "aborted_write_test_pkey" DETAIL: Key (a)=(16999) already exists. -- since second INSERT already failed, should not throw a "duplicate key" error REINDEX TABLE aborted_write_test; +BEGIN; + ALTER TABLE columnar.stripe SET (autovacuum_enabled = false); + ALTER TABLE columnar.chunk SET (autovacuum_enabled = false); + ALTER TABLE columnar.chunk_group SET (autovacuum_enabled = false); + DROP TABLE aborted_write_test; + TRUNCATE columnar.stripe, columnar.chunk, columnar.chunk_group; + CREATE TABLE aborted_write_test (a INT) USING columnar; + SAVEPOINT svpt; + INSERT INTO aborted_write_test SELECT i FROM generate_series(1, 2) i; + -- force flush write state + SELECT FROM aborted_write_test; +-- +(2 rows) + + ROLLBACK TO SAVEPOINT svpt; + -- Already disabled autovacuum for all three metadata tables. + -- Here we truncate columnar.chunk and columnar.chunk_group but not + -- columnar.stripe to make sure that we properly handle dead tuples + -- in columnar.stripe, i.e. stripe metadata entries for aborted + -- transactions. + TRUNCATE columnar.chunk, columnar.chunk_group; + CREATE INDEX ON aborted_write_test (a); +ROLLBACK; create table events (event_id bigserial, event_time timestamptz default now(), payload text) using columnar; BEGIN; -- this wouldn't flush any data diff --git a/src/test/regress/sql/columnar_indexes.sql b/src/test/regress/sql/columnar_indexes.sql index f3a84c13a..67382f201 100644 --- a/src/test/regress/sql/columnar_indexes.sql +++ b/src/test/regress/sql/columnar_indexes.sql @@ -406,6 +406,32 @@ INSERT INTO aborted_write_test VALUES (16999); -- since second INSERT already failed, should not throw a "duplicate key" error REINDEX TABLE aborted_write_test; +BEGIN; + ALTER TABLE columnar.stripe SET (autovacuum_enabled = false); + ALTER TABLE columnar.chunk SET (autovacuum_enabled = false); + ALTER TABLE columnar.chunk_group SET (autovacuum_enabled = false); + + DROP TABLE aborted_write_test; + TRUNCATE columnar.stripe, columnar.chunk, columnar.chunk_group; + + CREATE TABLE aborted_write_test (a INT) USING columnar; + + SAVEPOINT svpt; + INSERT INTO aborted_write_test SELECT i FROM generate_series(1, 2) i; + -- force flush write state + SELECT FROM aborted_write_test; + ROLLBACK TO SAVEPOINT svpt; + + -- Already disabled autovacuum for all three metadata tables. + -- Here we truncate columnar.chunk and columnar.chunk_group but not + -- columnar.stripe to make sure that we properly handle dead tuples + -- in columnar.stripe, i.e. stripe metadata entries for aborted + -- transactions. + TRUNCATE columnar.chunk, columnar.chunk_group; + + CREATE INDEX ON aborted_write_test (a); +ROLLBACK; + create table events (event_id bigserial, event_time timestamptz default now(), payload text) using columnar; BEGIN; -- this wouldn't flush any data