mirror of https://github.com/citusdata/citus.git
Make sure to skip aborted writes when reading the first tuple (#5274)
With 5825c44d5f
, we made the changes to
skip aborted writes when scanning a columnar table.
However, looks like we forgot to handle such cases for the very first
call made to columnar_getnextslot. That means, that commit only
considered the intermediate stripe read operations.
However, functions called by columnar_getnextslot to find first stripe
to read (ColumnarBeginRead & ColumnarRescan) were not caring about
those aborted writes.
To fix that, we teach AdvanceStripeRead to find the very first stripe
to read, and then start using it where were blindly calling
FindNextStripeByRowNumber.
pull/5275/head
parent
531ad83b8c
commit
4ee0fb2758
|
@ -196,13 +196,13 @@ ColumnarBeginRead(Relation relation, TupleDesc tupleDescriptor,
|
|||
readState->tupleDescriptor = tupleDescriptor;
|
||||
readState->stripeReadContext = stripeReadContext;
|
||||
readState->stripeReadState = NULL;
|
||||
readState->currentStripeMetadata = FindNextStripeByRowNumber(relation,
|
||||
COLUMNAR_INVALID_ROW_NUMBER,
|
||||
snapshot);
|
||||
readState->scanContext = scanContext;
|
||||
readState->snapshot = snapshot;
|
||||
readState->snapshotRegisteredByUs = snapshotRegisteredByUs;
|
||||
|
||||
/* set currentStripeMetadata for the first stripe to read */
|
||||
AdvanceStripeRead(readState);
|
||||
|
||||
return readState;
|
||||
}
|
||||
|
||||
|
@ -472,9 +472,10 @@ ColumnarRescan(ColumnarReadState *readState, List *scanQual)
|
|||
MemoryContext oldContext = MemoryContextSwitchTo(readState->scanContext);
|
||||
|
||||
ColumnarResetRead(readState);
|
||||
readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation,
|
||||
COLUMNAR_INVALID_ROW_NUMBER,
|
||||
readState->snapshot);
|
||||
|
||||
/* set currentStripeMetadata for the first stripe to read */
|
||||
AdvanceStripeRead(readState);
|
||||
|
||||
readState->chunkGroupsFiltered = 0;
|
||||
|
||||
readState->whereClauseList = copyObject(scanQual);
|
||||
|
@ -572,11 +573,17 @@ AdvanceStripeRead(ColumnarReadState *readState)
|
|||
{
|
||||
MemoryContext oldContext = MemoryContextSwitchTo(readState->scanContext);
|
||||
|
||||
/* if not read any stripes yet, start from the first one .. */
|
||||
uint64 lastReadRowNumber = COLUMNAR_INVALID_ROW_NUMBER;
|
||||
if (StripeReadInProgress(readState))
|
||||
{
|
||||
/* .. otherwise, continue with the next stripe */
|
||||
lastReadRowNumber = StripeGetHighestRowNumber(readState->currentStripeMetadata);
|
||||
|
||||
readState->chunkGroupsFiltered +=
|
||||
readState->stripeReadState->chunkGroupsFiltered;
|
||||
}
|
||||
|
||||
uint64 lastReadRowNumber =
|
||||
StripeGetHighestRowNumber(readState->currentStripeMetadata);
|
||||
readState->currentStripeMetadata = FindNextStripeByRowNumber(readState->relation,
|
||||
lastReadRowNumber,
|
||||
readState->snapshot);
|
||||
|
|
|
@ -544,6 +544,29 @@ ERROR: duplicate key value violates unique constraint "aborted_write_test_pkey"
|
|||
DETAIL: Key (a)=(16999) already exists.
|
||||
-- since second INSERT already failed, should not throw a "duplicate key" error
|
||||
REINDEX TABLE aborted_write_test;
|
||||
BEGIN;
|
||||
ALTER TABLE columnar.stripe SET (autovacuum_enabled = false);
|
||||
ALTER TABLE columnar.chunk SET (autovacuum_enabled = false);
|
||||
ALTER TABLE columnar.chunk_group SET (autovacuum_enabled = false);
|
||||
DROP TABLE aborted_write_test;
|
||||
TRUNCATE columnar.stripe, columnar.chunk, columnar.chunk_group;
|
||||
CREATE TABLE aborted_write_test (a INT) USING columnar;
|
||||
SAVEPOINT svpt;
|
||||
INSERT INTO aborted_write_test SELECT i FROM generate_series(1, 2) i;
|
||||
-- force flush write state
|
||||
SELECT FROM aborted_write_test;
|
||||
--
|
||||
(2 rows)
|
||||
|
||||
ROLLBACK TO SAVEPOINT svpt;
|
||||
-- Already disabled autovacuum for all three metadata tables.
|
||||
-- Here we truncate columnar.chunk and columnar.chunk_group but not
|
||||
-- columnar.stripe to make sure that we properly handle dead tuples
|
||||
-- in columnar.stripe, i.e. stripe metadata entries for aborted
|
||||
-- transactions.
|
||||
TRUNCATE columnar.chunk, columnar.chunk_group;
|
||||
CREATE INDEX ON aborted_write_test (a);
|
||||
ROLLBACK;
|
||||
create table events (event_id bigserial, event_time timestamptz default now(), payload text) using columnar;
|
||||
BEGIN;
|
||||
-- this wouldn't flush any data
|
||||
|
|
|
@ -406,6 +406,32 @@ INSERT INTO aborted_write_test VALUES (16999);
|
|||
-- since second INSERT already failed, should not throw a "duplicate key" error
|
||||
REINDEX TABLE aborted_write_test;
|
||||
|
||||
BEGIN;
|
||||
ALTER TABLE columnar.stripe SET (autovacuum_enabled = false);
|
||||
ALTER TABLE columnar.chunk SET (autovacuum_enabled = false);
|
||||
ALTER TABLE columnar.chunk_group SET (autovacuum_enabled = false);
|
||||
|
||||
DROP TABLE aborted_write_test;
|
||||
TRUNCATE columnar.stripe, columnar.chunk, columnar.chunk_group;
|
||||
|
||||
CREATE TABLE aborted_write_test (a INT) USING columnar;
|
||||
|
||||
SAVEPOINT svpt;
|
||||
INSERT INTO aborted_write_test SELECT i FROM generate_series(1, 2) i;
|
||||
-- force flush write state
|
||||
SELECT FROM aborted_write_test;
|
||||
ROLLBACK TO SAVEPOINT svpt;
|
||||
|
||||
-- Already disabled autovacuum for all three metadata tables.
|
||||
-- Here we truncate columnar.chunk and columnar.chunk_group but not
|
||||
-- columnar.stripe to make sure that we properly handle dead tuples
|
||||
-- in columnar.stripe, i.e. stripe metadata entries for aborted
|
||||
-- transactions.
|
||||
TRUNCATE columnar.chunk, columnar.chunk_group;
|
||||
|
||||
CREATE INDEX ON aborted_write_test (a);
|
||||
ROLLBACK;
|
||||
|
||||
create table events (event_id bigserial, event_time timestamptz default now(), payload text) using columnar;
|
||||
BEGIN;
|
||||
-- this wouldn't flush any data
|
||||
|
|
Loading…
Reference in New Issue