diff --git a/src/backend/columnar/cstore_reader.c b/src/backend/columnar/cstore_reader.c index 81b332e8b..884fd5544 100644 --- a/src/backend/columnar/cstore_reader.c +++ b/src/backend/columnar/cstore_reader.c @@ -178,18 +178,8 @@ ColumnarReadNextRow(TableReadState *readState, Datum *columnValues, bool *column if (chunkIndex != readState->deserializedChunkIndex) { - uint32 chunkRowCount = 0; - - uint32 stripeRowCount = stripeMetadata->rowCount; - uint32 lastChunkIndex = stripeRowCount / stripeMetadata->chunkRowCount; - if (chunkIndex == lastChunkIndex) - { - chunkRowCount = stripeRowCount % stripeMetadata->chunkRowCount; - } - else - { - chunkRowCount = stripeMetadata->chunkRowCount; - } + uint32 chunkRowCount = + readState->stripeBuffers->selectedChunkRowCount[chunkIndex]; oldContext = MemoryContextSwitchTo(readState->stripeReadContext); @@ -356,6 +346,14 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, SelectedChunkSkipList(stripeSkipList, projectedColumnMask, selectedChunkMask); + uint32 selectedChunkCount = selectedChunkSkipList->chunkCount; + uint32 *selectedChunkRowCount = palloc0(selectedChunkCount * sizeof(uint32)); + for (int chunkIndex = 0; chunkIndex < selectedChunkCount; chunkIndex++) + { + selectedChunkRowCount[chunkIndex] = + selectedChunkSkipList->chunkSkipNodeArray[0][chunkIndex].rowCount; + } + /* load column data for projected columns */ ColumnBuffers **columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); @@ -381,6 +379,8 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, stripeBuffers->columnCount = columnCount; stripeBuffers->rowCount = StripeSkipListRowCount(selectedChunkSkipList); stripeBuffers->columnBuffersArray = columnBuffersArray; + stripeBuffers->selectedChunks = selectedChunkCount; + stripeBuffers->selectedChunkRowCount = selectedChunkRowCount; return stripeBuffers; } diff --git a/src/include/columnar/columnar.h b/src/include/columnar/columnar.h index 5542034d5..f7514200f 100644 --- a/src/include/columnar/columnar.h +++ b/src/include/columnar/columnar.h @@ -201,6 +201,14 @@ typedef struct StripeBuffers uint32 columnCount; uint32 rowCount; ColumnBuffers **columnBuffersArray; + + /* + * We might skip reading some chunks because they're refuted by the + * WHERE clause. We keep number of selected chunks and number of rows + * in each of them. + */ + uint32 selectedChunks; + uint32 *selectedChunkRowCount; } StripeBuffers; diff --git a/src/test/regress/input/am_chunk_filtering.source b/src/test/regress/input/am_chunk_filtering.source index 41c098528..2b3e760a8 100644 --- a/src/test/regress/input/am_chunk_filtering.source +++ b/src/test/regress/input/am_chunk_filtering.source @@ -76,4 +76,12 @@ CREATE TABLE simple_chunk_filtering(i int) USING COLUMNAR; INSERT INTO simple_chunk_filtering SELECT generate_series(0,234567); EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM simple_chunk_filtering WHERE i > 123456; + +-- https://github.com/citusdata/citus/issues/4555 +TRUNCATE simple_chunk_filtering; +INSERT INTO simple_chunk_filtering SELECT generate_series(0,200000); +COPY (SELECT * FROM simple_chunk_filtering WHERE i > 180000) TO '/dev/null'; +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT * FROM simple_chunk_filtering WHERE i > 180000; + DROP TABLE simple_chunk_filtering; diff --git a/src/test/regress/output/am_chunk_filtering.source b/src/test/regress/output/am_chunk_filtering.source index 9a42d7230..032be36b8 100644 --- a/src/test/regress/output/am_chunk_filtering.source +++ b/src/test/regress/output/am_chunk_filtering.source @@ -130,4 +130,18 @@ EXPLAIN (analyze on, costs off, timing off, summary off) Columnar Chunks Removed by Filter: 12 (4 rows) +-- https://github.com/citusdata/citus/issues/4555 +TRUNCATE simple_chunk_filtering; +INSERT INTO simple_chunk_filtering SELECT generate_series(0,200000); +COPY (SELECT * FROM simple_chunk_filtering WHERE i > 180000) TO '/dev/null'; +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT * FROM simple_chunk_filtering WHERE i > 180000; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=20000 loops=1) + Filter: (i > 180000) + Rows Removed by Filter: 1 + Columnar Chunks Removed by Filter: 18 +(4 rows) + DROP TABLE simple_chunk_filtering;