Read chunk row count from catalog tables

pull/4565/head
Hadi Moshayedi 2021-01-22 16:07:29 -08:00
parent 690f54b4fd
commit 639952ffa8
4 changed files with 42 additions and 12 deletions

View File

@ -178,18 +178,8 @@ ColumnarReadNextRow(TableReadState *readState, Datum *columnValues, bool *column
if (chunkIndex != readState->deserializedChunkIndex) if (chunkIndex != readState->deserializedChunkIndex)
{ {
uint32 chunkRowCount = 0; uint32 chunkRowCount =
readState->stripeBuffers->selectedChunkRowCount[chunkIndex];
uint32 stripeRowCount = stripeMetadata->rowCount;
uint32 lastChunkIndex = stripeRowCount / stripeMetadata->chunkRowCount;
if (chunkIndex == lastChunkIndex)
{
chunkRowCount = stripeRowCount % stripeMetadata->chunkRowCount;
}
else
{
chunkRowCount = stripeMetadata->chunkRowCount;
}
oldContext = MemoryContextSwitchTo(readState->stripeReadContext); oldContext = MemoryContextSwitchTo(readState->stripeReadContext);
@ -356,6 +346,14 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
SelectedChunkSkipList(stripeSkipList, projectedColumnMask, SelectedChunkSkipList(stripeSkipList, projectedColumnMask,
selectedChunkMask); selectedChunkMask);
uint32 selectedChunkCount = selectedChunkSkipList->chunkCount;
uint32 *selectedChunkRowCount = palloc0(selectedChunkCount * sizeof(uint32));
for (int chunkIndex = 0; chunkIndex < selectedChunkCount; chunkIndex++)
{
selectedChunkRowCount[chunkIndex] =
selectedChunkSkipList->chunkSkipNodeArray[0][chunkIndex].rowCount;
}
/* load column data for projected columns */ /* load column data for projected columns */
ColumnBuffers **columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); ColumnBuffers **columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *));
@ -381,6 +379,8 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata,
stripeBuffers->columnCount = columnCount; stripeBuffers->columnCount = columnCount;
stripeBuffers->rowCount = StripeSkipListRowCount(selectedChunkSkipList); stripeBuffers->rowCount = StripeSkipListRowCount(selectedChunkSkipList);
stripeBuffers->columnBuffersArray = columnBuffersArray; stripeBuffers->columnBuffersArray = columnBuffersArray;
stripeBuffers->selectedChunks = selectedChunkCount;
stripeBuffers->selectedChunkRowCount = selectedChunkRowCount;
return stripeBuffers; return stripeBuffers;
} }

View File

@ -201,6 +201,14 @@ typedef struct StripeBuffers
uint32 columnCount; uint32 columnCount;
uint32 rowCount; uint32 rowCount;
ColumnBuffers **columnBuffersArray; ColumnBuffers **columnBuffersArray;
/*
* We might skip reading some chunks because they're refuted by the
* WHERE clause. We keep number of selected chunks and number of rows
* in each of them.
*/
uint32 selectedChunks;
uint32 *selectedChunkRowCount;
} StripeBuffers; } StripeBuffers;

View File

@ -76,4 +76,12 @@ CREATE TABLE simple_chunk_filtering(i int) USING COLUMNAR;
INSERT INTO simple_chunk_filtering SELECT generate_series(0,234567); INSERT INTO simple_chunk_filtering SELECT generate_series(0,234567);
EXPLAIN (analyze on, costs off, timing off, summary off) EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM simple_chunk_filtering WHERE i > 123456; SELECT * FROM simple_chunk_filtering WHERE i > 123456;
-- https://github.com/citusdata/citus/issues/4555
TRUNCATE simple_chunk_filtering;
INSERT INTO simple_chunk_filtering SELECT generate_series(0,200000);
COPY (SELECT * FROM simple_chunk_filtering WHERE i > 180000) TO '/dev/null';
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM simple_chunk_filtering WHERE i > 180000;
DROP TABLE simple_chunk_filtering; DROP TABLE simple_chunk_filtering;

View File

@ -130,4 +130,18 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
Columnar Chunks Removed by Filter: 12 Columnar Chunks Removed by Filter: 12
(4 rows) (4 rows)
-- https://github.com/citusdata/citus/issues/4555
TRUNCATE simple_chunk_filtering;
INSERT INTO simple_chunk_filtering SELECT generate_series(0,200000);
COPY (SELECT * FROM simple_chunk_filtering WHERE i > 180000) TO '/dev/null';
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM simple_chunk_filtering WHERE i > 180000;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=20000 loops=1)
Filter: (i > 180000)
Rows Removed by Filter: 1
Columnar Chunks Removed by Filter: 18
(4 rows)
DROP TABLE simple_chunk_filtering; DROP TABLE simple_chunk_filtering;