Columnar: Fix ANALYZE for large number of rows.

pull/4407/head
Hadi Moshayedi 2020-12-10 09:48:10 -08:00
parent df1ff60754
commit 4dd22cc4e4
3 changed files with 66 additions and 1 deletions

View File

@ -81,6 +81,12 @@ typedef struct CStoreScanDescData
MemoryContext scanContext;
Bitmapset *attr_needed;
List *scanQual;
/*
* ANALYZE requires an item pointer for sorting. We keep track of row
* number so we can construct an item pointer based on that.
*/
int rowNumber;
} CStoreScanDescData;
typedef struct CStoreScanDescData *CStoreScanDesc;
@ -114,7 +120,10 @@ static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode,
int timeout, int retryInterval);
static void LogRelationStats(Relation rel, int elevel);
static void TruncateCStore(Relation rel, int elevel);
static HeapTuple ColumnarSlotCopyHeapTuple(TupleTableSlot *slot);
/* Custom tuple slot ops used for columnar. Initialized in cstore_tableam_init(). */
TupleTableSlotOps TTSOpsColumnar;
static List *
RelationColumnList(Relation rel)
@ -148,7 +157,7 @@ RelationColumnList(Relation rel)
static const TupleTableSlotOps *
cstore_slot_callbacks(Relation relation)
{
return &TTSOpsVirtual;
return &TTSOpsColumnar;
}
@ -313,6 +322,20 @@ cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot
}
ExecStoreVirtualTuple(slot);
/*
* Set slot's item pointer block & offset to non-zero. These are
* used just for sorting in acquire_sample_rows(), so rowNumber
* is good enough. See ColumnarSlotCopyHeapTuple for more info.
*
* offset is 16-bits, so use the first 15 bits for offset and
* rest as block number.
*/
ItemPointerSetBlockNumber(&(slot->tts_tid), scan->rowNumber / (32 * 1024) + 1);
ItemPointerSetOffsetNumber(&(slot->tts_tid), scan->rowNumber % (32 * 1024) + 1);
scan->rowNumber++;
return true;
}
@ -1172,6 +1195,9 @@ cstore_tableam_init()
object_access_hook = CStoreTableAMObjectAccessHook;
cstore_customscan_init();
TTSOpsColumnar = TTSOpsVirtual;
TTSOpsColumnar.copy_heap_tuple = ColumnarSlotCopyHeapTuple;
}
@ -1182,6 +1208,31 @@ cstore_tableam_finish()
}
/*
* Implementation of TupleTableSlotOps.copy_heap_tuple for TTSOpsColumnar.
*/
static HeapTuple
ColumnarSlotCopyHeapTuple(TupleTableSlot *slot)
{
Assert(!TTS_EMPTY(slot));
HeapTuple tuple = heap_form_tuple(slot->tts_tupleDescriptor,
slot->tts_values,
slot->tts_isnull);
/*
* We need to set item pointer, since implementation of ANALYZE
* requires it. See the qsort in acquire_sample_rows() and
* also compare_rows in backend/commands/analyze.c.
*
* slot->tts_tid is filled in cstore_getnextslot.
*/
tuple->t_self = slot->tts_tid;
return tuple;
}
/*
* Implements object_access_hook. One of the places this is called is just
* before dropping an object, which allows us to clean-up resources for

View File

@ -17,3 +17,9 @@ SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed';
6
(1 row)
-- ANALYZE a table with lots of data to trigget qsort in analyze.c
CREATE TABLE test_analyze(a int, b text, c char) USING columnar;
INSERT INTO test_analyze SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 100000) i;
INSERT INTO test_analyze SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 110000) i;
ANALYZE test_analyze;
DROP TABLE test_analyze;

View File

@ -9,3 +9,11 @@ SELECT count(*) FROM pg_stats WHERE tablename='contestant';
-- ANALYZE compressed table
ANALYZE contestant_compressed;
SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed';
-- ANALYZE a table with lots of data to trigget qsort in analyze.c
CREATE TABLE test_analyze(a int, b text, c char) USING columnar;
INSERT INTO test_analyze SELECT floor(i / 1000), floor(i / 10)::text, 4 FROM generate_series(1, 100000) i;
INSERT INTO test_analyze SELECT floor(i / 2), floor(i / 10)::text, 5 FROM generate_series(1000, 110000) i;
ANALYZE test_analyze;
DROP TABLE test_analyze;