mirror of https://github.com/citusdata/citus.git
Merge pull request #4950 from citusdata/col/index-support
Add basic index support for columnar tables. This pr brings the support for following index/constraint types: * btree indexes * primary keys * unique constraints / indexes * exclusion constraints * hash indexes * partial indexes * indexes including additional columns (INCLUDE syntax), even if we don't properly support index-only scanspull/5052/head
commit
a2efe59e2f
|
@ -27,6 +27,7 @@
|
||||||
#include "columnar/columnar_customscan.h"
|
#include "columnar/columnar_customscan.h"
|
||||||
#include "columnar/columnar_metadata.h"
|
#include "columnar/columnar_metadata.h"
|
||||||
#include "columnar/columnar_tableam.h"
|
#include "columnar/columnar_tableam.h"
|
||||||
|
#include "distributed/listutils.h"
|
||||||
|
|
||||||
typedef struct ColumnarScanPath
|
typedef struct ColumnarScanPath
|
||||||
{
|
{
|
||||||
|
@ -50,8 +51,13 @@ typedef struct ColumnarScanState
|
||||||
} ColumnarScanState;
|
} ColumnarScanState;
|
||||||
|
|
||||||
|
|
||||||
|
typedef bool (*PathPredicate)(Path *path);
|
||||||
|
|
||||||
|
|
||||||
static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
||||||
RangeTblEntry *rte);
|
RangeTblEntry *rte);
|
||||||
|
static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate);
|
||||||
|
static bool IsNotIndexPath(Path *path);
|
||||||
static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
|
static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
|
||||||
RangeTblEntry *rte);
|
RangeTblEntry *rte);
|
||||||
static Cost ColumnarScanCost(RangeTblEntry *rte);
|
static Cost ColumnarScanCost(RangeTblEntry *rte);
|
||||||
|
@ -137,18 +143,6 @@ columnar_customscan_init()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
|
||||||
clear_paths(RelOptInfo *rel)
|
|
||||||
{
|
|
||||||
rel->pathlist = NIL;
|
|
||||||
rel->partial_pathlist = NIL;
|
|
||||||
rel->cheapest_startup_path = NULL;
|
|
||||||
rel->cheapest_total_path = NULL;
|
|
||||||
rel->cheapest_unique_path = NULL;
|
|
||||||
rel->cheapest_parameterized_paths = NIL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
||||||
RangeTblEntry *rte)
|
RangeTblEntry *rte)
|
||||||
|
@ -188,8 +182,13 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
||||||
|
|
||||||
ereport(DEBUG1, (errmsg("pathlist hook for columnar table am")));
|
ereport(DEBUG1, (errmsg("pathlist hook for columnar table am")));
|
||||||
|
|
||||||
/* we propose a new path that will be the only path for scanning this relation */
|
/*
|
||||||
clear_paths(rel);
|
* TODO: Since we don't have a proper costing model for
|
||||||
|
* ColumnarCustomScan, we remove other paths to force postgres
|
||||||
|
* using ColumnarCustomScan. Note that we still keep index paths
|
||||||
|
* since they still might be useful.
|
||||||
|
*/
|
||||||
|
RemovePathsByPredicate(rel, IsNotIndexPath);
|
||||||
add_path(rel, customPath);
|
add_path(rel, customPath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -197,6 +196,38 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RemovePathsByPredicate removes the paths that removePathPredicate
|
||||||
|
* evaluates to true from pathlist of given rel.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate)
|
||||||
|
{
|
||||||
|
List *filteredPathList = NIL;
|
||||||
|
|
||||||
|
Path *path = NULL;
|
||||||
|
foreach_ptr(path, rel->pathlist)
|
||||||
|
{
|
||||||
|
if (!removePathPredicate(path))
|
||||||
|
{
|
||||||
|
filteredPathList = lappend(filteredPathList, path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rel->pathlist = filteredPathList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IsNotIndexPath returns true if given path is not an IndexPath.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
IsNotIndexPath(Path *path)
|
||||||
|
{
|
||||||
|
return !IsA(path, IndexPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static Path *
|
static Path *
|
||||||
CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
||||||
{
|
{
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
* min/max values (used for Chunk Group Filtering)
|
* min/max values (used for Chunk Group Filtering)
|
||||||
* * useful for fast VACUUM operations (e.g. reporting with VACUUM VERBOSE)
|
* * useful for fast VACUUM operations (e.g. reporting with VACUUM VERBOSE)
|
||||||
* * useful for stats/costing
|
* * useful for stats/costing
|
||||||
* * TODO: maps logical row numbers to stripe IDs
|
* * maps logical row numbers to stripe IDs
|
||||||
* * TODO: visibility information
|
* * TODO: visibility information
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
|
@ -77,6 +77,7 @@ static uint32 * ReadChunkGroupRowCounts(uint64 storageId, uint64 stripe, uint32
|
||||||
static Oid ColumnarStorageIdSequenceRelationId(void);
|
static Oid ColumnarStorageIdSequenceRelationId(void);
|
||||||
static Oid ColumnarStripeRelationId(void);
|
static Oid ColumnarStripeRelationId(void);
|
||||||
static Oid ColumnarStripePKeyIndexRelationId(void);
|
static Oid ColumnarStripePKeyIndexRelationId(void);
|
||||||
|
static Oid ColumnarStripeFirstRowNumberIndexRelationId(void);
|
||||||
static Oid ColumnarOptionsRelationId(void);
|
static Oid ColumnarOptionsRelationId(void);
|
||||||
static Oid ColumnarOptionsIndexRegclass(void);
|
static Oid ColumnarOptionsIndexRegclass(void);
|
||||||
static Oid ColumnarChunkRelationId(void);
|
static Oid ColumnarChunkRelationId(void);
|
||||||
|
@ -620,6 +621,93 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FindStripeByRowNumber returns StripeMetadata for the stripe that has the
|
||||||
|
* row with rowNumber by doing backward index scan on
|
||||||
|
* stripe_first_row_number_idx. If no such row exists, then returns NULL.
|
||||||
|
*/
|
||||||
|
StripeMetadata *
|
||||||
|
FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot)
|
||||||
|
{
|
||||||
|
StripeMetadata *foundStripeMetadata = NULL;
|
||||||
|
|
||||||
|
uint64 storageId = ColumnarStorageGetStorageId(relation, false);
|
||||||
|
ScanKeyData scanKey[2];
|
||||||
|
ScanKeyInit(&scanKey[0], Anum_columnar_stripe_storageid,
|
||||||
|
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId));
|
||||||
|
ScanKeyInit(&scanKey[1], Anum_columnar_stripe_first_row_number,
|
||||||
|
BTLessEqualStrategyNumber, F_INT8LE, UInt64GetDatum(rowNumber));
|
||||||
|
|
||||||
|
Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock);
|
||||||
|
Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(),
|
||||||
|
AccessShareLock);
|
||||||
|
SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index,
|
||||||
|
snapshot, 2,
|
||||||
|
scanKey);
|
||||||
|
|
||||||
|
HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, BackwardScanDirection);
|
||||||
|
if (HeapTupleIsValid(heapTuple))
|
||||||
|
{
|
||||||
|
TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
|
||||||
|
Datum datumArray[Natts_columnar_stripe];
|
||||||
|
bool isNullArray[Natts_columnar_stripe];
|
||||||
|
heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray);
|
||||||
|
|
||||||
|
StripeMetadata *stripeMetadata = BuildStripeMetadata(datumArray);
|
||||||
|
if (rowNumber < stripeMetadata->firstRowNumber + stripeMetadata->rowCount)
|
||||||
|
{
|
||||||
|
foundStripeMetadata = stripeMetadata;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
systable_endscan_ordered(scanDescriptor);
|
||||||
|
index_close(index, AccessShareLock);
|
||||||
|
table_close(columnarStripes, AccessShareLock);
|
||||||
|
|
||||||
|
return foundStripeMetadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FindStripeWithHighestRowNumber returns StripeMetadata for the stripe that
|
||||||
|
* has the row with highest rowNumber by doing backward index scan on
|
||||||
|
* stripe_first_row_number_idx. If given relation is empty, then returns NULL.
|
||||||
|
*/
|
||||||
|
StripeMetadata *
|
||||||
|
FindStripeWithHighestRowNumber(Relation relation, Snapshot snapshot)
|
||||||
|
{
|
||||||
|
StripeMetadata *stripeWithHighestRowNumber = NULL;
|
||||||
|
|
||||||
|
uint64 storageId = ColumnarStorageGetStorageId(relation, false);
|
||||||
|
ScanKeyData scanKey[1];
|
||||||
|
ScanKeyInit(&scanKey[0], Anum_columnar_stripe_storageid,
|
||||||
|
BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId));
|
||||||
|
|
||||||
|
Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock);
|
||||||
|
Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(),
|
||||||
|
AccessShareLock);
|
||||||
|
SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index,
|
||||||
|
snapshot, 1, scanKey);
|
||||||
|
|
||||||
|
HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, BackwardScanDirection);
|
||||||
|
if (HeapTupleIsValid(heapTuple))
|
||||||
|
{
|
||||||
|
TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
|
||||||
|
Datum datumArray[Natts_columnar_stripe];
|
||||||
|
bool isNullArray[Natts_columnar_stripe];
|
||||||
|
heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray);
|
||||||
|
|
||||||
|
stripeWithHighestRowNumber = BuildStripeMetadata(datumArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
systable_endscan_ordered(scanDescriptor);
|
||||||
|
index_close(index, AccessShareLock);
|
||||||
|
table_close(columnarStripes, AccessShareLock);
|
||||||
|
|
||||||
|
return stripeWithHighestRowNumber;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ReadChunkGroupRowCounts returns an array of row counts of chunk groups for the
|
* ReadChunkGroupRowCounts returns an array of row counts of chunk groups for the
|
||||||
* given stripe.
|
* given stripe.
|
||||||
|
@ -828,7 +916,8 @@ ReadDataFileStripeList(uint64 storageId, Snapshot snapshot)
|
||||||
Oid columnarStripesOid = ColumnarStripeRelationId();
|
Oid columnarStripesOid = ColumnarStripeRelationId();
|
||||||
|
|
||||||
Relation columnarStripes = table_open(columnarStripesOid, AccessShareLock);
|
Relation columnarStripes = table_open(columnarStripesOid, AccessShareLock);
|
||||||
Relation index = index_open(ColumnarStripePKeyIndexRelationId(), AccessShareLock);
|
Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(),
|
||||||
|
AccessShareLock);
|
||||||
TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
|
TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
|
||||||
|
|
||||||
SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index,
|
SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index,
|
||||||
|
@ -1153,6 +1242,18 @@ ColumnarStripePKeyIndexRelationId(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ColumnarStripeFirstRowNumberIndexRelationId returns relation id of
|
||||||
|
* columnar.stripe_first_row_number_idx.
|
||||||
|
* TODO: should we cache this similar to citus?
|
||||||
|
*/
|
||||||
|
static Oid
|
||||||
|
ColumnarStripeFirstRowNumberIndexRelationId(void)
|
||||||
|
{
|
||||||
|
return get_relname_relid("stripe_first_row_number_idx", ColumnarNamespaceId());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ColumnarOptionsRelationId returns relation id of columnar.options.
|
* ColumnarOptionsRelationId returns relation id of columnar.options.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -35,6 +35,7 @@
|
||||||
|
|
||||||
#include "columnar/columnar.h"
|
#include "columnar/columnar.h"
|
||||||
#include "columnar/columnar_storage.h"
|
#include "columnar/columnar_storage.h"
|
||||||
|
#include "columnar/columnar_tableam.h"
|
||||||
#include "columnar/columnar_version_compat.h"
|
#include "columnar/columnar_version_compat.h"
|
||||||
|
|
||||||
typedef struct ChunkGroupReadState
|
typedef struct ChunkGroupReadState
|
||||||
|
@ -85,6 +86,14 @@ struct ColumnarReadState
|
||||||
|
|
||||||
/* static function declarations */
|
/* static function declarations */
|
||||||
static MemoryContext CreateStripeReadMemoryContext(void);
|
static MemoryContext CreateStripeReadMemoryContext(void);
|
||||||
|
static void ReadStripeRowByRowNumber(StripeReadState *stripeReadState,
|
||||||
|
StripeMetadata *stripeMetadata,
|
||||||
|
uint64 rowNumber, Datum *columnValues,
|
||||||
|
bool *columnNulls);
|
||||||
|
static void ReadChunkGroupRowByRowOffset(ChunkGroupReadState *chunkGroupReadState,
|
||||||
|
StripeMetadata *stripeMetadata,
|
||||||
|
uint64 stripeRowOffset, Datum *columnValues,
|
||||||
|
bool *columnNulls);
|
||||||
static bool StripeReadInProgress(ColumnarReadState *readState);
|
static bool StripeReadInProgress(ColumnarReadState *readState);
|
||||||
static bool HasUnreadStripe(ColumnarReadState *readState);
|
static bool HasUnreadStripe(ColumnarReadState *readState);
|
||||||
static StripeReadState * BeginStripeRead(StripeMetadata *stripeMetadata, Relation rel,
|
static StripeReadState * BeginStripeRead(StripeMetadata *stripeMetadata, Relation rel,
|
||||||
|
@ -243,6 +252,104 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ColumnarReadRowByRowNumber reads row with rowNumber from given relation
|
||||||
|
* into columnValues and columnNulls, and returns true. If no such row
|
||||||
|
* exists, then returns false.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
ColumnarReadRowByRowNumber(Relation relation, uint64 rowNumber,
|
||||||
|
List *neededColumnList, Datum *columnValues,
|
||||||
|
bool *columnNulls, Snapshot snapshot)
|
||||||
|
{
|
||||||
|
StripeMetadata *stripeMetadata = FindStripeByRowNumber(relation, rowNumber, snapshot);
|
||||||
|
if (stripeMetadata == NULL)
|
||||||
|
{
|
||||||
|
/* no such row exists */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
TupleDesc relationTupleDesc = RelationGetDescr(relation);
|
||||||
|
List *whereClauseList = NIL;
|
||||||
|
List *whereClauseVars = NIL;
|
||||||
|
MemoryContext stripeReadContext = CreateStripeReadMemoryContext();
|
||||||
|
StripeReadState *stripeReadState = BeginStripeRead(stripeMetadata,
|
||||||
|
relation,
|
||||||
|
relationTupleDesc,
|
||||||
|
neededColumnList,
|
||||||
|
whereClauseList,
|
||||||
|
whereClauseVars,
|
||||||
|
stripeReadContext);
|
||||||
|
|
||||||
|
ReadStripeRowByRowNumber(stripeReadState, stripeMetadata, rowNumber,
|
||||||
|
columnValues, columnNulls);
|
||||||
|
|
||||||
|
EndStripeRead(stripeReadState);
|
||||||
|
MemoryContextReset(stripeReadContext);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReadStripeRowByRowNumber reads row with rowNumber from given
|
||||||
|
* stripeReadState into columnValues and columnNulls.
|
||||||
|
* Errors out if no such row exists in the stripe being read.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ReadStripeRowByRowNumber(StripeReadState *stripeReadState,
|
||||||
|
StripeMetadata *stripeMetadata,
|
||||||
|
uint64 rowNumber, Datum *columnValues,
|
||||||
|
bool *columnNulls)
|
||||||
|
{
|
||||||
|
if (rowNumber < stripeMetadata->firstRowNumber)
|
||||||
|
{
|
||||||
|
/* not expected but be on the safe side */
|
||||||
|
ereport(ERROR, (errmsg("row offset cannot be negative")));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* find the exact chunk group to be read */
|
||||||
|
uint64 stripeRowOffset = rowNumber - stripeMetadata->firstRowNumber;
|
||||||
|
stripeReadState->chunkGroupIndex = stripeRowOffset /
|
||||||
|
stripeMetadata->chunkGroupRowCount;
|
||||||
|
stripeReadState->chunkGroupReadState = BeginChunkGroupRead(
|
||||||
|
stripeReadState->stripeBuffers,
|
||||||
|
stripeReadState->chunkGroupIndex,
|
||||||
|
stripeReadState->tupleDescriptor,
|
||||||
|
stripeReadState->projectedColumnList,
|
||||||
|
stripeReadState->stripeReadContext);
|
||||||
|
|
||||||
|
ReadChunkGroupRowByRowOffset(stripeReadState->chunkGroupReadState,
|
||||||
|
stripeMetadata, stripeRowOffset,
|
||||||
|
columnValues, columnNulls);
|
||||||
|
|
||||||
|
EndChunkGroupRead(stripeReadState->chunkGroupReadState);
|
||||||
|
stripeReadState->chunkGroupReadState = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReadChunkGroupRowByRowOffset reads row with stripeRowOffset from given
|
||||||
|
* chunkGroupReadState into columnValues and columnNulls.
|
||||||
|
* Errors out if no such row exists in the chunk group being read.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ReadChunkGroupRowByRowOffset(ChunkGroupReadState *chunkGroupReadState,
|
||||||
|
StripeMetadata *stripeMetadata,
|
||||||
|
uint64 stripeRowOffset, Datum *columnValues,
|
||||||
|
bool *columnNulls)
|
||||||
|
{
|
||||||
|
/* set the exact row number to be read from given chunk roup */
|
||||||
|
chunkGroupReadState->currentRow = stripeRowOffset %
|
||||||
|
stripeMetadata->chunkGroupRowCount;
|
||||||
|
if (!ReadChunkGroupNextRow(chunkGroupReadState, columnValues, columnNulls))
|
||||||
|
{
|
||||||
|
/* not expected but be on the safe side */
|
||||||
|
ereport(ERROR, (errmsg("could not find the row in stripe")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* StripeReadInProgress returns true if we already started reading a stripe.
|
* StripeReadInProgress returns true if we already started reading a stripe.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -615,7 +615,7 @@ ColumnarMetapageRead(Relation rel, bool force)
|
||||||
if (nblocks == 0)
|
if (nblocks == 0)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We only expect this to happen during binary ugrades. This is because,
|
* We only expect this to happen when upgrading citus.so. This is because,
|
||||||
* in current version of columnar, we immediately create the metapage
|
* in current version of columnar, we immediately create the metapage
|
||||||
* for columnar tables, i.e right after creating the table.
|
* for columnar tables, i.e right after creating the table.
|
||||||
* However in older versions, we were creating metapages lazily, i.e
|
* However in older versions, we were creating metapages lazily, i.e
|
||||||
|
|
|
@ -56,6 +56,7 @@
|
||||||
#include "columnar/columnar_version_compat.h"
|
#include "columnar/columnar_version_compat.h"
|
||||||
#include "distributed/commands.h"
|
#include "distributed/commands.h"
|
||||||
#include "distributed/commands/utility_hook.h"
|
#include "distributed/commands/utility_hook.h"
|
||||||
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -111,7 +112,20 @@ static HeapTuple ColumnarSlotCopyHeapTuple(TupleTableSlot *slot);
|
||||||
static void ColumnarCheckLogicalReplication(Relation rel);
|
static void ColumnarCheckLogicalReplication(Relation rel);
|
||||||
static Datum * detoast_values(TupleDesc tupleDesc, Datum *orig_values, bool *isnull);
|
static Datum * detoast_values(TupleDesc tupleDesc, Datum *orig_values, bool *isnull);
|
||||||
static ItemPointerData row_number_to_tid(uint64 rowNumber);
|
static ItemPointerData row_number_to_tid(uint64 rowNumber);
|
||||||
|
static uint64 tid_to_row_number(ItemPointerData tid);
|
||||||
static void ErrorIfInvalidRowNumber(uint64 rowNumber);
|
static void ErrorIfInvalidRowNumber(uint64 rowNumber);
|
||||||
|
static void ColumnarReportTotalVirtualBlocks(Relation relation, Snapshot snapshot,
|
||||||
|
int progressArrIndex);
|
||||||
|
static BlockNumber ColumnarGetNumberOfVirtualBlocks(Relation relation, Snapshot snapshot);
|
||||||
|
static ItemPointerData ColumnarGetHighestItemPointer(Relation relation,
|
||||||
|
Snapshot snapshot);
|
||||||
|
static double ColumnarReadRowsIntoIndex(TableScanDesc scan,
|
||||||
|
Relation indexRelation,
|
||||||
|
IndexInfo *indexInfo,
|
||||||
|
bool progress,
|
||||||
|
IndexBuildCallback indexCallback,
|
||||||
|
void *indexCallbackState,
|
||||||
|
EState *estate, ExprState *predicate);
|
||||||
|
|
||||||
/* Custom tuple slot ops used for columnar. Initialized in columnar_tableam_init(). */
|
/* Custom tuple slot ops used for columnar. Initialized in columnar_tableam_init(). */
|
||||||
static TupleTableSlotOps TTSOpsColumnar;
|
static TupleTableSlotOps TTSOpsColumnar;
|
||||||
|
@ -294,6 +308,21 @@ row_number_to_tid(uint64 rowNumber)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* tid_to_row_number maps given ItemPointerData to rowNumber.
|
||||||
|
*/
|
||||||
|
static uint64
|
||||||
|
tid_to_row_number(ItemPointerData tid)
|
||||||
|
{
|
||||||
|
uint64 rowNumber = ItemPointerGetBlockNumber(&tid) * VALID_ITEMPOINTER_OFFSETS +
|
||||||
|
ItemPointerGetOffsetNumber(&tid) - FirstOffsetNumber;
|
||||||
|
|
||||||
|
ErrorIfInvalidRowNumber(rowNumber);
|
||||||
|
|
||||||
|
return rowNumber;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ErrorIfInvalidRowNumber errors out if given rowNumber is invalid.
|
* ErrorIfInvalidRowNumber errors out if given rowNumber is invalid.
|
||||||
*/
|
*/
|
||||||
|
@ -320,45 +349,67 @@ ErrorIfInvalidRowNumber(uint64 rowNumber)
|
||||||
static Size
|
static Size
|
||||||
columnar_parallelscan_estimate(Relation rel)
|
columnar_parallelscan_estimate(Relation rel)
|
||||||
{
|
{
|
||||||
elog(ERROR, "columnar_parallelscan_estimate not implemented");
|
return sizeof(ParallelBlockTableScanDescData);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static Size
|
static Size
|
||||||
columnar_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
|
columnar_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
|
||||||
{
|
{
|
||||||
elog(ERROR, "columnar_parallelscan_initialize not implemented");
|
ParallelBlockTableScanDesc bpscan = (ParallelBlockTableScanDesc) pscan;
|
||||||
|
|
||||||
|
bpscan->base.phs_relid = RelationGetRelid(rel);
|
||||||
|
bpscan->phs_nblocks = RelationGetNumberOfBlocks(rel);
|
||||||
|
bpscan->base.phs_syncscan = synchronize_seqscans &&
|
||||||
|
!RelationUsesLocalBuffers(rel) &&
|
||||||
|
bpscan->phs_nblocks > NBuffers / 4;
|
||||||
|
SpinLockInit(&bpscan->phs_mutex);
|
||||||
|
bpscan->phs_startblock = InvalidBlockNumber;
|
||||||
|
pg_atomic_init_u64(&bpscan->phs_nallocated, 0);
|
||||||
|
|
||||||
|
return sizeof(ParallelBlockTableScanDescData);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
columnar_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
|
columnar_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
|
||||||
{
|
{
|
||||||
elog(ERROR, "columnar_parallelscan_reinitialize not implemented");
|
ParallelBlockTableScanDesc bpscan = (ParallelBlockTableScanDesc) pscan;
|
||||||
|
pg_atomic_write_u64(&bpscan->phs_nallocated, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static IndexFetchTableData *
|
static IndexFetchTableData *
|
||||||
columnar_index_fetch_begin(Relation rel)
|
columnar_index_fetch_begin(Relation rel)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
Oid relfilenode = rel->rd_node.relNode;
|
||||||
errmsg("indexes not supported for columnar tables")));
|
if (PendingWritesInUpperTransactions(relfilenode, GetCurrentSubTransactionId()))
|
||||||
|
{
|
||||||
|
/* XXX: maybe we can just flush the data and continue */
|
||||||
|
elog(ERROR, "cannot read from index when there is unflushed data in "
|
||||||
|
"upper transactions");
|
||||||
|
}
|
||||||
|
|
||||||
|
FlushWriteStateForRelfilenode(relfilenode, GetCurrentSubTransactionId());
|
||||||
|
|
||||||
|
IndexFetchTableData *scan = palloc0(sizeof(IndexFetchTableData));
|
||||||
|
scan->rel = rel;
|
||||||
|
return scan;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
columnar_index_fetch_reset(IndexFetchTableData *scan)
|
columnar_index_fetch_reset(IndexFetchTableData *scan)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
/* no-op */
|
||||||
errmsg("indexes not supported for columnar tables")));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
columnar_index_fetch_end(IndexFetchTableData *scan)
|
columnar_index_fetch_end(IndexFetchTableData *scan)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
columnar_index_fetch_reset(scan);
|
||||||
errmsg("indexes not supported for columnar tables")));
|
pfree(scan);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -369,8 +420,37 @@ columnar_index_fetch_tuple(struct IndexFetchTableData *scan,
|
||||||
TupleTableSlot *slot,
|
TupleTableSlot *slot,
|
||||||
bool *call_again, bool *all_dead)
|
bool *call_again, bool *all_dead)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
/* no HOT chains are possible in columnar, directly set it to false */
|
||||||
errmsg("indexes not supported for columnar tables")));
|
*call_again = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* No dead tuples are possible in columnar, set it to false if it's
|
||||||
|
* passed to be non-NULL.
|
||||||
|
*/
|
||||||
|
if (all_dead)
|
||||||
|
{
|
||||||
|
*all_dead = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ExecClearTuple(slot);
|
||||||
|
|
||||||
|
/* we need all columns */
|
||||||
|
int natts = scan->rel->rd_att->natts;
|
||||||
|
Bitmapset *attr_needed = bms_add_range(NULL, 0, natts - 1);
|
||||||
|
TupleDesc relationTupleDesc = RelationGetDescr(scan->rel);
|
||||||
|
List *relationColumnList = NeededColumnsList(relationTupleDesc, attr_needed);
|
||||||
|
uint64 rowNumber = tid_to_row_number(*tid);
|
||||||
|
if (!ColumnarReadRowByRowNumber(scan->rel, rowNumber, relationColumnList,
|
||||||
|
slot->tts_values, slot->tts_isnull, snapshot))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
slot->tts_tableOid = RelationGetRelid(scan->rel);
|
||||||
|
slot->tts_tid = *tid;
|
||||||
|
ExecStoreVirtualTuple(slot);
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -627,7 +707,8 @@ columnar_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
|
||||||
if (OldIndex != NULL || use_sort)
|
if (OldIndex != NULL || use_sort)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("indexes not supported for columnar tables")));
|
errmsg("clustering columnar tables using indexes is "
|
||||||
|
"not supported")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1003,7 +1084,7 @@ columnar_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
|
||||||
|
|
||||||
|
|
||||||
static double
|
static double
|
||||||
columnar_index_build_range_scan(Relation heapRelation,
|
columnar_index_build_range_scan(Relation columnarRelation,
|
||||||
Relation indexRelation,
|
Relation indexRelation,
|
||||||
IndexInfo *indexInfo,
|
IndexInfo *indexInfo,
|
||||||
bool allow_sync,
|
bool allow_sync,
|
||||||
|
@ -1015,8 +1096,278 @@ columnar_index_build_range_scan(Relation heapRelation,
|
||||||
void *callback_state,
|
void *callback_state,
|
||||||
TableScanDesc scan)
|
TableScanDesc scan)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
if (start_blockno != 0 || numblocks != InvalidBlockNumber)
|
||||||
errmsg("indexes not supported for columnar tables")));
|
{
|
||||||
|
/*
|
||||||
|
* Columnar utility hook already errors out for BRIN indexes on columnar
|
||||||
|
* tables, but be on the safe side.
|
||||||
|
*/
|
||||||
|
ereport(ERROR, (errmsg("BRIN indexes on columnar tables are not supported")));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (indexInfo->ii_Concurrent)
|
||||||
|
{
|
||||||
|
/* we already don't allow CONCURRENTLY syntax but be on the safe side */
|
||||||
|
ereport(ERROR, (errmsg("concurrent index builds are not supported "
|
||||||
|
"for columnar tables")));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scan)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Scan is initialized iff postgres decided to build the index using
|
||||||
|
* parallel workers. In this case, we simply return for parallel
|
||||||
|
* workers since we don't support parallel scan on columnar tables.
|
||||||
|
*/
|
||||||
|
if (IsBackgroundWorker)
|
||||||
|
{
|
||||||
|
ereport(DEBUG4, (errmsg("ignoring parallel worker when building "
|
||||||
|
"index since parallel scan on columnar "
|
||||||
|
"tables is not supported")));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ereport(NOTICE, (errmsg("falling back to serial index build since "
|
||||||
|
"parallel scan on columnar tables is not "
|
||||||
|
"supported")));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In a normal index build, we use SnapshotAny to retrieve all tuples. In
|
||||||
|
* a concurrent build or during bootstrap, we take a regular MVCC snapshot
|
||||||
|
* and index whatever's live according to that.
|
||||||
|
*/
|
||||||
|
TransactionId OldestXmin = InvalidTransactionId;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We already don't allow concurrent index builds so ii_Concurrent
|
||||||
|
* will always be false, but let's keep the code close to heapAM.
|
||||||
|
*/
|
||||||
|
if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
|
||||||
|
{
|
||||||
|
/* ignore lazy VACUUM's */
|
||||||
|
OldestXmin = GetOldestXmin(columnarRelation, PROCARRAY_FLAGS_VACUUM);
|
||||||
|
}
|
||||||
|
|
||||||
|
Snapshot snapshot = { 0 };
|
||||||
|
bool snapshotRegisteredByUs = false;
|
||||||
|
if (!scan)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* For serial index build, we begin our own scan. We may also need to
|
||||||
|
* register a snapshot whose lifetime is under our direct control.
|
||||||
|
*/
|
||||||
|
if (!TransactionIdIsValid(OldestXmin))
|
||||||
|
{
|
||||||
|
snapshot = RegisterSnapshot(GetTransactionSnapshot());
|
||||||
|
snapshotRegisteredByUs = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
snapshot = SnapshotAny;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nkeys = 0;
|
||||||
|
ScanKeyData *scanKey = NULL;
|
||||||
|
bool allowAccessStrategy = true;
|
||||||
|
scan = table_beginscan_strat(columnarRelation, snapshot, nkeys, scanKey,
|
||||||
|
allowAccessStrategy, allow_sync);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* For parallel index build, we don't register/unregister own snapshot
|
||||||
|
* since snapshot is taken from parallel scan. Note that even if we
|
||||||
|
* don't support parallel index builds, we still continue building the
|
||||||
|
* index via the main backend and we should still rely on the snapshot
|
||||||
|
* provided by parallel scan.
|
||||||
|
*/
|
||||||
|
snapshot = scan->rs_snapshot;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (progress)
|
||||||
|
{
|
||||||
|
ColumnarReportTotalVirtualBlocks(columnarRelation, snapshot,
|
||||||
|
PROGRESS_SCAN_BLOCKS_TOTAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set up execution state for predicate, if any.
|
||||||
|
* Note that this is only useful for partial indexes.
|
||||||
|
*/
|
||||||
|
EState *estate = CreateExecutorState();
|
||||||
|
ExprContext *econtext = GetPerTupleExprContext(estate);
|
||||||
|
econtext->ecxt_scantuple = table_slot_create(columnarRelation, NULL);
|
||||||
|
ExprState *predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
|
||||||
|
|
||||||
|
double reltuples = ColumnarReadRowsIntoIndex(scan, indexRelation, indexInfo,
|
||||||
|
progress, callback, callback_state,
|
||||||
|
estate, predicate);
|
||||||
|
table_endscan(scan);
|
||||||
|
|
||||||
|
if (progress)
|
||||||
|
{
|
||||||
|
/* report the last "virtual" block as "done" */
|
||||||
|
ColumnarReportTotalVirtualBlocks(columnarRelation, snapshot,
|
||||||
|
PROGRESS_SCAN_BLOCKS_DONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (snapshotRegisteredByUs)
|
||||||
|
{
|
||||||
|
UnregisterSnapshot(snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
|
ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple);
|
||||||
|
FreeExecutorState(estate);
|
||||||
|
indexInfo->ii_ExpressionsState = NIL;
|
||||||
|
indexInfo->ii_PredicateState = NULL;
|
||||||
|
|
||||||
|
return reltuples;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ColumnarReportTotalVirtualBlocks reports progress for index build based on
|
||||||
|
* number of "virtual" blocks that given relation has.
|
||||||
|
* "progressArrIndex" argument determines which entry in st_progress_param
|
||||||
|
* array should be updated. In this case, we only expect PROGRESS_SCAN_BLOCKS_TOTAL
|
||||||
|
* or PROGRESS_SCAN_BLOCKS_DONE to specify whether we want to report calculated
|
||||||
|
* number of blocks as "done" or as "total" number of "virtual" blocks to scan.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ColumnarReportTotalVirtualBlocks(Relation relation, Snapshot snapshot,
|
||||||
|
int progressArrIndex)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Indeed, columnar tables might have gaps between row numbers, e.g
|
||||||
|
* due to aborted transactions etc. Also, ItemPointer BlockNumber's
|
||||||
|
* for columnar tables don't actually correspond to actual disk blocks
|
||||||
|
* as in heapAM. For this reason, we call them as "virtual" blocks. At
|
||||||
|
* the moment, we believe it is better to report our progress based on
|
||||||
|
* this "virtual" block concept instead of doing nothing.
|
||||||
|
*/
|
||||||
|
Assert(progressArrIndex == PROGRESS_SCAN_BLOCKS_TOTAL ||
|
||||||
|
progressArrIndex == PROGRESS_SCAN_BLOCKS_DONE);
|
||||||
|
BlockNumber nvirtualBlocks =
|
||||||
|
ColumnarGetNumberOfVirtualBlocks(relation, snapshot);
|
||||||
|
pgstat_progress_update_param(progressArrIndex, nvirtualBlocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ColumnarGetNumberOfVirtualBlocks returns total number of "virtual" blocks
|
||||||
|
* that given columnar table has based on based on ItemPointer BlockNumber's.
|
||||||
|
*/
|
||||||
|
static BlockNumber
|
||||||
|
ColumnarGetNumberOfVirtualBlocks(Relation relation, Snapshot snapshot)
|
||||||
|
{
|
||||||
|
ItemPointerData highestItemPointer =
|
||||||
|
ColumnarGetHighestItemPointer(relation, snapshot);
|
||||||
|
if (!ItemPointerIsValid(&highestItemPointer))
|
||||||
|
{
|
||||||
|
/* table is empty according to our snapshot */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since BlockNumber is 0-based, increment it by 1 to find the total
|
||||||
|
* number of "virtual" blocks.
|
||||||
|
*/
|
||||||
|
return ItemPointerGetBlockNumber(&highestItemPointer) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ColumnarGetHighestItemPointer returns ItemPointerData for the tuple with
|
||||||
|
* highest tid for given relation.
|
||||||
|
* If given relation is empty, then returns invalid item pointer.
|
||||||
|
*/
|
||||||
|
static ItemPointerData
|
||||||
|
ColumnarGetHighestItemPointer(Relation relation, Snapshot snapshot)
|
||||||
|
{
|
||||||
|
StripeMetadata *stripeWithHighestRowNumber =
|
||||||
|
FindStripeWithHighestRowNumber(relation, snapshot);
|
||||||
|
if (stripeWithHighestRowNumber == NULL)
|
||||||
|
{
|
||||||
|
/* table is empty according to our snapshot */
|
||||||
|
ItemPointerData invalidItemPtr;
|
||||||
|
ItemPointerSetInvalid(&invalidItemPtr);
|
||||||
|
return invalidItemPtr;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64 highestRowNumber = stripeWithHighestRowNumber->firstRowNumber +
|
||||||
|
stripeWithHighestRowNumber->rowCount - 1;
|
||||||
|
return row_number_to_tid(highestRowNumber);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ColumnarReadRowsIntoIndex builds indexRelation tuples by reading the
|
||||||
|
* actual relation based on given "scan" and returns number of tuples
|
||||||
|
* scanned to build the indexRelation.
|
||||||
|
*/
|
||||||
|
static double
|
||||||
|
ColumnarReadRowsIntoIndex(TableScanDesc scan, Relation indexRelation,
|
||||||
|
IndexInfo *indexInfo, bool progress,
|
||||||
|
IndexBuildCallback indexCallback,
|
||||||
|
void *indexCallbackState, EState *estate,
|
||||||
|
ExprState *predicate)
|
||||||
|
{
|
||||||
|
double reltuples = 0;
|
||||||
|
|
||||||
|
BlockNumber lastReportedBlockNumber = InvalidBlockNumber;
|
||||||
|
|
||||||
|
ExprContext *econtext = GetPerTupleExprContext(estate);
|
||||||
|
TupleTableSlot *slot = econtext->ecxt_scantuple;
|
||||||
|
while (columnar_getnextslot(scan, ForwardScanDirection, slot))
|
||||||
|
{
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
|
BlockNumber currentBlockNumber = ItemPointerGetBlockNumber(&slot->tts_tid);
|
||||||
|
if (progress && lastReportedBlockNumber != currentBlockNumber)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* columnar_getnextslot guarantees that returned tuple will
|
||||||
|
* always have a greater ItemPointer than the ones we fetched
|
||||||
|
* before, so we directly use BlockNumber to report our progress.
|
||||||
|
*/
|
||||||
|
Assert(lastReportedBlockNumber == InvalidBlockNumber ||
|
||||||
|
currentBlockNumber >= lastReportedBlockNumber);
|
||||||
|
pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
|
||||||
|
currentBlockNumber);
|
||||||
|
lastReportedBlockNumber = currentBlockNumber;
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryContextReset(econtext->ecxt_per_tuple_memory);
|
||||||
|
|
||||||
|
if (predicate != NULL && !ExecQual(predicate, econtext))
|
||||||
|
{
|
||||||
|
/* for partial indexes, discard tuples that don't satisfy the predicate */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum indexValues[INDEX_MAX_KEYS];
|
||||||
|
bool indexNulls[INDEX_MAX_KEYS];
|
||||||
|
FormIndexDatum(indexInfo, slot, estate, indexValues, indexNulls);
|
||||||
|
|
||||||
|
ItemPointerData itemPointerData = slot->tts_tid;
|
||||||
|
|
||||||
|
/* currently, columnar tables can't have dead tuples */
|
||||||
|
bool tupleIsAlive = true;
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_13
|
||||||
|
indexCallback(indexRelation, &itemPointerData, indexValues, indexNulls,
|
||||||
|
tupleIsAlive, indexCallbackState);
|
||||||
|
#else
|
||||||
|
HeapTuple scanTuple = ExecCopySlotHeapTuple(slot);
|
||||||
|
scanTuple->t_self = itemPointerData;
|
||||||
|
indexCallback(indexRelation, scanTuple, indexValues, indexNulls,
|
||||||
|
tupleIsAlive, indexCallbackState);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
reltuples++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return reltuples;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1027,8 +1378,15 @@ columnar_index_validate_scan(Relation heapRelation,
|
||||||
Snapshot snapshot,
|
Snapshot snapshot,
|
||||||
ValidateIndexState *state)
|
ValidateIndexState *state)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* This is only called for concurrent index builds,
|
||||||
|
* see table_index_validate_scan.
|
||||||
|
* Note that we already error out for concurrent index
|
||||||
|
* builds in utility hook but be on the safe side.
|
||||||
|
*/
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("indexes not supported for columnar tables")));
|
errmsg("concurrent index builds are not supported for "
|
||||||
|
"columnar tables")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1348,24 +1706,34 @@ ColumnarProcessUtility(PlannedStmt *pstmt,
|
||||||
{
|
{
|
||||||
IndexStmt *indexStmt = (IndexStmt *) parsetree;
|
IndexStmt *indexStmt = (IndexStmt *) parsetree;
|
||||||
|
|
||||||
/*
|
Relation rel = relation_openrv(indexStmt->relation,
|
||||||
* We should reject CREATE INDEX CONCURRENTLY before DefineIndex() is
|
GetCreateIndexRelationLockMode(indexStmt));
|
||||||
* called. Erroring in callbacks called from DefineIndex() will create
|
if (rel->rd_tableam == GetColumnarTableAmRoutine())
|
||||||
* the index and mark it as INVALID, which will cause segfault during
|
|
||||||
* inserts.
|
|
||||||
*/
|
|
||||||
if (indexStmt->concurrent)
|
|
||||||
{
|
{
|
||||||
Relation rel = relation_openrv(indexStmt->relation,
|
/*
|
||||||
ShareUpdateExclusiveLock);
|
* We should reject CREATE INDEX CONCURRENTLY before DefineIndex() is
|
||||||
if (rel->rd_tableam == GetColumnarTableAmRoutine())
|
* called. Erroring in callbacks called from DefineIndex() will create
|
||||||
|
* the index and mark it as INVALID, which will cause segfault during
|
||||||
|
* inserts.
|
||||||
|
*/
|
||||||
|
if (indexStmt->concurrent)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("indexes not supported for columnar tables")));
|
errmsg("concurrent index commands are not "
|
||||||
|
"supported for columnar tables")));
|
||||||
}
|
}
|
||||||
|
|
||||||
RelationClose(rel);
|
/* for now, we don't support index access methods other than btree & hash */
|
||||||
|
if (strncmp(indexStmt->accessMethod, "btree", NAMEDATALEN) != 0 &&
|
||||||
|
strncmp(indexStmt->accessMethod, "hash", NAMEDATALEN) != 0)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("only btree and hash indexes are supported on "
|
||||||
|
"columnar tables ")));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RelationClose(rel);
|
||||||
}
|
}
|
||||||
|
|
||||||
PrevProcessUtilityHook(pstmt, queryString, context,
|
PrevProcessUtilityHook(pstmt, queryString, context,
|
||||||
|
@ -1407,6 +1775,17 @@ static const TableAmRoutine columnar_am_methods = {
|
||||||
.scan_rescan = columnar_rescan,
|
.scan_rescan = columnar_rescan,
|
||||||
.scan_getnextslot = columnar_getnextslot,
|
.scan_getnextslot = columnar_getnextslot,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Postgres calls following three callbacks during index builds, if it
|
||||||
|
* decides to use parallel workers when building the index. On the other
|
||||||
|
* hand, we don't support parallel scans on columnar tables but we also
|
||||||
|
* want to fallback to serial index build. For this reason, we both skip
|
||||||
|
* parallel workers in columnar_index_build_range_scan and also provide
|
||||||
|
* basic implementations for those callbacks based on their corresponding
|
||||||
|
* implementations in heapAM.
|
||||||
|
* Note that for regular query plans, we already ignore parallel paths via
|
||||||
|
* ColumnarSetRelPathlistHook.
|
||||||
|
*/
|
||||||
.parallelscan_estimate = columnar_parallelscan_estimate,
|
.parallelscan_estimate = columnar_parallelscan_estimate,
|
||||||
.parallelscan_initialize = columnar_parallelscan_initialize,
|
.parallelscan_initialize = columnar_parallelscan_initialize,
|
||||||
.parallelscan_reinitialize = columnar_parallelscan_reinitialize,
|
.parallelscan_reinitialize = columnar_parallelscan_reinitialize,
|
||||||
|
|
|
@ -62,7 +62,6 @@ static List * GenerateIndexParameters(IndexStmt *createIndexStatement);
|
||||||
static DDLJob * GenerateCreateIndexDDLJob(IndexStmt *createIndexStatement,
|
static DDLJob * GenerateCreateIndexDDLJob(IndexStmt *createIndexStatement,
|
||||||
const char *createIndexCommand);
|
const char *createIndexCommand);
|
||||||
static Oid CreateIndexStmtGetRelationId(IndexStmt *createIndexStatement);
|
static Oid CreateIndexStmtGetRelationId(IndexStmt *createIndexStatement);
|
||||||
static LOCKMODE GetCreateIndexRelationLockMode(IndexStmt *createIndexStatement);
|
|
||||||
static List * CreateIndexTaskList(IndexStmt *indexStmt);
|
static List * CreateIndexTaskList(IndexStmt *indexStmt);
|
||||||
static List * CreateReindexTaskList(Oid relationId, ReindexStmt *reindexStmt);
|
static List * CreateReindexTaskList(Oid relationId, ReindexStmt *reindexStmt);
|
||||||
static void RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid,
|
static void RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid,
|
||||||
|
@ -502,7 +501,7 @@ CreateIndexStmtGetRelationId(IndexStmt *createIndexStatement)
|
||||||
* GetCreateIndexRelationLockMode returns required lock mode to open the
|
* GetCreateIndexRelationLockMode returns required lock mode to open the
|
||||||
* relation that given CREATE INDEX command operates on.
|
* relation that given CREATE INDEX command operates on.
|
||||||
*/
|
*/
|
||||||
static LOCKMODE
|
LOCKMODE
|
||||||
GetCreateIndexRelationLockMode(IndexStmt *createIndexStatement)
|
GetCreateIndexRelationLockMode(IndexStmt *createIndexStatement)
|
||||||
{
|
{
|
||||||
if (createIndexStatement->concurrent)
|
if (createIndexStatement->concurrent)
|
||||||
|
|
|
@ -216,6 +216,9 @@ extern ColumnarReadState * ColumnarBeginRead(Relation relation,
|
||||||
extern bool ColumnarReadNextRow(ColumnarReadState *state, Datum *columnValues,
|
extern bool ColumnarReadNextRow(ColumnarReadState *state, Datum *columnValues,
|
||||||
bool *columnNulls, uint64 *rowNumber);
|
bool *columnNulls, uint64 *rowNumber);
|
||||||
extern void ColumnarRescan(ColumnarReadState *readState);
|
extern void ColumnarRescan(ColumnarReadState *readState);
|
||||||
|
extern bool ColumnarReadRowByRowNumber(Relation relation, uint64 rowNumber,
|
||||||
|
List *neededColumnList, Datum *columnValues,
|
||||||
|
bool *columnNulls, Snapshot snapshot);
|
||||||
extern void ColumnarEndRead(ColumnarReadState *state);
|
extern void ColumnarEndRead(ColumnarReadState *state);
|
||||||
extern int64 ColumnarReadChunkGroupsFiltered(ColumnarReadState *state);
|
extern int64 ColumnarReadChunkGroupsFiltered(ColumnarReadState *state);
|
||||||
|
|
||||||
|
@ -251,6 +254,10 @@ extern void SaveChunkGroups(RelFileNode relfilenode, uint64 stripe,
|
||||||
extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe,
|
extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe,
|
||||||
TupleDesc tupleDescriptor,
|
TupleDesc tupleDescriptor,
|
||||||
uint32 chunkCount);
|
uint32 chunkCount);
|
||||||
|
extern StripeMetadata * FindStripeByRowNumber(Relation relation, uint64 rowNumber,
|
||||||
|
Snapshot snapshot);
|
||||||
|
extern StripeMetadata * FindStripeWithHighestRowNumber(Relation relation,
|
||||||
|
Snapshot snapshot);
|
||||||
extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS);
|
extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -266,6 +266,7 @@ extern char * ChooseIndexName(const char *tabname, Oid namespaceId,
|
||||||
bool primary, bool isconstraint);
|
bool primary, bool isconstraint);
|
||||||
extern char * ChooseIndexNameAddition(List *colnames);
|
extern char * ChooseIndexNameAddition(List *colnames);
|
||||||
extern List * ChooseIndexColumnNames(List *indexElems);
|
extern List * ChooseIndexColumnNames(List *indexElems);
|
||||||
|
extern LOCKMODE GetCreateIndexRelationLockMode(IndexStmt *createIndexStatement);
|
||||||
extern List * PreprocessReindexStmt(Node *ReindexStatement,
|
extern List * PreprocessReindexStmt(Node *ReindexStatement,
|
||||||
const char *ReindexCommand,
|
const char *ReindexCommand,
|
||||||
ProcessUtilityContext processUtilityContext);
|
ProcessUtilityContext processUtilityContext);
|
||||||
|
|
|
@ -255,7 +255,6 @@ insert into atacc1 values(1);
|
||||||
alter table atacc1
|
alter table atacc1
|
||||||
add column b float8 not null default random(),
|
add column b float8 not null default random(),
|
||||||
add primary key(a);
|
add primary key(a);
|
||||||
ERROR: indexes not supported for columnar tables
|
|
||||||
-- Add a generate column with an expression value
|
-- Add a generate column with an expression value
|
||||||
create table test_gen_ex (x int) using columnar;
|
create table test_gen_ex (x int) using columnar;
|
||||||
INSERT INTO test_gen_ex VALUES (1), (2), (3);
|
INSERT INTO test_gen_ex VALUES (1), (2), (3);
|
||||||
|
@ -390,30 +389,30 @@ SELECT * FROM products ORDER BY 1;
|
||||||
3 | pen | 2
|
3 | pen | 2
|
||||||
(3 rows)
|
(3 rows)
|
||||||
|
|
||||||
-- Add a UNIQUE constraint (should fail)
|
-- Add a UNIQUE constraint
|
||||||
CREATE TABLE products_fail (
|
CREATE TABLE products_unique (
|
||||||
product_no integer UNIQUE,
|
product_no integer UNIQUE,
|
||||||
name text,
|
name text,
|
||||||
price numeric
|
price numeric
|
||||||
) USING columnar;
|
) USING columnar;
|
||||||
ERROR: indexes not supported for columnar tables
|
|
||||||
ALTER TABLE products ADD COLUMN store_id text UNIQUE;
|
ALTER TABLE products ADD COLUMN store_id text UNIQUE;
|
||||||
ERROR: indexes not supported for columnar tables
|
-- Add a PRIMARY KEY constraint
|
||||||
-- Add a PRIMARY KEY constraint (should fail)
|
CREATE TABLE products_primary (
|
||||||
CREATE TABLE products_fail (
|
|
||||||
product_no integer PRIMARY KEY,
|
product_no integer PRIMARY KEY,
|
||||||
name text,
|
name text,
|
||||||
price numeric
|
price numeric
|
||||||
) USING columnar;
|
) USING columnar;
|
||||||
ERROR: indexes not supported for columnar tables
|
BEGIN;
|
||||||
ALTER TABLE products ADD COLUMN store_id text PRIMARY KEY;
|
ALTER TABLE products DROP COLUMN store_id;
|
||||||
ERROR: indexes not supported for columnar tables
|
ALTER TABLE products ADD COLUMN store_id text PRIMARY KEY;
|
||||||
|
ERROR: column "store_id" contains null values
|
||||||
|
ROLLBACK;
|
||||||
-- Add an EXCLUSION constraint (should fail)
|
-- Add an EXCLUSION constraint (should fail)
|
||||||
CREATE TABLE circles (
|
CREATE TABLE circles (
|
||||||
c circle,
|
c circle,
|
||||||
EXCLUDE USING gist (c WITH &&)
|
EXCLUDE USING gist (c WITH &&)
|
||||||
) USING columnar;
|
) USING columnar;
|
||||||
ERROR: indexes not supported for columnar tables
|
ERROR: only btree and hash indexes are supported on columnar tables
|
||||||
-- Row level security
|
-- Row level security
|
||||||
CREATE TABLE public.row_level_security_col (id int, pgUser CHARACTER VARYING) USING columnar;
|
CREATE TABLE public.row_level_security_col (id int, pgUser CHARACTER VARYING) USING columnar;
|
||||||
CREATE USER user1;
|
CREATE USER user1;
|
||||||
|
|
|
@ -11,9 +11,7 @@ SELECT alter_columnar_table_set('contestant', compression => 'none');
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
-- should fail
|
|
||||||
CREATE INDEX contestant_idx on contestant(handle);
|
CREATE INDEX contestant_idx on contestant(handle);
|
||||||
ERROR: indexes not supported for columnar tables
|
|
||||||
-- Create zstd compressed table
|
-- Create zstd compressed table
|
||||||
CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT,
|
CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT,
|
||||||
percentile FLOAT, country CHAR(3), achievements TEXT[])
|
percentile FLOAT, country CHAR(3), achievements TEXT[])
|
||||||
|
|
|
@ -10,7 +10,7 @@ SET search_path tO columnar_indexes, public;
|
||||||
--
|
--
|
||||||
create table t(a int, b int) using columnar;
|
create table t(a int, b int) using columnar;
|
||||||
create index CONCURRENTLY t_idx on t(a, b);
|
create index CONCURRENTLY t_idx on t(a, b);
|
||||||
ERROR: indexes not supported for columnar tables
|
ERROR: concurrent index commands are not supported for columnar tables
|
||||||
\d t
|
\d t
|
||||||
Table "columnar_indexes.t"
|
Table "columnar_indexes.t"
|
||||||
Column | Type | Collation | Nullable | Default
|
Column | Type | Collation | Nullable | Default
|
||||||
|
@ -32,16 +32,15 @@ SELECT * FROM t;
|
||||||
1 | 2
|
1 | 2
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
-- create index without the concurrent option. We should
|
|
||||||
-- error out during index creation.
|
|
||||||
create index t_idx on t(a, b);
|
create index t_idx on t(a, b);
|
||||||
ERROR: indexes not supported for columnar tables
|
|
||||||
\d t
|
\d t
|
||||||
Table "columnar_indexes.t"
|
Table "columnar_indexes.t"
|
||||||
Column | Type | Collation | Nullable | Default
|
Column | Type | Collation | Nullable | Default
|
||||||
---------------------------------------------------------------------
|
---------------------------------------------------------------------
|
||||||
a | integer | | |
|
a | integer | | |
|
||||||
b | integer | | |
|
b | integer | | |
|
||||||
|
Indexes:
|
||||||
|
"t_idx" btree (a, b)
|
||||||
|
|
||||||
explain insert into t values (1, 2);
|
explain insert into t values (1, 2);
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
|
@ -58,5 +57,347 @@ SELECT * FROM t;
|
||||||
3 | 4
|
3 | 4
|
||||||
(2 rows)
|
(2 rows)
|
||||||
|
|
||||||
|
-- make sure that we test index scan
|
||||||
|
set columnar.enable_custom_scan to 'off';
|
||||||
|
set enable_seqscan to off;
|
||||||
|
CREATE table columnar_table (a INT, b int) USING columnar;
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(0, 16000) i;
|
||||||
|
-- unique --
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO columnar_table VALUES (100000000);
|
||||||
|
SAVEPOINT s1;
|
||||||
|
-- errors out due to unflushed data in upper transaction
|
||||||
|
CREATE UNIQUE INDEX ON columnar_table (a);
|
||||||
|
ERROR: cannot read from table when there is unflushed data in upper transactions
|
||||||
|
ROLLBACK;
|
||||||
|
CREATE UNIQUE INDEX ON columnar_table (a);
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO columnar_table VALUES (16050);
|
||||||
|
SAVEPOINT s1;
|
||||||
|
-- index scan errors out due to unflushed data in upper transaction
|
||||||
|
SELECT a FROM columnar_table WHERE a = 16050;
|
||||||
|
ERROR: cannot read from index when there is unflushed data in upper transactions
|
||||||
|
ROLLBACK;
|
||||||
|
EXPLAIN (COSTS OFF) SELECT * FROM columnar_table WHERE a=6456;
|
||||||
|
QUERY PLAN
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
Index Scan using columnar_table_a_idx on columnar_table
|
||||||
|
Index Cond: (a = 6456)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS OFF) SELECT a FROM columnar_table WHERE a=6456;
|
||||||
|
QUERY PLAN
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
Index Only Scan using columnar_table_a_idx on columnar_table
|
||||||
|
Index Cond: (a = 6456)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
SELECT (SELECT a FROM columnar_table WHERE a=6456 limit 1)=6456;
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT (SELECT b FROM columnar_table WHERE a=6456 limit 1)=6456*2;
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- even if a=16050 doesn't exist, we try to insert it twice so this should error out
|
||||||
|
INSERT INTO columnar_table VALUES (16050), (16050);
|
||||||
|
ERROR: duplicate key value violates unique constraint "columnar_table_a_idx"
|
||||||
|
DETAIL: Key (a)=(16050) already exists.
|
||||||
|
-- should work
|
||||||
|
INSERT INTO columnar_table VALUES (16050);
|
||||||
|
-- check edge cases around stripe boundaries, error out
|
||||||
|
INSERT INTO columnar_table VALUES (16050);
|
||||||
|
ERROR: duplicate key value violates unique constraint "columnar_table_a_idx"
|
||||||
|
DETAIL: Key (a)=(16050) already exists.
|
||||||
|
INSERT INTO columnar_table VALUES (15999);
|
||||||
|
ERROR: duplicate key value violates unique constraint "columnar_table_a_idx"
|
||||||
|
DETAIL: Key (a)=(15999) already exists.
|
||||||
|
DROP INDEX columnar_table_a_idx;
|
||||||
|
CREATE TABLE partial_unique_idx_test (a INT, b INT) USING columnar;
|
||||||
|
CREATE UNIQUE INDEX ON partial_unique_idx_test (a)
|
||||||
|
WHERE b > 500;
|
||||||
|
-- should work since b =< 500 and our partial index doesn't check this interval
|
||||||
|
INSERT INTO partial_unique_idx_test VALUES (1, 2), (1, 2);
|
||||||
|
-- should work since our partial index wouldn't cover the tuples that we inserted above
|
||||||
|
INSERT INTO partial_unique_idx_test VALUES (1, 800);
|
||||||
|
INSERT INTO partial_unique_idx_test VALUES (4, 600);
|
||||||
|
-- should error out due to (4, 600)
|
||||||
|
INSERT INTO partial_unique_idx_test VALUES (4, 700);
|
||||||
|
ERROR: duplicate key value violates unique constraint "partial_unique_idx_test_a_idx"
|
||||||
|
DETAIL: Key (a)=(4) already exists.
|
||||||
|
-- btree --
|
||||||
|
CREATE INDEX ON columnar_table (a);
|
||||||
|
SELECT (SELECT SUM(b) FROM columnar_table WHERE a>700 and a<965)=439560;
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE INDEX ON columnar_table (b)
|
||||||
|
WHERE (b > 30000 AND b < 33000);
|
||||||
|
-- partial index should be way smaller than the non-partial index
|
||||||
|
SELECT pg_total_relation_size('columnar_table_b_idx') * 5 <
|
||||||
|
pg_total_relation_size('columnar_table_a_idx');
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- can't use index scan due to partial index boundaries
|
||||||
|
EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30000;
|
||||||
|
QUERY PLAN
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
Seq Scan on columnar_table
|
||||||
|
Filter: (b = 30000)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
-- can use index scan
|
||||||
|
EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30001;
|
||||||
|
QUERY PLAN
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
Index Only Scan using columnar_table_b_idx on columnar_table
|
||||||
|
Index Cond: (b = 30001)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
-- some more rows
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 17000) i;
|
||||||
|
DROP INDEX columnar_table_a_idx;
|
||||||
|
TRUNCATE columnar_table;
|
||||||
|
-- pkey --
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 16499) i;
|
||||||
|
ALTER TABLE columnar_table ADD PRIMARY KEY (a);
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16500, 17000) i;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO columnar_table (a) SELECT 1;
|
||||||
|
ROLLBACK;
|
||||||
|
-- should work
|
||||||
|
INSERT INTO columnar_table (a) SELECT 1;
|
||||||
|
-- error out
|
||||||
|
INSERT INTO columnar_table VALUES (16100), (16101);
|
||||||
|
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
|
||||||
|
DETAIL: Key (a)=(16100) already exists.
|
||||||
|
INSERT INTO columnar_table VALUES (16999);
|
||||||
|
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
|
||||||
|
DETAIL: Key (a)=(16999) already exists.
|
||||||
|
BEGIN;
|
||||||
|
REINDEX INDEX columnar_table_pkey;
|
||||||
|
-- should error even after reindex
|
||||||
|
INSERT INTO columnar_table VALUES (16999);
|
||||||
|
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
|
||||||
|
DETAIL: Key (a)=(16999) already exists.
|
||||||
|
ROLLBACK;
|
||||||
|
VACUUM FULL columnar_table;
|
||||||
|
-- show that we don't support clustering columnar tables using indexes
|
||||||
|
CLUSTER columnar_table USING columnar_table_pkey;
|
||||||
|
ERROR: clustering columnar tables using indexes is not supported
|
||||||
|
ALTER TABLE columnar_table CLUSTER ON columnar_table_pkey;
|
||||||
|
CLUSTER columnar_table;
|
||||||
|
ERROR: clustering columnar tables using indexes is not supported
|
||||||
|
-- should error even after vacuum
|
||||||
|
INSERT INTO columnar_table VALUES (16999);
|
||||||
|
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
|
||||||
|
DETAIL: Key (a)=(16999) already exists.
|
||||||
|
TRUNCATE columnar_table;
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(1, 160000) i;
|
||||||
|
SELECT (SELECT b FROM columnar_table WHERE a = 150000)=300000;
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE columnar_table;
|
||||||
|
ALTER TABLE columnar_table DROP CONSTRAINT columnar_table_pkey;
|
||||||
|
-- hash --
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i*2,i FROM generate_series(1, 8000) i;
|
||||||
|
CREATE INDEX hash_idx ON columnar_table USING HASH (b);
|
||||||
|
BEGIN;
|
||||||
|
CREATE INDEX hash_idx_fill_factor ON columnar_table USING HASH (b) WITH (fillfactor=10);
|
||||||
|
-- same hash index with lower fillfactor should be way bigger
|
||||||
|
SELECT pg_total_relation_size ('hash_idx_fill_factor') >
|
||||||
|
pg_total_relation_size ('hash_idx') * 5;
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
ROLLBACK;
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i*3,i FROM generate_series(1, 8000) i;
|
||||||
|
ROLLBACK;
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i*4,i FROM generate_series(1, 8000) i;
|
||||||
|
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
REINDEX TABLE columnar_table;
|
||||||
|
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
ROLLBACK;
|
||||||
|
VACUUM FULL columnar_table;
|
||||||
|
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- exclusion contraints --
|
||||||
|
CREATE TABLE exclusion_test (c1 INT,c2 INT, c3 INT, c4 BOX,
|
||||||
|
EXCLUDE USING btree (c1 WITH =) INCLUDE(c3,c4) WHERE (c1 < 10)) USING columnar;
|
||||||
|
-- error out since "c1" is "1" for all rows to be inserted
|
||||||
|
INSERT INTO exclusion_test SELECT 1, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
|
||||||
|
ERROR: conflicting key value violates exclusion constraint "exclusion_test_c1_c3_c4_excl"
|
||||||
|
DETAIL: Key (c1)=(1) conflicts with existing key (c1)=(1).
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
|
||||||
|
ROLLBACK;
|
||||||
|
-- should work
|
||||||
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
|
||||||
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
|
||||||
|
BEGIN;
|
||||||
|
-- should work thanks to "where" clause in exclusion constraint
|
||||||
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
|
||||||
|
ROLLBACK;
|
||||||
|
REINDEX TABLE exclusion_test;
|
||||||
|
-- should still work after reindex
|
||||||
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
|
||||||
|
-- make sure that we respect INCLUDE syntax --
|
||||||
|
CREATE TABLE include_test (a INT, b BIGINT, c BIGINT, d BIGINT) USING columnar;
|
||||||
|
INSERT INTO include_test SELECT i, i, i, i FROM generate_series (1, 1000) i;
|
||||||
|
CREATE UNIQUE INDEX unique_a ON include_test (a);
|
||||||
|
-- cannot use index only scan
|
||||||
|
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
|
||||||
|
QUERY PLAN
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
Index Scan using unique_a on include_test
|
||||||
|
Index Cond: (a = 500)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX unique_a_include_b_c_d ON include_test (a) INCLUDE(b, c, d);
|
||||||
|
-- same unique index that includes other columns should be way bigger
|
||||||
|
SELECT pg_total_relation_size ('unique_a') * 1.5 <
|
||||||
|
pg_total_relation_size ('unique_a_include_b_c_d');
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP INDEX unique_a;
|
||||||
|
-- should use index only scan since unique_a_include_b_c_d includes column "b" too
|
||||||
|
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
|
||||||
|
QUERY PLAN
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
Index Only Scan using unique_a_include_b_c_d on include_test
|
||||||
|
Index Cond: (a = 500)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
SET enable_indexonlyscan = OFF;
|
||||||
|
-- show that we respect enable_indexonlyscan GUC
|
||||||
|
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
|
||||||
|
QUERY PLAN
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
Index Scan using unique_a_include_b_c_d on include_test
|
||||||
|
Index Cond: (a = 500)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
ROLLBACK;
|
||||||
|
-- make sure that we read the correct value for "b" when doing index only scan
|
||||||
|
SELECT b=980 FROM include_test WHERE a = 980;
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- some tests with distributed & partitioned tables --
|
||||||
|
CREATE TABLE dist_part_table(
|
||||||
|
dist_col INT,
|
||||||
|
part_col TIMESTAMPTZ,
|
||||||
|
col1 TEXT
|
||||||
|
) PARTITION BY RANGE (part_col);
|
||||||
|
-- create an index before creating a columnar partition
|
||||||
|
CREATE INDEX dist_part_table_btree ON dist_part_table (col1);
|
||||||
|
-- columnar partition
|
||||||
|
CREATE TABLE p0 PARTITION OF dist_part_table
|
||||||
|
FOR VALUES FROM ('2020-01-01') TO ('2020-02-01')
|
||||||
|
USING columnar;
|
||||||
|
SELECT create_distributed_table('dist_part_table', 'dist_col');
|
||||||
|
create_distributed_table
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- columnar partition
|
||||||
|
CREATE TABLE p1 PARTITION OF dist_part_table
|
||||||
|
FOR VALUES FROM ('2020-02-01') TO ('2020-03-01')
|
||||||
|
USING columnar;
|
||||||
|
-- row partition
|
||||||
|
CREATE TABLE p2 PARTITION OF dist_part_table
|
||||||
|
FOR VALUES FROM ('2020-03-01') TO ('2020-04-01');
|
||||||
|
INSERT INTO dist_part_table VALUES (1, '2020-03-15', 'str1', POINT(1, 1));
|
||||||
|
ERROR: INSERT has more expressions than target columns
|
||||||
|
-- insert into columnar partitions
|
||||||
|
INSERT INTO dist_part_table VALUES (1, '2020-01-15', 'str2', POINT(2, 2));
|
||||||
|
ERROR: INSERT has more expressions than target columns
|
||||||
|
INSERT INTO dist_part_table VALUES (1, '2020-02-15', 'str3', POINT(3, 3));
|
||||||
|
ERROR: INSERT has more expressions than target columns
|
||||||
|
-- create another index after creating a columnar partition
|
||||||
|
CREATE UNIQUE INDEX dist_part_table_unique ON dist_part_table (dist_col, part_col);
|
||||||
|
-- verify that indexes are created on columnar partitions
|
||||||
|
SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p0';
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p1';
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- unsupported index types --
|
||||||
|
-- gin --
|
||||||
|
CREATE TABLE testjsonb (j JSONB) USING columnar;
|
||||||
|
INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(1,10) i;
|
||||||
|
CREATE INDEX jidx ON testjsonb USING GIN (j);
|
||||||
|
ERROR: only btree and hash indexes are supported on columnar tables
|
||||||
|
INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(15,20) i;
|
||||||
|
-- gist --
|
||||||
|
CREATE TABLE gist_point_tbl(id INT4, p POINT) USING columnar;
|
||||||
|
INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(1, 10) g;
|
||||||
|
CREATE INDEX gist_pointidx ON gist_point_tbl USING gist(p);
|
||||||
|
ERROR: only btree and hash indexes are supported on columnar tables
|
||||||
|
INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(10, 20) g;
|
||||||
|
-- sp gist --
|
||||||
|
CREATE TABLE box_temp (f1 box) USING columnar;
|
||||||
|
INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
|
||||||
|
CREATE INDEX box_spgist ON box_temp USING spgist (f1);
|
||||||
|
ERROR: only btree and hash indexes are supported on columnar tables
|
||||||
|
INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
|
||||||
|
-- brin --
|
||||||
|
CREATE TABLE brin_summarize (value int) USING columnar;
|
||||||
|
CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2);
|
||||||
|
ERROR: only btree and hash indexes are supported on columnar tables
|
||||||
|
-- Show that we safely fallback to serial index build.
|
||||||
|
CREATE TABLE parallel_scan_test(a int) USING columnar WITH ( parallel_workers = 2 );
|
||||||
|
INSERT INTO parallel_scan_test SELECT i FROM generate_series(1,10) i;
|
||||||
|
CREATE INDEX ON parallel_scan_test (a);
|
||||||
|
NOTICE: falling back to serial index build since parallel scan on columnar tables is not supported
|
||||||
|
VACUUM FULL parallel_scan_test;
|
||||||
|
NOTICE: falling back to serial index build since parallel scan on columnar tables is not supported
|
||||||
|
REINDEX TABLE parallel_scan_test;
|
||||||
|
NOTICE: falling back to serial index build since parallel scan on columnar tables is not supported
|
||||||
SET client_min_messages TO WARNING;
|
SET client_min_messages TO WARNING;
|
||||||
DROP SCHEMA columnar_indexes CASCADE;
|
DROP SCHEMA columnar_indexes CASCADE;
|
||||||
|
|
|
@ -215,21 +215,25 @@ ALTER TABLE products DROP CONSTRAINT dummy_constraint;
|
||||||
INSERT INTO products VALUES (3, 'pen', 2);
|
INSERT INTO products VALUES (3, 'pen', 2);
|
||||||
SELECT * FROM products ORDER BY 1;
|
SELECT * FROM products ORDER BY 1;
|
||||||
|
|
||||||
-- Add a UNIQUE constraint (should fail)
|
-- Add a UNIQUE constraint
|
||||||
CREATE TABLE products_fail (
|
CREATE TABLE products_unique (
|
||||||
product_no integer UNIQUE,
|
product_no integer UNIQUE,
|
||||||
name text,
|
name text,
|
||||||
price numeric
|
price numeric
|
||||||
) USING columnar;
|
) USING columnar;
|
||||||
ALTER TABLE products ADD COLUMN store_id text UNIQUE;
|
ALTER TABLE products ADD COLUMN store_id text UNIQUE;
|
||||||
|
|
||||||
-- Add a PRIMARY KEY constraint (should fail)
|
-- Add a PRIMARY KEY constraint
|
||||||
CREATE TABLE products_fail (
|
CREATE TABLE products_primary (
|
||||||
product_no integer PRIMARY KEY,
|
product_no integer PRIMARY KEY,
|
||||||
name text,
|
name text,
|
||||||
price numeric
|
price numeric
|
||||||
) USING columnar;
|
) USING columnar;
|
||||||
ALTER TABLE products ADD COLUMN store_id text PRIMARY KEY;
|
|
||||||
|
BEGIN;
|
||||||
|
ALTER TABLE products DROP COLUMN store_id;
|
||||||
|
ALTER TABLE products ADD COLUMN store_id text PRIMARY KEY;
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
-- Add an EXCLUSION constraint (should fail)
|
-- Add an EXCLUSION constraint (should fail)
|
||||||
CREATE TABLE circles (
|
CREATE TABLE circles (
|
||||||
|
|
|
@ -9,7 +9,6 @@ CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT,
|
||||||
USING columnar;
|
USING columnar;
|
||||||
SELECT alter_columnar_table_set('contestant', compression => 'none');
|
SELECT alter_columnar_table_set('contestant', compression => 'none');
|
||||||
|
|
||||||
-- should fail
|
|
||||||
CREATE INDEX contestant_idx on contestant(handle);
|
CREATE INDEX contestant_idx on contestant(handle);
|
||||||
|
|
||||||
-- Create zstd compressed table
|
-- Create zstd compressed table
|
||||||
|
|
|
@ -17,13 +17,283 @@ explain insert into t values (1, 2);
|
||||||
insert into t values (1, 2);
|
insert into t values (1, 2);
|
||||||
SELECT * FROM t;
|
SELECT * FROM t;
|
||||||
|
|
||||||
-- create index without the concurrent option. We should
|
|
||||||
-- error out during index creation.
|
|
||||||
create index t_idx on t(a, b);
|
create index t_idx on t(a, b);
|
||||||
\d t
|
\d t
|
||||||
explain insert into t values (1, 2);
|
explain insert into t values (1, 2);
|
||||||
insert into t values (3, 4);
|
insert into t values (3, 4);
|
||||||
SELECT * FROM t;
|
SELECT * FROM t;
|
||||||
|
|
||||||
|
-- make sure that we test index scan
|
||||||
|
set columnar.enable_custom_scan to 'off';
|
||||||
|
set enable_seqscan to off;
|
||||||
|
|
||||||
|
CREATE table columnar_table (a INT, b int) USING columnar;
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(0, 16000) i;
|
||||||
|
|
||||||
|
-- unique --
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO columnar_table VALUES (100000000);
|
||||||
|
SAVEPOINT s1;
|
||||||
|
-- errors out due to unflushed data in upper transaction
|
||||||
|
CREATE UNIQUE INDEX ON columnar_table (a);
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX ON columnar_table (a);
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO columnar_table VALUES (16050);
|
||||||
|
SAVEPOINT s1;
|
||||||
|
-- index scan errors out due to unflushed data in upper transaction
|
||||||
|
SELECT a FROM columnar_table WHERE a = 16050;
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS OFF) SELECT * FROM columnar_table WHERE a=6456;
|
||||||
|
EXPLAIN (COSTS OFF) SELECT a FROM columnar_table WHERE a=6456;
|
||||||
|
SELECT (SELECT a FROM columnar_table WHERE a=6456 limit 1)=6456;
|
||||||
|
SELECT (SELECT b FROM columnar_table WHERE a=6456 limit 1)=6456*2;
|
||||||
|
|
||||||
|
-- even if a=16050 doesn't exist, we try to insert it twice so this should error out
|
||||||
|
INSERT INTO columnar_table VALUES (16050), (16050);
|
||||||
|
|
||||||
|
-- should work
|
||||||
|
INSERT INTO columnar_table VALUES (16050);
|
||||||
|
|
||||||
|
-- check edge cases around stripe boundaries, error out
|
||||||
|
INSERT INTO columnar_table VALUES (16050);
|
||||||
|
INSERT INTO columnar_table VALUES (15999);
|
||||||
|
|
||||||
|
DROP INDEX columnar_table_a_idx;
|
||||||
|
|
||||||
|
CREATE TABLE partial_unique_idx_test (a INT, b INT) USING columnar;
|
||||||
|
CREATE UNIQUE INDEX ON partial_unique_idx_test (a)
|
||||||
|
WHERE b > 500;
|
||||||
|
|
||||||
|
-- should work since b =< 500 and our partial index doesn't check this interval
|
||||||
|
INSERT INTO partial_unique_idx_test VALUES (1, 2), (1, 2);
|
||||||
|
|
||||||
|
-- should work since our partial index wouldn't cover the tuples that we inserted above
|
||||||
|
INSERT INTO partial_unique_idx_test VALUES (1, 800);
|
||||||
|
|
||||||
|
INSERT INTO partial_unique_idx_test VALUES (4, 600);
|
||||||
|
|
||||||
|
-- should error out due to (4, 600)
|
||||||
|
INSERT INTO partial_unique_idx_test VALUES (4, 700);
|
||||||
|
|
||||||
|
-- btree --
|
||||||
|
CREATE INDEX ON columnar_table (a);
|
||||||
|
SELECT (SELECT SUM(b) FROM columnar_table WHERE a>700 and a<965)=439560;
|
||||||
|
|
||||||
|
CREATE INDEX ON columnar_table (b)
|
||||||
|
WHERE (b > 30000 AND b < 33000);
|
||||||
|
|
||||||
|
-- partial index should be way smaller than the non-partial index
|
||||||
|
SELECT pg_total_relation_size('columnar_table_b_idx') * 5 <
|
||||||
|
pg_total_relation_size('columnar_table_a_idx');
|
||||||
|
|
||||||
|
-- can't use index scan due to partial index boundaries
|
||||||
|
EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30000;
|
||||||
|
-- can use index scan
|
||||||
|
EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30001;
|
||||||
|
|
||||||
|
-- some more rows
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 17000) i;
|
||||||
|
|
||||||
|
DROP INDEX columnar_table_a_idx;
|
||||||
|
TRUNCATE columnar_table;
|
||||||
|
|
||||||
|
-- pkey --
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 16499) i;
|
||||||
|
ALTER TABLE columnar_table ADD PRIMARY KEY (a);
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16500, 17000) i;
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO columnar_table (a) SELECT 1;
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
-- should work
|
||||||
|
INSERT INTO columnar_table (a) SELECT 1;
|
||||||
|
|
||||||
|
-- error out
|
||||||
|
INSERT INTO columnar_table VALUES (16100), (16101);
|
||||||
|
INSERT INTO columnar_table VALUES (16999);
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
REINDEX INDEX columnar_table_pkey;
|
||||||
|
-- should error even after reindex
|
||||||
|
INSERT INTO columnar_table VALUES (16999);
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
VACUUM FULL columnar_table;
|
||||||
|
|
||||||
|
-- show that we don't support clustering columnar tables using indexes
|
||||||
|
CLUSTER columnar_table USING columnar_table_pkey;
|
||||||
|
|
||||||
|
ALTER TABLE columnar_table CLUSTER ON columnar_table_pkey;
|
||||||
|
CLUSTER columnar_table;
|
||||||
|
|
||||||
|
-- should error even after vacuum
|
||||||
|
INSERT INTO columnar_table VALUES (16999);
|
||||||
|
|
||||||
|
TRUNCATE columnar_table;
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(1, 160000) i;
|
||||||
|
SELECT (SELECT b FROM columnar_table WHERE a = 150000)=300000;
|
||||||
|
|
||||||
|
TRUNCATE columnar_table;
|
||||||
|
ALTER TABLE columnar_table DROP CONSTRAINT columnar_table_pkey;
|
||||||
|
|
||||||
|
-- hash --
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i*2,i FROM generate_series(1, 8000) i;
|
||||||
|
CREATE INDEX hash_idx ON columnar_table USING HASH (b);
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
CREATE INDEX hash_idx_fill_factor ON columnar_table USING HASH (b) WITH (fillfactor=10);
|
||||||
|
-- same hash index with lower fillfactor should be way bigger
|
||||||
|
SELECT pg_total_relation_size ('hash_idx_fill_factor') >
|
||||||
|
pg_total_relation_size ('hash_idx') * 5;
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i*3,i FROM generate_series(1, 8000) i;
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
INSERT INTO columnar_table (a, b) SELECT i*4,i FROM generate_series(1, 8000) i;
|
||||||
|
|
||||||
|
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
REINDEX TABLE columnar_table;
|
||||||
|
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
VACUUM FULL columnar_table;
|
||||||
|
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
|
||||||
|
|
||||||
|
-- exclusion contraints --
|
||||||
|
CREATE TABLE exclusion_test (c1 INT,c2 INT, c3 INT, c4 BOX,
|
||||||
|
EXCLUDE USING btree (c1 WITH =) INCLUDE(c3,c4) WHERE (c1 < 10)) USING columnar;
|
||||||
|
|
||||||
|
-- error out since "c1" is "1" for all rows to be inserted
|
||||||
|
INSERT INTO exclusion_test SELECT 1, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
-- should work
|
||||||
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
|
||||||
|
|
||||||
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
-- should work thanks to "where" clause in exclusion constraint
|
||||||
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
REINDEX TABLE exclusion_test;
|
||||||
|
-- should still work after reindex
|
||||||
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
|
||||||
|
|
||||||
|
-- make sure that we respect INCLUDE syntax --
|
||||||
|
|
||||||
|
CREATE TABLE include_test (a INT, b BIGINT, c BIGINT, d BIGINT) USING columnar;
|
||||||
|
|
||||||
|
INSERT INTO include_test SELECT i, i, i, i FROM generate_series (1, 1000) i;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX unique_a ON include_test (a);
|
||||||
|
|
||||||
|
-- cannot use index only scan
|
||||||
|
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX unique_a_include_b_c_d ON include_test (a) INCLUDE(b, c, d);
|
||||||
|
|
||||||
|
-- same unique index that includes other columns should be way bigger
|
||||||
|
SELECT pg_total_relation_size ('unique_a') * 1.5 <
|
||||||
|
pg_total_relation_size ('unique_a_include_b_c_d');
|
||||||
|
|
||||||
|
DROP INDEX unique_a;
|
||||||
|
|
||||||
|
-- should use index only scan since unique_a_include_b_c_d includes column "b" too
|
||||||
|
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
SET enable_indexonlyscan = OFF;
|
||||||
|
-- show that we respect enable_indexonlyscan GUC
|
||||||
|
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
-- make sure that we read the correct value for "b" when doing index only scan
|
||||||
|
SELECT b=980 FROM include_test WHERE a = 980;
|
||||||
|
|
||||||
|
-- some tests with distributed & partitioned tables --
|
||||||
|
|
||||||
|
CREATE TABLE dist_part_table(
|
||||||
|
dist_col INT,
|
||||||
|
part_col TIMESTAMPTZ,
|
||||||
|
col1 TEXT
|
||||||
|
) PARTITION BY RANGE (part_col);
|
||||||
|
|
||||||
|
-- create an index before creating a columnar partition
|
||||||
|
CREATE INDEX dist_part_table_btree ON dist_part_table (col1);
|
||||||
|
|
||||||
|
-- columnar partition
|
||||||
|
CREATE TABLE p0 PARTITION OF dist_part_table
|
||||||
|
FOR VALUES FROM ('2020-01-01') TO ('2020-02-01')
|
||||||
|
USING columnar;
|
||||||
|
|
||||||
|
SELECT create_distributed_table('dist_part_table', 'dist_col');
|
||||||
|
|
||||||
|
-- columnar partition
|
||||||
|
CREATE TABLE p1 PARTITION OF dist_part_table
|
||||||
|
FOR VALUES FROM ('2020-02-01') TO ('2020-03-01')
|
||||||
|
USING columnar;
|
||||||
|
|
||||||
|
-- row partition
|
||||||
|
CREATE TABLE p2 PARTITION OF dist_part_table
|
||||||
|
FOR VALUES FROM ('2020-03-01') TO ('2020-04-01');
|
||||||
|
|
||||||
|
INSERT INTO dist_part_table VALUES (1, '2020-03-15', 'str1', POINT(1, 1));
|
||||||
|
|
||||||
|
-- insert into columnar partitions
|
||||||
|
INSERT INTO dist_part_table VALUES (1, '2020-01-15', 'str2', POINT(2, 2));
|
||||||
|
INSERT INTO dist_part_table VALUES (1, '2020-02-15', 'str3', POINT(3, 3));
|
||||||
|
|
||||||
|
-- create another index after creating a columnar partition
|
||||||
|
CREATE UNIQUE INDEX dist_part_table_unique ON dist_part_table (dist_col, part_col);
|
||||||
|
|
||||||
|
-- verify that indexes are created on columnar partitions
|
||||||
|
SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p0';
|
||||||
|
SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p1';
|
||||||
|
|
||||||
|
-- unsupported index types --
|
||||||
|
|
||||||
|
-- gin --
|
||||||
|
CREATE TABLE testjsonb (j JSONB) USING columnar;
|
||||||
|
INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(1,10) i;
|
||||||
|
CREATE INDEX jidx ON testjsonb USING GIN (j);
|
||||||
|
INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(15,20) i;
|
||||||
|
|
||||||
|
-- gist --
|
||||||
|
CREATE TABLE gist_point_tbl(id INT4, p POINT) USING columnar;
|
||||||
|
INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(1, 10) g;
|
||||||
|
CREATE INDEX gist_pointidx ON gist_point_tbl USING gist(p);
|
||||||
|
INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(10, 20) g;
|
||||||
|
|
||||||
|
-- sp gist --
|
||||||
|
CREATE TABLE box_temp (f1 box) USING columnar;
|
||||||
|
INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
|
||||||
|
CREATE INDEX box_spgist ON box_temp USING spgist (f1);
|
||||||
|
INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
|
||||||
|
|
||||||
|
-- brin --
|
||||||
|
CREATE TABLE brin_summarize (value int) USING columnar;
|
||||||
|
CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2);
|
||||||
|
|
||||||
|
-- Show that we safely fallback to serial index build.
|
||||||
|
CREATE TABLE parallel_scan_test(a int) USING columnar WITH ( parallel_workers = 2 );
|
||||||
|
INSERT INTO parallel_scan_test SELECT i FROM generate_series(1,10) i;
|
||||||
|
CREATE INDEX ON parallel_scan_test (a);
|
||||||
|
VACUUM FULL parallel_scan_test;
|
||||||
|
REINDEX TABLE parallel_scan_test;
|
||||||
|
|
||||||
SET client_min_messages TO WARNING;
|
SET client_min_messages TO WARNING;
|
||||||
DROP SCHEMA columnar_indexes CASCADE;
|
DROP SCHEMA columnar_indexes CASCADE;
|
||||||
|
|
Loading…
Reference in New Issue