From a94e8c9cda06db17889a72867c8fa06342dd9a54 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 30 Nov 2020 18:01:43 -0800 Subject: [PATCH] Associate column store metadata with storage id (#4347) --- src/backend/columnar/cstore_customscan.c | 8 +- src/backend/columnar/cstore_metadata_tables.c | 417 ++++++++++-------- src/backend/columnar/cstore_reader.c | 22 +- src/backend/columnar/cstore_tableam.c | 42 +- src/backend/columnar/cstore_writer.c | 8 +- .../columnar/sql/columnar--9.5-1--10.0-1.sql | 22 +- .../downgrades/columnar--10.0-1--9.5-1.sql | 2 +- src/include/columnar/cstore.h | 31 +- src/test/regress/bin/normalize.sed | 3 + src/test/regress/expected/.gitignore | 1 - .../am_create.out} | 8 +- src/test/regress/expected/am_drop.out | 9 +- src/test/regress/expected/am_matview.out | 23 +- src/test/regress/expected/am_recursive.out | 2 +- src/test/regress/expected/am_rollback.out | 13 +- src/test/regress/expected/am_truncate.out | 6 +- src/test/regress/expected/am_vacuum.out | 50 ++- .../regress/expected/am_vacuum_vs_insert.out | 5 +- src/test/regress/expected/multi_extension.out | 2 +- .../regress/expected/multi_extension_0.out | 2 +- .../expected/upgrade_list_citus_objects.out | 2 +- .../expected/upgrade_list_citus_objects_0.out | 2 +- src/test/regress/sql/.gitignore | 1 - .../am_create.source => sql/am_create.sql} | 5 + src/test/regress/sql/am_drop.sql | 9 +- src/test/regress/sql/am_matview.sql | 13 +- src/test/regress/sql/am_recursive.sql | 2 +- src/test/regress/sql/am_rollback.sql | 14 +- src/test/regress/sql/am_truncate.sql | 6 +- src/test/regress/sql/am_vacuum.sql | 29 +- 30 files changed, 414 insertions(+), 345 deletions(-) rename src/test/regress/{output/am_create.source => expected/am_create.out} (67%) rename src/test/regress/{input/am_create.source => sql/am_create.sql} (73%) diff --git a/src/backend/columnar/cstore_customscan.c b/src/backend/columnar/cstore_customscan.c index 062347d52..0dfed65ea 100644 --- a/src/backend/columnar/cstore_customscan.c +++ b/src/backend/columnar/cstore_customscan.c @@ -215,15 +215,15 @@ static Cost CStoreScanCost(RangeTblEntry *rte) { Relation rel = RelationIdGetRelation(rte->relid); - DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + List *stripeList = StripesForRelfilenode(rel->rd_node); + RelationClose(rel); + uint32 maxColumnCount = 0; uint64 totalStripeSize = 0; ListCell *stripeMetadataCell = NULL; - - RelationClose(rel); rel = NULL; - foreach(stripeMetadataCell, metadata->stripeMetadataList) + foreach(stripeMetadataCell, stripeList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); totalStripeSize += stripeMetadata->dataLength; diff --git a/src/backend/columnar/cstore_metadata_tables.c b/src/backend/columnar/cstore_metadata_tables.c index cc69f1887..a2bf8b85c 100644 --- a/src/backend/columnar/cstore_metadata_tables.c +++ b/src/backend/columnar/cstore_metadata_tables.c @@ -12,6 +12,7 @@ #include "safe_lib.h" +#include "citus_version.h" #include "columnar/cstore.h" #include "columnar/cstore_version_compat.h" @@ -27,6 +28,7 @@ #include "catalog/namespace.h" #include "commands/defrem.h" #include "commands/trigger.h" +#include "distributed/metadata_cache.h" #include "executor/executor.h" #include "executor/spi.h" #include "miscadmin.h" @@ -41,6 +43,30 @@ #include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/relfilenodemap.h" + + +/* + * Content of the first page in main fork, which stores metadata at file + * level. + */ +typedef struct ColumnarMetapage +{ + /* + * Store version of file format used, so we can detect files from + * previous versions if we change file format. + */ + int versionMajor; + int versionMinor; + + /* + * Each of the metadata table rows are identified by a storageId. + * We store it also in the main fork so we can link metadata rows + * with data files. + */ + uint64 storageId; +} ColumnarMetapage; + typedef struct { @@ -48,21 +74,18 @@ typedef struct EState *estate; } ModifyState; -static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); -static void GetHighestUsedAddressAndId(Oid relfilenode, +static void InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe); +static void GetHighestUsedAddressAndId(uint64 storageId, uint64 *highestUsedAddress, uint64 *highestUsedId); -static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot); +static List * ReadDataFileStripeList(uint64 storageId, Snapshot snapshot); static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); static Oid ColumnarOptionsRelationId(void); static Oid ColumnarOptionsIndexRegclass(void); -static Oid CStoreDataFilesRelationId(void); -static Oid CStoreDataFilesIndexRelationId(void); static Oid CStoreSkipNodesRelationId(void); static Oid CStoreSkipNodesIndexRelationId(void); static Oid CStoreNamespaceId(void); -static bool ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata); static ModifyState * StartModifyRelation(Relation rel); static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls); @@ -71,9 +94,13 @@ static void FinishModifyRelation(ModifyState *state); static EState * create_estate_for_relation(Relation rel); static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); - +static ColumnarMetapage * InitMetapage(Relation relation); +static ColumnarMetapage * ReadMetapage(RelFileNode relfilenode, bool missingOk); +static uint64 GetNextStorageId(void); static bool WriteColumnarOptions(Oid regclass, ColumnarOptions *options, bool overwrite); +PG_FUNCTION_INFO_V1(columnar_relation_storageid); + /* constants for cstore.options */ #define Natts_cstore_options 4 #define Anum_cstore_options_regclass 1 @@ -97,33 +124,10 @@ typedef struct FormData_cstore_options } FormData_cstore_options; typedef FormData_cstore_options *Form_cstore_options; -/* constants for cstore_table */ -#define Natts_cstore_data_files 3 -#define Anum_cstore_data_files_relfilenode 1 -#define Anum_cstore_data_files_version_major 2 -#define Anum_cstore_data_files_version_minor 3 - -/* ---------------- - * cstore.cstore_data_files definition. - * ---------------- - */ -typedef struct FormData_cstore_data_files -{ - Oid relfilenode; - int32 block_row_count; - int32 stripe_row_count; - NameData compression; - int64 version_major; - int64 version_minor; - -#ifdef CATALOG_VARLEN /* variable-length fields start here */ -#endif -} FormData_cstore_data_files; -typedef FormData_cstore_data_files *Form_cstore_data_files; /* constants for cstore_stripe */ #define Natts_cstore_stripes 8 -#define Anum_cstore_stripes_relfilenode 1 +#define Anum_cstore_stripes_storageid 1 #define Anum_cstore_stripes_stripe 2 #define Anum_cstore_stripes_file_offset 3 #define Anum_cstore_stripes_data_length 4 @@ -134,7 +138,7 @@ typedef FormData_cstore_data_files *Form_cstore_data_files; /* constants for cstore_skipnodes */ #define Natts_cstore_skipnodes 12 -#define Anum_cstore_skipnodes_relfilenode 1 +#define Anum_cstore_skipnodes_storageid 1 #define Anum_cstore_skipnodes_stripe 2 #define Anum_cstore_skipnodes_attr 3 #define Anum_cstore_skipnodes_block 4 @@ -357,47 +361,19 @@ ReadColumnarOptions(Oid regclass, ColumnarOptions *options) } -/* - * InitCStoreDataFileMetadata adds a record for the given relfilenode - * in cstore_data_files. - */ -void -InitCStoreDataFileMetadata(Oid relfilenode) -{ - bool nulls[Natts_cstore_data_files] = { 0 }; - Datum values[Natts_cstore_data_files] = { - ObjectIdGetDatum(relfilenode), - Int32GetDatum(CSTORE_VERSION_MAJOR), - Int32GetDatum(CSTORE_VERSION_MINOR) - }; - - DeleteDataFileMetadataRowIfExists(relfilenode); - - Oid cstoreDataFilesOid = CStoreDataFilesRelationId(); - Relation cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock); - - ModifyState *modifyState = StartModifyRelation(cstoreDataFiles); - InsertTupleAndEnforceConstraints(modifyState, values, nulls); - FinishModifyRelation(modifyState); - - CommandCounterIncrement(); - - heap_close(cstoreDataFiles, NoLock); -} - - /* * SaveStripeSkipList saves StripeSkipList for a given stripe as rows * of cstore_skipnodes. */ void -SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, +SaveStripeSkipList(RelFileNode relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor) { uint32 columnIndex = 0; uint32 blockIndex = 0; uint32 columnCount = stripeSkipList->columnCount; + ColumnarMetapage *metapage = ReadMetapage(relfilenode, false); Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId(); Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock); ModifyState *modifyState = StartModifyRelation(cstoreSkipNodes); @@ -410,7 +386,7 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis &stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex]; Datum values[Natts_cstore_skipnodes] = { - ObjectIdGetDatum(relfilenode), + UInt64GetDatum(metapage->storageId), Int64GetDatum(stripe), Int32GetDatum(columnIndex + 1), Int32GetDatum(blockIndex), @@ -456,7 +432,7 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis * ReadStripeSkipList fetches StripeSkipList for a given stripe. */ StripeSkipList * -ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, +ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 blockCount) { int32 columnIndex = 0; @@ -464,12 +440,14 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 columnCount = tupleDescriptor->natts; ScanKeyData scanKey[2]; + ColumnarMetapage *metapage = ReadMetapage(relfilenode, false); + Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId(); Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock); Relation index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock); - ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relfilenode, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); + ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_storageid, + BTEqualStrategyNumber, F_OIDEQ, UInt64GetDatum(metapage->storageId)); ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe)); @@ -559,11 +537,11 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, * InsertStripeMetadataRow adds a row to cstore_stripes. */ static void -InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) +InsertStripeMetadataRow(uint64 storageId, StripeMetadata *stripe) { bool nulls[Natts_cstore_stripes] = { 0 }; Datum values[Natts_cstore_stripes] = { - ObjectIdGetDatum(relfilenode), + UInt64GetDatum(storageId), Int64GetDatum(stripe->id), Int64GetDatum(stripe->fileOffset), Int64GetDatum(stripe->dataLength), @@ -589,31 +567,21 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) /* - * ReadDataFileMetadata constructs DataFileMetadata for a given relfilenode by reading - * from cstore_data_files and cstore_stripes. + * StripesForRelfilenode returns a list of StripeMetadata for stripes + * of the given relfilenode. */ -DataFileMetadata * -ReadDataFileMetadata(Oid relfilenode, bool missingOk) +List * +StripesForRelfilenode(RelFileNode relfilenode) { - DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata)); - bool found = ReadCStoreDataFiles(relfilenode, datafileMetadata); - if (!found) + ColumnarMetapage *metapage = ReadMetapage(relfilenode, true); + if (metapage == NULL) { - if (!missingOk) - { - ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.", - relfilenode))); - } - else - { - return NULL; - } + /* empty relation */ + return NIL; } - datafileMetadata->stripeMetadataList = - ReadDataFileStripeList(relfilenode, GetTransactionSnapshot()); - return datafileMetadata; + return ReadDataFileStripeList(metapage->storageId, GetTransactionSnapshot()); } @@ -622,12 +590,13 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk) * relfilenode across all active and inactive transactions. */ uint64 -GetHighestUsedAddress(Oid relfilenode) +GetHighestUsedAddress(RelFileNode relfilenode) { uint64 highestUsedAddress = 0; uint64 highestUsedId = 0; + ColumnarMetapage *metapage = ReadMetapage(relfilenode, false); - GetHighestUsedAddressAndId(relfilenode, &highestUsedAddress, &highestUsedId); + GetHighestUsedAddressAndId(metapage->storageId, &highestUsedAddress, &highestUsedId); return highestUsedAddress; } @@ -638,7 +607,7 @@ GetHighestUsedAddress(Oid relfilenode) * the given relfilenode across all active and inactive transactions. */ static void -GetHighestUsedAddressAndId(Oid relfilenode, +GetHighestUsedAddressAndId(uint64 storageId, uint64 *highestUsedAddress, uint64 *highestUsedId) { @@ -647,10 +616,12 @@ GetHighestUsedAddressAndId(Oid relfilenode, SnapshotData SnapshotDirty; InitDirtySnapshot(SnapshotDirty); - List *stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty); + List *stripeMetadataList = ReadDataFileStripeList(storageId, &SnapshotDirty); *highestUsedId = 0; - *highestUsedAddress = 0; + + /* file starts with metapage */ + *highestUsedAddress = CSTORE_BYTES_PER_PAGE; foreach(stripeMetadataCell, stripeMetadataList) { @@ -684,8 +655,20 @@ ReserveStripe(Relation rel, uint64 sizeBytes, */ LockRelation(rel, ShareUpdateExclusiveLock); - Oid relfilenode = rel->rd_node.relNode; - GetHighestUsedAddressAndId(relfilenode, &currLogicalHigh, &highestId); + RelFileNode relfilenode = rel->rd_node; + + + /* + * If this is the first stripe for this relation, initialize the + * metapage, otherwise use the previously initialized metapage. + */ + ColumnarMetapage *metapage = ReadMetapage(relfilenode, true); + if (metapage == NULL) + { + metapage = InitMetapage(rel); + } + + GetHighestUsedAddressAndId(metapage->storageId, &currLogicalHigh, &highestId); SmgrAddr currSmgrHigh = logical_to_smgr(currLogicalHigh); SmgrAddr resSmgrStart = next_block_start(currSmgrHigh); @@ -714,7 +697,7 @@ ReserveStripe(Relation rel, uint64 sizeBytes, stripe.rowCount = rowCount; stripe.id = highestId + 1; - InsertStripeMetadataRow(relfilenode, &stripe); + InsertStripeMetadataRow(metapage->storageId, &stripe); UnlockRelation(rel, ShareUpdateExclusiveLock); @@ -723,18 +706,18 @@ ReserveStripe(Relation rel, uint64 sizeBytes, /* - * ReadDataFileStripeList reads the stripe list for a given relfilenode + * ReadDataFileStripeList reads the stripe list for a given storageId * in the given snapshot. */ static List * -ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) +ReadDataFileStripeList(uint64 storageId, Snapshot snapshot) { List *stripeMetadataList = NIL; ScanKeyData scanKey[1]; HeapTuple heapTuple; - ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); + ScanKeyInit(&scanKey[0], Anum_cstore_stripes_storageid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId)); Oid cstoreStripesOid = CStoreStripesRelationId(); Relation cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock); @@ -779,60 +762,10 @@ ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) /* - * ReadCStoreDataFiles reads corresponding record from cstore_data_files. Returns - * false if table was not found in cstore_data_files. - */ -static bool -ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata) -{ - bool found = false; - ScanKeyData scanKey[1]; - - ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); - - Oid cstoreDataFilesOid = CStoreDataFilesRelationId(); - Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); - if (cstoreDataFiles == NULL) - { - /* - * Extension has been dropped. This can be called while - * dropping extension or database via ObjectAccess(). - */ - return false; - } - - Relation index = try_relation_open(CStoreDataFilesIndexRelationId(), AccessShareLock); - if (index == NULL) - { - heap_close(cstoreDataFiles, NoLock); - - /* extension has been dropped */ - return false; - } - - SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, - 1, scanKey); - - HeapTuple heapTuple = systable_getnext(scanDescriptor); - if (HeapTupleIsValid(heapTuple)) - { - found = true; - } - - systable_endscan_ordered(scanDescriptor); - index_close(index, NoLock); - heap_close(cstoreDataFiles, NoLock); - - return found; -} - - -/* - * DeleteDataFileMetadataRowIfExists removes the row with given relfilenode from cstore_stripes. + * DeleteMetadataRows removes the rows with given relfilenode from cstore_stripes. */ void -DeleteDataFileMetadataRowIfExists(Oid relfilenode) +DeleteMetadataRows(RelFileNode relfilenode) { ScanKeyData scanKey[1]; @@ -845,33 +778,46 @@ DeleteDataFileMetadataRowIfExists(Oid relfilenode) return; } - ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); + ColumnarMetapage *metapage = ReadMetapage(relfilenode, true); + if (metapage == NULL) + { + /* + * No data has been written to this storage yet, so there is no + * associated metadata yet. + */ + return; + } - Oid cstoreDataFilesOid = CStoreDataFilesRelationId(); - Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); - if (cstoreDataFiles == NULL) + ScanKeyInit(&scanKey[0], Anum_cstore_stripes_storageid, + BTEqualStrategyNumber, F_INT8EQ, UInt64GetDatum(metapage->storageId)); + + Oid cstoreStripesOid = CStoreStripesRelationId(); + Relation cstoreStripes = try_relation_open(cstoreStripesOid, AccessShareLock); + if (cstoreStripes == NULL) { /* extension has been dropped */ return; } - Relation index = index_open(CStoreDataFilesIndexRelationId(), AccessShareLock); + Relation index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); - SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, + SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, scanKey); + ModifyState *modifyState = StartModifyRelation(cstoreStripes); + HeapTuple heapTuple = systable_getnext(scanDescriptor); - if (HeapTupleIsValid(heapTuple)) + while (HeapTupleIsValid(heapTuple)) { - ModifyState *modifyState = StartModifyRelation(cstoreDataFiles); DeleteTupleAndEnforceConstraints(modifyState, heapTuple); - FinishModifyRelation(modifyState); + heapTuple = systable_getnext(scanDescriptor); } + FinishModifyRelation(modifyState); + systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); - heap_close(cstoreDataFiles, NoLock); + heap_close(cstoreStripes, NoLock); } @@ -1096,28 +1042,6 @@ ColumnarOptionsIndexRegclass(void) } -/* - * CStoreDataFilesRelationId returns relation id of cstore_data_files. - * TODO: should we cache this similar to citus? - */ -static Oid -CStoreDataFilesRelationId(void) -{ - return get_relname_relid("cstore_data_files", CStoreNamespaceId()); -} - - -/* - * CStoreDataFilesIndexRelationId returns relation id of cstore_data_files_pkey. - * TODO: should we cache this similar to citus? - */ -static Oid -CStoreDataFilesIndexRelationId(void) -{ - return get_relname_relid("cstore_data_files_pkey", CStoreNamespaceId()); -} - - /* * CStoreSkipNodesRelationId returns relation id of cstore_skipnodes. * TODO: should we cache this similar to citus? @@ -1149,3 +1073,128 @@ CStoreNamespaceId(void) { return get_namespace_oid("cstore", false); } + + +/* + * ReadMetapage reads metapage for the given relfilenode. It returns + * false if the relation doesn't have a meta page yet. + */ +static ColumnarMetapage * +ReadMetapage(RelFileNode relfilenode, bool missingOk) +{ + StringInfo metapageBuffer = NULL; + Oid relationId = RelidByRelfilenode(relfilenode.spcNode, + relfilenode.relNode); + if (OidIsValid(relationId)) + { + Relation relation = relation_open(relationId, NoLock); + + RelationOpenSmgr(relation); + int nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(relation); + + if (nblocks != 0) + { + metapageBuffer = ReadFromSmgr(relation, 0, sizeof(ColumnarMetapage)); + } + + relation_close(relation, NoLock); + } + + if (metapageBuffer == NULL) + { + if (!missingOk) + { + elog(ERROR, "columnar metapage was not found"); + } + + return NULL; + } + + ColumnarMetapage *metapage = palloc0(sizeof(ColumnarMetapage)); + memcpy_s((void *) metapage, sizeof(ColumnarMetapage), + metapageBuffer->data, sizeof(ColumnarMetapage)); + + return metapage; +} + + +/* + * InitMetapage initializes metapage for the given relation. + */ +static ColumnarMetapage * +InitMetapage(Relation relation) +{ + ColumnarMetapage *metapage = palloc0(sizeof(ColumnarMetapage)); + metapage->storageId = GetNextStorageId(); + metapage->versionMajor = CSTORE_VERSION_MAJOR; + metapage->versionMinor = CSTORE_VERSION_MINOR; + + /* create the first block */ + Buffer newBuffer = ReadBuffer(relation, P_NEW); + ReleaseBuffer(newBuffer); + + Assert(sizeof(ColumnarMetapage) <= BLCKSZ - SizeOfPageHeaderData); + WriteToSmgr(relation, 0, (char *) metapage, sizeof(ColumnarMetapage)); + + return metapage; +} + + +/* + * GetNextStorageId returns the next value from the storage id sequence. + */ +static uint64 +GetNextStorageId(void) +{ + Oid savedUserId = InvalidOid; + int savedSecurityContext = 0; + Oid sequenceId = get_relname_relid("storageid_seq", CStoreNamespaceId()); + Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId); + + /* + * Not all users have update access to the sequence, so switch + * security context. + */ + GetUserIdAndSecContext(&savedUserId, &savedSecurityContext); + SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE); + + /* + * Generate new and unique storage id from sequence. + */ + Datum storageIdDatum = DirectFunctionCall1(nextval_oid, sequenceIdDatum); + + SetUserIdAndSecContext(savedUserId, savedSecurityContext); + + uint64 storageId = DatumGetInt64(storageIdDatum); + + return storageId; +} + + +/* + * columnar_relation_storageid returns storage id associated with the + * given relation id, or -1 if there is no associated storage id yet. + */ +Datum +columnar_relation_storageid(PG_FUNCTION_ARGS) +{ + uint64 storageId = -1; + +#if HAS_TABLEAM + Oid relationId = PG_GETARG_OID(0); + Relation relation = relation_open(relationId, AccessShareLock); + if (IsCStoreTableAmTable(relationId)) + { + ColumnarMetapage *metadata = ReadMetapage(relation->rd_node, true); + if (metadata != NULL) + { + storageId = metadata->storageId; + } + } + + relation_close(relation, AccessShareLock); +#endif + + PG_RETURN_INT64(storageId); +} diff --git a/src/backend/columnar/cstore_reader.c b/src/backend/columnar/cstore_reader.c index 4faf2562c..0fcf8f26d 100644 --- a/src/backend/columnar/cstore_reader.c +++ b/src/backend/columnar/cstore_reader.c @@ -76,7 +76,6 @@ static BlockData * DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blo List *projectedColumnList); static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm); -static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); /* * CStoreBeginRead initializes a cstore read operation. This function returns a @@ -86,9 +85,7 @@ TableReadState * CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { - Oid relNode = relation->rd_node.relNode; - - DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relNode, false); + List *stripeList = StripesForRelfilenode(relation->rd_node); /* * We allocate all stripe specific data in the stripeReadContext, and reset @@ -101,7 +98,7 @@ CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, TableReadState *readState = palloc0(sizeof(TableReadState)); readState->relation = relation; - readState->datafileMetadata = datafileMetadata; + readState->stripeList = stripeList; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; readState->stripeBuffers = NULL; @@ -135,7 +132,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu */ while (readState->stripeBuffers == NULL) { - List *stripeMetadataList = readState->datafileMetadata->stripeMetadataList; + List *stripeMetadataList = readState->stripeList; uint32 stripeCount = list_length(stripeMetadataList); /* if we have read all stripes, return false */ @@ -238,8 +235,7 @@ void CStoreEndRead(TableReadState *readState) { MemoryContextDelete(readState->stripeReadContext); - list_free_deep(readState->datafileMetadata->stripeMetadataList); - pfree(readState->datafileMetadata); + list_free_deep(readState->stripeList); pfree(readState); } @@ -316,11 +312,9 @@ CStoreTableRowCount(Relation relation) { ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; + List *stripeList = StripesForRelfilenode(relation->rd_node); - DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode, - false); - - foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) + foreach(stripeMetadataCell, stripeList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); totalRowCount += stripeMetadata->rowCount; @@ -345,7 +339,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); - StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node.relNode, + StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node, stripeMetadata->id, tupleDescriptor, stripeMetadata->blockCount); @@ -1009,7 +1003,7 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor } -static StringInfo +StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size) { StringInfo resultBuffer = makeStringInfo(); diff --git a/src/backend/columnar/cstore_tableam.c b/src/backend/columnar/cstore_tableam.c index 0cc518499..2109627dd 100644 --- a/src/backend/columnar/cstore_tableam.c +++ b/src/backend/columnar/cstore_tableam.c @@ -106,7 +106,6 @@ static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, char *completionTag); #endif -static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); static void LogRelationStats(Relation rel, int elevel); @@ -542,26 +541,30 @@ cstore_relation_set_new_filenode(Relation rel, MarkRelfilenodeDropped(oldRelfilenode, GetCurrentSubTransactionId()); /* delete old relfilenode metadata */ - DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); + DeleteMetadataRows(rel->rd_node); Assert(persistence == RELPERSISTENCE_PERMANENT); *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); SMgrRelation srel = RelationCreateStorage(*newrnode, persistence); - InitCStoreDataFileMetadata(newrnode->relNode); InitColumnarOptions(rel->rd_id); smgrclose(srel); + + /* we will lazily initialize metadata in first stripe reservation */ } static void cstore_relation_nontransactional_truncate(Relation rel) { - Oid relfilenode = rel->rd_node.relNode; + RelFileNode relfilenode = rel->rd_node; - NonTransactionDropWriteState(relfilenode); + NonTransactionDropWriteState(relfilenode.relNode); + + /* Delete old relfilenode metadata */ + DeleteMetadataRows(relfilenode); /* * No need to set new relfilenode, since the table was created in this @@ -572,9 +575,7 @@ cstore_relation_nontransactional_truncate(Relation rel) */ RelationTruncate(rel, 0); - /* Delete old relfilenode metadata and recreate it */ - DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); - InitCStoreDataFileMetadata(rel->rd_node.relNode); + /* we will lazily initialize new metadata in first stripe reservation */ } @@ -673,11 +674,14 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, } +/* + * LogRelationStats logs statistics as the output of the VACUUM VERBOSE. + */ static void LogRelationStats(Relation rel, int elevel) { ListCell *stripeMetadataCell = NULL; - Oid relfilenode = rel->rd_node.relNode; + RelFileNode relfilenode = rel->rd_node; StringInfo infoBuf = makeStringInfo(); int compressionStats[COMPRESSION_COUNT] = { 0 }; @@ -687,10 +691,10 @@ LogRelationStats(Relation rel, int elevel) TupleDesc tupdesc = RelationGetDescr(rel); uint64 droppedBlocksWithData = 0; - DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relfilenode, false); - int stripeCount = list_length(datafileMetadata->stripeMetadataList); + List *stripeList = StripesForRelfilenode(relfilenode); + int stripeCount = list_length(stripeList); - foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) + foreach(stripeMetadataCell, stripeList) { StripeMetadata *stripe = lfirst(stripeMetadataCell); StripeSkipList *skiplist = ReadStripeSkipList(relfilenode, stripe->id, @@ -726,6 +730,10 @@ LogRelationStats(Relation rel, int elevel) uint64 relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); + Datum storageId = DirectFunctionCall1(columnar_relation_storageid, + ObjectIdGetDatum(RelationGetRelid(rel))); + + appendStringInfo(infoBuf, "storage id: %ld\n", DatumGetInt64(storageId)); appendStringInfo(infoBuf, "total file size: %ld, total data size: %ld\n", relPages * BLCKSZ, totalStripeLength); appendStringInfo(infoBuf, @@ -803,7 +811,7 @@ TruncateCStore(Relation rel, int elevel) * we're truncating. */ SmgrAddr highestPhysicalAddress = - logical_to_smgr(GetHighestUsedAddress(rel->rd_node.relNode)); + logical_to_smgr(GetHighestUsedAddress(rel->rd_node)); BlockNumber new_rel_pages = highestPhysicalAddress.blockno + 1; if (new_rel_pages == old_rel_pages) @@ -1171,11 +1179,11 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId * tableam tables storage is managed by postgres. */ Relation rel = table_open(objectId, AccessExclusiveLock); - Oid relfilenode = rel->rd_node.relNode; - DeleteDataFileMetadataRowIfExists(relfilenode); + RelFileNode relfilenode = rel->rd_node; + DeleteMetadataRows(relfilenode); DeleteColumnarTableOptions(rel->rd_id, true); - MarkRelfilenodeDropped(relfilenode, GetCurrentSubTransactionId()); + MarkRelfilenodeDropped(relfilenode.relNode, GetCurrentSubTransactionId()); /* keep the lock since we did physical changes to the relation */ table_close(rel, NoLock); @@ -1187,7 +1195,7 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId * IsCStoreTableAmTable returns true if relation has cstore_tableam * access method. This can be called before extension creation. */ -static bool +bool IsCStoreTableAmTable(Oid relationId) { if (!OidIsValid(relationId)) diff --git a/src/backend/columnar/cstore_writer.c b/src/backend/columnar/cstore_writer.c index ca0958ecf..2724d7b8d 100644 --- a/src/backend/columnar/cstore_writer.c +++ b/src/backend/columnar/cstore_writer.c @@ -65,12 +65,10 @@ CStoreBeginWrite(RelFileNode relfilenode, uint64 stripeMaxRowCount, uint32 blockRowCount, TupleDesc tupleDescriptor) { - uint32 columnIndex = 0; - /* get comparison function pointers for each of the columns */ uint32 columnCount = tupleDescriptor->natts; FmgrInfo **comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *)); - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + for (uint32 columnIndex = 0; columnIndex < columnCount; columnIndex++) { FmgrInfo *comparisonFunction = NULL; FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor, @@ -323,7 +321,7 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, } -static void +void WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) { uint64 remaining = dataLength; @@ -521,7 +519,7 @@ FlushStripe(TableWriteState *writeState) } /* create skip list and footer buffers */ - SaveStripeSkipList(relation->rd_node.relNode, + SaveStripeSkipList(writeState->relfilenode, stripeMetadata.id, stripeSkipList, tupleDescriptor); diff --git a/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql b/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql index 4a6c1d172..d1f982f61 100644 --- a/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql +++ b/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql @@ -3,6 +3,8 @@ CREATE SCHEMA cstore; SET search_path TO cstore; +CREATE SEQUENCE storageid_seq MINVALUE 10000000000 NO CYCLE; + CREATE TABLE options ( regclass regclass NOT NULL PRIMARY KEY, block_row_count int NOT NULL, @@ -12,17 +14,8 @@ CREATE TABLE options ( COMMENT ON TABLE options IS 'columnar table specific options, maintained by alter_columnar_table_set'; -CREATE TABLE cstore_data_files ( - relfilenode oid NOT NULL, - version_major bigint NOT NULL, - version_minor bigint NOT NULL, - PRIMARY KEY (relfilenode) -) WITH (user_catalog_table = true); - -COMMENT ON TABLE cstore_data_files IS 'CStore data file wide metadata'; - CREATE TABLE cstore_stripes ( - relfilenode oid NOT NULL, + storageid bigint NOT NULL, stripe bigint NOT NULL, file_offset bigint NOT NULL, data_length bigint NOT NULL, @@ -30,14 +23,13 @@ CREATE TABLE cstore_stripes ( block_count int NOT NULL, block_row_count int NOT NULL, row_count bigint NOT NULL, - PRIMARY KEY (relfilenode, stripe), - FOREIGN KEY (relfilenode) REFERENCES cstore_data_files(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED + PRIMARY KEY (storageid, stripe) ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_stripes IS 'CStore per stripe metadata'; CREATE TABLE cstore_skipnodes ( - relfilenode oid NOT NULL, + storageid bigint NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, block int NOT NULL, @@ -49,8 +41,8 @@ CREATE TABLE cstore_skipnodes ( exists_stream_offset bigint NOT NULL, exists_stream_length bigint NOT NULL, value_compression_type int NOT NULL, - PRIMARY KEY (relfilenode, stripe, attr, block), - FOREIGN KEY (relfilenode, stripe) REFERENCES cstore_stripes(relfilenode, stripe) ON DELETE CASCADE INITIALLY DEFERRED + PRIMARY KEY (storageid, stripe, attr, block), + FOREIGN KEY (storageid, stripe) REFERENCES cstore_stripes(storageid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata'; diff --git a/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql b/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql index 7372d3333..be30b062d 100644 --- a/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql +++ b/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql @@ -29,8 +29,8 @@ END$proc$; DROP TABLE cstore_skipnodes; DROP TABLE cstore_stripes; -DROP TABLE cstore_data_files; DROP TABLE options; +DROP SEQUENCE storageid_seq; DROP FUNCTION citus_internal.cstore_ensure_objects_exist(); diff --git a/src/include/columnar/cstore.h b/src/include/columnar/cstore.h index 8a20f0bdd..2e5c743cb 100644 --- a/src/include/columnar/cstore.h +++ b/src/include/columnar/cstore.h @@ -49,6 +49,7 @@ #define CSTORE_TUPLE_COST_MULTIPLIER 10 #define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 #define CSTORE_POSTSCRIPT_SIZE_MAX 256 +#define CSTORE_BYTES_PER_PAGE (BLCKSZ - SizeOfPageHeaderData) /* Enumaration for cstore file's compression method */ typedef enum @@ -195,7 +196,7 @@ typedef struct StripeBuffers /* TableReadState represents state of a cstore file read operation. */ typedef struct TableReadState { - DataFileMetadata *datafileMetadata; + List *stripeList; StripeMetadata *currentStripeMetadata; TupleDesc tupleDescriptor; Relation relation; @@ -289,21 +290,25 @@ extern bool InitColumnarOptions(Oid regclass); extern void SetColumnarOptions(Oid regclass, ColumnarOptions *options); extern bool DeleteColumnarTableOptions(Oid regclass, bool missingOk); extern bool ReadColumnarOptions(Oid regclass, ColumnarOptions *options); -extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); -extern void InitCStoreDataFileMetadata(Oid relfilenode); -extern void UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int - stripeRowCount, CompressionType compression); -extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); -extern uint64 GetHighestUsedAddress(Oid relfilenode); +extern void WriteToSmgr(Relation relation, uint64 logicalOffset, + char *data, uint32 dataLength); +extern StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); +extern bool IsCStoreTableAmTable(Oid relationId); + +/* cstore_metadata_tables.c */ +extern void DeleteMetadataRows(RelFileNode relfilenode); +extern List * StripesForRelfilenode(RelFileNode relfilenode); +extern uint64 GetHighestUsedAddress(RelFileNode relfilenode); extern StripeMetadata ReserveStripe(Relation rel, uint64 size, uint64 rowCount, uint64 columnCount, uint64 blockCount, uint64 blockRowCount); -extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, +extern void SaveStripeSkipList(RelFileNode relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); -extern StripeSkipList * ReadStripeSkipList(Oid relfilenode, uint64 stripe, +extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 blockCount); +extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS); /* write_state_management.c */ @@ -335,11 +340,10 @@ typedef struct SmgrAddr static inline SmgrAddr logical_to_smgr(uint64 logicalOffset) { - uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData; SmgrAddr addr; - addr.blockno = logicalOffset / bytes_per_page; - addr.offset = SizeOfPageHeaderData + (logicalOffset % bytes_per_page); + addr.blockno = logicalOffset / CSTORE_BYTES_PER_PAGE; + addr.offset = SizeOfPageHeaderData + (logicalOffset % CSTORE_BYTES_PER_PAGE); return addr; } @@ -351,8 +355,7 @@ logical_to_smgr(uint64 logicalOffset) static inline uint64 smgr_to_logical(SmgrAddr addr) { - uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData; - return bytes_per_page * addr.blockno + addr.offset - SizeOfPageHeaderData; + return CSTORE_BYTES_PER_PAGE * addr.blockno + addr.offset - SizeOfPageHeaderData; } diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index 00deb3174..dcc7e20b0 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -190,3 +190,6 @@ s/relation with OID [0-9]+ does not exist/relation with OID XXXX does not exist/ # ignore timing statistics for VACUUM VERBOSE /CPU: user: .*s, system: .*s, elapsed: .*s/d + +# normalize storage id of columnar tables +s/^storage id: [0-9]+$/storage id: xxxxx/g diff --git a/src/test/regress/expected/.gitignore b/src/test/regress/expected/.gitignore index de611255e..dfafe792b 100644 --- a/src/test/regress/expected/.gitignore +++ b/src/test/regress/expected/.gitignore @@ -23,7 +23,6 @@ /hyperscale_tutorial.out /am_block_filtering.out /am_copyto.out -/am_create.out /am_data_types.out /am_load.out /fdw_block_filtering.out diff --git a/src/test/regress/output/am_create.source b/src/test/regress/expected/am_create.out similarity index 67% rename from src/test/regress/output/am_create.source rename to src/test/regress/expected/am_create.out index 23f4b874b..3e29d9c35 100644 --- a/src/test/regress/output/am_create.source +++ b/src/test/regress/expected/am_create.out @@ -13,8 +13,12 @@ CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, -- Test that querying an empty table works ANALYZE contestant; SELECT count(*) FROM contestant; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) +-- Utility functions to be used throughout tests +CREATE FUNCTION columnar_relation_storageid(relid oid) RETURNS bigint + LANGUAGE C STABLE STRICT + AS 'citus', $$columnar_relation_storageid$$; diff --git a/src/test/regress/expected/am_drop.out b/src/test/regress/expected/am_drop.out index 83b7b4360..11bfd53b5 100644 --- a/src/test/regress/expected/am_drop.out +++ b/src/test/regress/expected/am_drop.out @@ -12,12 +12,12 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset +SELECT count(distinct storageid) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset -- DROP cstore_fdw tables DROP TABLE contestant; DROP TABLE contestant_compressed; -- make sure DROP deletes metadata -SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; +SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes; ?column? --------------------------------------------------------------------- 2 @@ -26,10 +26,11 @@ SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE TABLE test_schema.test_table(data int) USING columnar; -SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset +INSERT INTO test_schema.test_table VALUES (1); +SELECT count(*) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to table test_schema.test_table -SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; +SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes; ?column? --------------------------------------------------------------------- 1 diff --git a/src/test/regress/expected/am_matview.out b/src/test/regress/expected/am_matview.out index e9f26526e..44e17c9bc 100644 --- a/src/test/regress/expected/am_matview.out +++ b/src/test/regress/expected/am_matview.out @@ -65,20 +65,15 @@ SELECT * FROM t_view a ORDER BY a; (6 rows) -- verify that we have created metadata entries for the materialized view -SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset -SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; +SELECT columnar_relation_storageid(oid) AS storageid +FROM pg_class WHERE relname='t_view' \gset +SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid; count --------------------------------------------------------------------- 1 (1 row) -SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; - count ---------------------------------------------------------------------- - 1 -(1 row) - -SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode; +SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid; count --------------------------------------------------------------------- 3 @@ -87,19 +82,13 @@ SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode; DROP TABLE t CASCADE; NOTICE: drop cascades to materialized view t_view -- dropping must remove metadata -SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; +SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid; count --------------------------------------------------------------------- 0 (1 row) -SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; - count ---------------------------------------------------------------------- - 0 -(1 row) - -SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode; +SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid; count --------------------------------------------------------------------- 0 diff --git a/src/test/regress/expected/am_recursive.out b/src/test/regress/expected/am_recursive.out index c4c05d057..5dcda3557 100644 --- a/src/test/regress/expected/am_recursive.out +++ b/src/test/regress/expected/am_recursive.out @@ -12,7 +12,7 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i; -- there are no subtransactions, so above statement should batch -- INSERTs inside the UDF and create on stripe per table. SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b -WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2') +WHERE columnar_relation_storageid(b.oid)=a.storageid AND relname IN ('t1', 't2') GROUP BY relname ORDER BY relname; relname | count diff --git a/src/test/regress/expected/am_rollback.out b/src/test/regress/expected/am_rollback.out index a5c9a23f6..36c5e32ed 100644 --- a/src/test/regress/expected/am_rollback.out +++ b/src/test/regress/expected/am_rollback.out @@ -2,6 +2,9 @@ -- Testing we handle rollbacks properly -- CREATE TABLE t(a int, b int) USING columnar; +CREATE VIEW t_stripes AS +SELECT * FROM cstore.cstore_stripes a, pg_class b +WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname = 't'; BEGIN; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; ROLLBACK; @@ -12,8 +15,7 @@ SELECT count(*) FROM t; (1 row) -- check stripe metadata also have been rolled-back -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b -WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; +SELECT count(*) FROM t_stripes; count --------------------------------------------------------------------- 0 @@ -26,8 +28,7 @@ SELECT count(*) FROM t; 10 (1 row) -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b -WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; +SELECT count(*) FROM t_stripes; count --------------------------------------------------------------------- 1 @@ -73,11 +74,11 @@ SELECT count(*) FROM t; 20 (1 row) -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b -WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; +SELECT count(*) FROM t_stripes; count --------------------------------------------------------------------- 2 (1 row) DROP TABLE t; +DROP VIEW t_stripes; diff --git a/src/test/regress/expected/am_truncate.out b/src/test/regress/expected/am_truncate.out index 84ec99c01..01a51e59f 100644 --- a/src/test/regress/expected/am_truncate.out +++ b/src/test/regress/expected/am_truncate.out @@ -15,7 +15,7 @@ CREATE TABLE cstore_truncate_test_second (a int, b int) USING columnar; -- COMPRESSED CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING columnar; CREATE TABLE cstore_truncate_test_regular (a int, b int); -SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset +SELECT count(distinct storageid) AS cstore_data_files_before_truncate FROM cstore.cstore_stripes \gset INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; set cstore.compression = 'pglz'; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; @@ -147,7 +147,7 @@ SELECT * from cstore_truncate_test; (0 rows) -- make sure TRUNATE deletes metadata for old relfilenode -SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; +SELECT :cstore_data_files_before_truncate - count(distinct storageid) FROM cstore.cstore_stripes; ?column? --------------------------------------------------------------------- 0 @@ -161,7 +161,7 @@ TRUNCATE cstore_same_transaction_truncate; INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); COMMIT; -- should output "1" for the newly created relation -SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; +SELECT count(distinct storageid) - :cstore_data_files_before_truncate FROM cstore.cstore_stripes; ?column? --------------------------------------------------------------------- 1 diff --git a/src/test/regress/expected/am_vacuum.out b/src/test/regress/expected/am_vacuum.out index 9be5ee4f1..c141994d7 100644 --- a/src/test/regress/expected/am_vacuum.out +++ b/src/test/regress/expected/am_vacuum.out @@ -1,6 +1,9 @@ -SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset +SELECT count(distinct storageid) AS columnar_table_count FROM cstore.cstore_stripes \gset CREATE TABLE t(a int, b int) USING columnar; -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; +CREATE VIEW t_stripes AS +SELECT * FROM cstore.cstore_stripes a, pg_class b +WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t'; +SELECT count(*) FROM t_stripes; count --------------------------------------------------------------------- 0 @@ -15,7 +18,7 @@ SELECT sum(a), sum(b) FROM t; 465 | 9455 (1 row) -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; +SELECT count(*) FROM t_stripes; count --------------------------------------------------------------------- 3 @@ -29,7 +32,7 @@ SELECT sum(a), sum(b) FROM t; 465 | 9455 (1 row) -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; +SELECT count(*) FROM t_stripes; count --------------------------------------------------------------------- 1 @@ -49,7 +52,7 @@ SELECT sum(a), sum(b) FROM t; 3126715 | 6261955 (1 row) -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; +SELECT count(*) FROM t_stripes; count --------------------------------------------------------------------- 4 @@ -62,7 +65,7 @@ SELECT sum(a), sum(b) FROM t; 3126715 | 6261955 (1 row) -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; +SELECT count(*) FROM t_stripes; count --------------------------------------------------------------------- 3 @@ -70,7 +73,9 @@ SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.r -- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs ALTER TABLE t DROP COLUMN a; -SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL +FROM cstore.cstore_skipnodes a, pg_class b +WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3; stripe | attr | block | ?column? | ?column? --------------------------------------------------------------------- 1 | 1 | 0 | f | f @@ -82,7 +87,9 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs (6 rows) VACUUM FULL t; -SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL +FROM cstore.cstore_skipnodes a, pg_class b +WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3; stripe | attr | block | ?column? | ?column? --------------------------------------------------------------------- 1 | 1 | 0 | t | t @@ -94,7 +101,7 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs (6 rows) -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands -SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; +SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes; ?column? --------------------------------------------------------------------- 1 @@ -112,14 +119,14 @@ SELECT count(*) FROM t; SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty --------------------------------------------------------------------- - 32 kB + 40 kB (1 row) INSERT INTO t SELECT i FROM generate_series(1, 10000) i; SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty --------------------------------------------------------------------- - 112 kB + 120 kB (1 row) SELECT count(*) FROM t; @@ -134,22 +141,23 @@ ROLLBACK TO SAVEPOINT s1; SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty --------------------------------------------------------------------- - 112 kB + 120 kB (1 row) COMMIT; -- vacuum should truncate the relation to the usable space VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 114688, total data size: 10754 +storage id: xxxxx +total file size: 122880, total data size: 10754 total row count: 2530, stripe count: 3, average rows per stripe: 843 block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 -INFO: "t": truncated 14 to 4 pages +INFO: "t": truncated 15 to 5 pages SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty --------------------------------------------------------------------- - 32 kB + 40 kB (1 row) SELECT count(*) FROM t; @@ -187,7 +195,8 @@ INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; COMMIT; VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 49152, total data size: 18808 +storage id: xxxxx +total file size: 57344, total data size: 18808 total row count: 5530, stripe count: 5, average rows per stripe: 1106 block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2 @@ -203,7 +212,8 @@ INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; ALTER TABLE t DROP COLUMN c; VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 65536, total data size: 31372 +storage id: xxxxx +total file size: 73728, total data size: 31372 total row count: 7030, stripe count: 6, average rows per stripe: 1171 block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2 @@ -219,13 +229,15 @@ SELECT alter_columnar_table_set('t', compression => 'pglz'); VACUUM FULL t; VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 49152, total data size: 15728 +storage id: xxxxx +total file size: 57344, total data size: 15728 total row count: 7030, stripe count: 4, average rows per stripe: 1757 block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6 DROP TABLE t; +DROP VIEW t_stripes; -- Make sure we cleaned the metadata for t too -SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; +SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes; ?column? --------------------------------------------------------------------- 0 diff --git a/src/test/regress/expected/am_vacuum_vs_insert.out b/src/test/regress/expected/am_vacuum_vs_insert.out index 9d0e09f88..dc926adb8 100644 --- a/src/test/regress/expected/am_vacuum_vs_insert.out +++ b/src/test/regress/expected/am_vacuum_vs_insert.out @@ -11,7 +11,8 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; s2: INFO: statistics for "test_vacuum_vs_insert": -total file size: 16384, total data size: 26 +storage id: xxxxx +total file size: 24576, total data size: 26 total row count: 3, stripe count: 1, average rows per stripe: 3 block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 @@ -51,7 +52,7 @@ step s1-commit: COMMIT; s2: INFO: vacuuming "public.test_vacuum_vs_insert" -s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 3 pages +s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 4 pages DETAIL: 0 dead row versions cannot be removed yet. step s2-vacuum-full: <... completed> step s2-select: diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 7c72761d0..77bf499fd 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -483,7 +483,7 @@ SELECT * FROM print_extension_changes(); | function citus_internal.cstore_ensure_objects_exist() | function cstore.columnar_handler(internal) | schema cstore - | table cstore.cstore_data_files + | sequence cstore.storageid_seq | table cstore.cstore_skipnodes | table cstore.cstore_stripes | table cstore.options diff --git a/src/test/regress/expected/multi_extension_0.out b/src/test/regress/expected/multi_extension_0.out index c4ff31f5b..73b009794 100644 --- a/src/test/regress/expected/multi_extension_0.out +++ b/src/test/regress/expected/multi_extension_0.out @@ -479,7 +479,7 @@ SELECT * FROM print_extension_changes(); --------------------------------------------------------------------- | function citus_internal.cstore_ensure_objects_exist() | schema cstore - | table cstore.cstore_data_files + | sequence cstore.storageid_seq | table cstore.cstore_skipnodes | table cstore.cstore_stripes | table cstore.options diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out index f217a5d7a..c8b65b731 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects.out +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -185,13 +185,13 @@ ORDER BY 1; schema citus schema citus_internal schema cstore + sequence cstore.storageid_seq sequence pg_dist_colocationid_seq sequence pg_dist_groupid_seq sequence pg_dist_node_nodeid_seq sequence pg_dist_placement_placementid_seq sequence pg_dist_shardid_seq table citus.pg_dist_object - table cstore.cstore_data_files table cstore.cstore_skipnodes table cstore.cstore_stripes table cstore.options diff --git a/src/test/regress/expected/upgrade_list_citus_objects_0.out b/src/test/regress/expected/upgrade_list_citus_objects_0.out index c09b134c6..37c14cebc 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects_0.out +++ b/src/test/regress/expected/upgrade_list_citus_objects_0.out @@ -181,13 +181,13 @@ ORDER BY 1; schema citus schema citus_internal schema cstore + sequence cstore.storageid_seq sequence pg_dist_colocationid_seq sequence pg_dist_groupid_seq sequence pg_dist_node_nodeid_seq sequence pg_dist_placement_placementid_seq sequence pg_dist_shardid_seq table citus.pg_dist_object - table cstore.cstore_data_files table cstore.cstore_skipnodes table cstore.cstore_stripes table cstore.options diff --git a/src/test/regress/sql/.gitignore b/src/test/regress/sql/.gitignore index 145bbaeed..a82326a50 100644 --- a/src/test/regress/sql/.gitignore +++ b/src/test/regress/sql/.gitignore @@ -22,7 +22,6 @@ /hyperscale_tutorial.sql /am_block_filtering.sql /am_copyto.sql -/am_create.sql /am_data_types.sql /am_load.sql /fdw_block_filtering.sql diff --git a/src/test/regress/input/am_create.source b/src/test/regress/sql/am_create.sql similarity index 73% rename from src/test/regress/input/am_create.source rename to src/test/regress/sql/am_create.sql index 038da3398..bd16b6f0e 100644 --- a/src/test/regress/input/am_create.source +++ b/src/test/regress/sql/am_create.sql @@ -18,3 +18,8 @@ CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, -- Test that querying an empty table works ANALYZE contestant; SELECT count(*) FROM contestant; + +-- Utility functions to be used throughout tests +CREATE FUNCTION columnar_relation_storageid(relid oid) RETURNS bigint + LANGUAGE C STABLE STRICT + AS 'citus', $$columnar_relation_storageid$$; diff --git a/src/test/regress/sql/am_drop.sql b/src/test/regress/sql/am_drop.sql index 123f8025c..186f8e3d3 100644 --- a/src/test/regress/sql/am_drop.sql +++ b/src/test/regress/sql/am_drop.sql @@ -15,22 +15,23 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset +SELECT count(distinct storageid) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset -- DROP cstore_fdw tables DROP TABLE contestant; DROP TABLE contestant_compressed; -- make sure DROP deletes metadata -SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; +SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE TABLE test_schema.test_table(data int) USING columnar; +INSERT INTO test_schema.test_table VALUES (1); -SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset +SELECT count(*) AS cstore_stripes_before_drop FROM cstore.cstore_stripes \gset DROP SCHEMA test_schema CASCADE; -SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; +SELECT :cstore_stripes_before_drop - count(distinct storageid) FROM cstore.cstore_stripes; SELECT current_database() datname \gset diff --git a/src/test/regress/sql/am_matview.sql b/src/test/regress/sql/am_matview.sql index 6b82cc236..d0d671b08 100644 --- a/src/test/regress/sql/am_matview.sql +++ b/src/test/regress/sql/am_matview.sql @@ -33,15 +33,14 @@ WHERE regclass = 't_view'::regclass; SELECT * FROM t_view a ORDER BY a; -- verify that we have created metadata entries for the materialized view -SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset +SELECT columnar_relation_storageid(oid) AS storageid +FROM pg_class WHERE relname='t_view' \gset -SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; -SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; -SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode; +SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid; +SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid; DROP TABLE t CASCADE; -- dropping must remove metadata -SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; -SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; -SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode; +SELECT count(*) FROM cstore.cstore_stripes WHERE storageid=:storageid; +SELECT count(*) FROM cstore.cstore_skipnodes WHERE storageid=:storageid; diff --git a/src/test/regress/sql/am_recursive.sql b/src/test/regress/sql/am_recursive.sql index 6c7a53365..4669911f8 100644 --- a/src/test/regress/sql/am_recursive.sql +++ b/src/test/regress/sql/am_recursive.sql @@ -16,7 +16,7 @@ INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i; -- there are no subtransactions, so above statement should batch -- INSERTs inside the UDF and create on stripe per table. SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b -WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2') +WHERE columnar_relation_storageid(b.oid)=a.storageid AND relname IN ('t1', 't2') GROUP BY relname ORDER BY relname; diff --git a/src/test/regress/sql/am_rollback.sql b/src/test/regress/sql/am_rollback.sql index 91371c64b..a7495c8d8 100644 --- a/src/test/regress/sql/am_rollback.sql +++ b/src/test/regress/sql/am_rollback.sql @@ -4,20 +4,22 @@ CREATE TABLE t(a int, b int) USING columnar; +CREATE VIEW t_stripes AS +SELECT * FROM cstore.cstore_stripes a, pg_class b +WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname = 't'; + BEGIN; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; ROLLBACK; SELECT count(*) FROM t; -- check stripe metadata also have been rolled-back -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b -WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; +SELECT count(*) FROM t_stripes; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; SELECT count(*) FROM t; -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b -WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; +SELECT count(*) FROM t_stripes; -- savepoint rollback BEGIN; @@ -36,7 +38,7 @@ COMMIT; SELECT count(*) FROM t; -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b -WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; +SELECT count(*) FROM t_stripes; DROP TABLE t; +DROP VIEW t_stripes; diff --git a/src/test/regress/sql/am_truncate.sql b/src/test/regress/sql/am_truncate.sql index 3d331d400..084e3e7d8 100644 --- a/src/test/regress/sql/am_truncate.sql +++ b/src/test/regress/sql/am_truncate.sql @@ -13,7 +13,7 @@ CREATE TABLE cstore_truncate_test_second (a int, b int) USING columnar; CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING columnar; CREATE TABLE cstore_truncate_test_regular (a int, b int); -SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset +SELECT count(distinct storageid) AS cstore_data_files_before_truncate FROM cstore.cstore_stripes \gset INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; @@ -63,7 +63,7 @@ TRUNCATE TABLE cstore_truncate_test; SELECT * from cstore_truncate_test; -- make sure TRUNATE deletes metadata for old relfilenode -SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; +SELECT :cstore_data_files_before_truncate - count(distinct storageid) FROM cstore.cstore_stripes; -- test if truncation in the same transaction that created the table works properly BEGIN; @@ -74,7 +74,7 @@ INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 2 COMMIT; -- should output "1" for the newly created relation -SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; +SELECT count(distinct storageid) - :cstore_data_files_before_truncate FROM cstore.cstore_stripes; SELECT * FROM cstore_same_transaction_truncate; DROP TABLE cstore_same_transaction_truncate; diff --git a/src/test/regress/sql/am_vacuum.sql b/src/test/regress/sql/am_vacuum.sql index f34e21f88..de21cd30b 100644 --- a/src/test/regress/sql/am_vacuum.sql +++ b/src/test/regress/sql/am_vacuum.sql @@ -1,45 +1,53 @@ -SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset +SELECT count(distinct storageid) AS columnar_table_count FROM cstore.cstore_stripes \gset CREATE TABLE t(a int, b int) USING columnar; -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; +CREATE VIEW t_stripes AS +SELECT * FROM cstore.cstore_stripes a, pg_class b +WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t'; + +SELECT count(*) FROM t_stripes; INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i; INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i; SELECT sum(a), sum(b) FROM t; -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; +SELECT count(*) FROM t_stripes; -- vacuum full should merge stripes together VACUUM FULL t; SELECT sum(a), sum(b) FROM t; -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; +SELECT count(*) FROM t_stripes; -- test the case when all data cannot fit into a single stripe SELECT alter_columnar_table_set('t', stripe_row_count => 1000); INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; SELECT sum(a), sum(b) FROM t; -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; +SELECT count(*) FROM t_stripes; VACUUM FULL t; SELECT sum(a), sum(b) FROM t; -SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; +SELECT count(*) FROM t_stripes; -- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs ALTER TABLE t DROP COLUMN a; -SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL +FROM cstore.cstore_skipnodes a, pg_class b +WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3; VACUUM FULL t; -SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL +FROM cstore.cstore_skipnodes a, pg_class b +WHERE a.storageid = columnar_relation_storageid(b.oid) AND b.relname='t' ORDER BY 1, 2, 3; -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands -SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; +SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes; -- do this in a transaction so concurrent autovacuum doesn't interfere with results BEGIN; @@ -99,6 +107,7 @@ VACUUM FULL t; VACUUM VERBOSE t; DROP TABLE t; +DROP VIEW t_stripes; -- Make sure we cleaned the metadata for t too -SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; +SELECT count(distinct storageid) - :columnar_table_count FROM cstore.cstore_stripes;