From b9e18406fa9acdb1486370a3c81fff543c3311e5 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Wed, 18 Jan 2023 15:31:14 +0300 Subject: [PATCH] Fall-back to seq-scan when accessing columnar metadata if the index doesn't exist Fixes #6570. In the past, having columnar tables in the cluster was causing pg upgrades to fail when attempting to access columnar metadata. This is because, pg_dump doesn't see objects that we use for columnar-am related booking as the dependencies of the tables using columnar-am. To fix that; in #5456, we inserted some "normal dependency" edges (from those objects to columnar-am) into pg_depend. This helped us ensuring the existency of a class of metadata objects --such as columnar.storageid_seq-- and helped fixing #5437. However, the normal-dependency edges that we added for indexes on columnar metadata tables --such columnar.stripe_pkey-- didn't help at all because they were indeed causing dependency loops (#5510) and pg_dump was not able to take those dependency edges into the account. For this reason, instead of inserting such dependency edges from indexes to columnar-am, we allow columnar metadata accessors to fall-back to sequential scan during pg upgrades. (cherry picked from commit 1c51ddae494c0230bc240d49cd7c487150b365d5) --- src/backend/columnar/columnar_metadata.c | 259 ++++++++++++------ src/test/regress/expected/columnar_create.out | 83 ++++++ .../regress/expected/columnar_indexes.out | 26 ++ .../expected/upgrade_columnar_before.out | 3 + src/test/regress/sql/columnar_create.sql | 52 ++++ src/test/regress/sql/columnar_indexes.sql | 10 + .../regress/sql/upgrade_columnar_before.sql | 4 + 7 files changed, 354 insertions(+), 83 deletions(-) diff --git a/src/backend/columnar/columnar_metadata.c b/src/backend/columnar/columnar_metadata.c index 36b974b08..5f780dc79 100644 --- a/src/backend/columnar/columnar_metadata.c +++ b/src/backend/columnar/columnar_metadata.c @@ -59,6 +59,10 @@ #include "utils/rel.h" #include "utils/relfilenodemap.h" +#define SLOW_METADATA_ACCESS_WARNING \ + "Metadata index %s is not available, this might mean slower read/writes " \ + "on columnar tables. This is expected during Postgres upgrades and not " \ + "expected otherwise." typedef struct { @@ -551,15 +555,23 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri Oid columnarChunkOid = ColumnarChunkRelationId(); Relation columnarChunk = table_open(columnarChunkOid, AccessShareLock); - Relation index = index_open(ColumnarChunkIndexRelationId(), AccessShareLock); ScanKeyInit(&scanKey[0], Anum_columnar_chunk_storageid, BTEqualStrategyNumber, F_OIDEQ, UInt64GetDatum(storageId)); ScanKeyInit(&scanKey[1], Anum_columnar_chunk_stripe, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe)); - SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarChunk, index, - snapshot, 2, scanKey); + Oid indexId = ColumnarChunkIndexRelationId(); + bool indexOk = OidIsValid(indexId); + SysScanDesc scanDescriptor = systable_beginscan(columnarChunk, indexId, + indexOk, snapshot, 2, scanKey); + + static bool loggedSlowMetadataAccessWarning = false; + if (!indexOk && !loggedSlowMetadataAccessWarning) + { + ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING, "chunk_pkey"))); + loggedSlowMetadataAccessWarning = true; + } StripeSkipList *chunkList = palloc0(sizeof(StripeSkipList)); chunkList->chunkCount = chunkCount; @@ -571,8 +583,7 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri palloc0(chunkCount * sizeof(ColumnChunkSkipNode)); } - while (HeapTupleIsValid(heapTuple = systable_getnext_ordered(scanDescriptor, - ForwardScanDirection))) + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { Datum datumArray[Natts_columnar_chunk]; bool isNullArray[Natts_columnar_chunk]; @@ -637,8 +648,7 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri } } - systable_endscan_ordered(scanDescriptor); - index_close(index, AccessShareLock); + systable_endscan(scanDescriptor); table_close(columnarChunk, AccessShareLock); chunkList->chunkGroupRowCounts = @@ -649,9 +659,9 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri /* - * FindStripeByRowNumber returns StripeMetadata for the stripe whose - * firstRowNumber is greater than given rowNumber. If no such stripe - * exists, then returns NULL. + * FindStripeByRowNumber returns StripeMetadata for the stripe that has the + * smallest firstRowNumber among the stripes whose firstRowNumber is grater + * than given rowNumber. If no such stripe exists, then returns NULL. */ StripeMetadata * FindNextStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot) @@ -741,8 +751,7 @@ StripeGetHighestRowNumber(StripeMetadata *stripeMetadata) /* * StripeMetadataLookupRowNumber returns StripeMetadata for the stripe whose * firstRowNumber is less than or equal to (FIND_LESS_OR_EQUAL), or is - * greater than (FIND_GREATER) given rowNumber by doing backward index - * scan on stripe_first_row_number_idx. + * greater than (FIND_GREATER) given rowNumber. * If no such stripe exists, then returns NULL. */ static StripeMetadata * @@ -773,31 +782,71 @@ StripeMetadataLookupRowNumber(Relation relation, uint64 rowNumber, Snapshot snap ScanKeyInit(&scanKey[1], Anum_columnar_stripe_first_row_number, strategyNumber, procedure, UInt64GetDatum(rowNumber)); - Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock); - Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(), - AccessShareLock); - SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index, - snapshot, 2, - scanKey); - ScanDirection scanDirection = NoMovementScanDirection; - if (lookupMode == FIND_LESS_OR_EQUAL) + Oid indexId = ColumnarStripeFirstRowNumberIndexRelationId(); + bool indexOk = OidIsValid(indexId); + SysScanDesc scanDescriptor = systable_beginscan(columnarStripes, indexId, indexOk, + snapshot, 2, scanKey); + + static bool loggedSlowMetadataAccessWarning = false; + if (!indexOk && !loggedSlowMetadataAccessWarning) { - scanDirection = BackwardScanDirection; - } - else if (lookupMode == FIND_GREATER) - { - scanDirection = ForwardScanDirection; - } - HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, scanDirection); - if (HeapTupleIsValid(heapTuple)) - { - foundStripeMetadata = BuildStripeMetadata(columnarStripes, heapTuple); + ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING, + "stripe_first_row_number_idx"))); + loggedSlowMetadataAccessWarning = true; } - systable_endscan_ordered(scanDescriptor); - index_close(index, AccessShareLock); + if (indexOk) + { + ScanDirection scanDirection = NoMovementScanDirection; + if (lookupMode == FIND_LESS_OR_EQUAL) + { + scanDirection = BackwardScanDirection; + } + else if (lookupMode == FIND_GREATER) + { + scanDirection = ForwardScanDirection; + } + HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, scanDirection); + if (HeapTupleIsValid(heapTuple)) + { + foundStripeMetadata = BuildStripeMetadata(columnarStripes, heapTuple); + } + } + else + { + HeapTuple heapTuple = NULL; + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + StripeMetadata *stripe = BuildStripeMetadata(columnarStripes, heapTuple); + if (!foundStripeMetadata) + { + /* first match */ + foundStripeMetadata = stripe; + } + else if (lookupMode == FIND_LESS_OR_EQUAL && + stripe->firstRowNumber > foundStripeMetadata->firstRowNumber) + { + /* + * Among the stripes with firstRowNumber less-than-or-equal-to given, + * we're looking for the one with the greatest firstRowNumber. + */ + foundStripeMetadata = stripe; + } + else if (lookupMode == FIND_GREATER && + stripe->firstRowNumber < foundStripeMetadata->firstRowNumber) + { + /* + * Among the stripes with firstRowNumber greater-than given, + * we're looking for the one with the smallest firstRowNumber. + */ + foundStripeMetadata = stripe; + } + } + } + + systable_endscan(scanDescriptor); table_close(columnarStripes, AccessShareLock); return foundStripeMetadata; @@ -871,8 +920,8 @@ CheckStripeMetadataConsistency(StripeMetadata *stripeMetadata) /* * FindStripeWithHighestRowNumber returns StripeMetadata for the stripe that - * has the row with highest rowNumber by doing backward index scan on - * stripe_first_row_number_idx. If given relation is empty, then returns NULL. + * has the row with highest rowNumber. If given relation is empty, then returns + * NULL. */ StripeMetadata * FindStripeWithHighestRowNumber(Relation relation, Snapshot snapshot) @@ -885,19 +934,46 @@ FindStripeWithHighestRowNumber(Relation relation, Snapshot snapshot) BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId)); Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock); - Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(), - AccessShareLock); - SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index, - snapshot, 1, scanKey); - HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, BackwardScanDirection); - if (HeapTupleIsValid(heapTuple)) + Oid indexId = ColumnarStripeFirstRowNumberIndexRelationId(); + bool indexOk = OidIsValid(indexId); + SysScanDesc scanDescriptor = systable_beginscan(columnarStripes, indexId, indexOk, + snapshot, 1, scanKey); + + static bool loggedSlowMetadataAccessWarning = false; + if (!indexOk && !loggedSlowMetadataAccessWarning) { - stripeWithHighestRowNumber = BuildStripeMetadata(columnarStripes, heapTuple); + ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING, + "stripe_first_row_number_idx"))); + loggedSlowMetadataAccessWarning = true; } - systable_endscan_ordered(scanDescriptor); - index_close(index, AccessShareLock); + if (indexOk) + { + /* do one-time fetch using the index */ + HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, + BackwardScanDirection); + if (HeapTupleIsValid(heapTuple)) + { + stripeWithHighestRowNumber = BuildStripeMetadata(columnarStripes, heapTuple); + } + } + else + { + HeapTuple heapTuple = NULL; + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + StripeMetadata *stripe = BuildStripeMetadata(columnarStripes, heapTuple); + if (!stripeWithHighestRowNumber || + stripe->firstRowNumber > stripeWithHighestRowNumber->firstRowNumber) + { + /* first or a greater match */ + stripeWithHighestRowNumber = stripe; + } + } + } + + systable_endscan(scanDescriptor); table_close(columnarStripes, AccessShareLock); return stripeWithHighestRowNumber; @@ -914,7 +990,6 @@ ReadChunkGroupRowCounts(uint64 storageId, uint64 stripe, uint32 chunkGroupCount, { Oid columnarChunkGroupOid = ColumnarChunkGroupRelationId(); Relation columnarChunkGroup = table_open(columnarChunkGroupOid, AccessShareLock); - Relation index = index_open(ColumnarChunkGroupIndexRelationId(), AccessShareLock); ScanKeyData scanKey[2]; ScanKeyInit(&scanKey[0], Anum_columnar_chunkgroup_storageid, @@ -922,15 +997,22 @@ ReadChunkGroupRowCounts(uint64 storageId, uint64 stripe, uint32 chunkGroupCount, ScanKeyInit(&scanKey[1], Anum_columnar_chunkgroup_stripe, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe)); + Oid indexId = ColumnarChunkGroupIndexRelationId(); + bool indexOk = OidIsValid(indexId); SysScanDesc scanDescriptor = - systable_beginscan_ordered(columnarChunkGroup, index, snapshot, 2, scanKey); + systable_beginscan(columnarChunkGroup, indexId, indexOk, snapshot, 2, scanKey); + + static bool loggedSlowMetadataAccessWarning = false; + if (!indexOk && !loggedSlowMetadataAccessWarning) + { + ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING, "chunk_group_pkey"))); + loggedSlowMetadataAccessWarning = true; + } - uint32 chunkGroupIndex = 0; HeapTuple heapTuple = NULL; uint32 *chunkGroupRowCounts = palloc0(chunkGroupCount * sizeof(uint32)); - while (HeapTupleIsValid(heapTuple = systable_getnext_ordered(scanDescriptor, - ForwardScanDirection))) + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { Datum datumArray[Natts_columnar_chunkgroup]; bool isNullArray[Natts_columnar_chunkgroup]; @@ -941,24 +1023,16 @@ ReadChunkGroupRowCounts(uint64 storageId, uint64 stripe, uint32 chunkGroupCount, uint32 tupleChunkGroupIndex = DatumGetUInt32(datumArray[Anum_columnar_chunkgroup_chunk - 1]); - if (chunkGroupIndex >= chunkGroupCount || - tupleChunkGroupIndex != chunkGroupIndex) + if (tupleChunkGroupIndex >= chunkGroupCount) { elog(ERROR, "unexpected chunk group"); } - chunkGroupRowCounts[chunkGroupIndex] = + chunkGroupRowCounts[tupleChunkGroupIndex] = (uint32) DatumGetUInt64(datumArray[Anum_columnar_chunkgroup_row_count - 1]); - chunkGroupIndex++; } - if (chunkGroupIndex != chunkGroupCount) - { - elog(ERROR, "unexpected chunk group count"); - } - - systable_endscan_ordered(scanDescriptor); - index_close(index, AccessShareLock); + systable_endscan(scanDescriptor); table_close(columnarChunkGroup, AccessShareLock); return chunkGroupRowCounts; @@ -1155,14 +1229,20 @@ UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, bool *update, Oid columnarStripesOid = ColumnarStripeRelationId(); Relation columnarStripes = table_open(columnarStripesOid, AccessShareLock); - Relation columnarStripePkeyIndex = index_open(ColumnarStripePKeyIndexRelationId(), - AccessShareLock); - SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, - columnarStripePkeyIndex, - &dirtySnapshot, 2, scanKey); + Oid indexId = ColumnarStripePKeyIndexRelationId(); + bool indexOk = OidIsValid(indexId); + SysScanDesc scanDescriptor = systable_beginscan(columnarStripes, indexId, indexOk, + &dirtySnapshot, 2, scanKey); - HeapTuple oldTuple = systable_getnext_ordered(scanDescriptor, ForwardScanDirection); + static bool loggedSlowMetadataAccessWarning = false; + if (!indexOk && !loggedSlowMetadataAccessWarning) + { + ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING, "stripe_pkey"))); + loggedSlowMetadataAccessWarning = true; + } + + HeapTuple oldTuple = systable_getnext(scanDescriptor); if (!HeapTupleIsValid(oldTuple)) { ereport(ERROR, (errmsg("attempted to modify an unexpected stripe, " @@ -1197,8 +1277,7 @@ UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, bool *update, CommandCounterIncrement(); - systable_endscan_ordered(scanDescriptor); - index_close(columnarStripePkeyIndex, AccessShareLock); + systable_endscan(scanDescriptor); table_close(columnarStripes, AccessShareLock); /* return StripeMetadata object built from modified tuple */ @@ -1209,6 +1288,10 @@ UpdateStripeMetadataRow(uint64 storageId, uint64 stripeId, bool *update, /* * ReadDataFileStripeList reads the stripe list for a given storageId * in the given snapshot. + * + * Doesn't sort the stripes by their ids before returning if + * stripe_first_row_number_idx is not available --normally can only happen + * during pg upgrades. */ static List * ReadDataFileStripeList(uint64 storageId, Snapshot snapshot) @@ -1223,22 +1306,27 @@ ReadDataFileStripeList(uint64 storageId, Snapshot snapshot) Oid columnarStripesOid = ColumnarStripeRelationId(); Relation columnarStripes = table_open(columnarStripesOid, AccessShareLock); - Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(), - AccessShareLock); - SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index, - snapshot, 1, - scanKey); + Oid indexId = ColumnarStripeFirstRowNumberIndexRelationId(); + bool indexOk = OidIsValid(indexId); + SysScanDesc scanDescriptor = systable_beginscan(columnarStripes, indexId, + indexOk, snapshot, 1, scanKey); - while (HeapTupleIsValid(heapTuple = systable_getnext_ordered(scanDescriptor, - ForwardScanDirection))) + static bool loggedSlowMetadataAccessWarning = false; + if (!indexOk && !loggedSlowMetadataAccessWarning) + { + ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING, + "stripe_first_row_number_idx"))); + loggedSlowMetadataAccessWarning = true; + } + + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { StripeMetadata *stripeMetadata = BuildStripeMetadata(columnarStripes, heapTuple); stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); } - systable_endscan_ordered(scanDescriptor); - index_close(index, AccessShareLock); + systable_endscan(scanDescriptor); table_close(columnarStripes, AccessShareLock); return stripeMetadataList; @@ -1349,25 +1437,30 @@ DeleteStorageFromColumnarMetadataTable(Oid metadataTableId, return; } - Relation index = index_open(storageIdIndexId, AccessShareLock); + bool indexOk = OidIsValid(storageIdIndexId); + SysScanDesc scanDescriptor = systable_beginscan(metadataTable, storageIdIndexId, + indexOk, NULL, 1, scanKey); - SysScanDesc scanDescriptor = systable_beginscan_ordered(metadataTable, index, NULL, - 1, scanKey); + static bool loggedSlowMetadataAccessWarning = false; + if (!indexOk && !loggedSlowMetadataAccessWarning) + { + ereport(WARNING, (errmsg(SLOW_METADATA_ACCESS_WARNING, + "on a columnar metadata table"))); + loggedSlowMetadataAccessWarning = true; + } ModifyState *modifyState = StartModifyRelation(metadataTable); HeapTuple heapTuple; - while (HeapTupleIsValid(heapTuple = systable_getnext_ordered(scanDescriptor, - ForwardScanDirection))) + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { DeleteTupleAndEnforceConstraints(modifyState, heapTuple); } - systable_endscan_ordered(scanDescriptor); + systable_endscan(scanDescriptor); FinishModifyRelation(modifyState); - index_close(index, AccessShareLock); table_close(metadataTable, AccessShareLock); } diff --git a/src/test/regress/expected/columnar_create.out b/src/test/regress/expected/columnar_create.out index 9cb025336..b1bd16b06 100644 --- a/src/test/regress/expected/columnar_create.out +++ b/src/test/regress/expected/columnar_create.out @@ -60,6 +60,89 @@ SELECT columnar_test_helpers.columnar_metadata_has_storage_id(:columnar_table_1_ t (1 row) +BEGIN; + INSERT INTO columnar_table_1 VALUES (2); +ROLLBACK; +INSERT INTO columnar_table_1 VALUES (3),(4); +INSERT INTO columnar_table_1 VALUES (5),(6); +INSERT INTO columnar_table_1 VALUES (7),(8); +-- Test whether columnar metadata accessors are still fine even +-- when the metadata indexes are not available to them. +BEGIN; + ALTER INDEX columnar.stripe_first_row_number_idx RENAME TO new_index_name; + ALTER INDEX columnar.chunk_pkey RENAME TO new_index_name_1; + ALTER INDEX columnar.stripe_pkey RENAME TO new_index_name_2; + ALTER INDEX columnar.chunk_group_pkey RENAME TO new_index_name_3; + CREATE INDEX columnar_table_1_idx ON columnar_table_1(a); +WARNING: Metadata index stripe_first_row_number_idx is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise. +WARNING: Metadata index stripe_first_row_number_idx is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise. +WARNING: Metadata index chunk_pkey is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise. +WARNING: Metadata index chunk_group_pkey is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise. + -- make sure that we test index scan + SET LOCAL columnar.enable_custom_scan TO 'off'; + SET LOCAL enable_seqscan TO off; + SET LOCAL seq_page_cost TO 10000000; + SELECT * FROM columnar_table_1 WHERE a = 6; +WARNING: Metadata index stripe_first_row_number_idx is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise. + a +--------------------------------------------------------------------- + 6 +(1 row) + + SELECT * FROM columnar_table_1 WHERE a = 5; + a +--------------------------------------------------------------------- + 5 +(1 row) + + SELECT * FROM columnar_table_1 WHERE a = 7; + a +--------------------------------------------------------------------- + 7 +(1 row) + + SELECT * FROM columnar_table_1 WHERE a = 3; + a +--------------------------------------------------------------------- + 3 +(1 row) + + DROP INDEX columnar_table_1_idx; + -- Re-shuffle some metadata records to test whether we can + -- rely on sequential metadata scan when the metadata records + -- are not ordered by their "first_row_number"s. + WITH cte AS ( + DELETE FROM columnar.stripe + WHERE storage_id = :columnar_table_1_storage_id + RETURNING * + ) + INSERT INTO columnar.stripe SELECT * FROM cte ORDER BY first_row_number DESC; + SELECT SUM(a) FROM columnar_table_1; + sum +--------------------------------------------------------------------- + 34 +(1 row) + + SELECT * FROM columnar_table_1 WHERE a = 6; + a +--------------------------------------------------------------------- + 6 +(1 row) + + -- Run a SELECT query after the INSERT command to force flushing the + -- data within the xact block. + INSERT INTO columnar_table_1 VALUES (20); + SELECT COUNT(*) FROM columnar_table_1; +WARNING: Metadata index stripe_pkey is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise. + count +--------------------------------------------------------------------- + 8 +(1 row) + + DROP TABLE columnar_table_1 CASCADE; +NOTICE: drop cascades to materialized view columnar_table_1_mv +WARNING: Metadata index on a columnar metadata table is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise. +ROLLBACK; -- test dropping columnar table DROP TABLE columnar_table_1 CASCADE; NOTICE: drop cascades to materialized view columnar_table_1_mv diff --git a/src/test/regress/expected/columnar_indexes.out b/src/test/regress/expected/columnar_indexes.out index fa9d8d3ac..181042a1b 100644 --- a/src/test/regress/expected/columnar_indexes.out +++ b/src/test/regress/expected/columnar_indexes.out @@ -257,6 +257,32 @@ SELECT SUM(a)=48000 FROM columnar_table WHERE a = 16000 OR a = 32000; t (1 row) +BEGIN; + ALTER INDEX columnar.stripe_first_row_number_idx RENAME TO new_index_name; + ALTER INDEX columnar.chunk_pkey RENAME TO new_index_name_1; + -- same queries but this time some metadata indexes are not available + SELECT SUM(a)=312487500 FROM columnar_table WHERE a < 25000; +WARNING: Metadata index stripe_first_row_number_idx is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise. +WARNING: Metadata index stripe_first_row_number_idx is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise. +WARNING: Metadata index chunk_pkey is not available, this might mean slower read/writes on columnar tables. This is expected during Postgres upgrades and not expected otherwise. + ?column? +--------------------------------------------------------------------- + t +(1 row) + + SELECT SUM(a)=167000 FROM columnar_table WHERE a = 16000 OR a = 151000; + ?column? +--------------------------------------------------------------------- + t +(1 row) + + SELECT SUM(a)=48000 FROM columnar_table WHERE a = 16000 OR a = 32000; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; TRUNCATE columnar_table; ALTER TABLE columnar_table DROP CONSTRAINT columnar_table_pkey; -- hash -- diff --git a/src/test/regress/expected/upgrade_columnar_before.out b/src/test/regress/expected/upgrade_columnar_before.out index 8e8494d31..dcf341c64 100644 --- a/src/test/regress/expected/upgrade_columnar_before.out +++ b/src/test/regress/expected/upgrade_columnar_before.out @@ -378,3 +378,6 @@ select count(DISTINCT value) from text_data; 11 (1 row) +-- test using a columnar partition +CREATE TABLE foo (d DATE NOT NULL) PARTITION BY RANGE (d); +CREATE TABLE foo3 PARTITION OF foo FOR VALUES FROM ('2009-02-01') TO ('2009-03-01') USING COLUMNAR; diff --git a/src/test/regress/sql/columnar_create.sql b/src/test/regress/sql/columnar_create.sql index a5861bb45..242db5e0f 100644 --- a/src/test/regress/sql/columnar_create.sql +++ b/src/test/regress/sql/columnar_create.sql @@ -48,6 +48,58 @@ ROLLBACK; -- since we rollback'ed above xact, should return true SELECT columnar_test_helpers.columnar_metadata_has_storage_id(:columnar_table_1_storage_id); +BEGIN; + INSERT INTO columnar_table_1 VALUES (2); +ROLLBACK; + +INSERT INTO columnar_table_1 VALUES (3),(4); +INSERT INTO columnar_table_1 VALUES (5),(6); +INSERT INTO columnar_table_1 VALUES (7),(8); + +-- Test whether columnar metadata accessors are still fine even +-- when the metadata indexes are not available to them. +BEGIN; + ALTER INDEX columnar.stripe_first_row_number_idx RENAME TO new_index_name; + ALTER INDEX columnar.chunk_pkey RENAME TO new_index_name_1; + ALTER INDEX columnar.stripe_pkey RENAME TO new_index_name_2; + ALTER INDEX columnar.chunk_group_pkey RENAME TO new_index_name_3; + + CREATE INDEX columnar_table_1_idx ON columnar_table_1(a); + + -- make sure that we test index scan + SET LOCAL columnar.enable_custom_scan TO 'off'; + SET LOCAL enable_seqscan TO off; + SET LOCAL seq_page_cost TO 10000000; + + SELECT * FROM columnar_table_1 WHERE a = 6; + SELECT * FROM columnar_table_1 WHERE a = 5; + SELECT * FROM columnar_table_1 WHERE a = 7; + SELECT * FROM columnar_table_1 WHERE a = 3; + + DROP INDEX columnar_table_1_idx; + + -- Re-shuffle some metadata records to test whether we can + -- rely on sequential metadata scan when the metadata records + -- are not ordered by their "first_row_number"s. + WITH cte AS ( + DELETE FROM columnar.stripe + WHERE storage_id = :columnar_table_1_storage_id + RETURNING * + ) + INSERT INTO columnar.stripe SELECT * FROM cte ORDER BY first_row_number DESC; + + SELECT SUM(a) FROM columnar_table_1; + + SELECT * FROM columnar_table_1 WHERE a = 6; + + -- Run a SELECT query after the INSERT command to force flushing the + -- data within the xact block. + INSERT INTO columnar_table_1 VALUES (20); + SELECT COUNT(*) FROM columnar_table_1; + + DROP TABLE columnar_table_1 CASCADE; +ROLLBACK; + -- test dropping columnar table DROP TABLE columnar_table_1 CASCADE; SELECT columnar_test_helpers.columnar_metadata_has_storage_id(:columnar_table_1_storage_id); diff --git a/src/test/regress/sql/columnar_indexes.sql b/src/test/regress/sql/columnar_indexes.sql index 8fec947b2..80f78e06f 100644 --- a/src/test/regress/sql/columnar_indexes.sql +++ b/src/test/regress/sql/columnar_indexes.sql @@ -167,6 +167,16 @@ SELECT SUM(a)=312487500 FROM columnar_table WHERE a < 25000; SELECT SUM(a)=167000 FROM columnar_table WHERE a = 16000 OR a = 151000; SELECT SUM(a)=48000 FROM columnar_table WHERE a = 16000 OR a = 32000; +BEGIN; + ALTER INDEX columnar.stripe_first_row_number_idx RENAME TO new_index_name; + ALTER INDEX columnar.chunk_pkey RENAME TO new_index_name_1; + + -- same queries but this time some metadata indexes are not available + SELECT SUM(a)=312487500 FROM columnar_table WHERE a < 25000; + SELECT SUM(a)=167000 FROM columnar_table WHERE a = 16000 OR a = 151000; + SELECT SUM(a)=48000 FROM columnar_table WHERE a = 16000 OR a = 32000; +ROLLBACK; + TRUNCATE columnar_table; ALTER TABLE columnar_table DROP CONSTRAINT columnar_table_pkey; diff --git a/src/test/regress/sql/upgrade_columnar_before.sql b/src/test/regress/sql/upgrade_columnar_before.sql index 1f83a4d5a..34d5a103e 100644 --- a/src/test/regress/sql/upgrade_columnar_before.sql +++ b/src/test/regress/sql/upgrade_columnar_before.sql @@ -284,3 +284,7 @@ $$ LANGUAGE plpgsql; CREATE TABLE text_data (id SERIAL, value TEXT) USING COLUMNAR; INSERT INTO text_data (value) SELECT generate_random_string(1024 * 10) FROM generate_series(0,10); select count(DISTINCT value) from text_data; + +-- test using a columnar partition +CREATE TABLE foo (d DATE NOT NULL) PARTITION BY RANGE (d); +CREATE TABLE foo3 PARTITION OF foo FOR VALUES FROM ('2009-02-01') TO ('2009-03-01') USING COLUMNAR;