From f433737e15fdd5cb284ffd38a75e94896138d69f Mon Sep 17 00:00:00 2001 From: Hanefi Onaldi Date: Thu, 6 Oct 2022 16:29:38 +0300 Subject: [PATCH] Look for pg_dist_object in pg_catalog schema Citus 11 moved `pg_dist_object` to `pg_catalog`. When doing a downgrade, for a brief period of time, Citus 10.2 expects to have this table in `citus` schema, but fails. This can potentially cause failures in downgrade. We now search for the relation in the `citus` schema and if that fails, we search in the `pg_catalog` schema. --- .../distributed/metadata/metadata_cache.c | 96 +++++++++++++++++-- 1 file changed, 90 insertions(+), 6 deletions(-) diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c index fa01aa42a..db75abd17 100644 --- a/src/backend/distributed/metadata/metadata_cache.c +++ b/src/backend/distributed/metadata/metadata_cache.c @@ -242,8 +242,13 @@ static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMe Oid *intervalTypeId, int32 *intervalTypeMod); static void CachedNamespaceLookup(const char *nspname, Oid *cachedOid); static void CachedRelationLookup(const char *relationName, Oid *cachedOid); +static void CachedRelationLookupExtended(const char *relationName, Oid *cachedOid, + bool missing_ok); static void CachedRelationNamespaceLookup(const char *relationName, Oid relnamespace, Oid *cachedOid); +static void CachedRelationNamespaceLookupExtended(const char *relationName, + Oid renamespace, Oid *cachedOid, + bool missing_ok); static ShardPlacement * ResolveGroupShardPlacement( GroupShardPlacement *groupShardPlacement, CitusTableCacheEntry *tableEntry, int shardIndex); @@ -2193,8 +2198,37 @@ CitusCatalogNamespaceId(void) Oid DistObjectRelationId(void) { - CachedRelationNamespaceLookup("pg_dist_object", CitusCatalogNamespaceId(), - &MetadataCache.distObjectRelationId); + /* + * In older versions pg_dist_object was living in the `citus` namespace, With Citus 11 + * this has been moved to pg_dist_catalog. + * + * During downgrades it could therefore be that we simply need to look in the new + * catalog. Since we expect to find it most of the time in the citus schema from + * now on we will start there. + * + * even after the table has been moved, the oid's stay the same, so we don't have to + * invalidate the cache after a move + * + * Note: during testing we also up/downgrade the extension, and sometimes interact + * with the database when the schema and the binary are not in sync. Hance we always + * allow the catalog to be missing on our first lookup. The error message might + * therefore become misleading as it will complain about citus.pg_dist_object not + * being found when called too early. + */ + CachedRelationNamespaceLookupExtended("pg_dist_object", + CitusCatalogNamespaceId(), + &MetadataCache.distObjectRelationId, + true); + if (!OidIsValid(MetadataCache.distObjectRelationId)) + { + /* + * We can only ever reach here while we are creating/altering our extension before + * the table is moved back to citus. + */ + CachedRelationLookupExtended("pg_dist_object", + &MetadataCache.distObjectRelationId, + false); + } return MetadataCache.distObjectRelationId; } @@ -2204,9 +2238,38 @@ DistObjectRelationId(void) Oid DistObjectPrimaryKeyIndexId(void) { - CachedRelationNamespaceLookup("pg_dist_object_pkey", - CitusCatalogNamespaceId(), - &MetadataCache.distObjectPrimaryKeyIndexId); + /* + * In older versions pg_dist_object was living in the `citus` namespace, With Citus 11 + * this has been moved to pg_dist_catalog. + * + * During upgrades it could therefore be that we simply need to look in the old + * catalog. Since we expect to find it most of the time in the pg_catalog schema from + * now on we will start there. + * + * even after the table has been moved, the oid's stay the same, so we don't have to + * invalidate the cache after a move + * + * Note: during testing we also up/downgrade the extension, and sometimes interact + * with the database when the schema and the binary are not in sync. Hance we always + * allow the catalog to be missing on our first lookup. The error message might + * therefore become misleading as it will complain about citus.pg_dist_object not + * being found when called too early. + */ + CachedRelationLookupExtended("pg_dist_object_pkey", + &MetadataCache.distObjectPrimaryKeyIndexId, + true); + + if (!OidIsValid(MetadataCache.distObjectPrimaryKeyIndexId)) + { + /* + * We can only ever reach here while we are creating/altering our extension before + * the table is moved to pg_catalog. + */ + CachedRelationNamespaceLookupExtended("pg_dist_object_pkey", + CitusCatalogNamespaceId(), + &MetadataCache.distObjectPrimaryKeyIndexId, + false); + } return MetadataCache.distObjectPrimaryKeyIndexId; } @@ -4395,9 +4458,30 @@ CachedRelationLookup(const char *relationName, Oid *cachedOid) } +/* + * CachedRelationLookupExtended performs a cached lookup for the relation + * relationName, with the result cached in *cachedOid. Will _not_ throw an error when + * missing_ok is set to true. + */ +static void +CachedRelationLookupExtended(const char *relationName, Oid *cachedOid, bool missing_ok) +{ + CachedRelationNamespaceLookupExtended(relationName, PG_CATALOG_NAMESPACE, cachedOid, + missing_ok); +} + + static void CachedRelationNamespaceLookup(const char *relationName, Oid relnamespace, Oid *cachedOid) +{ + CachedRelationNamespaceLookupExtended(relationName, relnamespace, cachedOid, false); +} + + +static void +CachedRelationNamespaceLookupExtended(const char *relationName, Oid relnamespace, + Oid *cachedOid, bool missing_ok) { /* force callbacks to be registered, so we always get notified upon changes */ InitializeCaches(); @@ -4406,7 +4490,7 @@ CachedRelationNamespaceLookup(const char *relationName, Oid relnamespace, { *cachedOid = get_relname_relid(relationName, relnamespace); - if (*cachedOid == InvalidOid) + if (*cachedOid == InvalidOid && !missing_ok) { ereport(ERROR, (errmsg( "cache lookup failed for %s, called too early?",