From 41d99249d9a09a00be8e492030f42a39feb28875 Mon Sep 17 00:00:00 2001 From: Karina <55838532+Green-Chan@users.noreply.github.com> Date: Wed, 10 Apr 2024 19:08:54 +0300 Subject: [PATCH 01/10] Use expecteddir option when running vanilla tests (#7573) In PostgreSQL 16 a new option expecteddir was introduced to pg_regress. Together with fix in [196eeb6b](https://github.com/postgres/postgres/commit/196eeb6b) it causes check-vanilla failure if expecteddir is not specified. Co-authored-by: Karina Litskevich --- src/test/regress/pg_regress_multi.pl | 37 ++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl index c9a85d523..01e57c469 100755 --- a/src/test/regress/pg_regress_multi.pl +++ b/src/test/regress/pg_regress_multi.pl @@ -1126,16 +1126,33 @@ sub RunVanillaTests system("mkdir", ("-p", "$pgregressOutputdir/sql")) == 0 or die "Could not create vanilla sql dir."; - $exitcode = system("$plainRegress", - ("--dlpath", $dlpath), - ("--inputdir", $pgregressInputdir), - ("--outputdir", $pgregressOutputdir), - ("--schedule", catfile("$pgregressInputdir", "parallel_schedule")), - ("--use-existing"), - ("--host","$host"), - ("--port","$masterPort"), - ("--user","$user"), - ("--dbname", "$dbName")); + if ($majorversion >= "16") + { + $exitcode = system("$plainRegress", + ("--dlpath", $dlpath), + ("--inputdir", $pgregressInputdir), + ("--outputdir", $pgregressOutputdir), + ("--expecteddir", $pgregressOutputdir), + ("--schedule", catfile("$pgregressInputdir", "parallel_schedule")), + ("--use-existing"), + ("--host","$host"), + ("--port","$masterPort"), + ("--user","$user"), + ("--dbname", "$dbName")); + } + else + { + $exitcode = system("$plainRegress", + ("--dlpath", $dlpath), + ("--inputdir", $pgregressInputdir), + ("--outputdir", $pgregressOutputdir), + ("--schedule", catfile("$pgregressInputdir", "parallel_schedule")), + ("--use-existing"), + ("--host","$host"), + ("--port","$masterPort"), + ("--user","$user"), + ("--dbname", "$dbName")); + } } if ($useMitmproxy) { From 3586aab17a7ff2fef8f336b6528376552a5d4c2c Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 15 Apr 2024 12:51:11 +0300 Subject: [PATCH 02/10] Allow providing "host" parameter via citus.node_conninfo (#7541) And when that is the case, directly use it as "host" parameter for the connections between nodes and use the "hostname" provided in pg_dist_node / pg_dist_poolinfo as "hostaddr" to avoid host name lookup. This is to avoid allowing dns resolution (and / or setting up DNS names for each host in the cluster). This already works currently when using IPs in the hostname. The only use of setting host is that you can then use sslmode=verify-full and it will validate that the hostname matches the certificate provided by the node you're connecting too. It would be more flexible to make this a per-node setting, but that requires SQL changes. And we'd like to backport this change, and backporting such a sql change would be quite hard while backporting this change would be very easy. And in many setups, a different hostname for TLS validation is actually not needed. The reason for that is query-from-any node: With query-from-any-node all nodes usually have a certificate that is valid for the same "cluster hostname", either using a wildcard cert or a Subject Alternative Name (SAN). Because if you load balance across nodes you don't know which node you're connecting to, but you still want TLS validation to do it's job. So with this change you can use this same "cluster hostname" for TLS validation within the cluster. Obviously this means you don't validate that you're connecting to a particular node, just that you're connecting to one of the nodes in the cluster, but that should be fine from a security perspective (in most cases). Note to self: This change requires updating https://docs.citusdata.com/en/latest/develop/api_guc.html#citus-node-conninfo-text. DESCRIPTION: Allows overwriting host name for all inter-node connections by supporting "host" parameter in citus.node_conninfo --- .../connection/connection_configuration.c | 17 +++++- src/backend/distributed/shared_library_init.c | 1 + .../regress/expected/node_conninfo_reload.out | 56 +++++++++++++++++++ src/test/regress/sql/node_conninfo_reload.sql | 26 +++++++++ 4 files changed, 99 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/connection/connection_configuration.c b/src/backend/distributed/connection/connection_configuration.c index ac82d4e09..3913173e2 100644 --- a/src/backend/distributed/connection/connection_configuration.c +++ b/src/backend/distributed/connection/connection_configuration.c @@ -271,9 +271,24 @@ GetConnParams(ConnectionHashKey *key, char ***keywords, char ***values, * We allocate everything in the provided context so as to facilitate using * pfree on all runtime parameters when connections using these entries are * invalidated during config reloads. + * + * Also, when "host" is already provided in global parameters, we use hostname + * from the key as "hostaddr" instead of "host" to avoid host name lookup. In + * that case, the value for "host" becomes useful only if the authentication + * method requires it. */ + bool gotHostParamFromGlobalParams = false; + for (Size paramIndex = 0; paramIndex < ConnParams.size; paramIndex++) + { + if (strcmp(ConnParams.keywords[paramIndex], "host") == 0) + { + gotHostParamFromGlobalParams = true; + break; + } + } + const char *runtimeKeywords[] = { - "host", + gotHostParamFromGlobalParams ? "hostaddr" : "host", "port", "dbname", "user", diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 45e212e8b..bd65fa60c 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -2929,6 +2929,7 @@ NodeConninfoGucCheckHook(char **newval, void **extra, GucSource source) #if defined(ENABLE_GSS) && defined(ENABLE_SSPI) "gsslib", #endif + "host", "keepalives", "keepalives_count", "keepalives_idle", diff --git a/src/test/regress/expected/node_conninfo_reload.out b/src/test/regress/expected/node_conninfo_reload.out index 785e3e1b1..3b33c54b2 100644 --- a/src/test/regress/expected/node_conninfo_reload.out +++ b/src/test/regress/expected/node_conninfo_reload.out @@ -520,5 +520,61 @@ show citus.node_conninfo; -- Should work again ALTER TABLE test ADD COLUMN e INT; +-- show that we allow providing "host" param via citus.node_conninfo +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=require host=nosuchhost'; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(0.1); + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +-- fails due to invalid host +SELECT COUNT(*)>=0 FROM test; +WARNING: connection to the remote node postgres@localhost:xxxxx failed with the following error: could not parse network address "localhost": Name or service not known +ERROR: connection to the remote node postgres@localhost:xxxxx failed with the following error: could not parse network address "localhost": Name or service not known +SELECT array_agg(nodeid) as updated_nodeids from pg_dist_node WHERE nodename = 'localhost' \gset +UPDATE pg_dist_node SET nodename = '127.0.0.1' WHERE nodeid = ANY(:'updated_nodeids'::int[]); +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=require host=localhost'; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(0.1); + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +-- works when hostaddr is specified in pg_dist_node after providing host in citus.node_conninfo +SELECT COUNT(*)>=0 FROM test; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +-- restore original nodenames into pg_dist_node +UPDATE pg_dist_node SET nodename = 'localhost' WHERE nodeid = ANY(:'updated_nodeids'::int[]); +-- reset it +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +select pg_sleep(0.1); -- wait for config reload to apply + pg_sleep +--------------------------------------------------------------------- + +(1 row) + DROP SCHEMA node_conninfo_reload CASCADE; NOTICE: drop cascades to table test diff --git a/src/test/regress/sql/node_conninfo_reload.sql b/src/test/regress/sql/node_conninfo_reload.sql index 42ba8c9b1..2faaaeeb1 100644 --- a/src/test/regress/sql/node_conninfo_reload.sql +++ b/src/test/regress/sql/node_conninfo_reload.sql @@ -205,4 +205,30 @@ show citus.node_conninfo; -- Should work again ALTER TABLE test ADD COLUMN e INT; +-- show that we allow providing "host" param via citus.node_conninfo +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=require host=nosuchhost'; +SELECT pg_reload_conf(); +SELECT pg_sleep(0.1); + +-- fails due to invalid host +SELECT COUNT(*)>=0 FROM test; + +SELECT array_agg(nodeid) as updated_nodeids from pg_dist_node WHERE nodename = 'localhost' \gset +UPDATE pg_dist_node SET nodename = '127.0.0.1' WHERE nodeid = ANY(:'updated_nodeids'::int[]); + +ALTER SYSTEM SET citus.node_conninfo = 'sslmode=require host=localhost'; +SELECT pg_reload_conf(); +SELECT pg_sleep(0.1); + +-- works when hostaddr is specified in pg_dist_node after providing host in citus.node_conninfo +SELECT COUNT(*)>=0 FROM test; + +-- restore original nodenames into pg_dist_node +UPDATE pg_dist_node SET nodename = 'localhost' WHERE nodeid = ANY(:'updated_nodeids'::int[]); + +-- reset it +ALTER SYSTEM RESET citus.node_conninfo; +select pg_reload_conf(); +select pg_sleep(0.1); -- wait for config reload to apply + DROP SCHEMA node_conninfo_reload CASCADE; From 381f31756e6de0e0522aeaa489fe671df0ddf731 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Mon, 15 Apr 2024 12:28:11 +0200 Subject: [PATCH 03/10] Speed up EnsureSequenceTypeSupported (#7575) DESCRIPTION: Fix performance issue when creating distributed tables and many already exist EnsureSequenceTypeSupported was doing an O(number of distributed tables) operation. This can become very slow with lots of Citus tables, which now happens much more frequently in practice due to schema based sharding. Partially addresses #7022 --- .../commands/create_distributed_table.c | 73 +++++++--------- .../distributed/metadata/metadata_sync.c | 84 +++++++++++++++++++ src/include/distributed/metadata_sync.h | 4 + 3 files changed, 119 insertions(+), 42 deletions(-) diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index 5ec6d6dd7..8c59aa199 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -22,6 +22,7 @@ #include "catalog/dependency.h" #include "catalog/index.h" #include "catalog/pg_am.h" +#include "catalog/pg_attrdef.h" #include "catalog/pg_attribute.h" #include "catalog/pg_enum.h" #include "catalog/pg_extension.h" @@ -50,6 +51,7 @@ #include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "utils/builtins.h" +#include "utils/fmgroids.h" #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -1696,52 +1698,39 @@ PropagatePrerequisiteObjectsForDistributedTable(Oid relationId) void EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId, Oid ownerRelationId) { - List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE); - citusTableIdList = list_append_unique_oid(citusTableIdList, ownerRelationId); + Oid attrDefOid; + List *attrDefOids = GetAttrDefsFromSequence(seqOid); - Oid citusTableId = InvalidOid; - foreach_oid(citusTableId, citusTableIdList) + foreach_oid(attrDefOid, attrDefOids) { - List *seqInfoList = NIL; - GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, DEPENDENCY_AUTO); + ObjectAddress columnAddress = GetAttrDefaultColumnAddress(attrDefOid); - SequenceInfo *seqInfo = NULL; - foreach_ptr(seqInfo, seqInfoList) + /* + * If another distributed table is using the same sequence + * in one of its column defaults, make sure the types of the + * columns match. + * + * We skip non-distributed tables, but we need to check the current + * table as it might reference the same sequence multiple times. + */ + if (columnAddress.objectId != ownerRelationId && + !IsCitusTable(columnAddress.objectId)) { - AttrNumber currentAttnum = seqInfo->attributeNumber; - Oid currentSeqOid = seqInfo->sequenceOid; - - if (!seqInfo->isNextValDefault) - { - /* - * If a sequence is not on the nextval, we don't need any check. - * This is a dependent sequence via ALTER SEQUENCE .. OWNED BY col - */ - continue; - } - - /* - * If another distributed table is using the same sequence - * in one of its column defaults, make sure the types of the - * columns match - */ - if (currentSeqOid == seqOid) - { - Oid currentAttributeTypId = GetAttributeTypeOid(citusTableId, - currentAttnum); - if (attributeTypeId != currentAttributeTypId) - { - char *sequenceName = generate_qualified_relation_name( - seqOid); - char *citusTableName = - generate_qualified_relation_name(citusTableId); - ereport(ERROR, (errmsg( - "The sequence %s is already used for a different" - " type in column %d of the table %s", - sequenceName, currentAttnum, - citusTableName))); - } - } + continue; + } + Oid currentAttributeTypId = GetAttributeTypeOid(columnAddress.objectId, + columnAddress.objectSubId); + if (attributeTypeId != currentAttributeTypId) + { + char *sequenceName = generate_qualified_relation_name( + seqOid); + char *citusTableName = + generate_qualified_relation_name(columnAddress.objectId); + ereport(ERROR, (errmsg( + "The sequence %s is already used for a different" + " type in column %d of the table %s", + sequenceName, columnAddress.objectSubId, + citusTableName))); } } } diff --git a/src/backend/distributed/metadata/metadata_sync.c b/src/backend/distributed/metadata/metadata_sync.c index 31d586e90..9e44a01f9 100644 --- a/src/backend/distributed/metadata/metadata_sync.c +++ b/src/backend/distributed/metadata/metadata_sync.c @@ -1682,6 +1682,90 @@ GetSequencesFromAttrDef(Oid attrdefOid) } +#if PG_VERSION_NUM < PG_VERSION_15 + +/* + * Given a pg_attrdef OID, return the relation OID and column number of + * the owning column (represented as an ObjectAddress for convenience). + * + * Returns InvalidObjectAddress if there is no such pg_attrdef entry. + */ +ObjectAddress +GetAttrDefaultColumnAddress(Oid attrdefoid) +{ + ObjectAddress result = InvalidObjectAddress; + ScanKeyData skey[1]; + HeapTuple tup; + + Relation attrdef = table_open(AttrDefaultRelationId, AccessShareLock); + ScanKeyInit(&skey[0], + Anum_pg_attrdef_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(attrdefoid)); + SysScanDesc scan = systable_beginscan(attrdef, AttrDefaultOidIndexId, true, + NULL, 1, skey); + + if (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_attrdef atdform = (Form_pg_attrdef) GETSTRUCT(tup); + + result.classId = RelationRelationId; + result.objectId = atdform->adrelid; + result.objectSubId = atdform->adnum; + } + + systable_endscan(scan); + table_close(attrdef, AccessShareLock); + + return result; +} + + +#endif + + +/* + * GetAttrDefsFromSequence returns a list of attrdef OIDs that have + * a dependency on the given sequence + */ +List * +GetAttrDefsFromSequence(Oid seqOid) +{ + List *attrDefsResult = NIL; + ScanKeyData key[2]; + HeapTuple tup; + + Relation depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(seqOid)); + SysScanDesc scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, lengthof(key), key); + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); + + if (deprec->classid == AttrDefaultRelationId && + deprec->deptype == DEPENDENCY_NORMAL) + { + attrDefsResult = lappend_oid(attrDefsResult, deprec->objid); + } + } + + systable_endscan(scan); + + table_close(depRel, AccessShareLock); + + return attrDefsResult; +} + + /* * GetDependentFunctionsWithRelation returns the dependent functions for the * given relation id. diff --git a/src/include/distributed/metadata_sync.h b/src/include/distributed/metadata_sync.h index cb111e16e..d0b760758 100644 --- a/src/include/distributed/metadata_sync.h +++ b/src/include/distributed/metadata_sync.h @@ -130,6 +130,10 @@ extern List * IdentitySequenceDependencyCommandList(Oid targetRelationId); extern List * DDLCommandsForSequence(Oid sequenceOid, char *ownerName); extern List * GetSequencesFromAttrDef(Oid attrdefOid); +#if PG_VERSION_NUM < PG_VERSION_15 +ObjectAddress GetAttrDefaultColumnAddress(Oid attrdefoid); +#endif +extern List * GetAttrDefsFromSequence(Oid seqOid); extern void GetDependentSequencesWithRelation(Oid relationId, List **seqInfoList, AttrNumber attnum, char depType); extern List * GetDependentFunctionsWithRelation(Oid relationId); From cdf51da45842a41304a6a2ce9d878f21c8ec0782 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Mon, 15 Apr 2024 14:01:55 +0200 Subject: [PATCH 04/10] Speed up SequenceUsedInDistributedTable (#7579) DESCRIPTION: Fix performance issue when creating distributed tables if many already exist This builds on the work to speed up EnsureSequenceTypeSupported, and now does something similar for SequenceUsedInDistributedTable. SequenceUsedInDistributedTable had a similar O(number of citus tables) operation. This fixes that and speeds up creation of distributed tables significantly when many distributed tables already exist. Fixes #7022 --- src/backend/distributed/commands/sequence.c | 21 ++---- .../distributed/metadata/metadata_sync.c | 68 +++++++++++++++++++ src/include/distributed/metadata_sync.h | 1 + 3 files changed, 76 insertions(+), 14 deletions(-) diff --git a/src/backend/distributed/commands/sequence.c b/src/backend/distributed/commands/sequence.c index 4d838a882..cfb55faf7 100644 --- a/src/backend/distributed/commands/sequence.c +++ b/src/backend/distributed/commands/sequence.c @@ -14,6 +14,7 @@ #include "access/xact.h" #include "catalog/dependency.h" #include "catalog/namespace.h" +#include "catalog/pg_attrdef.h" #include "commands/defrem.h" #include "commands/extension.h" #include "nodes/makefuncs.h" @@ -507,22 +508,14 @@ PreprocessAlterSequenceStmt(Node *node, const char *queryString, static Oid SequenceUsedInDistributedTable(const ObjectAddress *sequenceAddress, char depType) { - List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE); - Oid citusTableId = InvalidOid; - foreach_oid(citusTableId, citusTableIdList) + Oid relationId; + List *relations = GetDependentRelationsWithSequence(sequenceAddress->objectId, + depType); + foreach_oid(relationId, relations) { - List *seqInfoList = NIL; - GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, depType); - SequenceInfo *seqInfo = NULL; - foreach_ptr(seqInfo, seqInfoList) + if (IsCitusTable(relationId)) { - /* - * This sequence is used in a distributed table - */ - if (seqInfo->sequenceOid == sequenceAddress->objectId) - { - return citusTableId; - } + return relationId; } } diff --git a/src/backend/distributed/metadata/metadata_sync.c b/src/backend/distributed/metadata/metadata_sync.c index 9e44a01f9..ef7c56dc7 100644 --- a/src/backend/distributed/metadata/metadata_sync.c +++ b/src/backend/distributed/metadata/metadata_sync.c @@ -1637,6 +1637,74 @@ GetDependentSequencesWithRelation(Oid relationId, List **seqInfoList, } +/* + * GetDependentDependentRelationsWithSequence returns a list of oids of + * relations that have have a dependency on the given sequence. + * There are three types of dependencies: + * 1. direct auto (owned sequences), created using SERIAL or BIGSERIAL + * 2. indirect auto (through an AttrDef), created using DEFAULT nextval('..') + * 3. internal, created using GENERATED ALWAYS AS IDENTITY + * + * Depending on the passed deptype, we return the relations that have the + * given type(s): + * - DEPENDENCY_AUTO returns both 1 and 2 + * - DEPENDENCY_INTERNAL returns 3 + * + * The returned list can contain duplicates, as the same relation can have + * multiple dependencies on the sequence. + */ +List * +GetDependentRelationsWithSequence(Oid sequenceOid, char depType) +{ + List *relations = NIL; + ScanKeyData key[2]; + HeapTuple tup; + + Relation depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_classid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_objid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(sequenceOid)); + SysScanDesc scan = systable_beginscan(depRel, DependDependerIndexId, true, + NULL, lengthof(key), key); + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); + + if ( + deprec->refclassid == RelationRelationId && + deprec->refobjsubid != 0 && + deprec->deptype == depType) + { + relations = lappend_oid(relations, deprec->refobjid); + } + } + + systable_endscan(scan); + + table_close(depRel, AccessShareLock); + + if (depType == DEPENDENCY_AUTO) + { + Oid attrDefOid; + List *attrDefOids = GetAttrDefsFromSequence(sequenceOid); + + foreach_oid(attrDefOid, attrDefOids) + { + ObjectAddress columnAddress = GetAttrDefaultColumnAddress(attrDefOid); + relations = lappend_oid(relations, columnAddress.objectId); + } + } + + return relations; +} + + /* * GetSequencesFromAttrDef returns a list of sequence OIDs that have * dependency with the given attrdefOid in pg_depend diff --git a/src/include/distributed/metadata_sync.h b/src/include/distributed/metadata_sync.h index d0b760758..617eed705 100644 --- a/src/include/distributed/metadata_sync.h +++ b/src/include/distributed/metadata_sync.h @@ -136,6 +136,7 @@ ObjectAddress GetAttrDefaultColumnAddress(Oid attrdefoid); extern List * GetAttrDefsFromSequence(Oid seqOid); extern void GetDependentSequencesWithRelation(Oid relationId, List **seqInfoList, AttrNumber attnum, char depType); +extern List * GetDependentRelationsWithSequence(Oid seqId, char depType); extern List * GetDependentFunctionsWithRelation(Oid relationId); extern Oid GetAttributeTypeOid(Oid relationId, AttrNumber attnum); extern void SetLocalEnableMetadataSync(bool state); From 16604a6601d8445c334d44b32ec6cf825c61839f Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Mon, 15 Apr 2024 14:42:56 +0200 Subject: [PATCH 05/10] Use an index to get FDWs that depend on extensions (#7574) DESCRIPTION: Fix performance issue when distributing a table that depends on an extension When the database contains many objects this function would show up in profiles because it was doing a sequence scan on pg_depend. And with many objects pg_depend can get very large. This starts using an index scan to only look for rows containing FDWs, of which there are expected to be very few (often even zero). --- src/backend/distributed/commands/extension.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/backend/distributed/commands/extension.c b/src/backend/distributed/commands/extension.c index 2ead0c58a..8d4c6431b 100644 --- a/src/backend/distributed/commands/extension.c +++ b/src/backend/distributed/commands/extension.c @@ -1093,33 +1093,26 @@ List * GetDependentFDWsToExtension(Oid extensionId) { List *extensionFDWs = NIL; - ScanKeyData key[3]; - int scanKeyCount = 3; + ScanKeyData key[1]; HeapTuple tup; Relation pgDepend = table_open(DependRelationId, AccessShareLock); ScanKeyInit(&key[0], - Anum_pg_depend_refclassid, - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(ExtensionRelationId)); - ScanKeyInit(&key[1], - Anum_pg_depend_refobjid, - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(extensionId)); - ScanKeyInit(&key[2], Anum_pg_depend_classid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(ForeignDataWrapperRelationId)); - SysScanDesc scan = systable_beginscan(pgDepend, InvalidOid, false, - NULL, scanKeyCount, key); + SysScanDesc scan = systable_beginscan(pgDepend, DependDependerIndexId, true, + NULL, lengthof(key), key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { Form_pg_depend pgDependEntry = (Form_pg_depend) GETSTRUCT(tup); - if (pgDependEntry->deptype == DEPENDENCY_EXTENSION) + if (pgDependEntry->deptype == DEPENDENCY_EXTENSION && + pgDependEntry->refclassid == ExtensionRelationId && + pgDependEntry->refobjid == extensionId) { extensionFDWs = lappend_oid(extensionFDWs, pgDependEntry->objid); } From 110b4192b2277144fb6716dc5323874f52255e20 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Mon, 15 Apr 2024 16:26:33 +0200 Subject: [PATCH 06/10] Fix PG upgrades when invalid rebalance strategies exist (#7580) DESCRIPTION: Fix PG upgrades when invalid rebalance strategies exist Without this change an upgrade of a cluster with an invalid rebalance strategy would fail with an error like this: ``` cache lookup failed for shard_cost_function with oid 6077337 CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( NEW.shard_cost_function, NEW.node_capacity_function, NEW.shard_allowed_on_node_function)" PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM SQL statement "INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT name, default_strategy, shard_cost_function::regprocedure::regproc, node_capacity_function::regprocedure::regproc, shard_allowed_on_node_function::regprocedure::regproc, default_threshold, minimum_threshold, improvement_threshold FROM public.pg_dist_rebalance_strategy" PL/pgSQL function citus_finish_pg_upgrade() line 115 at SQL statement ``` This fixes that by disabling the trigger and simply re-inserting the invalid rebalance strategy without checking. We could also silently remove it, but this seems nicer. --- .../distributed/sql/citus--12.1-1--12.2-1.sql | 1 + .../sql/downgrades/citus--12.2-1--12.1-1.sql | 1 + .../udfs/citus_finish_pg_upgrade/12.2-1.sql | 227 ++++++++++++++++++ .../udfs/citus_finish_pg_upgrade/latest.sql | 7 + .../upgrade_rebalance_strategy_after.out | 11 +- .../upgrade_rebalance_strategy_before.out | 20 ++ .../sql/upgrade_rebalance_strategy_before.sql | 16 ++ 7 files changed, 278 insertions(+), 5 deletions(-) create mode 100644 src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/12.2-1.sql diff --git a/src/backend/distributed/sql/citus--12.1-1--12.2-1.sql b/src/backend/distributed/sql/citus--12.1-1--12.2-1.sql index 2d5f88676..1bec0f429 100644 --- a/src/backend/distributed/sql/citus--12.1-1--12.2-1.sql +++ b/src/backend/distributed/sql/citus--12.1-1--12.2-1.sql @@ -54,3 +54,4 @@ DROP VIEW IF EXISTS pg_catalog.citus_lock_waits; #include "udfs/citus_internal_update_placement_metadata/12.2-1.sql" #include "udfs/citus_internal_update_relation_colocation/12.2-1.sql" #include "udfs/repl_origin_helper/12.2-1.sql" +#include "udfs/citus_finish_pg_upgrade/12.2-1.sql" diff --git a/src/backend/distributed/sql/downgrades/citus--12.2-1--12.1-1.sql b/src/backend/distributed/sql/downgrades/citus--12.2-1--12.1-1.sql index 581c65ea8..099bf8d87 100644 --- a/src/backend/distributed/sql/downgrades/citus--12.2-1--12.1-1.sql +++ b/src/backend/distributed/sql/downgrades/citus--12.2-1--12.1-1.sql @@ -54,3 +54,4 @@ DROP FUNCTION citus_internal.update_relation_colocation(oid, int); DROP FUNCTION citus_internal.start_replication_origin_tracking(); DROP FUNCTION citus_internal.stop_replication_origin_tracking(); DROP FUNCTION citus_internal.is_replication_origin_tracking_active(); +#include "../udfs/citus_finish_pg_upgrade/12.1-1.sql" diff --git a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/12.2-1.sql b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/12.2-1.sql new file mode 100644 index 000000000..4d3a17bd4 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/12.2-1.sql @@ -0,0 +1,227 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_finish_pg_upgrade() + RETURNS void + LANGUAGE plpgsql + SET search_path = pg_catalog + AS $cppu$ +DECLARE + table_name regclass; + command text; + trigger_name text; +BEGIN + + + IF substring(current_Setting('server_version'), '\d+')::int >= 14 THEN + EXECUTE $cmd$ + -- disable propagation to prevent EnsureCoordinator errors + -- the aggregate created here does not depend on Citus extension (yet) + -- since we add the dependency with the next command + SET citus.enable_ddl_propagation TO OFF; + CREATE AGGREGATE array_cat_agg(anycompatiblearray) (SFUNC = array_cat, STYPE = anycompatiblearray); + COMMENT ON AGGREGATE array_cat_agg(anycompatiblearray) + IS 'concatenate input arrays into a single array'; + RESET citus.enable_ddl_propagation; + $cmd$; + ELSE + EXECUTE $cmd$ + SET citus.enable_ddl_propagation TO OFF; + CREATE AGGREGATE array_cat_agg(anyarray) (SFUNC = array_cat, STYPE = anyarray); + COMMENT ON AGGREGATE array_cat_agg(anyarray) + IS 'concatenate input arrays into a single array'; + RESET citus.enable_ddl_propagation; + $cmd$; + END IF; + + -- + -- Citus creates the array_cat_agg but because of a compatibility + -- issue between pg13-pg14, we drop and create it during upgrade. + -- And as Citus creates it, there needs to be a dependency to the + -- Citus extension, so we create that dependency here. + -- We are not using: + -- ALTER EXENSION citus DROP/CREATE AGGREGATE array_cat_agg + -- because we don't have an easy way to check if the aggregate + -- exists with anyarray type or anycompatiblearray type. + + INSERT INTO pg_depend + SELECT + 'pg_proc'::regclass::oid as classid, + (SELECT oid FROM pg_proc WHERE proname = 'array_cat_agg') as objid, + 0 as objsubid, + 'pg_extension'::regclass::oid as refclassid, + (select oid from pg_extension where extname = 'citus') as refobjid, + 0 as refobjsubid , + 'e' as deptype; + + -- PG16 has its own any_value, so only create it pre PG16. + -- We can remove this part when we drop support for PG16 + IF substring(current_Setting('server_version'), '\d+')::int < 16 THEN + EXECUTE $cmd$ + -- disable propagation to prevent EnsureCoordinator errors + -- the aggregate created here does not depend on Citus extension (yet) + -- since we add the dependency with the next command + SET citus.enable_ddl_propagation TO OFF; + CREATE OR REPLACE FUNCTION pg_catalog.any_value_agg ( anyelement, anyelement ) + RETURNS anyelement AS $$ + SELECT CASE WHEN $1 IS NULL THEN $2 ELSE $1 END; + $$ LANGUAGE SQL STABLE; + + CREATE AGGREGATE pg_catalog.any_value ( + sfunc = pg_catalog.any_value_agg, + combinefunc = pg_catalog.any_value_agg, + basetype = anyelement, + stype = anyelement + ); + COMMENT ON AGGREGATE pg_catalog.any_value(anyelement) IS + 'Returns the value of any row in the group. It is mostly useful when you know there will be only 1 element.'; + RESET citus.enable_ddl_propagation; + -- + -- Citus creates the any_value aggregate but because of a compatibility + -- issue between pg15-pg16 -- any_value is created in PG16, we drop + -- and create it during upgrade IF upgraded version is less than 16. + -- And as Citus creates it, there needs to be a dependency to the + -- Citus extension, so we create that dependency here. + + INSERT INTO pg_depend + SELECT + 'pg_proc'::regclass::oid as classid, + (SELECT oid FROM pg_proc WHERE proname = 'any_value_agg') as objid, + 0 as objsubid, + 'pg_extension'::regclass::oid as refclassid, + (select oid from pg_extension where extname = 'citus') as refobjid, + 0 as refobjsubid , + 'e' as deptype; + + INSERT INTO pg_depend + SELECT + 'pg_proc'::regclass::oid as classid, + (SELECT oid FROM pg_proc WHERE proname = 'any_value') as objid, + 0 as objsubid, + 'pg_extension'::regclass::oid as refclassid, + (select oid from pg_extension where extname = 'citus') as refobjid, + 0 as refobjsubid , + 'e' as deptype; + $cmd$; + END IF; + + -- + -- restore citus catalog tables + -- + INSERT INTO pg_catalog.pg_dist_partition SELECT * FROM public.pg_dist_partition; + + -- if we are upgrading from PG14/PG15 to PG16+, + -- we need to regenerate the partkeys because they will include varnullingrels as well. + UPDATE pg_catalog.pg_dist_partition + SET partkey = column_name_to_column(pg_dist_partkeys_pre_16_upgrade.logicalrelid, col_name) + FROM public.pg_dist_partkeys_pre_16_upgrade + WHERE pg_dist_partkeys_pre_16_upgrade.logicalrelid = pg_dist_partition.logicalrelid; + DROP TABLE public.pg_dist_partkeys_pre_16_upgrade; + + INSERT INTO pg_catalog.pg_dist_shard SELECT * FROM public.pg_dist_shard; + INSERT INTO pg_catalog.pg_dist_placement SELECT * FROM public.pg_dist_placement; + INSERT INTO pg_catalog.pg_dist_node_metadata SELECT * FROM public.pg_dist_node_metadata; + INSERT INTO pg_catalog.pg_dist_node SELECT * FROM public.pg_dist_node; + INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group; + INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction; + INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation; + INSERT INTO pg_catalog.pg_dist_cleanup SELECT * FROM public.pg_dist_cleanup; + INSERT INTO pg_catalog.pg_dist_schema SELECT schemaname::regnamespace, colocationid FROM public.pg_dist_schema; + -- enterprise catalog tables + INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo; + INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo; + + -- Temporarily disable trigger to check for validity of functions while + -- inserting. The current contents of the table might be invalid if one of + -- the functions was removed by the user without also removing the + -- rebalance strategy. Obviously that's not great, but it should be no + -- reason to fail the upgrade. + ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_validation_trigger; + INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT + name, + default_strategy, + shard_cost_function::regprocedure::regproc, + node_capacity_function::regprocedure::regproc, + shard_allowed_on_node_function::regprocedure::regproc, + default_threshold, + minimum_threshold, + improvement_threshold + FROM public.pg_dist_rebalance_strategy; + ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_validation_trigger; + + -- + -- drop backup tables + -- + DROP TABLE public.pg_dist_authinfo; + DROP TABLE public.pg_dist_colocation; + DROP TABLE public.pg_dist_local_group; + DROP TABLE public.pg_dist_node; + DROP TABLE public.pg_dist_node_metadata; + DROP TABLE public.pg_dist_partition; + DROP TABLE public.pg_dist_placement; + DROP TABLE public.pg_dist_poolinfo; + DROP TABLE public.pg_dist_shard; + DROP TABLE public.pg_dist_transaction; + DROP TABLE public.pg_dist_rebalance_strategy; + DROP TABLE public.pg_dist_cleanup; + DROP TABLE public.pg_dist_schema; + -- + -- reset sequences + -- + PERFORM setval('pg_catalog.pg_dist_shardid_seq', (SELECT MAX(shardid)+1 AS max_shard_id FROM pg_dist_shard), false); + PERFORM setval('pg_catalog.pg_dist_placement_placementid_seq', (SELECT MAX(placementid)+1 AS max_placement_id FROM pg_dist_placement), false); + PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false); + PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false); + PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false); + PERFORM setval('pg_catalog.pg_dist_operationid_seq', (SELECT MAX(operation_id)+1 AS max_operation_id FROM pg_dist_cleanup), false); + PERFORM setval('pg_catalog.pg_dist_cleanup_recordid_seq', (SELECT MAX(record_id)+1 AS max_record_id FROM pg_dist_cleanup), false); + PERFORM setval('pg_catalog.pg_dist_clock_logical_seq', (SELECT last_value FROM public.pg_dist_clock_logical_seq), false); + DROP TABLE public.pg_dist_clock_logical_seq; + + + + -- + -- register triggers + -- + FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition JOIN pg_class ON (logicalrelid = oid) WHERE relkind <> 'f' + LOOP + trigger_name := 'truncate_trigger_' || table_name::oid; + command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()'; + EXECUTE command; + command := 'update pg_trigger set tgisinternal = true where tgname = ' || quote_literal(trigger_name); + EXECUTE command; + END LOOP; + + -- + -- set dependencies + -- + INSERT INTO pg_depend + SELECT + 'pg_class'::regclass::oid as classid, + p.logicalrelid::regclass::oid as objid, + 0 as objsubid, + 'pg_extension'::regclass::oid as refclassid, + (select oid from pg_extension where extname = 'citus') as refobjid, + 0 as refobjsubid , + 'n' as deptype + FROM pg_catalog.pg_dist_partition p; + + -- set dependencies for columnar table access method + PERFORM columnar_internal.columnar_ensure_am_depends_catalog(); + + -- restore pg_dist_object from the stable identifiers + TRUNCATE pg_catalog.pg_dist_object; + INSERT INTO pg_catalog.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid) + SELECT + address.classid, + address.objid, + address.objsubid, + naming.distribution_argument_index, + naming.colocationid + FROM + public.pg_dist_object naming, + pg_catalog.pg_get_object_address(naming.type, naming.object_names, naming.object_args) address; + + DROP TABLE public.pg_dist_object; +END; +$cppu$; + +COMMENT ON FUNCTION pg_catalog.citus_finish_pg_upgrade() + IS 'perform tasks to restore citus settings from a location that has been prepared before pg_upgrade'; diff --git a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql index 766e86a2e..4d3a17bd4 100644 --- a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql @@ -128,6 +128,12 @@ BEGIN INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo; INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo; + -- Temporarily disable trigger to check for validity of functions while + -- inserting. The current contents of the table might be invalid if one of + -- the functions was removed by the user without also removing the + -- rebalance strategy. Obviously that's not great, but it should be no + -- reason to fail the upgrade. + ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_validation_trigger; INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT name, default_strategy, @@ -138,6 +144,7 @@ BEGIN minimum_threshold, improvement_threshold FROM public.pg_dist_rebalance_strategy; + ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_validation_trigger; -- -- drop backup tables diff --git a/src/test/regress/expected/upgrade_rebalance_strategy_after.out b/src/test/regress/expected/upgrade_rebalance_strategy_after.out index 4036af539..c7ea5cc4e 100644 --- a/src/test/regress/expected/upgrade_rebalance_strategy_after.out +++ b/src/test/regress/expected/upgrade_rebalance_strategy_after.out @@ -1,8 +1,9 @@ SELECT * FROM pg_catalog.pg_dist_rebalance_strategy ORDER BY name; - name | default_strategy | shard_cost_function | node_capacity_function | shard_allowed_on_node_function | default_threshold | minimum_threshold | improvement_threshold + name | default_strategy | shard_cost_function | node_capacity_function | shard_allowed_on_node_function | default_threshold | minimum_threshold | improvement_threshold --------------------------------------------------------------------- - by_disk_size | f | citus_shard_cost_by_disk_size | citus_node_capacity_1 | citus_shard_allowed_on_node_true | 0.1 | 0.01 | 0.5 - by_shard_count | f | citus_shard_cost_1 | citus_node_capacity_1 | citus_shard_allowed_on_node_true | 0 | 0 | 0 - custom_strategy | t | upgrade_rebalance_strategy.shard_cost_2 | upgrade_rebalance_strategy.capacity_high_worker_1 | upgrade_rebalance_strategy.only_worker_2 | 0.5 | 0.2 | 0.3 -(3 rows) + by_disk_size | f | citus_shard_cost_by_disk_size | citus_node_capacity_1 | citus_shard_allowed_on_node_true | 0.1 | 0.01 | 0.5 + by_shard_count | f | citus_shard_cost_1 | citus_node_capacity_1 | citus_shard_allowed_on_node_true | 0 | 0 | 0 + custom_strategy | t | upgrade_rebalance_strategy.shard_cost_2 | upgrade_rebalance_strategy.capacity_high_worker_1 | upgrade_rebalance_strategy.only_worker_2 | 0.5 | 0.2 | 0.3 + invalid_strategy | f | 1234567 | upgrade_rebalance_strategy.capacity_high_worker_1 | upgrade_rebalance_strategy.only_worker_2 | 0.5 | 0.2 | 0.3 +(4 rows) diff --git a/src/test/regress/expected/upgrade_rebalance_strategy_before.out b/src/test/regress/expected/upgrade_rebalance_strategy_before.out index cf1d122b3..85b458389 100644 --- a/src/test/regress/expected/upgrade_rebalance_strategy_before.out +++ b/src/test/regress/expected/upgrade_rebalance_strategy_before.out @@ -35,3 +35,23 @@ SELECT citus_set_default_rebalance_strategy('custom_strategy'); (1 row) +-- Disable the trigger temporarily to allow the invalid strategy to be added. +-- Normally an invalid strategy can end up in the table by deleting one of the +-- functions it depends on. But we do directly in this test because we want to +-- have a consistent OID, so we get consistent test output. +ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_validation_trigger; +SELECT citus_add_rebalance_strategy( + 'invalid_strategy', + 1234567, + 'capacity_high_worker_1', + 'only_worker_2', + 0.5, + 0.2, + 0.3 + ); + citus_add_rebalance_strategy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_validation_trigger; diff --git a/src/test/regress/sql/upgrade_rebalance_strategy_before.sql b/src/test/regress/sql/upgrade_rebalance_strategy_before.sql index 458fb9cf6..be2012e9c 100644 --- a/src/test/regress/sql/upgrade_rebalance_strategy_before.sql +++ b/src/test/regress/sql/upgrade_rebalance_strategy_before.sql @@ -29,3 +29,19 @@ SELECT citus_add_rebalance_strategy( 0.3 ); SELECT citus_set_default_rebalance_strategy('custom_strategy'); + +-- Disable the trigger temporarily to allow the invalid strategy to be added. +-- Normally an invalid strategy can end up in the table by deleting one of the +-- functions it depends on. But we do directly in this test because we want to +-- have a consistent OID, so we get consistent test output. +ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_validation_trigger; +SELECT citus_add_rebalance_strategy( + 'invalid_strategy', + 1234567, + 'capacity_high_worker_1', + 'only_worker_2', + 0.5, + 0.2, + 0.3 + ); +ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_validation_trigger; From a263ac6f5f6cdc64d2ed49fa911e3507c73facfe Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Tue, 16 Apr 2024 10:16:40 +0200 Subject: [PATCH 07/10] Speed up GetForeignKeyOids (#7578) DESCRIPTION: Fix performance issue in GetForeignKeyOids on systems with many constraints GetForeignKeyOids was showing up in CPU profiles when distributing schemas on systems with 100k+ constraints. The reason was that this function was doing a sequence scan of pg_constraint to get the foreign keys that referenced the requested table. This fixes that by finding the constraints referencing the table through pg_depend instead of pg_constraint. We're doing this indirection, because pg_constraint doesn't have an index that we can use, but pg_depend does. --- .../distributed/commands/foreign_constraint.c | 187 ++++++++++++------ 1 file changed, 122 insertions(+), 65 deletions(-) diff --git a/src/backend/distributed/commands/foreign_constraint.c b/src/backend/distributed/commands/foreign_constraint.c index c1f2b83b6..2f60c3fb1 100644 --- a/src/backend/distributed/commands/foreign_constraint.c +++ b/src/backend/distributed/commands/foreign_constraint.c @@ -20,6 +20,7 @@ #include "access/xact.h" #include "catalog/namespace.h" #include "catalog/pg_constraint.h" +#include "catalog/pg_depend.h" #include "catalog/pg_type.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -36,6 +37,7 @@ #include "distributed/commands.h" #include "distributed/commands/sequence.h" #include "distributed/coordinator_protocol.h" +#include "distributed/hash_helpers.h" #include "distributed/listutils.h" #include "distributed/multi_join_order.h" #include "distributed/namespace_utils.h" @@ -1198,6 +1200,114 @@ TableHasExternalForeignKeys(Oid relationId) } +/* + * ForeignConstraintMatchesFlags is a function with logic that's very specific + * to GetForeignKeyOids. There's no reason to use it in any other context. + */ +static bool +ForeignConstraintMatchesFlags(Form_pg_constraint constraintForm, + int flags) +{ + if (constraintForm->contype != CONSTRAINT_FOREIGN) + { + return false; + } + + bool inheritedConstraint = OidIsValid(constraintForm->conparentid); + if (inheritedConstraint) + { + /* + * We only consider the constraints that are explicitly created on + * the table as we already process the constraints from parent tables + * implicitly when a command is issued + */ + return false; + } + + bool excludeSelfReference = (flags & EXCLUDE_SELF_REFERENCES); + bool isSelfReference = (constraintForm->conrelid == constraintForm->confrelid); + if (excludeSelfReference && isSelfReference) + { + return false; + } + + Oid otherTableId = InvalidOid; + if (flags & INCLUDE_REFERENCING_CONSTRAINTS) + { + otherTableId = constraintForm->confrelid; + } + else + { + otherTableId = constraintForm->conrelid; + } + + return IsTableTypeIncluded(otherTableId, flags); +} + + +/* + * GetForeignKeyOidsForReferencedTable returns a list of foreign key OIDs that + * reference the relationId and match the given flags. + * + * This is separated from GetForeignKeyOids because we need to scan pg_depend + * instead of pg_constraint directly. The reason for this is that there is no + * index on the confrelid of pg_constraint, so searching by that column + * requires a seqscan. + */ +static List * +GetForeignKeyOidsForReferencedTable(Oid relationId, int flags) +{ + HTAB *foreignKeyOidsSet = CreateSimpleHashSetWithName( + Oid, "ReferencingForeignKeyOidsSet"); + List *foreignKeyOidsList = NIL; + ScanKeyData key[2]; + HeapTuple dependTup; + Relation depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relationId)); + SysScanDesc scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, lengthof(key), key); + while (HeapTupleIsValid(dependTup = systable_getnext(scan))) + { + Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(dependTup); + + if (deprec->classid != ConstraintRelationId || + deprec->deptype != DEPENDENCY_NORMAL || + hash_search(foreignKeyOidsSet, &deprec->objid, HASH_FIND, NULL)) + { + continue; + } + + + HeapTuple constraintTup = SearchSysCache1(CONSTROID, ObjectIdGetDatum( + deprec->objid)); + if (!HeapTupleIsValid(constraintTup)) /* can happen during DROP TABLE */ + { + continue; + } + + Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(constraintTup); + if (constraint->confrelid == relationId && + ForeignConstraintMatchesFlags(constraint, flags)) + { + foreignKeyOidsList = lappend_oid(foreignKeyOidsList, constraint->oid); + hash_search(foreignKeyOidsSet, &constraint->oid, HASH_ENTER, NULL); + } + ReleaseSysCache(constraintTup); + } + systable_endscan(scan); + table_close(depRel, AccessShareLock); + return foreignKeyOidsList; +} + + /* * GetForeignKeyOids takes in a relationId, and returns a list of OIDs for * foreign constraints that the relation with relationId is involved according @@ -1207,9 +1317,8 @@ TableHasExternalForeignKeys(Oid relationId) List * GetForeignKeyOids(Oid relationId, int flags) { - AttrNumber pgConstraintTargetAttrNumber = InvalidAttrNumber; - - bool extractReferencing = (flags & INCLUDE_REFERENCING_CONSTRAINTS); + bool extractReferencing PG_USED_FOR_ASSERTS_ONLY = (flags & + INCLUDE_REFERENCING_CONSTRAINTS); bool extractReferenced = (flags & INCLUDE_REFERENCED_CONSTRAINTS); /* @@ -1220,22 +1329,10 @@ GetForeignKeyOids(Oid relationId, int flags) Assert(!(extractReferencing && extractReferenced)); Assert(extractReferencing || extractReferenced); - bool useIndex = false; - Oid indexOid = InvalidOid; - - if (extractReferencing) + if (extractReferenced) { - pgConstraintTargetAttrNumber = Anum_pg_constraint_conrelid; - - useIndex = true; - indexOid = ConstraintRelidTypidNameIndexId; + return GetForeignKeyOidsForReferencedTable(relationId, flags); } - else if (extractReferenced) - { - pgConstraintTargetAttrNumber = Anum_pg_constraint_confrelid; - } - - bool excludeSelfReference = (flags & EXCLUDE_SELF_REFERENCES); List *foreignKeyOids = NIL; @@ -1243,62 +1340,22 @@ GetForeignKeyOids(Oid relationId, int flags) int scanKeyCount = 1; Relation pgConstraint = table_open(ConstraintRelationId, AccessShareLock); - ScanKeyInit(&scanKey[0], pgConstraintTargetAttrNumber, + ScanKeyInit(&scanKey[0], Anum_pg_constraint_conrelid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId)); - SysScanDesc scanDescriptor = systable_beginscan(pgConstraint, indexOid, useIndex, + + SysScanDesc scanDescriptor = systable_beginscan(pgConstraint, + ConstraintRelidTypidNameIndexId, true, NULL, scanKeyCount, scanKey); - HeapTuple heapTuple = systable_getnext(scanDescriptor); - while (HeapTupleIsValid(heapTuple)) + HeapTuple heapTuple; + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { Form_pg_constraint constraintForm = (Form_pg_constraint) GETSTRUCT(heapTuple); - if (constraintForm->contype != CONSTRAINT_FOREIGN) + if (ForeignConstraintMatchesFlags(constraintForm, flags)) { - heapTuple = systable_getnext(scanDescriptor); - continue; + foreignKeyOids = lappend_oid(foreignKeyOids, constraintForm->oid); } - - bool inheritedConstraint = OidIsValid(constraintForm->conparentid); - if (inheritedConstraint) - { - /* - * We only consider the constraints that are explicitly created on - * the table as we already process the constraints from parent tables - * implicitly when a command is issued - */ - heapTuple = systable_getnext(scanDescriptor); - continue; - } - - Oid constraintId = constraintForm->oid; - - bool isSelfReference = (constraintForm->conrelid == constraintForm->confrelid); - if (excludeSelfReference && isSelfReference) - { - heapTuple = systable_getnext(scanDescriptor); - continue; - } - - Oid otherTableId = InvalidOid; - if (extractReferencing) - { - otherTableId = constraintForm->confrelid; - } - else if (extractReferenced) - { - otherTableId = constraintForm->conrelid; - } - - if (!IsTableTypeIncluded(otherTableId, flags)) - { - heapTuple = systable_getnext(scanDescriptor); - continue; - } - - foreignKeyOids = lappend_oid(foreignKeyOids, constraintId); - - heapTuple = systable_getnext(scanDescriptor); } systable_endscan(scanDescriptor); From 41e2af8ff5d86c9b327b96cd57f08d26777b7d13 Mon Sep 17 00:00:00 2001 From: Karina <55838532+Green-Chan@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:44:47 +0300 Subject: [PATCH 08/10] Use expecteddir option in _run_pg_regress() (#7582) Fix check-arbitrary-configs tests failure with current REL_16_STABLE. This is the same problem as described in #7573. I missed pg_regress call in _run_pg_regress() in that PR. Co-authored-by: Karina Litskevich --- src/test/regress/citus_tests/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/regress/citus_tests/common.py b/src/test/regress/citus_tests/common.py index 2135a0eba..6c09e0b38 100644 --- a/src/test/regress/citus_tests/common.py +++ b/src/test/regress/citus_tests/common.py @@ -294,6 +294,9 @@ def _run_pg_regress( output_dir, "--use-existing", ] + if PG_MAJOR_VERSION >= 16: + command.append("--expecteddir") + command.append(output_dir) if extra_tests != "": command.append(extra_tests) From ada3ba25072cc5be055b3bbdedfa2fe936443b0d Mon Sep 17 00:00:00 2001 From: Xing Guo Date: Tue, 16 Apr 2024 21:29:14 +0800 Subject: [PATCH 09/10] Add missing volatile qualifier. (#7570) Variables being modified in the PG_TRY block and read in the PG_CATCH block should be qualified with volatile. The variable waitEventSet is modified in the PG_TRY block (line 1085) and read in the PG_CATCH block (line 1095). The variable relation is modified in the PG_TRY block (line 500) and read in the PG_CATCH block (line 515). Besides, the variable objectAddress doesn't need the volatile qualifier. Ref: C99 7.13.2.1[^1], > All accessible objects have values, and all other components of the abstract machine have state, as of the time the longjmp function was called, except that the values of objects of automatic storage duration that are local to the function containing the invocation of the corresponding setjmp macro that do not have volatile-qualified type and have been changed between the setjmp invocation and longjmp call are indeterminate. [^1]: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf DESCRIPTION: Correctly mark some variables as volatile --------- Co-authored-by: Hong Yi --- .../distributed/connection/remote_commands.c | 2 +- .../distributed/utils/citus_depended_object.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/backend/distributed/connection/remote_commands.c b/src/backend/distributed/connection/remote_commands.c index 4b46e96d2..cbd74ff51 100644 --- a/src/backend/distributed/connection/remote_commands.c +++ b/src/backend/distributed/connection/remote_commands.c @@ -883,7 +883,7 @@ WaitForAllConnections(List *connectionList, bool raiseInterrupts) palloc(totalConnectionCount * sizeof(MultiConnection *)); WaitEvent *events = palloc(totalConnectionCount * sizeof(WaitEvent)); bool *connectionReady = palloc(totalConnectionCount * sizeof(bool)); - WaitEventSet *waitEventSet = NULL; + WaitEventSet *volatile waitEventSet = NULL; /* convert connection list to an array such that we can move items around */ MultiConnection *connectionItem = NULL; diff --git a/src/backend/distributed/utils/citus_depended_object.c b/src/backend/distributed/utils/citus_depended_object.c index a160fcd56..7588f8594 100644 --- a/src/backend/distributed/utils/citus_depended_object.c +++ b/src/backend/distributed/utils/citus_depended_object.c @@ -465,8 +465,8 @@ static bool AnyObjectViolatesOwnership(DropStmt *dropStmt) { bool hasOwnershipViolation = false; - volatile ObjectAddress objectAddress = { 0 }; - Relation relation = NULL; + ObjectAddress objectAddress = { 0 }; + volatile Relation relation = NULL; ObjectType objectType = dropStmt->removeType; bool missingOk = dropStmt->missing_ok; @@ -480,8 +480,17 @@ AnyObjectViolatesOwnership(DropStmt *dropStmt) Node *object = NULL; foreach_ptr(object, dropStmt->objects) { + Relation rel = NULL; objectAddress = get_object_address(objectType, object, - &relation, AccessShareLock, missingOk); + &rel, AccessShareLock, missingOk); + + /* + * The object relation is qualified with volatile and its value is obtained from + * get_object_address(). Unless we can qualify the corresponding parameter of + * get_object_address() with volatile (this is a function defined in PostgreSQL), + * we cannot get rid of this assignment. + */ + relation = rel; if (OidIsValid(objectAddress.objectId)) { From a0151aa31d7584187a50dfbc19b28bccc76cb616 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Tue, 16 Apr 2024 17:26:12 +0200 Subject: [PATCH 10/10] Greatly speed up "\d tablename" on servers with many tables (#7577) DESCRIPTION: Fix performance issue when using "\d tablename" on a server with many tables We introduce a filter to every query on pg_class to automatically remove shards. This is useful to make sure \d and PgAdmin are not cluttered with shards. However, the way we were introducing this filter was using `securityQuals` which can have negative impact on query performance. On clusters with 100k+ tables this could cause a simple "\d tablename" command to take multiple seconds, because a skipped optimization by Postgres causes a full table scan. This changes the code to introduce this filter in the regular `quals` list instead of in `securityQuals`. Which causes Postgres to use the intended optimization again. For reference, this was initially reported as a Postgres issue by me: https://www.postgresql.org/message-id/flat/4189982.1712785863%40sss.pgh.pa.us#b87421293b362d581ea8677e3bfea920 --- .../worker/worker_shard_visibility.c | 65 ++++++++++++++++--- .../expected/multi_mx_hide_shard_names.out | 46 +++++++++++++ .../regress/sql/multi_mx_hide_shard_names.sql | 18 +++++ 3 files changed, 120 insertions(+), 9 deletions(-) diff --git a/src/backend/distributed/worker/worker_shard_visibility.c b/src/backend/distributed/worker/worker_shard_visibility.c index 49131ef6d..ccd1a897c 100644 --- a/src/backend/distributed/worker/worker_shard_visibility.c +++ b/src/backend/distributed/worker/worker_shard_visibility.c @@ -54,6 +54,7 @@ static bool ShouldHideShardsInternal(void); static bool IsPgBgWorker(void); static bool FilterShardsFromPgclass(Node *node, void *context); static Node * CreateRelationIsAKnownShardFilter(int pgClassVarno); +static bool HasRangeTableRef(Node *node, int *varno); PG_FUNCTION_INFO_V1(citus_table_is_visible); PG_FUNCTION_INFO_V1(relation_is_a_known_shard); @@ -421,8 +422,8 @@ IsPgBgWorker(void) /* - * FilterShardsFromPgclass adds a NOT relation_is_a_known_shard(oid) filter - * to the security quals of pg_class RTEs. + * FilterShardsFromPgclass adds a "relation_is_a_known_shard(oid) IS NOT TRUE" + * filter to the quals of queries that query pg_class. */ static bool FilterShardsFromPgclass(Node *node, void *context) @@ -456,12 +457,35 @@ FilterShardsFromPgclass(Node *node, void *context) continue; } + /* + * Skip if pg_class is not actually queried. This is possible on + * INSERT statements that insert into pg_class. + */ + if (!expression_tree_walker((Node *) query->jointree->fromlist, + HasRangeTableRef, &varno)) + { + /* the query references pg_class */ + continue; + } + /* make sure the expression is in the right memory context */ MemoryContext originalContext = MemoryContextSwitchTo(queryContext); - /* add NOT relation_is_a_known_shard(oid) to the security quals of the RTE */ - rangeTableEntry->securityQuals = - list_make1(CreateRelationIsAKnownShardFilter(varno)); + + /* add relation_is_a_known_shard(oid) IS NOT TRUE to the quals of the query */ + Node *newQual = CreateRelationIsAKnownShardFilter(varno); + Node *oldQuals = query->jointree->quals; + if (oldQuals) + { + query->jointree->quals = (Node *) makeBoolExpr( + AND_EXPR, + list_make2(oldQuals, newQual), + -1); + } + else + { + query->jointree->quals = newQual; + } MemoryContextSwitchTo(originalContext); } @@ -473,9 +497,32 @@ FilterShardsFromPgclass(Node *node, void *context) } +/* + * HasRangeTableRef passed to expression_tree_walker to check if a node is a + * RangeTblRef of the given varno is present in a fromlist. + */ +static bool +HasRangeTableRef(Node *node, int *varno) +{ + if (IsA(node, RangeTblRef)) + { + RangeTblRef *rangeTblRef = (RangeTblRef *) node; + return rangeTblRef->rtindex == *varno; + } + + return expression_tree_walker(node, HasRangeTableRef, varno); +} + + /* * CreateRelationIsAKnownShardFilter constructs an expression of the form: - * NOT pg_catalog.relation_is_a_known_shard(oid) + * pg_catalog.relation_is_a_known_shard(oid) IS NOT TRUE + * + * The difference between "NOT pg_catalog.relation_is_a_known_shard(oid)" and + * "pg_catalog.relation_is_a_known_shard(oid) IS NOT TRUE" is that the former + * will return FALSE if the function returns NULL, while the second will return + * TRUE. This difference is important in the case of outer joins, because this + * filter might be applied on an oid that is then NULL. */ static Node * CreateRelationIsAKnownShardFilter(int pgClassVarno) @@ -496,9 +543,9 @@ CreateRelationIsAKnownShardFilter(int pgClassVarno) funcExpr->location = -1; funcExpr->args = list_make1(oidVar); - BoolExpr *notExpr = makeNode(BoolExpr); - notExpr->boolop = NOT_EXPR; - notExpr->args = list_make1(funcExpr); + BooleanTest *notExpr = makeNode(BooleanTest); + notExpr->booltesttype = IS_NOT_TRUE; + notExpr->arg = (Expr *) funcExpr; notExpr->location = -1; return (Node *) notExpr; diff --git a/src/test/regress/expected/multi_mx_hide_shard_names.out b/src/test/regress/expected/multi_mx_hide_shard_names.out index 116269a4e..762c6a30b 100644 --- a/src/test/regress/expected/multi_mx_hide_shard_names.out +++ b/src/test/regress/expected/multi_mx_hide_shard_names.out @@ -83,6 +83,52 @@ SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_name test_table (1 row) +-- Even when using subquery and having no existing quals on pg_clcass +SELECT relname FROM (SELECT relname, relnamespace FROM pg_catalog.pg_class) AS q WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname; + relname +--------------------------------------------------------------------- + test_table +(1 row) + +-- Check that inserts into pg_class don't add the filter +EXPLAIN (COSTS OFF) INSERT INTO pg_class VALUES (1); + QUERY PLAN +--------------------------------------------------------------------- + Insert on pg_class + -> Result +(2 rows) + +-- Unless it's an INSERT SELECT that queries from pg_class; +EXPLAIN (COSTS OFF) INSERT INTO pg_class SELECT * FROM pg_class; + QUERY PLAN +--------------------------------------------------------------------- + Insert on pg_class + -> Seq Scan on pg_class pg_class_1 + Filter: (relation_is_a_known_shard(oid) IS NOT TRUE) +(3 rows) + +-- Check that query that psql "\d test_table" does gets optimized to an index +-- scan +EXPLAIN (COSTS OFF) SELECT c.oid, + n.nspname, + c.relname +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relname OPERATOR(pg_catalog.~) '^(test_table)$' COLLATE pg_catalog.default + AND pg_catalog.pg_table_is_visible(c.oid) +ORDER BY 2, 3; + QUERY PLAN +--------------------------------------------------------------------- + Sort + Sort Key: n.nspname, c.relname + -> Nested Loop Left Join + Join Filter: (n.oid = c.relnamespace) + -> Index Scan using pg_class_relname_nsp_index on pg_class c + Index Cond: (relname = 'test_table'::text) + Filter: ((relname ~ '^(test_table)$'::text) AND (relation_is_a_known_shard(oid) IS NOT TRUE) AND pg_table_is_visible(oid)) + -> Seq Scan on pg_namespace n +(8 rows) + commit prepared 'take-aggressive-lock'; -- now create an index \c - - - :master_port diff --git a/src/test/regress/sql/multi_mx_hide_shard_names.sql b/src/test/regress/sql/multi_mx_hide_shard_names.sql index e5213a41b..addc7f90e 100644 --- a/src/test/regress/sql/multi_mx_hide_shard_names.sql +++ b/src/test/regress/sql/multi_mx_hide_shard_names.sql @@ -50,6 +50,24 @@ prepare transaction 'take-aggressive-lock'; -- shards are hidden when using psql as application_name SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname; +-- Even when using subquery and having no existing quals on pg_clcass +SELECT relname FROM (SELECT relname, relnamespace FROM pg_catalog.pg_class) AS q WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname; + +-- Check that inserts into pg_class don't add the filter +EXPLAIN (COSTS OFF) INSERT INTO pg_class VALUES (1); +-- Unless it's an INSERT SELECT that queries from pg_class; +EXPLAIN (COSTS OFF) INSERT INTO pg_class SELECT * FROM pg_class; + +-- Check that query that psql "\d test_table" does gets optimized to an index +-- scan +EXPLAIN (COSTS OFF) SELECT c.oid, + n.nspname, + c.relname +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relname OPERATOR(pg_catalog.~) '^(test_table)$' COLLATE pg_catalog.default + AND pg_catalog.pg_table_is_visible(c.oid) +ORDER BY 2, 3; commit prepared 'take-aggressive-lock';