From 5ed9197041d14299a6e3ee70a59ee62b1594dfd9 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Thu, 24 Dec 2020 16:42:40 +0300 Subject: [PATCH] Implement infra to get foreign key connected relations (#4439) On top of our foreign key graph, implement the infrastructure to get list of relations that are connected to input relation via a foreign key graph. We need this to support cascading create_citus_local_table & undistribute_table operations. Also add regression tests to see what our foreign key graph is able to capture currently. --- .../test/foreign_key_relationship_query.c | 42 ++++++ .../utils/foreign_key_relationship.c | 108 ++++++++++++++ .../distributed/foreign_key_relationship.h | 1 + .../multi_foreign_key_relation_graph.out | 141 ++++++++++++++++-- .../sql/multi_foreign_key_relation_graph.sql | 90 +++++++++++ 5 files changed, 369 insertions(+), 13 deletions(-) diff --git a/src/backend/distributed/test/foreign_key_relationship_query.c b/src/backend/distributed/test/foreign_key_relationship_query.c index b1f1f7c8d..ea152acc2 100644 --- a/src/backend/distributed/test/foreign_key_relationship_query.c +++ b/src/backend/distributed/test/foreign_key_relationship_query.c @@ -14,13 +14,20 @@ #include "fmgr.h" #include "funcapi.h" +#include "distributed/foreign_key_relationship.h" #include "distributed/listutils.h" #include "distributed/metadata_cache.h" +#include "distributed/tuplestore.h" #include "distributed/version_compat.h" + +#define GET_FKEY_CONNECTED_RELATIONS_COLUMNS 1 + + /* these functions are only exported in the regression tests */ PG_FUNCTION_INFO_V1(get_referencing_relation_id_list); PG_FUNCTION_INFO_V1(get_referenced_relation_id_list); +PG_FUNCTION_INFO_V1(get_foreign_key_connected_relations); /* * get_referencing_relation_id_list returns the list of table oids that is referencing @@ -138,3 +145,38 @@ get_referenced_relation_id_list(PG_FUNCTION_ARGS) SRF_RETURN_DONE(functionContext); } } + + +/* + * get_foreign_key_connected_relations takes a relation, and returns relations + * that are connected to input relation via a foreign key graph. + */ +Datum +get_foreign_key_connected_relations(PG_FUNCTION_ARGS) +{ + CheckCitusVersion(ERROR); + + Oid relationId = PG_GETARG_OID(0); + + TupleDesc tupleDescriptor = NULL; + Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor); + + Oid connectedRelationId; + List *fkeyConnectedRelationIdList = GetForeignKeyConnectedRelationIdList(relationId); + foreach_oid(connectedRelationId, fkeyConnectedRelationIdList) + { + Datum values[GET_FKEY_CONNECTED_RELATIONS_COLUMNS]; + bool nulls[GET_FKEY_CONNECTED_RELATIONS_COLUMNS]; + + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + values[0] = ObjectIdGetDatum(connectedRelationId); + + tuplestore_putvalues(tupleStore, tupleDescriptor, values, nulls); + } + + tuplestore_donestoring(tupleStore); + + PG_RETURN_VOID(); +} diff --git a/src/backend/distributed/utils/foreign_key_relationship.c b/src/backend/distributed/utils/foreign_key_relationship.c index 7641af58e..41174f1c7 100644 --- a/src/backend/distributed/utils/foreign_key_relationship.c +++ b/src/backend/distributed/utils/foreign_key_relationship.c @@ -35,6 +35,9 @@ #include "common/hashfn.h" #endif #include "utils/memutils.h" +#if PG_VERSION_NUM < PG_VERSION_12 +#include "utils/rel.h" +#endif /* @@ -77,6 +80,9 @@ typedef struct ForeignConstraintRelationshipEdge static ForeignConstraintRelationshipGraph *fConstraintRelationshipGraph = NULL; +static List * GetRelationshipNodesForFKeyConnectedRelations( + ForeignConstraintRelationshipNode *relationshipNode); +static List * GetAllNeighboursList(ForeignConstraintRelationshipNode *relationshipNode); static ForeignConstraintRelationshipNode * GetRelationshipNodeForRelationId(Oid relationId, bool *isFound); @@ -98,6 +104,108 @@ static void VisitOid(HTAB *oidVisitedMap, Oid oid); static List * GetForeignConstraintRelationshipHelper(Oid relationId, bool isReferencing); +/* + * GetForeignKeyConnectedRelationIdList returns a list of relation id's for + * relations that are connected to relation with relationId via a foreign + * key graph. + */ +List * +GetForeignKeyConnectedRelationIdList(Oid relationId) +{ + /* use ShareRowExclusiveLock to prevent concurent foreign key creation */ + LOCKMODE lockMode = ShareRowExclusiveLock; + Relation relation = try_relation_open(relationId, lockMode); + if (!RelationIsValid(relation)) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("relation with OID %d does not exist", + relationId))); + } + + relation_close(relation, NoLock); + + bool foundInFKeyGraph = false; + ForeignConstraintRelationshipNode *relationshipNode = + GetRelationshipNodeForRelationId(relationId, &foundInFKeyGraph); + if (!foundInFKeyGraph) + { + /* + * Relation could not be found in foreign key graph, then it has no + * foreign key relationships. + */ + return NIL; + } + + List *fKeyConnectedRelationshipNodeList = + GetRelationshipNodesForFKeyConnectedRelations(relationshipNode); + List *fKeyConnectedRelationIdList = + GetRelationIdsFromRelationshipNodeList(fKeyConnectedRelationshipNodeList); + return fKeyConnectedRelationIdList; +} + + +/* + * GetRelationshipNodesForFKeyConnectedRelations performs breadth-first search + * starting from input ForeignConstraintRelationshipNode and returns a list + * of ForeignConstraintRelationshipNode objects for relations that are connected + * to given relation node via a foreign key relationhip graph. + */ +static List * +GetRelationshipNodesForFKeyConnectedRelations( + ForeignConstraintRelationshipNode *relationshipNode) +{ + HTAB *oidVisitedMap = CreateOidVisitedHashSet(); + + VisitOid(oidVisitedMap, relationshipNode->relationId); + List *relationshipNodeList = list_make1(relationshipNode); + + ForeignConstraintRelationshipNode *currentNode = NULL; + foreach_ptr_append(currentNode, relationshipNodeList) + { + List *allNeighboursList = GetAllNeighboursList(currentNode); + ForeignConstraintRelationshipNode *neighbourNode = NULL; + foreach_ptr(neighbourNode, allNeighboursList) + { + Oid neighbourRelationId = neighbourNode->relationId; + if (OidVisited(oidVisitedMap, neighbourRelationId)) + { + continue; + } + + VisitOid(oidVisitedMap, neighbourRelationId); + relationshipNodeList = lappend(relationshipNodeList, neighbourNode); + } + } + + return relationshipNodeList; +} + + +/* + * GetAllNeighboursList returns a list of ForeignConstraintRelationshipNode + * objects by concatenating both (referencing & referenced) adjacency lists + * of given relationship node. + */ +static List * +GetAllNeighboursList(ForeignConstraintRelationshipNode *relationshipNode) +{ + bool isReferencing = false; + List *referencedNeighboursList = GetNeighbourList(relationshipNode, isReferencing); + + isReferencing = true; + List *referencingNeighboursList = GetNeighbourList(relationshipNode, isReferencing); + + /* + * GetNeighbourList returns list from graph as is, so first copy it as + * list_concat might invalidate it. + */ + List *allNeighboursList = list_copy(referencedNeighboursList); + allNeighboursList = list_concat_unique_ptr(allNeighboursList, + referencingNeighboursList); + return allNeighboursList; +} + + /* * ReferencedRelationIdList is a wrapper function around GetForeignConstraintRelationshipHelper * to get list of relation IDs which are referenced by the given relation id. diff --git a/src/include/distributed/foreign_key_relationship.h b/src/include/distributed/foreign_key_relationship.h index 2f9289962..1ad98133f 100644 --- a/src/include/distributed/foreign_key_relationship.h +++ b/src/include/distributed/foreign_key_relationship.h @@ -15,6 +15,7 @@ #include "utils/hsearch.h" #include "nodes/primnodes.h" +extern List * GetForeignKeyConnectedRelationIdList(Oid relationId); extern List * ReferencedRelationIdList(Oid relationId); extern List * ReferencingRelationIdList(Oid relationId); extern void SetForeignConstraintRelationshipGraphInvalid(void); diff --git a/src/test/regress/expected/multi_foreign_key_relation_graph.out b/src/test/regress/expected/multi_foreign_key_relation_graph.out index 8345f0cfe..0aa7dafb9 100644 --- a/src/test/regress/expected/multi_foreign_key_relation_graph.out +++ b/src/test/regress/expected/multi_foreign_key_relation_graph.out @@ -602,18 +602,133 @@ drop cascades to table test_8 (0 rows) ROLLBACK; +CREATE OR REPLACE FUNCTION get_foreign_key_connected_relations(IN table_name regclass) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS 'citus', $$get_foreign_key_connected_relations$$; +COMMENT ON FUNCTION get_foreign_key_connected_relations(IN table_name regclass) +IS 'returns relations connected to input relation via a foreign key graph'; +CREATE TABLE distributed_table_1(col int unique); +CREATE TABLE distributed_table_2(col int unique); +CREATE TABLE distributed_table_3(col int unique); +CREATE TABLE distributed_table_4(col int unique); +SELECT create_distributed_table('distributed_table_1', 'col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('distributed_table_2', 'col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('distributed_table_3', 'col'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE reference_table_1(col int unique); +CREATE TABLE reference_table_2(col int unique); +SELECT create_reference_table('reference_table_1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('reference_table_2'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- Now build below foreign key graph: +-- +-- -------------------------------------------- +-- ^ | +-- | v +-- distributed_table_1 <- distributed_table_2 -> reference_table_1 <- reference_table_2 +-- | ^ +-- | | +-- ----------> distributed_table_3 +ALTER TABLE distributed_table_2 ADD CONSTRAINT fkey_1 FOREIGN KEY (col) REFERENCES distributed_table_1(col); +ALTER TABLE distributed_table_2 ADD CONSTRAINT fkey_2 FOREIGN KEY (col) REFERENCES reference_table_1(col); +ALTER TABLE reference_table_2 ADD CONSTRAINT fkey_3 FOREIGN KEY (col) REFERENCES reference_table_1(col); +ALTER TABLE distributed_table_3 ADD CONSTRAINT fkey_4 FOREIGN KEY (col) REFERENCES distributed_table_2(col); +ALTER TABLE distributed_table_2 ADD CONSTRAINT fkey_5 FOREIGN KEY (col) REFERENCES reference_table_2(col); +ALTER TABLE distributed_table_1 ADD CONSTRAINT fkey_6 FOREIGN KEY (col) REFERENCES distributed_table_3(col); +-- below queries should print all 5 tables mentioned in above graph +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('reference_table_1') AS f(oid oid) +ORDER BY tablename; + tablename +--------------------------------------------------------------------- + distributed_table_1 + distributed_table_2 + distributed_table_3 + reference_table_1 + reference_table_2 +(5 rows) + +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('distributed_table_1') AS f(oid oid) +ORDER BY tablename; + tablename +--------------------------------------------------------------------- + distributed_table_1 + distributed_table_2 + distributed_table_3 + reference_table_1 + reference_table_2 +(5 rows) + +-- show that this does not print anything as distributed_table_4 +-- is not involved in any foreign key relationship +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('distributed_table_4') AS f(oid oid) +ORDER BY tablename; + tablename +--------------------------------------------------------------------- +(0 rows) + +ALTER TABLE distributed_table_4 ADD CONSTRAINT fkey_1 FOREIGN KEY (col) REFERENCES distributed_table_4(col); +-- even if distributed_table_4 has a self referencing foreign key, +-- we don't print anything as we only consider foreign key relationships +-- with other tables +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('distributed_table_4') AS f(oid oid) +ORDER BY tablename; + tablename +--------------------------------------------------------------------- +(0 rows) + +CREATE TABLE local_table_1 (col int unique); +CREATE TABLE local_table_2 (col int unique); +-- show that we do not trigger updating foreign key graph when +-- defining/dropping foreign keys between postgres tables +ALTER TABLE local_table_1 ADD CONSTRAINT fkey_1 FOREIGN KEY (col) REFERENCES local_table_2(col); +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('local_table_2') AS f(oid oid) +ORDER BY tablename; + tablename +--------------------------------------------------------------------- +(0 rows) + +ALTER TABLE local_table_1 DROP CONSTRAINT fkey_1; +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('local_table_1') AS f(oid oid) +ORDER BY tablename; + tablename +--------------------------------------------------------------------- +(0 rows) + +-- show that we error out for non-existent tables +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('non_existent_table') AS f(oid oid) +ORDER BY tablename; +ERROR: relation "non_existent_table" does not exist +set client_min_messages to error; SET search_path TO public; DROP SCHEMA fkey_graph CASCADE; -NOTICE: drop cascades to 12 other objects -DETAIL: drop cascades to function fkey_graph.get_referencing_relation_id_list(oid) -drop cascades to function fkey_graph.get_referenced_relation_id_list(oid) -drop cascades to table fkey_graph.dtt1 -drop cascades to table fkey_graph.dtt2 -drop cascades to table fkey_graph.dtt3 -drop cascades to table fkey_graph.dtt4 -drop cascades to view fkey_graph.referential_integrity_summary -drop cascades to table fkey_graph.test_1 -drop cascades to table fkey_graph.test_2 -drop cascades to table fkey_graph.test_3 -drop cascades to table fkey_graph.test_4 -drop cascades to table fkey_graph.test_5 diff --git a/src/test/regress/sql/multi_foreign_key_relation_graph.sql b/src/test/regress/sql/multi_foreign_key_relation_graph.sql index 7a7e23dc1..72ade3f60 100644 --- a/src/test/regress/sql/multi_foreign_key_relation_graph.sql +++ b/src/test/regress/sql/multi_foreign_key_relation_graph.sql @@ -220,5 +220,95 @@ BEGIN; ROLLBACK; +CREATE OR REPLACE FUNCTION get_foreign_key_connected_relations(IN table_name regclass) +RETURNS SETOF RECORD +LANGUAGE C STRICT +AS 'citus', $$get_foreign_key_connected_relations$$; +COMMENT ON FUNCTION get_foreign_key_connected_relations(IN table_name regclass) +IS 'returns relations connected to input relation via a foreign key graph'; + +CREATE TABLE distributed_table_1(col int unique); +CREATE TABLE distributed_table_2(col int unique); +CREATE TABLE distributed_table_3(col int unique); +CREATE TABLE distributed_table_4(col int unique); + +SELECT create_distributed_table('distributed_table_1', 'col'); +SELECT create_distributed_table('distributed_table_2', 'col'); +SELECT create_distributed_table('distributed_table_3', 'col'); + +CREATE TABLE reference_table_1(col int unique); +CREATE TABLE reference_table_2(col int unique); + +SELECT create_reference_table('reference_table_1'); +SELECT create_reference_table('reference_table_2'); + + +-- Now build below foreign key graph: +-- +-- -------------------------------------------- +-- ^ | +-- | v +-- distributed_table_1 <- distributed_table_2 -> reference_table_1 <- reference_table_2 +-- | ^ +-- | | +-- ----------> distributed_table_3 + +ALTER TABLE distributed_table_2 ADD CONSTRAINT fkey_1 FOREIGN KEY (col) REFERENCES distributed_table_1(col); +ALTER TABLE distributed_table_2 ADD CONSTRAINT fkey_2 FOREIGN KEY (col) REFERENCES reference_table_1(col); +ALTER TABLE reference_table_2 ADD CONSTRAINT fkey_3 FOREIGN KEY (col) REFERENCES reference_table_1(col); +ALTER TABLE distributed_table_3 ADD CONSTRAINT fkey_4 FOREIGN KEY (col) REFERENCES distributed_table_2(col); +ALTER TABLE distributed_table_2 ADD CONSTRAINT fkey_5 FOREIGN KEY (col) REFERENCES reference_table_2(col); +ALTER TABLE distributed_table_1 ADD CONSTRAINT fkey_6 FOREIGN KEY (col) REFERENCES distributed_table_3(col); + +-- below queries should print all 5 tables mentioned in above graph + +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('reference_table_1') AS f(oid oid) +ORDER BY tablename; + +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('distributed_table_1') AS f(oid oid) +ORDER BY tablename; + +-- show that this does not print anything as distributed_table_4 +-- is not involved in any foreign key relationship +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('distributed_table_4') AS f(oid oid) +ORDER BY tablename; + +ALTER TABLE distributed_table_4 ADD CONSTRAINT fkey_1 FOREIGN KEY (col) REFERENCES distributed_table_4(col); + +-- even if distributed_table_4 has a self referencing foreign key, +-- we don't print anything as we only consider foreign key relationships +-- with other tables +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('distributed_table_4') AS f(oid oid) +ORDER BY tablename; + +CREATE TABLE local_table_1 (col int unique); +CREATE TABLE local_table_2 (col int unique); + +-- show that we do not trigger updating foreign key graph when +-- defining/dropping foreign keys between postgres tables + +ALTER TABLE local_table_1 ADD CONSTRAINT fkey_1 FOREIGN KEY (col) REFERENCES local_table_2(col); + +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('local_table_2') AS f(oid oid) +ORDER BY tablename; + +ALTER TABLE local_table_1 DROP CONSTRAINT fkey_1; + +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('local_table_1') AS f(oid oid) +ORDER BY tablename; + +-- show that we error out for non-existent tables +SELECT oid::regclass::text AS tablename +FROM get_foreign_key_connected_relations('non_existent_table') AS f(oid oid) +ORDER BY tablename; + +set client_min_messages to error; + SET search_path TO public; DROP SCHEMA fkey_graph CASCADE;