From d535121f8de97dec3feabac84aa62131f26a52f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanefi=20=C3=96nald=C4=B1?= Date: Fri, 17 Apr 2020 13:03:25 +0300 Subject: [PATCH] Introduce truncate_local_data_after_distributing_table() --- .../distributed/master/master_truncate.c | 80 ++++++++ .../distributed/sql/citus--9.2-4--9.3-2.sql | 1 + .../9.3-2.sql | 7 + .../latest.sql | 7 + src/test/regress/expected/multi_truncate.out | 180 +++++++++++++++++- src/test/regress/sql/multi_truncate.sql | 112 ++++++++++- 6 files changed, 382 insertions(+), 5 deletions(-) create mode 100644 src/backend/distributed/sql/udfs/truncate_local_data_after_distributing_table/9.3-2.sql create mode 100644 src/backend/distributed/sql/udfs/truncate_local_data_after_distributing_table/latest.sql diff --git a/src/backend/distributed/master/master_truncate.c b/src/backend/distributed/master/master_truncate.c index 4c31a1b97..76ad4c091 100644 --- a/src/backend/distributed/master/master_truncate.c +++ b/src/backend/distributed/master/master_truncate.c @@ -10,13 +10,16 @@ */ #include "postgres.h" +#include "miscadmin.h" #include #include "commands/tablecmds.h" #include "commands/trigger.h" +#include "distributed/citus_ruleutils.h" #include "distributed/commands/utility_hook.h" #include "distributed/deparse_shard_query.h" +#include "distributed/foreign_key_relationship.h" #include "distributed/listutils.h" #include "distributed/master_metadata_utility.h" #include "distributed/master_protocol.h" @@ -26,6 +29,7 @@ #include "distributed/resource_lock.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/regproc.h" #include "utils/rel.h" static List * TruncateTaskList(Oid relationId); @@ -33,6 +37,9 @@ static List * TruncateTaskList(Oid relationId); /* exports for SQL callable functions */ PG_FUNCTION_INFO_V1(citus_truncate_trigger); +PG_FUNCTION_INFO_V1(truncate_local_data_after_distributing_table); + +void EnsureLocalTableCanBeTruncated(Oid relationId); /* @@ -139,3 +146,76 @@ TruncateTaskList(Oid relationId) return taskList; } + + +/* + * truncate_local_data_after_distributing_table truncates the local records of a distributed table. + * + * The main advantage of this function is to truncate all local records after creating a + * distributed table, and prevent constraints from failing due to outdated local records. + */ +Datum +truncate_local_data_after_distributing_table(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + + CheckCitusVersion(ERROR); + EnsureCoordinator(); + EnsureLocalTableCanBeTruncated(relationId); + + TruncateStmt *truncateStmt = makeNode(TruncateStmt); + + char *relationName = generate_qualified_relation_name(relationId); + List *names = stringToQualifiedNameList(relationName); + truncateStmt->relations = list_make1(makeRangeVarFromNameList(names)); + truncateStmt->restart_seqs = false; + truncateStmt->behavior = DROP_CASCADE; + + set_config_option("citus.enable_ddl_propagation", "false", + (superuser() ? PGC_SUSET : PGC_USERSET), PGC_S_SESSION, + GUC_ACTION_LOCAL, true, 0, false); + ExecuteTruncate(truncateStmt); + set_config_option("citus.enable_ddl_propagation", "true", + (superuser() ? PGC_SUSET : PGC_USERSET), PGC_S_SESSION, + GUC_ACTION_LOCAL, true, 0, false); + + PG_RETURN_VOID(); +} + + +/* + * EnsureLocalTableCanBeTruncated performs the necessary checks to make sure it + * is safe to truncate the local table of a distributed table + */ +void +EnsureLocalTableCanBeTruncated(Oid relationId) +{ + /* error out if the relation is not a distributed table */ + if (!IsCitusTable(relationId)) + { + ereport(ERROR, (errmsg("supplied parameter is not a distributed relation"), + errdetail("This UDF only truncates local records of distributed " + "tables."))); + } + + /* make sure there are no foreign key references from a local table */ + SetForeignConstraintRelationshipGraphInvalid(); + List *referencingRelationList = ReferencingRelationIdList(relationId); + + Oid referencingRelation = InvalidOid; + foreach_oid(referencingRelation, referencingRelationList) + { + /* we do not truncate a table if there is a local table referencing it */ + if (!IsCitusTable(referencingRelation)) + { + char *referencedRelationName = get_rel_name(relationId); + char *referencingRelationName = get_rel_name(referencingRelation); + + ereport(ERROR, (errmsg("cannot truncate a table referenced in a " + "foreign key constraint by a local table"), + errdetail("Table \"%s\" references \"%s\"", + referencingRelationName, + referencedRelationName))); + } + } +} diff --git a/src/backend/distributed/sql/citus--9.2-4--9.3-2.sql b/src/backend/distributed/sql/citus--9.2-4--9.3-2.sql index a60b6846f..8716ceb6c 100644 --- a/src/backend/distributed/sql/citus--9.2-4--9.3-2.sql +++ b/src/backend/distributed/sql/citus--9.2-4--9.3-2.sql @@ -7,6 +7,7 @@ #include "udfs/replicate_reference_tables/9.3-2.sql" #include "udfs/citus_remote_connection_stats/9.3-2.sql" #include "udfs/worker_create_or_alter_role/9.3-2.sql" +#include "udfs/truncate_local_data_after_distributing_table/9.3-2.sql" -- add citus extension owner as a distributed object, if not already in there INSERT INTO citus.pg_dist_object SELECT diff --git a/src/backend/distributed/sql/udfs/truncate_local_data_after_distributing_table/9.3-2.sql b/src/backend/distributed/sql/udfs/truncate_local_data_after_distributing_table/9.3-2.sql new file mode 100644 index 000000000..b06e5f5a6 --- /dev/null +++ b/src/backend/distributed/sql/udfs/truncate_local_data_after_distributing_table/9.3-2.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION truncate_local_data_after_distributing_table(function_name regclass) + RETURNS void + LANGUAGE C CALLED ON NULL INPUT + AS 'MODULE_PATHNAME', $$truncate_local_data_after_distributing_table$$; + +COMMENT ON FUNCTION truncate_local_data_after_distributing_table(function_name regclass) + IS 'truncates local records of a distributed table'; diff --git a/src/backend/distributed/sql/udfs/truncate_local_data_after_distributing_table/latest.sql b/src/backend/distributed/sql/udfs/truncate_local_data_after_distributing_table/latest.sql new file mode 100644 index 000000000..b06e5f5a6 --- /dev/null +++ b/src/backend/distributed/sql/udfs/truncate_local_data_after_distributing_table/latest.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION truncate_local_data_after_distributing_table(function_name regclass) + RETURNS void + LANGUAGE C CALLED ON NULL INPUT + AS 'MODULE_PATHNAME', $$truncate_local_data_after_distributing_table$$; + +COMMENT ON FUNCTION truncate_local_data_after_distributing_table(function_name regclass) + IS 'truncates local records of a distributed table'; diff --git a/src/test/regress/expected/multi_truncate.out b/src/test/regress/expected/multi_truncate.out index 74778c2ff..0b03a2d8b 100644 --- a/src/test/regress/expected/multi_truncate.out +++ b/src/test/regress/expected/multi_truncate.out @@ -4,6 +4,16 @@ SET citus.next_shard_id TO 1210000; CREATE SCHEMA multi_truncate; SET search_path TO multi_truncate; +-- helper view that prints out local table names and sizes in the schema +CREATE VIEW table_sizes AS +SELECT + c.relname as name, + pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as size +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind = 'r' + AND n.nspname = 'multi_truncate' +ORDER BY 1; -- -- truncate for append distribution -- expect all shards to be dropped @@ -314,7 +324,7 @@ SELECT * FROM test_local_truncate; (1 row) -- Undistribute table -SELECT master_drop_all_shards('test_local_truncate', 'pubic', 'test_local_truncate'); +SELECT master_drop_all_shards('test_local_truncate', 'public', 'test_local_truncate'); master_drop_all_shards --------------------------------------------------------------------- 4 @@ -350,7 +360,7 @@ SELECT * FROM test_local_truncate; (1 row) -- Undistribute table -SELECT master_drop_all_shards('test_local_truncate', 'pubic', 'test_local_truncate'); +SELECT master_drop_all_shards('test_local_truncate', 'public', 'test_local_truncate'); master_drop_all_shards --------------------------------------------------------------------- 4 @@ -364,5 +374,169 @@ SELECT * FROM test_local_truncate; 1 | 2 (1 row) +DROP TABLE test_local_truncate; +-- Test truncate_local_data_after_distributing_table UDF +CREATE TABLE referenced_table(id int UNIQUE, test_column int); +INSERT INTO referenced_table SELECT x,x FROM generate_series(1,10000) x; +CREATE TABLE referencing_table(id int, ref_id int REFERENCES referenced_table(id)); +INSERT INTO referencing_table SELECT * FROM referenced_table; +-- The following will fail as the table is not distributed +SELECT truncate_local_data_after_distributing_table('referenced_table'); +ERROR: supplied parameter is not a distributed relation +DETAIL: This UDF only truncates local records of distributed tables. +-- Test foreign keys from local tables to distributed tables +-- We can not truncate local tables until all the local foreign keys are removed. +SELECT create_distributed_table('referenced_table', 'id'); +NOTICE: Copying data from local table... + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- The following will fail, as local referencing_table has fk references +SELECT truncate_local_data_after_distributing_table('referenced_table'); +ERROR: cannot truncate a table referenced in a foreign key constraint by a local table +DETAIL: Table "referencing_table" references "referenced_table" +-- Test foreign keys between distributed tables +SELECT create_distributed_table('referencing_table', 'ref_id'); +NOTICE: Copying data from local table... + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +-- The following will no longer fail as the referencing table is now distributed +SELECT truncate_local_data_after_distributing_table('referenced_table'); +NOTICE: truncate cascades to table "referencing_table" + truncate_local_data_after_distributing_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM table_sizes; + name | size +--------------------------------------------------------------------- + referenced_table | 0 bytes + referencing_table | 0 bytes +(2 rows) + +ROLLBACK; +-- observe that none of the tables are truncated +SELECT * FROM table_sizes; + name | size +--------------------------------------------------------------------- + referenced_table | 384 kB + referencing_table | 384 kB +(2 rows) + +-- test that if we truncate the referencing table, only said table is affected +BEGIN; +SELECT truncate_local_data_after_distributing_table('referencing_table'); + truncate_local_data_after_distributing_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM table_sizes; + name | size +--------------------------------------------------------------------- + referenced_table | 384 kB + referencing_table | 0 bytes +(2 rows) + +ROLLBACK; +-- however if we truncate referenced table, both of the tables get truncated +-- because we supply the CASCADE option +-- test that if we truncate the referencing table, only said table is affected +BEGIN; +SELECT truncate_local_data_after_distributing_table('referenced_table'); +NOTICE: truncate cascades to table "referencing_table" + truncate_local_data_after_distributing_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM table_sizes; + name | size +--------------------------------------------------------------------- + referenced_table | 0 bytes + referencing_table | 0 bytes +(2 rows) + +ROLLBACK; +DROP TABLE referencing_table; +DROP TABLE referenced_table; +-- test truncating reference tables +CREATE TABLE ref(id int UNIQUE, data int); +INSERT INTO ref SELECT x,x FROM generate_series(1,10000) x; +SELECT create_reference_table('ref'); +NOTICE: Copying data from local table... + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist(id int, ref_id int REFERENCES ref(id)); +INSERT INTO dist SELECT x,x FROM generate_series(1,10000) x; +-- test that we do not cascade truncates to local referencing tables +SELECT truncate_local_data_after_distributing_table('ref'); +ERROR: cannot truncate a table referenced in a foreign key constraint by a local table +DETAIL: Table "dist" references "ref" +-- distribute the table and start testing allowed truncation queries +SELECT create_distributed_table('dist','id'); +ERROR: cannot distribute "dist" in sequential mode because it is not empty +HINT: If you have manually set citus.multi_shard_modify_mode to 'sequential', try with 'parallel' option. If that is not the case, try distributing local tables when they are empty. +-- the following should truncate ref and dist +BEGIN; +SELECT truncate_local_data_after_distributing_table('ref'); +ERROR: cannot truncate a table referenced in a foreign key constraint by a local table +DETAIL: Table "dist" references "ref" +SELECT * FROM table_sizes; +ERROR: current transaction is aborted, commands ignored until end of transaction block +ROLLBACK; +-- the following should truncate dist table only +BEGIN; +SELECT truncate_local_data_after_distributing_table('dist'); +ERROR: supplied parameter is not a distributed relation +DETAIL: This UDF only truncates local records of distributed tables. +SELECT * FROM table_sizes; +ERROR: current transaction is aborted, commands ignored until end of transaction block +ROLLBACK; +DROP TABLE ref, dist; +-- tests for issue 1770 +CREATE TABLE t1(a int, b int); +INSERT INTO t1 VALUES(1,1); +SELECT create_distributed_table('t1', 'a'); +NOTICE: Copying data from local table... + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE t1 ADD CONSTRAINT t1_a_check CHECK(a > 2) NOT VALID; +-- will error out with "ERROR: CHECK CONSTRAINT "t1_a_check" is violated by some row" +ALTER TABLE t1 VALIDATE CONSTRAINT t1_a_check; +ERROR: check constraint "t1_a_check" is violated by some row +-- remove violating row +DELETE FROM t1 where a = 1; +-- verify no rows in t1 +SELECT * FROM t1; + a | b +--------------------------------------------------------------------- +(0 rows) + +-- this will still error out +ALTER TABLE t1 VALIDATE CONSTRAINT t1_a_check; +ERROR: check constraint "t1_a_check" is violated by some row +-- The check will pass when the local copies are truncated +SELECT truncate_local_data_after_distributing_table('t1'); + truncate_local_data_after_distributing_table +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE t1 VALIDATE CONSTRAINT t1_a_check; +DROP VIEW table_sizes; +DROP TABLE t1; DROP SCHEMA multi_truncate CASCADE; -NOTICE: drop cascades to table test_local_truncate diff --git a/src/test/regress/sql/multi_truncate.sql b/src/test/regress/sql/multi_truncate.sql index 99d0a44d1..105fe31e1 100644 --- a/src/test/regress/sql/multi_truncate.sql +++ b/src/test/regress/sql/multi_truncate.sql @@ -7,6 +7,17 @@ SET citus.next_shard_id TO 1210000; CREATE SCHEMA multi_truncate; SET search_path TO multi_truncate; +-- helper view that prints out local table names and sizes in the schema +CREATE VIEW table_sizes AS +SELECT + c.relname as name, + pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as size +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind = 'r' + AND n.nspname = 'multi_truncate' +ORDER BY 1; + -- -- truncate for append distribution -- expect all shards to be dropped @@ -194,7 +205,7 @@ COMMIT; SELECT * FROM test_local_truncate; -- Undistribute table -SELECT master_drop_all_shards('test_local_truncate', 'pubic', 'test_local_truncate'); +SELECT master_drop_all_shards('test_local_truncate', 'public', 'test_local_truncate'); DELETE FROM pg_dist_partition WHERE logicalrelid = 'test_local_truncate'::regclass; -- Ensure local data is truncated @@ -216,10 +227,107 @@ ROLLBACK; SELECT * FROM test_local_truncate; -- Undistribute table -SELECT master_drop_all_shards('test_local_truncate', 'pubic', 'test_local_truncate'); +SELECT master_drop_all_shards('test_local_truncate', 'public', 'test_local_truncate'); DELETE FROM pg_dist_partition WHERE logicalrelid = 'test_local_truncate'::regclass; -- Ensure local data is not truncated SELECT * FROM test_local_truncate; +DROP TABLE test_local_truncate; + +-- Test truncate_local_data_after_distributing_table UDF +CREATE TABLE referenced_table(id int UNIQUE, test_column int); +INSERT INTO referenced_table SELECT x,x FROM generate_series(1,10000) x; + +CREATE TABLE referencing_table(id int, ref_id int REFERENCES referenced_table(id)); +INSERT INTO referencing_table SELECT * FROM referenced_table; + +-- The following will fail as the table is not distributed +SELECT truncate_local_data_after_distributing_table('referenced_table'); + +-- Test foreign keys from local tables to distributed tables +-- We can not truncate local tables until all the local foreign keys are removed. +SELECT create_distributed_table('referenced_table', 'id'); + +-- The following will fail, as local referencing_table has fk references +SELECT truncate_local_data_after_distributing_table('referenced_table'); + +-- Test foreign keys between distributed tables +SELECT create_distributed_table('referencing_table', 'ref_id'); + +BEGIN; +-- The following will no longer fail as the referencing table is now distributed +SELECT truncate_local_data_after_distributing_table('referenced_table'); +SELECT * FROM table_sizes; +ROLLBACK; + +-- observe that none of the tables are truncated +SELECT * FROM table_sizes; + +-- test that if we truncate the referencing table, only said table is affected +BEGIN; +SELECT truncate_local_data_after_distributing_table('referencing_table'); +SELECT * FROM table_sizes; +ROLLBACK; + +-- however if we truncate referenced table, both of the tables get truncated +-- because we supply the CASCADE option +-- test that if we truncate the referencing table, only said table is affected +BEGIN; +SELECT truncate_local_data_after_distributing_table('referenced_table'); +SELECT * FROM table_sizes; +ROLLBACK; + +DROP TABLE referencing_table; +DROP TABLE referenced_table; + +-- test truncating reference tables +CREATE TABLE ref(id int UNIQUE, data int); +INSERT INTO ref SELECT x,x FROM generate_series(1,10000) x; +SELECT create_reference_table('ref'); + +CREATE TABLE dist(id int, ref_id int REFERENCES ref(id)); +INSERT INTO dist SELECT x,x FROM generate_series(1,10000) x; + +-- test that we do not cascade truncates to local referencing tables +SELECT truncate_local_data_after_distributing_table('ref'); + +-- distribute the table and start testing allowed truncation queries +SELECT create_distributed_table('dist','id'); + +-- the following should truncate ref and dist +BEGIN; +SELECT truncate_local_data_after_distributing_table('ref'); +SELECT * FROM table_sizes; +ROLLBACK; + +-- the following should truncate dist table only +BEGIN; +SELECT truncate_local_data_after_distributing_table('dist'); +SELECT * FROM table_sizes; +ROLLBACK; + +DROP TABLE ref, dist; + +-- tests for issue 1770 +CREATE TABLE t1(a int, b int); +INSERT INTO t1 VALUES(1,1); +SELECT create_distributed_table('t1', 'a'); +ALTER TABLE t1 ADD CONSTRAINT t1_a_check CHECK(a > 2) NOT VALID; + +-- will error out with "ERROR: CHECK CONSTRAINT "t1_a_check" is violated by some row" +ALTER TABLE t1 VALIDATE CONSTRAINT t1_a_check; +-- remove violating row +DELETE FROM t1 where a = 1; +-- verify no rows in t1 +SELECT * FROM t1; +-- this will still error out +ALTER TABLE t1 VALIDATE CONSTRAINT t1_a_check; + +-- The check will pass when the local copies are truncated +SELECT truncate_local_data_after_distributing_table('t1'); +ALTER TABLE t1 VALIDATE CONSTRAINT t1_a_check; + +DROP VIEW table_sizes; +DROP TABLE t1; DROP SCHEMA multi_truncate CASCADE;