From 1b3257816e812d77b766e0dc06a6b87bbac160fd Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Sun, 19 Aug 2018 17:25:04 +0300 Subject: [PATCH] Make sure that table is dropped before shards are dropped This commit fixes a bug where a concurrent DROP TABLE deadlocks with SELECT (or DML) when the SELECT is executed from the workers. The problem was that Citus used to remove the metadata before droping the table on the workers. That creates a time window where the SELECT starts running on some of the nodes and DROP table on some of the other nodes. --- citus.control | 2 +- src/backend/distributed/Makefile | 4 +- .../distributed/citus--8.0-2--8.0-3.sql | 64 +++++++++++ src/backend/distributed/citus.control | 2 +- .../commands/drop_distributed_table.c | 100 ++++++++++++++++-- src/test/regress/expected/multi_extension.out | 1 + .../multi_unsupported_worker_operations.out | 9 +- src/test/regress/sql/multi_extension.sql | 1 + .../multi_unsupported_worker_operations.sql | 3 +- 9 files changed, 168 insertions(+), 18 deletions(-) create mode 100644 src/backend/distributed/citus--8.0-2--8.0-3.sql diff --git a/citus.control b/citus.control index 9f27cf916..cdd186d36 100644 --- a/citus.control +++ b/citus.control @@ -1,6 +1,6 @@ # Citus extension comment = 'Citus distributed database' -default_version = '8.0-2' +default_version = '8.0-3' module_pathname = '$libdir/citus' relocatable = false schema = pg_catalog diff --git a/src/backend/distributed/Makefile b/src/backend/distributed/Makefile index 1b48c8a6c..e7afea3e4 100644 --- a/src/backend/distributed/Makefile +++ b/src/backend/distributed/Makefile @@ -17,7 +17,7 @@ EXTVERSIONS = 5.0 5.0-1 5.0-2 \ 7.3-1 7.3-2 7.3-3 \ 7.4-1 7.4-2 7.4-3 \ 7.5-1 7.5-2 7.5-3 7.5-4 7.5-5 7.5-6 7.5-7 \ - 8.0-1 8.0-2 + 8.0-1 8.0-2 8.0-3 # All citus--*.sql files in the source directory DATA = $(patsubst $(citus_abs_srcdir)/%.sql,%.sql,$(wildcard $(citus_abs_srcdir)/$(EXTENSION)--*--*.sql)) @@ -219,6 +219,8 @@ $(EXTENSION)--8.0-1.sql: $(EXTENSION)--7.5-7.sql $(EXTENSION)--7.5-7--8.0-1.sql cat $^ > $@ $(EXTENSION)--8.0-2.sql: $(EXTENSION)--8.0-1.sql $(EXTENSION)--8.0-1--8.0-2.sql cat $^ > $@ +$(EXTENSION)--8.0-3.sql: $(EXTENSION)--8.0-2.sql $(EXTENSION)--8.0-2--8.0-3.sql + cat $^ > $@ NO_PGXS = 1 diff --git a/src/backend/distributed/citus--8.0-2--8.0-3.sql b/src/backend/distributed/citus--8.0-2--8.0-3.sql new file mode 100644 index 000000000..3a02c05ee --- /dev/null +++ b/src/backend/distributed/citus--8.0-2--8.0-3.sql @@ -0,0 +1,64 @@ +/* citus--8.0-2--8.0-3 */ +SET search_path = 'pg_catalog'; + +CREATE FUNCTION master_remove_partition_metadata(logicalrelid regclass, + schema_name text, + table_name text) + RETURNS void + LANGUAGE C STRICT + AS 'MODULE_PATHNAME', $$master_remove_partition_metadata$$; +COMMENT ON FUNCTION master_remove_partition_metadata(logicalrelid regclass, + schema_name text, + table_name text) + IS 'deletes the partition metadata of a distributed table'; + +CREATE OR REPLACE FUNCTION master_remove_distributed_table_metadata_from_workers(logicalrelid regclass, + schema_name text, + table_name text) + RETURNS void + LANGUAGE C STRICT + AS 'MODULE_PATHNAME', $$master_remove_distributed_table_metadata_from_workers$$; +COMMENT ON FUNCTION master_remove_distributed_table_metadata_from_workers(logicalrelid regclass, + schema_name text, + table_name text) + IS 'drops the table and removes all the metadata belonging the distributed table in the worker nodes with metadata.'; + + +CREATE OR REPLACE FUNCTION pg_catalog.citus_drop_trigger() + RETURNS event_trigger + LANGUAGE plpgsql + SET search_path = pg_catalog + AS $cdbdt$ +DECLARE + v_obj record; + sequence_names text[] := '{}'; + table_colocation_id integer; + propagate_drop boolean := false; +BEGIN + -- collect set of dropped sequences to drop on workers later + SELECT array_agg(object_identity) INTO sequence_names + FROM pg_event_trigger_dropped_objects() + WHERE object_type = 'sequence'; + + FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() + WHERE object_type IN ('table', 'foreign table') + LOOP + -- first drop the table and metadata on the workers + -- then drop all the shards on the workers + -- finally remove the pg_dist_partition entry on the coordinator + PERFORM master_remove_distributed_table_metadata_from_workers(v_obj.objid, v_obj.schema_name, v_obj.object_name); + PERFORM master_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name); + PERFORM master_remove_partition_metadata(v_obj.objid, v_obj.schema_name, v_obj.object_name); + END LOOP; + + IF cardinality(sequence_names) = 0 THEN + RETURN; + END IF; + + PERFORM master_drop_sequences(sequence_names); +END; +$cdbdt$; +COMMENT ON FUNCTION pg_catalog.citus_drop_trigger() + IS 'perform checks and actions at the end of DROP actions'; + +RESET search_path; diff --git a/src/backend/distributed/citus.control b/src/backend/distributed/citus.control index 9f27cf916..cdd186d36 100644 --- a/src/backend/distributed/citus.control +++ b/src/backend/distributed/citus.control @@ -1,6 +1,6 @@ # Citus extension comment = 'Citus distributed database' -default_version = '8.0-2' +default_version = '8.0-3' module_pathname = '$libdir/citus' relocatable = false schema = pg_catalog diff --git a/src/backend/distributed/commands/drop_distributed_table.c b/src/backend/distributed/commands/drop_distributed_table.c index 6237e206f..acf669b9f 100644 --- a/src/backend/distributed/commands/drop_distributed_table.c +++ b/src/backend/distributed/commands/drop_distributed_table.c @@ -20,21 +20,46 @@ #include "utils/lsyscache.h" +/* local function forward declarations */ +static void MasterRemoveDistributedTableMetadataFromWorkers(Oid relationId, + char *schemaName, + char *tableName); + + /* exports for SQL callable functions */ PG_FUNCTION_INFO_V1(master_drop_distributed_table_metadata); +PG_FUNCTION_INFO_V1(master_remove_partition_metadata); +PG_FUNCTION_INFO_V1(master_remove_distributed_table_metadata_from_workers); /* - * master_drop_distributed_table_metadata removes the entry of the specified distributed - * table from pg_dist_partition and drops the table from the workers if needed. + * master_drop_distributed_table_metadata UDF is a stub UDF to install Citus flawlessly. + * Otherwise we need to delete them from our sql files, which is confusing and not a + * common operation in the code-base. + * + * This function is basically replaced with + * master_remove_distributed_table_metadata_from_workers() followed by + * master_remove_partition_metadata(). */ Datum master_drop_distributed_table_metadata(PG_FUNCTION_ARGS) +{ + ereport(INFO, (errmsg("this function is deprecated and no longer is used"))); + + PG_RETURN_VOID(); +} + + +/* + * master_remove_partition_metadata removes the entry of the specified distributed + * table from pg_dist_partition. + */ +Datum +master_remove_partition_metadata(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); text *schemaNameText = PG_GETARG_TEXT_P(1); text *tableNameText = PG_GETARG_TEXT_P(2); - bool shouldSyncMetadata = false; char *schemaName = text_to_cstring(schemaNameText); char *tableName = text_to_cstring(tableNameText); @@ -58,15 +83,68 @@ master_drop_distributed_table_metadata(PG_FUNCTION_ARGS) DeletePartitionRow(relationId); - shouldSyncMetadata = ShouldSyncTableMetadata(relationId); - if (shouldSyncMetadata) - { - char *deleteDistributionCommand = NULL; + PG_RETURN_VOID(); +} - /* drop the distributed table metadata on the workers */ - deleteDistributionCommand = DistributionDeleteCommand(schemaName, tableName); - SendCommandToWorkers(WORKERS_WITH_METADATA, deleteDistributionCommand); - } + +/* + * master_remove_distributed_table_metadata_from_workers removes the entry of the + * specified distributed table from pg_dist_partition and drops the table from + * the workers if needed. + */ +Datum +master_remove_distributed_table_metadata_from_workers(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + text *schemaNameText = PG_GETARG_TEXT_P(1); + text *tableNameText = PG_GETARG_TEXT_P(2); + + char *schemaName = text_to_cstring(schemaNameText); + char *tableName = text_to_cstring(tableNameText); + + CheckCitusVersion(ERROR); + + MasterRemoveDistributedTableMetadataFromWorkers(relationId, schemaName, tableName); PG_RETURN_VOID(); } + + +/* + * MasterRemoveDistributedTableMetadataFromWorkers drops the table and removes + * all the metadata beloning the distributed table in the worker nodes + * with metadata. The function doesn't drop the tables that are + * the shards on the workers. + * + * The function is a no-op for non-distributed tables and clusters that don't + * have any workers with metadata. Also, the function errors out if called + * from a worker node. + */ +static void +MasterRemoveDistributedTableMetadataFromWorkers(Oid relationId, char *schemaName, + char *tableName) +{ + char *deleteDistributionCommand = NULL; + + /* + * The SQL_DROP trigger calls this function even for tables that are + * not distributed. In that case, silently ignore. This is not very + * user-friendly, but this function is really only meant to be called + * from the trigger. + */ + if (!IsDistributedTable(relationId) || !EnableDDLPropagation) + { + return; + } + + EnsureCoordinator(); + + if (!ShouldSyncTableMetadata(relationId)) + { + return; + } + + /* drop the distributed table metadata on the workers */ + deleteDistributionCommand = DistributionDeleteCommand(schemaName, tableName); + SendCommandToWorkers(WORKERS_WITH_METADATA, deleteDistributionCommand); +} diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 70dfa768f..126a50d61 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -145,6 +145,7 @@ ALTER EXTENSION citus UPDATE TO '7.5-6'; ALTER EXTENSION citus UPDATE TO '7.5-7'; ALTER EXTENSION citus UPDATE TO '8.0-1'; ALTER EXTENSION citus UPDATE TO '8.0-2'; +ALTER EXTENSION citus UPDATE TO '8.0-3'; -- show running version SHOW citus.version; citus.version diff --git a/src/test/regress/expected/multi_unsupported_worker_operations.out b/src/test/regress/expected/multi_unsupported_worker_operations.out index 50bc7713b..2bce34f0b 100644 --- a/src/test/regress/expected/multi_unsupported_worker_operations.out +++ b/src/test/regress/expected/multi_unsupported_worker_operations.out @@ -293,8 +293,8 @@ DELETE FROM pg_dist_node; DROP TABLE mx_table; ERROR: operation is not allowed on this node HINT: Connect to the coordinator and run it again. -CONTEXT: SQL statement "SELECT master_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name)" -PL/pgSQL function citus_drop_trigger() line 17 at PERFORM +CONTEXT: SQL statement "SELECT master_remove_distributed_table_metadata_from_workers(v_obj.objid, v_obj.schema_name, v_obj.object_name)" +PL/pgSQL function citus_drop_trigger() line 19 at PERFORM SELECT count(*) FROM mx_table; count ------- @@ -302,7 +302,10 @@ SELECT count(*) FROM mx_table; (1 row) -- master_drop_distributed_table_metadata -SELECT master_drop_distributed_table_metadata('mx_table'::regclass, 'public', 'mx_table'); +SELECT master_remove_distributed_table_metadata_from_workers('mx_table'::regclass, 'public', 'mx_table'); +ERROR: operation is not allowed on this node +HINT: Connect to the coordinator and run it again. +SELECT master_remove_partition_metadata('mx_table'::regclass, 'public', 'mx_table'); ERROR: operation is not allowed on this node HINT: Connect to the coordinator and run it again. SELECT count(*) FROM mx_table; diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql index c59789450..84253991c 100644 --- a/src/test/regress/sql/multi_extension.sql +++ b/src/test/regress/sql/multi_extension.sql @@ -145,6 +145,7 @@ ALTER EXTENSION citus UPDATE TO '7.5-6'; ALTER EXTENSION citus UPDATE TO '7.5-7'; ALTER EXTENSION citus UPDATE TO '8.0-1'; ALTER EXTENSION citus UPDATE TO '8.0-2'; +ALTER EXTENSION citus UPDATE TO '8.0-3'; -- show running version SHOW citus.version; diff --git a/src/test/regress/sql/multi_unsupported_worker_operations.sql b/src/test/regress/sql/multi_unsupported_worker_operations.sql index a58b489d3..b5a19c6fd 100644 --- a/src/test/regress/sql/multi_unsupported_worker_operations.sql +++ b/src/test/regress/sql/multi_unsupported_worker_operations.sql @@ -165,7 +165,8 @@ DROP TABLE mx_table; SELECT count(*) FROM mx_table; -- master_drop_distributed_table_metadata -SELECT master_drop_distributed_table_metadata('mx_table'::regclass, 'public', 'mx_table'); +SELECT master_remove_distributed_table_metadata_from_workers('mx_table'::regclass, 'public', 'mx_table'); +SELECT master_remove_partition_metadata('mx_table'::regclass, 'public', 'mx_table'); SELECT count(*) FROM mx_table; -- master_copy_shard_placement