Merge pull request #2343 from citusdata/fix_drop_table_deadlock

Make sure that table (and metadata) is dropped before shards are dropped on Citus MX
pull/2361/head
Önder Kalacı 2018-09-04 09:44:57 +03:00 committed by GitHub
commit c64f669755
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 168 additions and 18 deletions

View File

@ -1,6 +1,6 @@
# Citus extension # Citus extension
comment = 'Citus distributed database' comment = 'Citus distributed database'
default_version = '8.0-2' default_version = '8.0-3'
module_pathname = '$libdir/citus' module_pathname = '$libdir/citus'
relocatable = false relocatable = false
schema = pg_catalog schema = pg_catalog

View File

@ -17,7 +17,7 @@ EXTVERSIONS = 5.0 5.0-1 5.0-2 \
7.3-1 7.3-2 7.3-3 \ 7.3-1 7.3-2 7.3-3 \
7.4-1 7.4-2 7.4-3 \ 7.4-1 7.4-2 7.4-3 \
7.5-1 7.5-2 7.5-3 7.5-4 7.5-5 7.5-6 7.5-7 \ 7.5-1 7.5-2 7.5-3 7.5-4 7.5-5 7.5-6 7.5-7 \
8.0-1 8.0-2 8.0-1 8.0-2 8.0-3
# All citus--*.sql files in the source directory # All citus--*.sql files in the source directory
DATA = $(patsubst $(citus_abs_srcdir)/%.sql,%.sql,$(wildcard $(citus_abs_srcdir)/$(EXTENSION)--*--*.sql)) DATA = $(patsubst $(citus_abs_srcdir)/%.sql,%.sql,$(wildcard $(citus_abs_srcdir)/$(EXTENSION)--*--*.sql))
@ -219,6 +219,8 @@ $(EXTENSION)--8.0-1.sql: $(EXTENSION)--7.5-7.sql $(EXTENSION)--7.5-7--8.0-1.sql
cat $^ > $@ cat $^ > $@
$(EXTENSION)--8.0-2.sql: $(EXTENSION)--8.0-1.sql $(EXTENSION)--8.0-1--8.0-2.sql $(EXTENSION)--8.0-2.sql: $(EXTENSION)--8.0-1.sql $(EXTENSION)--8.0-1--8.0-2.sql
cat $^ > $@ cat $^ > $@
$(EXTENSION)--8.0-3.sql: $(EXTENSION)--8.0-2.sql $(EXTENSION)--8.0-2--8.0-3.sql
cat $^ > $@
NO_PGXS = 1 NO_PGXS = 1

View File

@ -0,0 +1,64 @@
/* citus--8.0-2--8.0-3 */
SET search_path = 'pg_catalog';
CREATE FUNCTION master_remove_partition_metadata(logicalrelid regclass,
schema_name text,
table_name text)
RETURNS void
LANGUAGE C STRICT
AS 'MODULE_PATHNAME', $$master_remove_partition_metadata$$;
COMMENT ON FUNCTION master_remove_partition_metadata(logicalrelid regclass,
schema_name text,
table_name text)
IS 'deletes the partition metadata of a distributed table';
CREATE OR REPLACE FUNCTION master_remove_distributed_table_metadata_from_workers(logicalrelid regclass,
schema_name text,
table_name text)
RETURNS void
LANGUAGE C STRICT
AS 'MODULE_PATHNAME', $$master_remove_distributed_table_metadata_from_workers$$;
COMMENT ON FUNCTION master_remove_distributed_table_metadata_from_workers(logicalrelid regclass,
schema_name text,
table_name text)
IS 'drops the table and removes all the metadata belonging the distributed table in the worker nodes with metadata.';
CREATE OR REPLACE FUNCTION pg_catalog.citus_drop_trigger()
RETURNS event_trigger
LANGUAGE plpgsql
SET search_path = pg_catalog
AS $cdbdt$
DECLARE
v_obj record;
sequence_names text[] := '{}';
table_colocation_id integer;
propagate_drop boolean := false;
BEGIN
-- collect set of dropped sequences to drop on workers later
SELECT array_agg(object_identity) INTO sequence_names
FROM pg_event_trigger_dropped_objects()
WHERE object_type = 'sequence';
FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects()
WHERE object_type IN ('table', 'foreign table')
LOOP
-- first drop the table and metadata on the workers
-- then drop all the shards on the workers
-- finally remove the pg_dist_partition entry on the coordinator
PERFORM master_remove_distributed_table_metadata_from_workers(v_obj.objid, v_obj.schema_name, v_obj.object_name);
PERFORM master_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name);
PERFORM master_remove_partition_metadata(v_obj.objid, v_obj.schema_name, v_obj.object_name);
END LOOP;
IF cardinality(sequence_names) = 0 THEN
RETURN;
END IF;
PERFORM master_drop_sequences(sequence_names);
END;
$cdbdt$;
COMMENT ON FUNCTION pg_catalog.citus_drop_trigger()
IS 'perform checks and actions at the end of DROP actions';
RESET search_path;

View File

@ -1,6 +1,6 @@
# Citus extension # Citus extension
comment = 'Citus distributed database' comment = 'Citus distributed database'
default_version = '8.0-2' default_version = '8.0-3'
module_pathname = '$libdir/citus' module_pathname = '$libdir/citus'
relocatable = false relocatable = false
schema = pg_catalog schema = pg_catalog

View File

@ -20,21 +20,46 @@
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
/* local function forward declarations */
static void MasterRemoveDistributedTableMetadataFromWorkers(Oid relationId,
char *schemaName,
char *tableName);
/* exports for SQL callable functions */ /* exports for SQL callable functions */
PG_FUNCTION_INFO_V1(master_drop_distributed_table_metadata); PG_FUNCTION_INFO_V1(master_drop_distributed_table_metadata);
PG_FUNCTION_INFO_V1(master_remove_partition_metadata);
PG_FUNCTION_INFO_V1(master_remove_distributed_table_metadata_from_workers);
/* /*
* master_drop_distributed_table_metadata removes the entry of the specified distributed * master_drop_distributed_table_metadata UDF is a stub UDF to install Citus flawlessly.
* table from pg_dist_partition and drops the table from the workers if needed. * Otherwise we need to delete them from our sql files, which is confusing and not a
* common operation in the code-base.
*
* This function is basically replaced with
* master_remove_distributed_table_metadata_from_workers() followed by
* master_remove_partition_metadata().
*/ */
Datum Datum
master_drop_distributed_table_metadata(PG_FUNCTION_ARGS) master_drop_distributed_table_metadata(PG_FUNCTION_ARGS)
{
ereport(INFO, (errmsg("this function is deprecated and no longer is used")));
PG_RETURN_VOID();
}
/*
* master_remove_partition_metadata removes the entry of the specified distributed
* table from pg_dist_partition.
*/
Datum
master_remove_partition_metadata(PG_FUNCTION_ARGS)
{ {
Oid relationId = PG_GETARG_OID(0); Oid relationId = PG_GETARG_OID(0);
text *schemaNameText = PG_GETARG_TEXT_P(1); text *schemaNameText = PG_GETARG_TEXT_P(1);
text *tableNameText = PG_GETARG_TEXT_P(2); text *tableNameText = PG_GETARG_TEXT_P(2);
bool shouldSyncMetadata = false;
char *schemaName = text_to_cstring(schemaNameText); char *schemaName = text_to_cstring(schemaNameText);
char *tableName = text_to_cstring(tableNameText); char *tableName = text_to_cstring(tableNameText);
@ -58,15 +83,68 @@ master_drop_distributed_table_metadata(PG_FUNCTION_ARGS)
DeletePartitionRow(relationId); DeletePartitionRow(relationId);
shouldSyncMetadata = ShouldSyncTableMetadata(relationId); PG_RETURN_VOID();
if (shouldSyncMetadata) }
{
char *deleteDistributionCommand = NULL;
/* drop the distributed table metadata on the workers */
deleteDistributionCommand = DistributionDeleteCommand(schemaName, tableName); /*
SendCommandToWorkers(WORKERS_WITH_METADATA, deleteDistributionCommand); * master_remove_distributed_table_metadata_from_workers removes the entry of the
} * specified distributed table from pg_dist_partition and drops the table from
* the workers if needed.
*/
Datum
master_remove_distributed_table_metadata_from_workers(PG_FUNCTION_ARGS)
{
Oid relationId = PG_GETARG_OID(0);
text *schemaNameText = PG_GETARG_TEXT_P(1);
text *tableNameText = PG_GETARG_TEXT_P(2);
char *schemaName = text_to_cstring(schemaNameText);
char *tableName = text_to_cstring(tableNameText);
CheckCitusVersion(ERROR);
MasterRemoveDistributedTableMetadataFromWorkers(relationId, schemaName, tableName);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
/*
* MasterRemoveDistributedTableMetadataFromWorkers drops the table and removes
* all the metadata beloning the distributed table in the worker nodes
* with metadata. The function doesn't drop the tables that are
* the shards on the workers.
*
* The function is a no-op for non-distributed tables and clusters that don't
* have any workers with metadata. Also, the function errors out if called
* from a worker node.
*/
static void
MasterRemoveDistributedTableMetadataFromWorkers(Oid relationId, char *schemaName,
char *tableName)
{
char *deleteDistributionCommand = NULL;
/*
* The SQL_DROP trigger calls this function even for tables that are
* not distributed. In that case, silently ignore. This is not very
* user-friendly, but this function is really only meant to be called
* from the trigger.
*/
if (!IsDistributedTable(relationId) || !EnableDDLPropagation)
{
return;
}
EnsureCoordinator();
if (!ShouldSyncTableMetadata(relationId))
{
return;
}
/* drop the distributed table metadata on the workers */
deleteDistributionCommand = DistributionDeleteCommand(schemaName, tableName);
SendCommandToWorkers(WORKERS_WITH_METADATA, deleteDistributionCommand);
}

View File

@ -145,6 +145,7 @@ ALTER EXTENSION citus UPDATE TO '7.5-6';
ALTER EXTENSION citus UPDATE TO '7.5-7'; ALTER EXTENSION citus UPDATE TO '7.5-7';
ALTER EXTENSION citus UPDATE TO '8.0-1'; ALTER EXTENSION citus UPDATE TO '8.0-1';
ALTER EXTENSION citus UPDATE TO '8.0-2'; ALTER EXTENSION citus UPDATE TO '8.0-2';
ALTER EXTENSION citus UPDATE TO '8.0-3';
-- show running version -- show running version
SHOW citus.version; SHOW citus.version;
citus.version citus.version

View File

@ -293,8 +293,8 @@ DELETE FROM pg_dist_node;
DROP TABLE mx_table; DROP TABLE mx_table;
ERROR: operation is not allowed on this node ERROR: operation is not allowed on this node
HINT: Connect to the coordinator and run it again. HINT: Connect to the coordinator and run it again.
CONTEXT: SQL statement "SELECT master_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name)" CONTEXT: SQL statement "SELECT master_remove_distributed_table_metadata_from_workers(v_obj.objid, v_obj.schema_name, v_obj.object_name)"
PL/pgSQL function citus_drop_trigger() line 17 at PERFORM PL/pgSQL function citus_drop_trigger() line 19 at PERFORM
SELECT count(*) FROM mx_table; SELECT count(*) FROM mx_table;
count count
------- -------
@ -302,7 +302,10 @@ SELECT count(*) FROM mx_table;
(1 row) (1 row)
-- master_drop_distributed_table_metadata -- master_drop_distributed_table_metadata
SELECT master_drop_distributed_table_metadata('mx_table'::regclass, 'public', 'mx_table'); SELECT master_remove_distributed_table_metadata_from_workers('mx_table'::regclass, 'public', 'mx_table');
ERROR: operation is not allowed on this node
HINT: Connect to the coordinator and run it again.
SELECT master_remove_partition_metadata('mx_table'::regclass, 'public', 'mx_table');
ERROR: operation is not allowed on this node ERROR: operation is not allowed on this node
HINT: Connect to the coordinator and run it again. HINT: Connect to the coordinator and run it again.
SELECT count(*) FROM mx_table; SELECT count(*) FROM mx_table;

View File

@ -145,6 +145,7 @@ ALTER EXTENSION citus UPDATE TO '7.5-6';
ALTER EXTENSION citus UPDATE TO '7.5-7'; ALTER EXTENSION citus UPDATE TO '7.5-7';
ALTER EXTENSION citus UPDATE TO '8.0-1'; ALTER EXTENSION citus UPDATE TO '8.0-1';
ALTER EXTENSION citus UPDATE TO '8.0-2'; ALTER EXTENSION citus UPDATE TO '8.0-2';
ALTER EXTENSION citus UPDATE TO '8.0-3';
-- show running version -- show running version
SHOW citus.version; SHOW citus.version;

View File

@ -165,7 +165,8 @@ DROP TABLE mx_table;
SELECT count(*) FROM mx_table; SELECT count(*) FROM mx_table;
-- master_drop_distributed_table_metadata -- master_drop_distributed_table_metadata
SELECT master_drop_distributed_table_metadata('mx_table'::regclass, 'public', 'mx_table'); SELECT master_remove_distributed_table_metadata_from_workers('mx_table'::regclass, 'public', 'mx_table');
SELECT master_remove_partition_metadata('mx_table'::regclass, 'public', 'mx_table');
SELECT count(*) FROM mx_table; SELECT count(*) FROM mx_table;
-- master_copy_shard_placement -- master_copy_shard_placement