Merge pull request #2629 from citusdata/disable_constraint_checking_on_coordinator

Don't execute ALTER TABLE constraint checks in coordinator.
pull/2620/head^2
Hadi Moshayedi 2019-03-14 16:14:30 -07:00 committed by GitHub
commit cd00e92cbc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 335 additions and 23 deletions

View File

@ -1093,7 +1093,7 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement)
* for two things for practial purpose for not doing the same checks
* twice:
* (a) For any command, decide and return whether we should
* run the command in sequntial mode
* run the command in sequential mode
* (b) For commands in a transaction block, set the transaction local
* multi-shard modify mode to sequential when necessary
*
@ -1185,26 +1185,14 @@ SetupExecutionModeForAlterTable(Oid relationId, AlterTableCmd *command)
{
executeSequentially = true;
}
/*
* Postgres performs additional selects when creating constraints
* on partitioned tables. We need to set execution mode to
* sequential for the select query so that ddl connections
* we open does not fail due to previous select.
*/
if (executeSequentially && PartitionedTable(relationId))
{
SetLocalMultiShardModifyModeToSequential();
}
}
}
else if (alterTableType == AT_DetachPartition)
else if (alterTableType == AT_DetachPartition || alterTableType == AT_AttachPartition)
{
/* check if there are foreign constraints to reference tables */
if (HasForeignKeyToReferenceTable(relationId))
{
executeSequentially = true;
SetLocalMultiShardModifyModeToSequential();
}
}

View File

@ -57,6 +57,7 @@
bool EnableDDLPropagation = true; /* ddl propagation is enabled */
static bool shouldInvalidateForeignKeyGraph = false;
static int activeAlterTables = 0;
/* Local functions forward declarations for helper functions */
@ -419,9 +420,33 @@ multi_ProcessUtility(PlannedStmt *pstmt,
}
pstmt->utilityStmt = parsetree;
PG_TRY();
{
if (IsA(parsetree, AlterTableStmt))
{
activeAlterTables++;
}
standard_ProcessUtility(pstmt, queryString, context,
params, queryEnv, dest, completionTag);
if (IsA(parsetree, AlterTableStmt))
{
activeAlterTables--;
}
}
PG_CATCH();
{
if (IsA(parsetree, AlterTableStmt))
{
activeAlterTables--;
}
PG_RE_THROW();
}
PG_END_TRY();
/*
* We only process CREATE TABLE ... PARTITION OF commands in the function below
* to handle the case when user creates a table as a partition of distributed table.
@ -749,3 +774,14 @@ DDLTaskList(Oid relationId, const char *commandString)
return taskList;
}
/*
* AlterTableInProgress returns true if we're processing an ALTER TABLE command
* right now.
*/
bool
AlterTableInProgress(void)
{
return activeAlterTables > 0;
}

View File

@ -17,6 +17,7 @@
#include "catalog/namespace.h"
#include "distributed/citus_custom_scan.h"
#include "distributed/commands/multi_copy.h"
#include "distributed/commands/utility_hook.h"
#include "distributed/insert_select_executor.h"
#include "distributed/insert_select_planner.h"
#include "distributed/multi_executor.h"
@ -51,7 +52,7 @@ bool WritableStandbyCoordinator = false;
static bool IsCitusPlan(Plan *plan);
static bool IsCitusCustomScan(Plan *plan);
static Relation StubRelation(TupleDesc tupleDescriptor);
static bool AlterTableConstraintCheck(QueryDesc *queryDesc);
/*
* CitusExecutorStart is the ExecutorStart_hook that gets called when
@ -96,6 +97,47 @@ CitusExecutorStart(QueryDesc *queryDesc, int eflags)
}
/*
* CitusExecutorRun is the ExecutorRun_hook that gets called when postgres
* executes a query.
*/
void
CitusExecutorRun(QueryDesc *queryDesc,
ScanDirection direction, uint64 count, bool execute_once)
{
/*
* Disable execution of ALTER TABLE constraint validation queries. These
* constraints will be validated in worker nodes, so running these queries
* from the coordinator would be redundant.
*
* For example, ALTER TABLE ... ATTACH PARTITION checks that the new
* partition doesn't violate constraints of the parent table, which
* might involve running some SELECT queries.
*
* Ideally we'd completely skip these checks in the coordinator, but we don't
* have any means to tell postgres to skip the checks. So the best we can do is
* to not execute the queries and return an empty result set, as if this table has
* no rows, so no constraints will be violated.
*/
if (AlterTableConstraintCheck(queryDesc))
{
EState *estate = queryDesc->estate;
DestReceiver *dest = queryDesc->dest;
estate->es_processed = 0;
estate->es_lastoid = InvalidOid;
/* start and shutdown tuple receiver to simulate empty result */
dest->rStartup(queryDesc->dest, CMD_SELECT, queryDesc->tupDesc);
dest->rShutdown(dest);
}
else
{
standard_ExecutorRun(queryDesc, direction, count, execute_once);
}
}
/*
* IsCitusPlan returns whether a Plan contains a CustomScan generated by Citus
* by recursively walking through the plan tree.
@ -418,3 +460,41 @@ SetLocalMultiShardModifyModeToSequential()
(superuser() ? PGC_SUSET : PGC_USERSET), PGC_S_SESSION,
GUC_ACTION_LOCAL, true, 0, false);
}
/*
* AlterTableConstraintCheck returns if the given query is an ALTER TABLE
* constraint check query.
*
* Postgres uses SPI to execute these queries. To see examples of how these
* constraint check queries look like, see RI_Initial_Check() and RI_Fkey_check().
*/
static bool
AlterTableConstraintCheck(QueryDesc *queryDesc)
{
if (!AlterTableInProgress())
{
return false;
}
/*
* These queries are one or more SELECT queries, where postgres checks
* their results either for NULL values or existence of a row at all.
*/
if (queryDesc->plannedstmt->commandType != CMD_SELECT)
{
return false;
}
/*
* While an ALTER TABLE is in progress, we might do SELECTs on some
* catalog tables too. For example, when dropping a column, citus_drop_trigger()
* runs some SELECTs on catalog tables. These are not constraint check queries.
*/
if (!IsCitusPlan(queryDesc->plannedstmt->planTree))
{
return false;
}
return true;
}

View File

@ -164,7 +164,9 @@ _PG_init(void)
* (thus as the innermost/last running hook) to be able to do our
* duties. For simplicity insist that all hooks are previously unused.
*/
if (planner_hook != NULL || ProcessUtility_hook != NULL || ExecutorStart_hook != NULL)
if (planner_hook != NULL || ProcessUtility_hook != NULL || ExecutorStart_hook !=
NULL ||
ExecutorRun_hook != NULL)
{
ereport(ERROR, (errmsg("Citus has to be loaded first"),
errhint("Place citus at the beginning of "
@ -209,6 +211,7 @@ _PG_init(void)
set_rel_pathlist_hook = multi_relation_restriction_hook;
set_join_pathlist_hook = multi_join_restriction_hook;
ExecutorStart_hook = CitusExecutorStart;
ExecutorRun_hook = CitusExecutorRun;
/* register hook for error messages */
emit_log_hook = multi_log_hook;

View File

@ -45,5 +45,6 @@ extern void CitusProcessUtility(Node *node, const char *queryString,
extern void MarkInvalidateForeignKeyGraph(void);
extern void InvalidateForeignKeyGraphForDDL(void);
extern List * DDLTaskList(Oid relationId, const char *commandString);
extern bool AlterTableInProgress(void);
#endif /* MULTI_UTILITY_H */

View File

@ -32,6 +32,8 @@ extern bool WritableStandbyCoordinator;
extern void CitusExecutorStart(QueryDesc *queryDesc, int eflags);
extern void CitusExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
bool execute_once);
extern TupleTableSlot * ReturnTupleFromTuplestore(CitusScanState *scanState);
extern void LoadTuplesIntoTupleStore(CitusScanState *citusScanState, Job *workerJob);
extern void ReadFileIntoTupleStore(char *fileName, char *copyFormat, TupleDesc

View File

@ -511,14 +511,14 @@ ERROR: cannot execute DDL on reference relation "referece_table" because there
HINT: Try re-running the transaction with "SET LOCAL citus.multi_shard_modify_mode TO 'sequential';"
ROLLBACK;
-- case 5.4: Parallel UPDATE on distributed table follow by a related DDL on reference table
-- FIXME: Can we do better?
BEGIN;
UPDATE on_update_fkey_table SET value_1 = 16 WHERE value_1 = 15;
ALTER TABLE referece_table ALTER COLUMN id SET DATA TYPE smallint;
DEBUG: rewriting table "referece_table"
DEBUG: building index "referece_table_pkey" on table "referece_table"
DEBUG: validating foreign key constraint "fkey"
ERROR: cannot perform DDL on placement 2380001, which has been read over multiple connections
ERROR: cannot execute DDL on reference relation "referece_table" because there was a parallel DML access to distributed relation "on_update_fkey_table" in the same transaction
HINT: Try re-running the transaction with "SET LOCAL citus.multi_shard_modify_mode TO 'sequential';"
ROLLBACK;
-- case 6:1: Unrelated parallel DDL on distributed table followed by SELECT on ref. table
BEGIN;

View File

@ -511,14 +511,14 @@ ERROR: cannot execute DDL on reference relation "referece_table" because there
HINT: Try re-running the transaction with "SET LOCAL citus.multi_shard_modify_mode TO 'sequential';"
ROLLBACK;
-- case 5.4: Parallel UPDATE on distributed table follow by a related DDL on reference table
-- FIXME: Can we do better?
BEGIN;
UPDATE on_update_fkey_table SET value_1 = 16 WHERE value_1 = 15;
ALTER TABLE referece_table ALTER COLUMN id SET DATA TYPE smallint;
DEBUG: rewriting table "referece_table"
DEBUG: building index "referece_table_pkey" on table "referece_table" serially
DEBUG: validating foreign key constraint "fkey"
ERROR: cannot perform DDL on placement 2380001, which has been read over multiple connections
ERROR: cannot execute DDL on reference relation "referece_table" because there was a parallel DML access to distributed relation "on_update_fkey_table" in the same transaction
HINT: Try re-running the transaction with "SET LOCAL citus.multi_shard_modify_mode TO 'sequential';"
ROLLBACK;
-- case 6:1: Unrelated parallel DDL on distributed table followed by SELECT on ref. table
BEGIN;

View File

@ -1882,6 +1882,81 @@ ORDER BY
"schema-test_2009" | 4
(2 rows)
-- test we don't deadlock when attaching and detaching partitions from partitioned
-- tables with foreign keys
CREATE TABLE reference_table(id int PRIMARY KEY);
SELECT create_reference_table('reference_table');
create_reference_table
------------------------
(1 row)
CREATE TABLE reference_table_2(id int PRIMARY KEY);
SELECT create_reference_table('reference_table_2');
create_reference_table
------------------------
(1 row)
CREATE TABLE partitioning_test(id int, time date) PARTITION BY RANGE (time);
CREATE TABLE partitioning_test_2008 PARTITION OF partitioning_test FOR VALUES FROM ('2008-01-01') TO ('2009-01-01');
CREATE TABLE partitioning_test_2009 (LIKE partitioning_test);
CREATE TABLE partitioning_test_2010 (LIKE partitioning_test);
CREATE TABLE partitioning_test_2011 (LIKE partitioning_test);
-- distributing partitioning_test will also distribute partitioning_test_2008
SELECT create_distributed_table('partitioning_test', 'id');
create_distributed_table
--------------------------
(1 row)
SELECT create_distributed_table('partitioning_test_2009', 'id');
create_distributed_table
--------------------------
(1 row)
SELECT create_distributed_table('partitioning_test_2010', 'id');
create_distributed_table
--------------------------
(1 row)
SELECT create_distributed_table('partitioning_test_2011', 'id');
create_distributed_table
--------------------------
(1 row)
ALTER TABLE partitioning_test ADD CONSTRAINT partitioning_reference_fkey
FOREIGN KEY (id) REFERENCES reference_table(id) ON DELETE CASCADE;
ALTER TABLE partitioning_test_2009 ADD CONSTRAINT partitioning_reference_fkey_2009
FOREIGN KEY (id) REFERENCES reference_table(id) ON DELETE CASCADE;
INSERT INTO partitioning_test_2010 VALUES (1, '2010-02-01');
-- This should fail because of foreign key constraint violation
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2010
FOR VALUES FROM ('2010-01-01') TO ('2011-01-01');
ERROR: insert or update on table "partitioning_test_2010_1660191" violates foreign key constraint "partitioning_reference_fkey_1660179"
DETAIL: Key (id)=(1) is not present in table "reference_table_1660177".
CONTEXT: while executing command on localhost:57637
-- Truncate, so attaching again won't fail
TRUNCATE partitioning_test_2010;
-- Attach a table which already has the same constraint
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2009
FOR VALUES FROM ('2009-01-01') TO ('2010-01-01');
-- Attach a table which doesn't have the constraint
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2010
FOR VALUES FROM ('2010-01-01') TO ('2011-01-01');
-- Attach a table which has a different constraint
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2011
FOR VALUES FROM ('2011-01-01') TO ('2012-01-01');
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2008;
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2009;
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2010;
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2011;
DROP TABLE partitioning_test, partitioning_test_2008, partitioning_test_2009,
partitioning_test_2010, partitioning_test_2011,
reference_table, reference_table_2;
DROP SCHEMA partitioning_schema CASCADE;
NOTICE: drop cascades to table "schema-test"
RESET SEARCH_PATH;

View File

@ -1806,6 +1806,82 @@ ORDER BY
"schema-test_2009" | 4
(2 rows)
-- test we don't deadlock when attaching and detaching partitions from partitioned
-- tables with foreign keys
CREATE TABLE reference_table(id int PRIMARY KEY);
SELECT create_reference_table('reference_table');
create_reference_table
------------------------
(1 row)
CREATE TABLE reference_table_2(id int PRIMARY KEY);
SELECT create_reference_table('reference_table_2');
create_reference_table
------------------------
(1 row)
CREATE TABLE partitioning_test(id int, time date) PARTITION BY RANGE (time);
CREATE TABLE partitioning_test_2008 PARTITION OF partitioning_test FOR VALUES FROM ('2008-01-01') TO ('2009-01-01');
CREATE TABLE partitioning_test_2009 (LIKE partitioning_test);
CREATE TABLE partitioning_test_2010 (LIKE partitioning_test);
CREATE TABLE partitioning_test_2011 (LIKE partitioning_test);
-- distributing partitioning_test will also distribute partitioning_test_2008
SELECT create_distributed_table('partitioning_test', 'id');
create_distributed_table
--------------------------
(1 row)
SELECT create_distributed_table('partitioning_test_2009', 'id');
create_distributed_table
--------------------------
(1 row)
SELECT create_distributed_table('partitioning_test_2010', 'id');
create_distributed_table
--------------------------
(1 row)
SELECT create_distributed_table('partitioning_test_2011', 'id');
create_distributed_table
--------------------------
(1 row)
ALTER TABLE partitioning_test ADD CONSTRAINT partitioning_reference_fkey
FOREIGN KEY (id) REFERENCES reference_table(id) ON DELETE CASCADE;
ERROR: foreign key constraints are not supported on partitioned tables
LINE 1: ALTER TABLE partitioning_test ADD CONSTRAINT partitioning_re...
^
ALTER TABLE partitioning_test_2009 ADD CONSTRAINT partitioning_reference_fkey_2009
FOREIGN KEY (id) REFERENCES reference_table(id) ON DELETE CASCADE;
INSERT INTO partitioning_test_2010 VALUES (1, '2010-02-01');
-- This should fail because of foreign key constraint violation
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2010
FOR VALUES FROM ('2010-01-01') TO ('2011-01-01');
-- Truncate, so attaching again won't fail
TRUNCATE partitioning_test_2010;
-- Attach a table which already has the same constraint
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2009
FOR VALUES FROM ('2009-01-01') TO ('2010-01-01');
-- Attach a table which doesn't have the constraint
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2010
FOR VALUES FROM ('2010-01-01') TO ('2011-01-01');
ERROR: "partitioning_test_2010" is already a partition
-- Attach a table which has a different constraint
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2011
FOR VALUES FROM ('2011-01-01') TO ('2012-01-01');
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2008;
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2009;
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2010;
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2011;
DROP TABLE partitioning_test, partitioning_test_2008, partitioning_test_2009,
partitioning_test_2010, partitioning_test_2011,
reference_table, reference_table_2;
DROP SCHEMA partitioning_schema CASCADE;
NOTICE: drop cascades to table "schema-test"
RESET SEARCH_PATH;

View File

@ -303,7 +303,6 @@ BEGIN;
ROLLBACK;
-- case 5.4: Parallel UPDATE on distributed table follow by a related DDL on reference table
-- FIXME: Can we do better?
BEGIN;
UPDATE on_update_fkey_table SET value_1 = 16 WHERE value_1 = 15;
ALTER TABLE referece_table ALTER COLUMN id SET DATA TYPE smallint;

View File

@ -1127,6 +1127,58 @@ GROUP BY
ORDER BY
1,2;
-- test we don't deadlock when attaching and detaching partitions from partitioned
-- tables with foreign keys
CREATE TABLE reference_table(id int PRIMARY KEY);
SELECT create_reference_table('reference_table');
CREATE TABLE reference_table_2(id int PRIMARY KEY);
SELECT create_reference_table('reference_table_2');
CREATE TABLE partitioning_test(id int, time date) PARTITION BY RANGE (time);
CREATE TABLE partitioning_test_2008 PARTITION OF partitioning_test FOR VALUES FROM ('2008-01-01') TO ('2009-01-01');
CREATE TABLE partitioning_test_2009 (LIKE partitioning_test);
CREATE TABLE partitioning_test_2010 (LIKE partitioning_test);
CREATE TABLE partitioning_test_2011 (LIKE partitioning_test);
-- distributing partitioning_test will also distribute partitioning_test_2008
SELECT create_distributed_table('partitioning_test', 'id');
SELECT create_distributed_table('partitioning_test_2009', 'id');
SELECT create_distributed_table('partitioning_test_2010', 'id');
SELECT create_distributed_table('partitioning_test_2011', 'id');
ALTER TABLE partitioning_test ADD CONSTRAINT partitioning_reference_fkey
FOREIGN KEY (id) REFERENCES reference_table(id) ON DELETE CASCADE;
ALTER TABLE partitioning_test_2009 ADD CONSTRAINT partitioning_reference_fkey_2009
FOREIGN KEY (id) REFERENCES reference_table(id) ON DELETE CASCADE;
INSERT INTO partitioning_test_2010 VALUES (1, '2010-02-01');
-- This should fail because of foreign key constraint violation
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2010
FOR VALUES FROM ('2010-01-01') TO ('2011-01-01');
-- Truncate, so attaching again won't fail
TRUNCATE partitioning_test_2010;
-- Attach a table which already has the same constraint
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2009
FOR VALUES FROM ('2009-01-01') TO ('2010-01-01');
-- Attach a table which doesn't have the constraint
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2010
FOR VALUES FROM ('2010-01-01') TO ('2011-01-01');
-- Attach a table which has a different constraint
ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2011
FOR VALUES FROM ('2011-01-01') TO ('2012-01-01');
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2008;
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2009;
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2010;
ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2011;
DROP TABLE partitioning_test, partitioning_test_2008, partitioning_test_2009,
partitioning_test_2010, partitioning_test_2011,
reference_table, reference_table_2;
DROP SCHEMA partitioning_schema CASCADE;
RESET SEARCH_PATH;
DROP TABLE IF EXISTS