Fixes a bug that causes crash when using auto_explain extension with ALTER TABLE...ADD FOREIGN KEY... queries. (#6470)

Fixes a bug that causes crash when using auto_explain extension with
ALTER TABLE...ADD FOREIGN KEY... queries.
Those queries trigger a SELECT query on the citus tables as part of the
foreign key constraint validation check. At the explain hook, workers
try to explain this SELECT query as a distributed query causing memory
corruption in the connection data structures. Hence, we will not explain
ALTER TABLE...ADD FOREIGN KEY... and the triggered queries on the
workers.

Fixes #6424.
naisila/rebalancer_pt
Emel Şimşek 2022-11-15 17:53:39 +03:00 committed by GitHub
parent 0ee973368b
commit 8e5ba45b74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 86 additions and 0 deletions

View File

@ -49,6 +49,7 @@
#include "distributed/worker_protocol.h"
#include "distributed/version_compat.h"
#include "distributed/jsonbutils.h"
#include "distributed/commands/utility_hook.h"
#include "executor/tstoreReceiver.h"
#include "fmgr.h"
#include "lib/stringinfo.h"
@ -198,6 +199,20 @@ CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es
return;
}
/*
* ALTER TABLE statements are not explained by postgres. However ALTER TABLE statements
* may trigger SELECT statements causing explain hook to run. This situation causes a crash in a worker.
* Therefore we will detect if we are explaining a triggered query when we are processing
* an ALTER TABLE statement and stop explain in this situation.
*/
if (AlterTableInProgress())
{
ExplainPropertyText("Citus Explain Scan",
"Explain for triggered constraint validation queries during ALTER TABLE commands are not supported by Citus",
es);
return;
}
ExplainOpenGroup("Distributed Query", "Distributed Query", true, es);
/*

View File

@ -298,3 +298,6 @@ s/^(NOTICE: )(clock).*LC:[0-9]+,.*C:[0-9]+,.*$/\1\2 xxxxxx/g
/^(DEBUG: )(adjusted to remote clock: <logical)\([0-9]+\) counter\([0-9]+\)>$/d
/^DEBUG: persisting transaction.*counter.*$/d
/^DEBUG: both logical clock values are equal\([0-9]+\), pick remote.*$/d
# The following 2 lines are to normalize duration and cost in the EXPLAIN output
s/LOG: duration: [0-9].[0-9]+ ms/LOG: duration: xxxx ms/g
s/"Total Cost": [0-9].[0-9]+/"Total Cost": xxxx/g

View File

@ -690,3 +690,48 @@ DROP SCHEMA sc3 CASCADE;
NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to table sc3.alter_add_prim_key
drop cascades to table sc3.alter_add_unique
-- Test ALTER TABLE ... ADD CONSTRAINT ... does not cause a crash when auto_explain module is loaded
CREATE TABLE target_table(col_1 int primary key, col_2 int);
SELECT create_distributed_table('target_table','col_1');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO target_table VALUES(1,2),(2,3),(3,4),(4,5),(5,6);
CREATE TABLE test_ref_table (key int PRIMARY KEY);
SELECT create_reference_table('test_ref_table');
create_reference_table
---------------------------------------------------------------------
(1 row)
INSERT INTO test_ref_table VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
LOAD 'auto_explain';
SET auto_explain.log_min_duration = 0;
SET auto_explain.log_level = LOG;
SET client_min_messages to LOG;
SET auto_explain.log_timing TO off;
SET auto_explain.log_format = JSON;
BEGIN;
-- simulate being a worker session/backend
SET LOCAL application_name to 'citus_internal gpid=10000000001';
SET citus.enable_ddl_propagation TO OFF;
-- alter table triggers SELECT, and auto_explain catches that
ALTER TABLE target_table ADD CONSTRAINT fkey_167 FOREIGN KEY (col_1) REFERENCES test_ref_table(key) ON DELETE CASCADE;
LOG: duration: xxxx ms plan:
{
"Query Text": "SELECT fk.\"col_1\" FROM ONLY \"public\".\"target_table\" fk LEFT OUTER JOIN ONLY \"public\".\"test_ref_table\" pk ON ( pk.\"key\" OPERATOR(pg_catalog.=) fk.\"col_1\") WHERE pk.\"key\" IS NULL AND (fk.\"col_1\" IS NOT NULL)",
"Plan": {
"Node Type": "Custom Scan",
"Custom Plan Provider": "Citus Adaptive",
"Parallel Aware": false,
"Startup Cost": 0.00,
"Total Cost": xxxx,
"Plan Rows": 100000,
"Plan Width": 4,
"Citus Explain Scan": "Explain for triggered constraint validation queries during ALTER TABLE commands are not supported by Citus"
}
}
CONTEXT: SQL statement "SELECT fk."col_1" FROM ONLY "public"."target_table" fk LEFT OUTER JOIN ONLY "public"."test_ref_table" pk ON ( pk."key" OPERATOR(pg_catalog.=) fk."col_1") WHERE pk."key" IS NULL AND (fk."col_1" IS NOT NULL)"
END;

View File

@ -575,3 +575,26 @@ SET search_path TO 'public';
DROP SCHEMA sc1 CASCADE;
DROP SCHEMA sc2 CASCADE;
DROP SCHEMA sc3 CASCADE;
-- Test ALTER TABLE ... ADD CONSTRAINT ... does not cause a crash when auto_explain module is loaded
CREATE TABLE target_table(col_1 int primary key, col_2 int);
SELECT create_distributed_table('target_table','col_1');
INSERT INTO target_table VALUES(1,2),(2,3),(3,4),(4,5),(5,6);
CREATE TABLE test_ref_table (key int PRIMARY KEY);
SELECT create_reference_table('test_ref_table');
INSERT INTO test_ref_table VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
LOAD 'auto_explain';
SET auto_explain.log_min_duration = 0;
SET auto_explain.log_level = LOG;
SET client_min_messages to LOG;
SET auto_explain.log_timing TO off;
SET auto_explain.log_format = JSON;
BEGIN;
-- simulate being a worker session/backend
SET LOCAL application_name to 'citus_internal gpid=10000000001';
SET citus.enable_ddl_propagation TO OFF;
-- alter table triggers SELECT, and auto_explain catches that
ALTER TABLE target_table ADD CONSTRAINT fkey_167 FOREIGN KEY (col_1) REFERENCES test_ref_table(key) ON DELETE CASCADE;
END;