From ab7c3b7804c1ec96be6cc88625060294ec2b2a58 Mon Sep 17 00:00:00 2001 From: Mehmet YILMAZ Date: Tue, 21 Jan 2025 17:48:06 +0300 Subject: [PATCH] PG17 Compatibility - Fix crash when pg_class is used in MERGE (#7853) This pull request addresses Issue #7846, where specific MERGE queries on non-distributed and distributed tables can result in crashes in certain scenarios. The issue stems from the usage of `pg_class` catalog table, and the `FilterShardsFromPgclass` function in Citus. This function goes through the query's jointree to hide the shards. However, in PG17, MERGE's join quals are in a separate structure called `mergeJoinCondition`. Therefore FilterShardsFromPgclass was not filtering correctly in a `MERGE` command that involves `pg_class`. To fix the issue, we handle `mergeJoinCondition` separately in PG17. Relevant PG commit: https://github.com/postgres/postgres/commit/0294df2f1f842dfb0eed79007b21016f486a3c6c **Non-Distributed Tables:** A MERGE query involving a non-distributed table using `pg_catalog.pg_class` as the source may execute successfully but needs testing to ensure stability. **Distributed Tables:** Performing a MERGE on a distributed table using `pg_catalog.pg_class` as the source raises an error: `ERROR: MERGE INTO a distributed table from Postgres table is not yet supported` However, in some cases, this can lead to a server crash if the unsupported operation is not properly handled. This is the test output from the same test conducted prior to the code changes being implemented. ``` -- Issue #7846: Test crash scenarios with MERGE on non-distributed and distributed tables -- Step 1: Connect to a worker node to verify shard visibility \c postgresql://postgres@localhost::worker_1_port/regression?application_name=psql SET search_path TO pg17; -- Step 2: Create and test a non-distributed table CREATE TABLE non_dist_table_12345 (id INTEGER); -- Test MERGE on the non-distributed table MERGE INTO non_dist_table_12345 AS target_0 USING pg_catalog.pg_class AS ref_0 ON target_0.id = ref_0.relpages WHEN NOT MATCHED THEN DO NOTHING; SSL SYSCALL error: EOF detected connection to server was lost ``` --- citus-tools | 1 + .../worker/worker_shard_visibility.c | 33 ++++++++++++++---- src/test/regress/expected/pg17.out | 33 ++++++++++++++++++ src/test/regress/sql/pg17.sql | 34 +++++++++++++++++++ 4 files changed, 94 insertions(+), 7 deletions(-) create mode 160000 citus-tools diff --git a/citus-tools b/citus-tools new file mode 160000 index 000000000..3376bd684 --- /dev/null +++ b/citus-tools @@ -0,0 +1 @@ +Subproject commit 3376bd6845f0614908ed304f5033bd644c82d3bf diff --git a/src/backend/distributed/worker/worker_shard_visibility.c b/src/backend/distributed/worker/worker_shard_visibility.c index f783d514d..e80d66f58 100644 --- a/src/backend/distributed/worker/worker_shard_visibility.c +++ b/src/backend/distributed/worker/worker_shard_visibility.c @@ -441,7 +441,7 @@ FilterShardsFromPgclass(Node *node, void *context) /* * We process the whole rtable rather than visiting individual RangeTblEntry's * in the walker, since we need to know the varno to generate the right - * fiter. + * filter. */ int varno = 0; RangeTblEntry *rangeTableEntry = NULL; @@ -471,20 +471,39 @@ FilterShardsFromPgclass(Node *node, void *context) /* make sure the expression is in the right memory context */ MemoryContext originalContext = MemoryContextSwitchTo(queryContext); - /* add relation_is_a_known_shard(oid) IS NOT TRUE to the quals of the query */ Node *newQual = CreateRelationIsAKnownShardFilter(varno); - Node *oldQuals = query->jointree->quals; - if (oldQuals) + +#if PG_VERSION_NUM >= PG_VERSION_17 + + /* + * In PG17, MERGE queries introduce a new struct `mergeJoinCondition`. + * We need to handle this condition safely. + */ + if (query->mergeJoinCondition != NULL) { - query->jointree->quals = (Node *) makeBoolExpr( + /* Add the filter to mergeJoinCondition */ + query->mergeJoinCondition = (Node *) makeBoolExpr( AND_EXPR, - list_make2(oldQuals, newQual), + list_make2(query->mergeJoinCondition, newQual), -1); } else +#endif { - query->jointree->quals = newQual; + /* Handle older versions or queries without mergeJoinCondition */ + Node *oldQuals = query->jointree->quals; + if (oldQuals) + { + query->jointree->quals = (Node *) makeBoolExpr( + AND_EXPR, + list_make2(oldQuals, newQual), + -1); + } + else + { + query->jointree->quals = newQual; + } } MemoryContextSwitchTo(originalContext); diff --git a/src/test/regress/expected/pg17.out b/src/test/regress/expected/pg17.out index 83507bb15..5e4460be1 100644 --- a/src/test/regress/expected/pg17.out +++ b/src/test/regress/expected/pg17.out @@ -2690,6 +2690,39 @@ SELECT * FROM sensor_readings ORDER BY 1; (4 rows) -- End of MERGE ... WHEN NOT MATCHED BY SOURCE tests +-- Issue #7846: Test crash scenarios with MERGE on non-distributed and distributed tables +-- Step 1: Connect to a worker node to verify shard visibility +\c postgresql://postgres@localhost::worker_1_port/regression?application_name=psql +SET search_path TO pg17; +-- Step 2: Create and test a non-distributed table +CREATE TABLE non_dist_table_12345 (id INTEGER); +-- Test MERGE on the non-distributed table +MERGE INTO non_dist_table_12345 AS target_0 +USING pg_catalog.pg_class AS ref_0 +ON target_0.id = ref_0.relpages +WHEN NOT MATCHED THEN DO NOTHING; +-- Step 3: Switch back to the coordinator for distributed table operations +\c postgresql://postgres@localhost::master_port/regression?application_name=psql +SET search_path TO pg17; +-- Step 4: Create and test a distributed table +CREATE TABLE dist_table_67890 (id INTEGER); +SELECT create_distributed_table('dist_table_67890', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Test MERGE on the distributed table +MERGE INTO dist_table_67890 AS target_0 +USING pg_catalog.pg_class AS ref_0 +ON target_0.id = ref_0.relpages +WHEN NOT MATCHED THEN DO NOTHING; +ERROR: MERGE INTO an distributed table from Postgres table is not yet supported +-- Step 5: Cleanup +DROP TABLE non_dist_table_12345; +ERROR: table "non_dist_table_12345" does not exist +DROP TABLE dist_table_67890 CASCADE; +-- End of Issue #7846 \set VERBOSITY terse SET client_min_messages TO WARNING; DROP SCHEMA pg17 CASCADE; diff --git a/src/test/regress/sql/pg17.sql b/src/test/regress/sql/pg17.sql index e4843db44..ef7371551 100644 --- a/src/test/regress/sql/pg17.sql +++ b/src/test/regress/sql/pg17.sql @@ -1451,6 +1451,40 @@ SELECT * FROM sensor_readings ORDER BY 1; -- End of MERGE ... WHEN NOT MATCHED BY SOURCE tests +-- Issue #7846: Test crash scenarios with MERGE on non-distributed and distributed tables +-- Step 1: Connect to a worker node to verify shard visibility +\c postgresql://postgres@localhost::worker_1_port/regression?application_name=psql +SET search_path TO pg17; + +-- Step 2: Create and test a non-distributed table +CREATE TABLE non_dist_table_12345 (id INTEGER); + +-- Test MERGE on the non-distributed table +MERGE INTO non_dist_table_12345 AS target_0 +USING pg_catalog.pg_class AS ref_0 +ON target_0.id = ref_0.relpages +WHEN NOT MATCHED THEN DO NOTHING; + +-- Step 3: Switch back to the coordinator for distributed table operations +\c postgresql://postgres@localhost::master_port/regression?application_name=psql +SET search_path TO pg17; + +-- Step 4: Create and test a distributed table +CREATE TABLE dist_table_67890 (id INTEGER); +SELECT create_distributed_table('dist_table_67890', 'id'); + +-- Test MERGE on the distributed table +MERGE INTO dist_table_67890 AS target_0 +USING pg_catalog.pg_class AS ref_0 +ON target_0.id = ref_0.relpages +WHEN NOT MATCHED THEN DO NOTHING; + +-- Step 5: Cleanup +DROP TABLE non_dist_table_12345; +DROP TABLE dist_table_67890 CASCADE; + +-- End of Issue #7846 + \set VERBOSITY terse SET client_min_messages TO WARNING; DROP SCHEMA pg17 CASCADE;