From c52f36019fceeb8894f4c7a0e52d7225a0f78d09 Mon Sep 17 00:00:00 2001 From: Colm McHugh Date: Fri, 25 Oct 2024 11:24:01 +0000 Subject: [PATCH 01/20] [Bug Fix] [SEGFAULT] Querying distributed tables with window partition may cause segfault #7705 In function MasterAggregateMutator(), when the original Node is a Var node use makeVar() instead of copyObject() when constructing the Var node for the target list of the combine query. The varnullingrels field of the original Var node is ignored because it is not relevant for the combine query; copying this cause the problem in issue 7705, where a coordinator query had a Var with a reference to a non-existent join relation. --- .../planner/multi_logical_optimizer.c | 7 +- src/test/regress/expected/issue_7705.out | 248 ++++++++++++++++++ src/test/regress/multi_schedule | 2 +- src/test/regress/sql/issue_7705.sql | 72 +++++ 4 files changed, 325 insertions(+), 4 deletions(-) create mode 100644 src/test/regress/expected/issue_7705.out create mode 100644 src/test/regress/sql/issue_7705.sql diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 371ba54e6..28680deb0 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -1557,9 +1557,10 @@ MasterAggregateMutator(Node *originalNode, MasterAggregateWalkerContext *walkerC } else if (IsA(originalNode, Var)) { - Var *newColumn = copyObject((Var *) originalNode); - newColumn->varno = masterTableId; - newColumn->varattno = walkerContext->columnId; + Var *origColumn = (Var *) originalNode; + Var *newColumn = makeVar(masterTableId, walkerContext->columnId, + origColumn->vartype, origColumn->vartypmod, + origColumn->varcollid, origColumn->varlevelsup); walkerContext->columnId++; newNode = (Node *) newColumn; diff --git a/src/test/regress/expected/issue_7705.out b/src/test/regress/expected/issue_7705.out new file mode 100644 index 000000000..20b078226 --- /dev/null +++ b/src/test/regress/expected/issue_7705.out @@ -0,0 +1,248 @@ +--- Test for verifying that column references (var nodes) in targets that cannot be pushed down +--- do not cause issues for the postgres planner, in particular postgres versions 16+, where the +--- varnullingrels field of a VAR node may contain relids of join relations that can make the var +--- NULL; in a rewritten distributed query without a join such relids do not have a meaning. +--- Issue #7705: [SEGFAULT] Querying distributed tables with window partition causes segmentation fault +--- https://github.com/citusdata/citus/issues/7705 +CREATE SCHEMA issue_7705; +SET search_path to 'issue_7705'; +SET citus.next_shard_id TO 30070000; +SET citus.shard_replication_factor TO 1; +SET citus.enable_local_execution TO ON; +CREATE TABLE t1 (id INT PRIMARY KEY); +INSERT INTO t1 VALUES (1), (2); +CREATE TABLE t2 (id INT, account_id INT, a2 INT, PRIMARY KEY(id, account_id)); +INSERT INTO t2 VALUES (3, 1, 10), (4, 2, 20), (5, 1, NULL); +SELECT create_distributed_table('t1', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$issue_7705.t1$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('t2', 'account_id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$issue_7705.t2$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Test the issue seen in #7705; a target expression with +-- a window function that cannot be pushed down because the +-- partion by is not on the distribution column also includes +-- a column from the inner side of a left outer join, which +-- produces a non-empty varnullingrels set in PG 16 (and higher) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + id | max +--------------------------------------------------------------------- + 1 | 10 + 2 | 20 + 1 | +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + QUERY PLAN +--------------------------------------------------------------------- + WindowAgg + Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3 + -> Sort + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Sort Key: remote_scan.worker_column_3 + -> Custom Scan (Citus Adaptive) + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery + Node: host=localhost port=xxxxx dbname=regression + -> Hash Right Join + Output: t1.id, t2.a2, t2.id + Inner Unique: true + Hash Cond: (t2.account_id = t1.id) + -> Seq Scan on issue_7705.t2_30070004 t2 + Output: t2.id, t2.account_id, t2.a2 + -> Hash + Output: t1.id + -> Seq Scan on issue_7705.t1_30070000 t1 + Output: t1.id +(22 rows) + +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id; + id | max +--------------------------------------------------------------------- + 1 | 10 + 2 | 20 + 1 | +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id; + QUERY PLAN +--------------------------------------------------------------------- + WindowAgg + Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3 + -> Sort + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Sort Key: remote_scan.worker_column_3 + -> Custom Scan (Citus Adaptive) + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t2_30070004 t2 RIGHT JOIN issue_7705.t1_30070000 t1 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery + Node: host=localhost port=xxxxx dbname=regression + -> Hash Right Join + Output: t1.id, t2.a2, t2.id + Inner Unique: true + Hash Cond: (t2.account_id = t1.id) + -> Seq Scan on issue_7705.t2_30070004 t2 + Output: t2.id, t2.account_id, t2.a2 + -> Hash + Output: t1.id + -> Seq Scan on issue_7705.t1_30070000 t1 + Output: t1.id +(22 rows) + +SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + id | max +--------------------------------------------------------------------- + 1 | + 1 | 10 + 2 | 20 +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate + Output: remote_scan.id, (max(remote_scan.max) OVER (?)), remote_scan.worker_column_3 + Group Key: remote_scan.id, max(remote_scan.max) OVER (?) + -> WindowAgg + Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3 + -> Sort + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Sort Key: remote_scan.worker_column_3 + -> Custom Scan (Citus Adaptive) + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery + Node: host=localhost port=xxxxx dbname=regression + -> Hash Right Join + Output: t1.id, t2.a2, t2.id + Inner Unique: true + Hash Cond: (t2.account_id = t1.id) + -> Seq Scan on issue_7705.t2_30070004 t2 + Output: t2.id, t2.account_id, t2.a2 + -> Hash + Output: t1.id + -> Seq Scan on issue_7705.t1_30070000 t1 + Output: t1.id +(25 rows) + +CREATE SEQUENCE test_seq START 101; +CREATE OR REPLACE FUNCTION TEST_F(int) returns INT language sql stable as $$ select $1 + 42; $$ ; +-- Issue #7705 also occurs if a target expression includes a column +-- of a distributed table that is on the inner side of a left outer +-- join and a call to nextval(), because nextval() cannot be pushed +-- down, and must be run on the coordinator +SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + id | test_f +--------------------------------------------------------------------- + 1 | 153 + 1 | + 2 | 165 +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + QUERY PLAN +--------------------------------------------------------------------- + Result + Output: remote_scan.id, ((remote_scan.test_f + (nextval('test_seq'::regclass))::integer) + 42) + -> Sort + Output: remote_scan.id, remote_scan.test_f + Sort Key: remote_scan.id + -> Custom Scan (Citus Adaptive) + Output: remote_scan.id, remote_scan.test_f + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT worker_column_1 AS id, worker_column_2 AS test_f FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery + Node: host=localhost port=xxxxx dbname=regression + -> Hash Right Join + Output: t1.id, t2.a2 + Inner Unique: true + Hash Cond: (t2.account_id = t1.id) + -> Seq Scan on issue_7705.t2_30070004 t2 + Output: t2.id, t2.account_id, t2.a2 + -> Hash + Output: t1.id + -> Seq Scan on issue_7705.t1_30070000 t1 + Output: t1.id +(22 rows) + +SELECT t1.id, CASE nextval('test_seq') % 2 = 0 WHEN true THEN t2.a2 ELSE 1 END +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + id | case +--------------------------------------------------------------------- + 1 | 10 + 1 | 1 + 2 | 20 +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + QUERY PLAN +--------------------------------------------------------------------- + Result + Output: remote_scan.id, CASE ((nextval('test_seq'::regclass) % '2'::bigint) = 0) WHEN CASE_TEST_EXPR THEN remote_scan."case" ELSE 1 END + -> Sort + Output: remote_scan.id, remote_scan."case" + Sort Key: remote_scan.id + -> Custom Scan (Citus Adaptive) + Output: remote_scan.id, remote_scan."case" + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT worker_column_1 AS id, worker_column_2 AS "case" FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery + Node: host=localhost port=xxxxx dbname=regression + -> Hash Right Join + Output: t1.id, t2.a2 + Inner Unique: true + Hash Cond: (t2.account_id = t1.id) + -> Seq Scan on issue_7705.t2_30070004 t2 + Output: t2.id, t2.account_id, t2.a2 + -> Hash + Output: t1.id + -> Seq Scan on issue_7705.t1_30070000 t1 + Output: t1.id +(22 rows) + +--- cleanup +\set VERBOSITY TERSE +DROP SCHEMA issue_7705 CASCADE; +NOTICE: drop cascades to 4 other objects +RESET all; diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 220ce1964..bbb4047a9 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -103,7 +103,7 @@ test: multi_dropped_column_aliases foreign_key_restriction_enforcement test: binary_protocol test: alter_table_set_access_method test: alter_distributed_table -test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477 +test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477 issue_7705 test: object_propagation_debug test: undistribute_table test: run_command_on_all_nodes diff --git a/src/test/regress/sql/issue_7705.sql b/src/test/regress/sql/issue_7705.sql new file mode 100644 index 000000000..950933017 --- /dev/null +++ b/src/test/regress/sql/issue_7705.sql @@ -0,0 +1,72 @@ +--- Test for verifying that column references (var nodes) in targets that cannot be pushed down +--- do not cause issues for the postgres planner, in particular postgres versions 16+, where the +--- varnullingrels field of a VAR node may contain relids of join relations that can make the var +--- NULL; in a rewritten distributed query without a join such relids do not have a meaning. +--- Issue #7705: [SEGFAULT] Querying distributed tables with window partition causes segmentation fault +--- https://github.com/citusdata/citus/issues/7705 + +CREATE SCHEMA issue_7705; +SET search_path to 'issue_7705'; +SET citus.next_shard_id TO 30070000; +SET citus.shard_replication_factor TO 1; +SET citus.enable_local_execution TO ON; + +CREATE TABLE t1 (id INT PRIMARY KEY); +INSERT INTO t1 VALUES (1), (2); + +CREATE TABLE t2 (id INT, account_id INT, a2 INT, PRIMARY KEY(id, account_id)); +INSERT INTO t2 VALUES (3, 1, 10), (4, 2, 20), (5, 1, NULL); + +SELECT create_distributed_table('t1', 'id'); +SELECT create_distributed_table('t2', 'account_id'); + +-- Test the issue seen in #7705; a target expression with +-- a window function that cannot be pushed down because the +-- partion by is not on the distribution column also includes +-- a column from the inner side of a left outer join, which +-- produces a non-empty varnullingrels set in PG 16 (and higher) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id; +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id; + +SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + +CREATE SEQUENCE test_seq START 101; +CREATE OR REPLACE FUNCTION TEST_F(int) returns INT language sql stable as $$ select $1 + 42; $$ ; + +-- Issue #7705 also occurs if a target expression includes a column +-- of a distributed table that is on the inner side of a left outer +-- join and a call to nextval(), because nextval() cannot be pushed +-- down, and must be run on the coordinator +SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + +SELECT t1.id, CASE nextval('test_seq') % 2 = 0 WHEN true THEN t2.a2 ELSE 1 END +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + +--- cleanup +\set VERBOSITY TERSE +DROP SCHEMA issue_7705 CASCADE; +RESET all; From fe6d198ab2cb87073fe552ab8c45b011c97230c2 Mon Sep 17 00:00:00 2001 From: Pavel Seleznev Date: Tue, 3 Dec 2024 17:10:36 +0300 Subject: [PATCH 02/20] Remove warnings on some builds (#7680) Co-authored-by: Pavel Seleznev --- src/backend/columnar/columnar_tableam.c | 2 ++ src/backend/distributed/metadata/metadata_cache.c | 2 ++ src/backend/distributed/planner/insert_select_planner.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c index ca3a5f4c4..fd3d171c6 100644 --- a/src/backend/columnar/columnar_tableam.c +++ b/src/backend/columnar/columnar_tableam.c @@ -3021,6 +3021,8 @@ AvailableExtensionVersionColumnar(void) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("citus extension is not found"))); + + return NULL; /* keep compiler happy */ } diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c index 402dedb8a..4f1b942a0 100644 --- a/src/backend/distributed/metadata/metadata_cache.c +++ b/src/backend/distributed/metadata/metadata_cache.c @@ -2522,6 +2522,8 @@ AvailableExtensionVersion(void) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("citus extension is not found"))); + + return NULL; /* keep compiler happy */ } diff --git a/src/backend/distributed/planner/insert_select_planner.c b/src/backend/distributed/planner/insert_select_planner.c index 60d6ce466..155880253 100644 --- a/src/backend/distributed/planner/insert_select_planner.c +++ b/src/backend/distributed/planner/insert_select_planner.c @@ -1810,6 +1810,8 @@ CastExpr(Expr *expr, Oid sourceType, Oid targetType, Oid targetCollation, ereport(ERROR, (errmsg("could not find a conversion path from type %d to %d", sourceType, targetType))); } + + return NULL; /* keep compiler happy */ } From 0355b12c7f13c138f3f66d3971e065e69edf37e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emel=20=C5=9Eim=C5=9Fek?= Date: Wed, 4 Dec 2024 11:11:33 +0300 Subject: [PATCH 03/20] Add changelog entries for 12.1.6 (#7770) Add changelog entries for 12.1.6 --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78d1d2a7c..94c85bcdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +### citus v12.1.6 (Nov 14, 2024) ### + +* Propagates `SECURITY LABEL .. ON ROLE` statements (#7304) + +* Fixes crash caused by running queries with window partition (#7718) + ### citus v12.1.5 (July 17, 2024) ### * Adds support for MERGE commands with single shard distributed target tables From 665d72a2f57bf2c94241ac9892306ac7c70d098a Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:15:15 +0300 Subject: [PATCH 04/20] Bump postgres versions in CI and dev: 14.14, 15.9, 16.5 (#7779) Upgrade postgres versions to: - 14.14 - 15.9 - 16.5 Depends on https://github.com/citusdata/the-process/pull/163 We had some errors with the latest minors, so this is a 2-level bump for now. --- .devcontainer/Dockerfile | 8 ++++---- .github/workflows/build_and_test.yml | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 33bba98d5..7dc75abd4 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -68,7 +68,7 @@ USER citus # build postgres versions separately for effective parrallelism and caching of already built versions when changing only certain versions FROM base AS pg14 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 14.12 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 14.14 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -80,7 +80,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/ RUN rm .pgenv-staging/config/default.conf FROM base AS pg15 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.7 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.9 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -92,7 +92,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/ RUN rm .pgenv-staging/config/default.conf FROM base AS pg16 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.3 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.5 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -211,7 +211,7 @@ COPY --chown=citus:citus .psqlrc . RUN sudo chown --from=root:root citus:citus -R ~ # sets default pg version -RUN pgenv switch 16.3 +RUN pgenv switch 16.5 # make connecting to the coordinator easy ENV PGPORT=9700 diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 70bc0bcb9..ffeea3094 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -31,14 +31,14 @@ jobs: pgupgrade_image_name: "ghcr.io/citusdata/pgupgradetester" style_checker_image_name: "ghcr.io/citusdata/stylechecker" style_checker_tools_version: "0.8.18" - sql_snapshot_pg_version: "16.3" - image_suffix: "-v13fd57c" - pg14_version: '{ "major": "14", "full": "14.12" }' - pg15_version: '{ "major": "15", "full": "15.7" }' - pg16_version: '{ "major": "16", "full": "16.3" }' - upgrade_pg_versions: "14.12-15.7-16.3" + sql_snapshot_pg_version: "16.5" + image_suffix: "-v1d9d7d7" + pg14_version: '{ "major": "14", "full": "14.14" }' + pg15_version: '{ "major": "15", "full": "15.9" }' + pg16_version: '{ "major": "16", "full": "16.5" }' + upgrade_pg_versions: "14.14-15.9-16.5" steps: - # Since GHA jobs needs at least one step we use a noop step here. + # Since GHA jobs need at least one step we use a noop step here. - name: Set up parameters run: echo 'noop' check-sql-snapshots: From 73411915a47d514cb17c0aaff0ce25ba35ae06d2 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 23 Dec 2024 17:01:53 +0300 Subject: [PATCH 05/20] Avoid re-assigning the global pid for client backends and bg workers when the application_name changes (#7791) DESCRIPTION: Fixes a crash that happens because of unsafe catalog access when re-assigning the global pid after application_name changes. When application_name changes, we don't actually need to try re-assigning the global pid for external client backends because application_name doesn't affect the global pid for such backends. Plus, trying to re-assign the global pid for external client backends would unnecessarily cause performing a catalog access when the cached local node id is invalidated. However, accessing to the catalog tables is dangerous in certain situations like when we're not in a transaction block. And for the other types of backends, i.e., the Citus internal backends, we need to re-assign the global pid when the application_name changes because for such backends we simply extract the global pid inherited from the originating backend from the application_name -that's specified by originating backend when openning that connection- and this doesn't require catalog access. --- src/backend/distributed/shared_library_init.c | 31 +++++++++++----- .../test/run_from_same_connection.c | 4 +++ .../distributed/transaction/backend_data.c | 17 +++++++++ src/include/distributed/backend_data.h | 1 + .../regress/expected/remove_coordinator.out | 31 ++++++++++++++++ src/test/regress/sql/remove_coordinator.sql | 36 +++++++++++++++++++ 6 files changed, 111 insertions(+), 9 deletions(-) diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index bd65fa60c..6d26b802f 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -2890,14 +2890,27 @@ ApplicationNameAssignHook(const char *newval, void *extra) DetermineCitusBackendType(newval); /* - * AssignGlobalPID might read from catalog tables to get the the local - * nodeid. But ApplicationNameAssignHook might be called before catalog - * access is available to the backend (such as in early stages of - * authentication). We use StartupCitusBackend to initialize the global pid - * after catalogs are available. After that happens this hook becomes - * responsible to update the global pid on later application_name changes. - * So we set the FinishedStartupCitusBackend flag in StartupCitusBackend to - * indicate when this responsibility handoff has happened. + * We use StartupCitusBackend to initialize the global pid after catalogs + * are available. After that happens this hook becomes responsible to update + * the global pid on later application_name changes. So we set the + * FinishedStartupCitusBackend flag in StartupCitusBackend to indicate when + * this responsibility handoff has happened. + * + * Also note that when application_name changes, we don't actually need to + * try re-assigning the global pid for external client backends and + * background workers because application_name doesn't affect the global + * pid for such backends - note that !IsExternalClientBackend() check covers + * both types of backends. Plus, + * trying to re-assign the global pid for such backends would unnecessarily + * cause performing a catalog access when the cached local node id is + * invalidated. However, accessing to the catalog tables is dangerous in + * certain situations like when we're not in a transaction block. And for + * the other types of backends, i.e., the Citus internal backends, we need + * to re-assign the global pid when the application_name changes because for + * such backends we simply extract the global pid inherited from the + * originating backend from the application_name -that's specified by + * originating backend when openning that connection- and this doesn't require + * catalog access. * * Another solution to the catalog table acccess problem would be to update * global pid lazily, like we do for HideShards. But that's not possible @@ -2907,7 +2920,7 @@ ApplicationNameAssignHook(const char *newval, void *extra) * as reasonably possible, which is also why we extract global pids in the * AuthHook already (extracting doesn't require catalog access). */ - if (FinishedStartupCitusBackend) + if (FinishedStartupCitusBackend && !IsExternalClientBackend()) { AssignGlobalPID(newval); } diff --git a/src/backend/distributed/test/run_from_same_connection.c b/src/backend/distributed/test/run_from_same_connection.c index 52b2e0b18..d22ee4428 100644 --- a/src/backend/distributed/test/run_from_same_connection.c +++ b/src/backend/distributed/test/run_from_same_connection.c @@ -190,6 +190,9 @@ run_commands_on_session_level_connection_to_node(PG_FUNCTION_ARGS) /* * override_backend_data_gpid is a wrapper around SetBackendDataGpid(). + * Also sets distributedCommandOriginator to true since the only caller of + * this method calls this function actually wants this backend to + * be treated as a distributed command originator with the given global pid. */ Datum override_backend_data_gpid(PG_FUNCTION_ARGS) @@ -199,6 +202,7 @@ override_backend_data_gpid(PG_FUNCTION_ARGS) uint64 gpid = PG_GETARG_INT64(0); SetBackendDataGlobalPID(gpid); + SetBackendDataDistributedCommandOriginator(true); PG_RETURN_VOID(); } diff --git a/src/backend/distributed/transaction/backend_data.c b/src/backend/distributed/transaction/backend_data.c index 67acadd29..85fb0f6cf 100644 --- a/src/backend/distributed/transaction/backend_data.c +++ b/src/backend/distributed/transaction/backend_data.c @@ -964,6 +964,23 @@ SetBackendDataGlobalPID(uint64 gpid) } +/* + * SetBackendDataDistributedCommandOriginator sets the distributedCommandOriginator + * field on MyBackendData. + */ +void +SetBackendDataDistributedCommandOriginator(bool distributedCommandOriginator) +{ + if (!MyBackendData) + { + return; + } + SpinLockAcquire(&MyBackendData->mutex); + MyBackendData->distributedCommandOriginator = distributedCommandOriginator; + SpinLockRelease(&MyBackendData->mutex); +} + + /* * GetGlobalPID returns the global process id of the current backend. */ diff --git a/src/include/distributed/backend_data.h b/src/include/distributed/backend_data.h index 8014fe5a6..5b3fcf2ac 100644 --- a/src/include/distributed/backend_data.h +++ b/src/include/distributed/backend_data.h @@ -61,6 +61,7 @@ extern void AssignGlobalPID(const char *applicationName); extern uint64 GetGlobalPID(void); extern void SetBackendDataDatabaseId(void); extern void SetBackendDataGlobalPID(uint64 gpid); +extern void SetBackendDataDistributedCommandOriginator(bool distributedCommandOriginator); extern uint64 ExtractGlobalPID(const char *applicationName); extern int ExtractNodeIdFromGlobalPID(uint64 globalPID, bool missingOk); extern int ExtractProcessIdFromGlobalPID(uint64 globalPID); diff --git a/src/test/regress/expected/remove_coordinator.out b/src/test/regress/expected/remove_coordinator.out index 0226a7cd0..e2fd5df02 100644 --- a/src/test/regress/expected/remove_coordinator.out +++ b/src/test/regress/expected/remove_coordinator.out @@ -5,6 +5,37 @@ SELECT master_remove_node('localhost', :master_port); (1 row) +-- to silence -potentially flaky- "could not establish connection after" warnings in below test +SET client_min_messages TO ERROR; +-- to fail fast when the hostname is not resolvable, as it will be the case below +SET citus.node_connection_timeout to '1s'; +BEGIN; + SET application_name TO 'new_app_name'; + -- that should fail because of bad hostname & port + SELECT citus_add_node('200.200.200.200', 1, 200); +ERROR: connection to the remote node postgres@200.200.200.200:1 failed + -- Since above command failed, now Postgres will need to revert the + -- application_name change made in this transaction and this will + -- happen within abort-transaction callback, so we won't be in a + -- transaction block while Postgres does that. + -- + -- And when the application_name changes, Citus tries to re-assign + -- the global pid but it does so only for Citus internal backends, + -- and doing so for Citus internal backends doesn't require being + -- in a transaction block and is safe. + -- + -- However, for the client external backends (like us here), Citus + -- doesn't re-assign the global pid because it's not needed and it's + -- not safe to do so outside of a transaction block. This is because, + -- it would require performing a catalog access to retrive the local + -- node id when the cached local node is invalidated like what just + -- happened here because of the failed citus_add_node() call made + -- above. + -- + -- So by failing here (rather than crashing), we ensure this behavior. +ROLLBACK; +RESET client_min_messages; +RESET citus.node_connection_timeout; -- restore coordinator for the rest of the tests SELECT citus_set_coordinator_host('localhost', :master_port); citus_set_coordinator_host diff --git a/src/test/regress/sql/remove_coordinator.sql b/src/test/regress/sql/remove_coordinator.sql index b0df327d1..35a8a5718 100644 --- a/src/test/regress/sql/remove_coordinator.sql +++ b/src/test/regress/sql/remove_coordinator.sql @@ -1,5 +1,41 @@ -- removing coordinator from pg_dist_node should update pg_dist_colocation SELECT master_remove_node('localhost', :master_port); +-- to silence -potentially flaky- "could not establish connection after" warnings in below test +SET client_min_messages TO ERROR; + +-- to fail fast when the hostname is not resolvable, as it will be the case below +SET citus.node_connection_timeout to '1s'; + +BEGIN; + SET application_name TO 'new_app_name'; + + -- that should fail because of bad hostname & port + SELECT citus_add_node('200.200.200.200', 1, 200); + + -- Since above command failed, now Postgres will need to revert the + -- application_name change made in this transaction and this will + -- happen within abort-transaction callback, so we won't be in a + -- transaction block while Postgres does that. + -- + -- And when the application_name changes, Citus tries to re-assign + -- the global pid but it does so only for Citus internal backends, + -- and doing so for Citus internal backends doesn't require being + -- in a transaction block and is safe. + -- + -- However, for the client external backends (like us here), Citus + -- doesn't re-assign the global pid because it's not needed and it's + -- not safe to do so outside of a transaction block. This is because, + -- it would require performing a catalog access to retrive the local + -- node id when the cached local node is invalidated like what just + -- happened here because of the failed citus_add_node() call made + -- above. + -- + -- So by failing here (rather than crashing), we ensure this behavior. +ROLLBACK; + +RESET client_min_messages; +RESET citus.node_connection_timeout; + -- restore coordinator for the rest of the tests SELECT citus_set_coordinator_host('localhost', :master_port); From ab7c13beb5ec7415dc881c9b72ac0881b9daf4e5 Mon Sep 17 00:00:00 2001 From: Teja Mupparti Date: Tue, 24 Dec 2024 14:42:15 -0800 Subject: [PATCH 06/20] For scenarios, such as, Bug 3697586: Server crashes when assigning distributed transaction: Raise an ERROR instead of a crash --- src/backend/distributed/transaction/backend_data.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/backend/distributed/transaction/backend_data.c b/src/backend/distributed/transaction/backend_data.c index 85fb0f6cf..9b6e7d122 100644 --- a/src/backend/distributed/transaction/backend_data.c +++ b/src/backend/distributed/transaction/backend_data.c @@ -855,6 +855,16 @@ GetCurrentDistributedTransactionId(void) void AssignDistributedTransactionId(void) { + /* + * MyBackendData should always be available. However, we observed some + * crashes where certain hooks were not executed. + * Bug 3697586: Server crashes when assigning distributed transaction + */ + if (!MyBackendData) + { + ereport(ERROR, (errmsg("backend is not ready for distributed transactions"))); + } + pg_atomic_uint64 *transactionNumberSequence = &backendManagementShmemData->nextTransactionNumber; From 0a6adf4ccc908e373b7e7230ccc5b313ba63d9a4 Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Thu, 2 Jan 2025 01:00:40 +0300 Subject: [PATCH 07/20] EXPLAIN generic_plan NOT supported in Citus (#7825) We thought we provided support for this in https://github.com/citusdata/citus/commit/b8c493f2c44efc1a19895fcadf5291b8285add7c However the use of parameters in SQL is not supported in Citus. Since generic plan queries use parameters, we can't support for now. Relevant PG16 commit https://github.com/postgres/postgres/commit/3c05284 Fixes #7813 with proper error message --- .../distributed/planner/multi_explain.c | 14 +++++----- src/test/regress/expected/pg16.out | 26 +++---------------- src/test/regress/sql/pg16.sql | 4 +-- 3 files changed, 13 insertions(+), 31 deletions(-) diff --git a/src/backend/distributed/planner/multi_explain.c b/src/backend/distributed/planner/multi_explain.c index 4584e7740..db30f4b60 100644 --- a/src/backend/distributed/planner/multi_explain.c +++ b/src/backend/distributed/planner/multi_explain.c @@ -190,6 +190,14 @@ PG_FUNCTION_INFO_V1(worker_save_query_explain_analyze); void CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es) { +#if PG_VERSION_NUM >= PG_VERSION_16 + if (es->generic) + { + ereport(ERROR, (errmsg( + "EXPLAIN GENERIC_PLAN is currently not supported for Citus tables"))); + } +#endif + CitusScanState *scanState = (CitusScanState *) node; DistributedPlan *distributedPlan = scanState->distributedPlan; EState *executorState = ScanStateGetExecutorState(scanState); @@ -992,18 +1000,12 @@ BuildRemoteExplainQuery(char *queryString, ExplainState *es) appendStringInfo(explainQuery, "EXPLAIN (ANALYZE %s, VERBOSE %s, " "COSTS %s, BUFFERS %s, WAL %s, " -#if PG_VERSION_NUM >= PG_VERSION_16 - "GENERIC_PLAN %s, " -#endif "TIMING %s, SUMMARY %s, FORMAT %s) %s", es->analyze ? "TRUE" : "FALSE", es->verbose ? "TRUE" : "FALSE", es->costs ? "TRUE" : "FALSE", es->buffers ? "TRUE" : "FALSE", es->wal ? "TRUE" : "FALSE", -#if PG_VERSION_NUM >= PG_VERSION_16 - es->generic ? "TRUE" : "FALSE", -#endif es->timing ? "TRUE" : "FALSE", es->summary ? "TRUE" : "FALSE", formatStr, diff --git a/src/test/regress/expected/pg16.out b/src/test/regress/expected/pg16.out index a035fcfc4..546c0a832 100644 --- a/src/test/regress/expected/pg16.out +++ b/src/test/regress/expected/pg16.out @@ -81,29 +81,9 @@ SELECT create_distributed_table('tenk1', 'unique1'); (1 row) SET citus.log_remote_commands TO on; -EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = 1000; -NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing SAVEPOINT citus_explain_savepoint -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing EXPLAIN (ANALYZE FALSE, VERBOSE FALSE, COSTS TRUE, BUFFERS FALSE, WAL FALSE, GENERIC_PLAN TRUE, TIMING FALSE, SUMMARY FALSE, FORMAT TEXT) SELECT unique1 FROM pg16.tenk1_950001 tenk1 WHERE (thousand OPERATOR(pg_catalog.=) 1000) -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing ROLLBACK TO SAVEPOINT citus_explain_savepoint -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing COMMIT -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx - QUERY PLAN ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) - Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Seq Scan on tenk1_950001 tenk1 (cost=0.00..35.50 rows=10 width=4) - Filter: (thousand = 1000) -(7 rows) - -EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = 1000; +EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = $1; +ERROR: EXPLAIN GENERIC_PLAN is currently not supported for Citus tables +EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = $1; ERROR: EXPLAIN options ANALYZE and GENERIC_PLAN cannot be used together SET citus.log_remote_commands TO off; -- Proper error when creating statistics without a name on a Citus table diff --git a/src/test/regress/sql/pg16.sql b/src/test/regress/sql/pg16.sql index 99024edcb..0312fcdff 100644 --- a/src/test/regress/sql/pg16.sql +++ b/src/test/regress/sql/pg16.sql @@ -58,8 +58,8 @@ CREATE TABLE tenk1 ( SELECT create_distributed_table('tenk1', 'unique1'); SET citus.log_remote_commands TO on; -EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = 1000; -EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = 1000; +EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = $1; +EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = $1; SET citus.log_remote_commands TO off; -- Proper error when creating statistics without a name on a Citus table From 70f84e4aeeb326b690c57380eafca7e904ebcc1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Seda=20G=C3=BCndo=C4=9Fdu?= <69769369+sedagundogdu@users.noreply.github.com> Date: Thu, 2 Jan 2025 12:22:22 +0300 Subject: [PATCH 08/20] Remove Debian Buster support from packaging pipelines (#7828) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove Debian Buster support from packaging-test-pipelines Co-authored-by: Gürkan İndibay --- .github/workflows/packaging-test-pipelines.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/packaging-test-pipelines.yml b/.github/workflows/packaging-test-pipelines.yml index 26b5cfc95..7f89b9f83 100644 --- a/.github/workflows/packaging-test-pipelines.yml +++ b/.github/workflows/packaging-test-pipelines.yml @@ -116,7 +116,6 @@ jobs: # for each deb based image and we use POSTGRES_VERSION to set # PG_CONFIG variable in each of those runs. packaging_docker_image: - - debian-buster-all - debian-bookworm-all - debian-bullseye-all - ubuntu-focal-all From 5ef2cd67edef2d05f69e3d0f8c9795b5d538e3fa Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:24:51 +0300 Subject: [PATCH 09/20] Bump pg versions 14.15, 15.10, 16.6 (#7829) Bump PG versions to the latest minors 14.15, 15.10, 16.6 There is a libpq symlink issue when the images are built remotely https://github.com/citusdata/citus/actions/runs/12583502447/job/35071296238 Hence, we use the commit sha of a local build of the images, pushed. This is temporary, until we find the underlying cause of the symlink issue. --------- Co-authored-by: Onur Tirtir --- .devcontainer/Dockerfile | 8 ++++---- .github/workflows/build_and_test.yml | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 7dc75abd4..9c0b011f0 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -68,7 +68,7 @@ USER citus # build postgres versions separately for effective parrallelism and caching of already built versions when changing only certain versions FROM base AS pg14 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 14.14 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 14.15 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -80,7 +80,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/ RUN rm .pgenv-staging/config/default.conf FROM base AS pg15 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.9 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.10 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -92,7 +92,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/ RUN rm .pgenv-staging/config/default.conf FROM base AS pg16 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.5 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.6 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -211,7 +211,7 @@ COPY --chown=citus:citus .psqlrc . RUN sudo chown --from=root:root citus:citus -R ~ # sets default pg version -RUN pgenv switch 16.5 +RUN pgenv switch 16.6 # make connecting to the coordinator easy ENV PGPORT=9700 diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ffeea3094..d149ff650 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -31,12 +31,12 @@ jobs: pgupgrade_image_name: "ghcr.io/citusdata/pgupgradetester" style_checker_image_name: "ghcr.io/citusdata/stylechecker" style_checker_tools_version: "0.8.18" - sql_snapshot_pg_version: "16.5" - image_suffix: "-v1d9d7d7" - pg14_version: '{ "major": "14", "full": "14.14" }' - pg15_version: '{ "major": "15", "full": "15.9" }' - pg16_version: '{ "major": "16", "full": "16.5" }' - upgrade_pg_versions: "14.14-15.9-16.5" + sql_snapshot_pg_version: "16.6" + image_suffix: "-v5779674" + pg14_version: '{ "major": "14", "full": "14.15" }' + pg15_version: '{ "major": "15", "full": "15.10" }' + pg16_version: '{ "major": "16", "full": "16.6" }' + upgrade_pg_versions: "14.15-15.10-16.6" steps: # Since GHA jobs need at least one step we use a noop step here. - name: Set up parameters From f7bead22d478ac3f407b1fb0f23739a289743bcc Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:49:50 +0300 Subject: [PATCH 10/20] Remove accidentally added citus-tools empty submodule (#7842) Accidentally added here https://github.com/citusdata/citus/commit/477571569178ca8f48321bc396f1db07b6f2244f --- citus-tools | 1 - 1 file changed, 1 deletion(-) delete mode 160000 citus-tools diff --git a/citus-tools b/citus-tools deleted file mode 160000 index 3376bd684..000000000 --- a/citus-tools +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3376bd6845f0614908ed304f5033bd644c82d3bf From 7b6a828c7468db749afcd9914017ae79f7e70a4e Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Wed, 22 Jan 2025 12:22:31 +0300 Subject: [PATCH 11/20] Changelog entries for 13.0.0 (#7850) --- CHANGELOG.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94c85bcdf..0ebb6bec8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,41 @@ +### citus v13.0.0 (January 17, 2025) ### + +* Adds support for PostgreSQL 17 (#7699, #7661) + +* Adds `JSON_TABLE()` support in distributed queries (#7816) + +* Propagates `MERGE ... WHEN NOT MATCHED BY SOURCE` (#7807) + +* Propagates `MEMORY` and `SERIALIZE` options of `EXPLAIN` (#7802) + +* Adds support for identity columns in distributed partitioned tables (#7785) + +* Allows specifying an access method for distributed partitioned tables (#7818) + +* Allows exclusion constraints on distributed partitioned tables (#7733) + +* Allows configuring sslnegotiation using `citus.node_conn_info` (#7821) + +* Avoids wal receiver timeouts during large shard splits (#7229) + +* Fixes a bug causing incorrect writing of data to target `MERGE` repartition + command (#7659) + +* Fixes a crash that happens because of unsafe catalog access when re-assigning + the global pid after `application_name` changes (#7791) + +* Fixes incorrect `VALID UNTIL` setting assumption made for roles when syncing + them to new nodes (#7534) + +* Fixes segfault when calling distributed procedure with a parameterized + distribution argument (#7242) + +* Fixes server crash when trying to execute `activate_node_snapshot()` on a + single-node cluster (#7552) + +* Improves `citus_move_shard_placement()` to fail early if there is a new node + without reference tables yet (#7467) + ### citus v12.1.6 (Nov 14, 2024) ### * Propagates `SECURITY LABEL .. ON ROLE` statements (#7304) From af5fced935440089fe63cd3fe0f4758b4d24f422 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 31 Jan 2025 14:23:01 +0300 Subject: [PATCH 12/20] Upgrade upload-artifacts action to 4.6.0 (cherry picked from commit 398a2ea1978c7a0b00bd69600efa238b86d011ca) --- .github/actions/save_logs_and_results/action.yml | 2 +- .github/actions/upload_coverage/action.yml | 2 +- .github/workflows/build_and_test.yml | 2 +- .github/workflows/flaky_test_debugging.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/actions/save_logs_and_results/action.yml b/.github/actions/save_logs_and_results/action.yml index 0f238835d..b344c68f2 100644 --- a/.github/actions/save_logs_and_results/action.yml +++ b/.github/actions/save_logs_and_results/action.yml @@ -6,7 +6,7 @@ inputs: runs: using: composite steps: - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 name: Upload logs with: name: ${{ inputs.folder }} diff --git a/.github/actions/upload_coverage/action.yml b/.github/actions/upload_coverage/action.yml index 0b5f581a6..abffa784a 100644 --- a/.github/actions/upload_coverage/action.yml +++ b/.github/actions/upload_coverage/action.yml @@ -21,7 +21,7 @@ runs: mkdir -p /tmp/codeclimate cc-test-reporter format-coverage -t lcov -o /tmp/codeclimate/${{ inputs.flags }}.json lcov.info shell: bash - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 with: path: "/tmp/codeclimate/*.json" name: codeclimate diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index d149ff650..4e3b99958 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -125,7 +125,7 @@ jobs: - name: Build run: "./ci/build-citus.sh" shell: bash - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 with: name: build-${{ env.PG_MAJOR }} path: |- diff --git a/.github/workflows/flaky_test_debugging.yml b/.github/workflows/flaky_test_debugging.yml index 7135f99fa..812fbe241 100644 --- a/.github/workflows/flaky_test_debugging.yml +++ b/.github/workflows/flaky_test_debugging.yml @@ -34,7 +34,7 @@ jobs: echo "PG_MAJOR=${PG_MAJOR}" >> $GITHUB_ENV ./ci/build-citus.sh shell: bash - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 with: name: build-${{ env.PG_MAJOR }} path: |- From a28f75cc778545aa5b8d589c46037ae192908f48 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 31 Jan 2025 14:39:55 +0300 Subject: [PATCH 13/20] Upgrade download-artifacts action to 4.1.8 (cherry picked from commit 5317cc7310d81eeab88fa5fca568817f80f5c1df) --- .github/actions/setup_extension/action.yml | 2 +- .github/workflows/build_and_test.yml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/actions/setup_extension/action.yml b/.github/actions/setup_extension/action.yml index 96b408e7e..33129f17d 100644 --- a/.github/actions/setup_extension/action.yml +++ b/.github/actions/setup_extension/action.yml @@ -17,7 +17,7 @@ runs: echo "PG_MAJOR=${{ inputs.pg_major }}" >> $GITHUB_ENV fi shell: bash - - uses: actions/download-artifact@v3.0.1 + - uses: actions/download-artifact@v4.1.8 with: name: build-${{ env.PG_MAJOR }} - name: Install Extension diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4e3b99958..20f737744 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -399,7 +399,7 @@ jobs: - test-citus-upgrade - test-pg-upgrade steps: - - uses: actions/download-artifact@v3.0.1 + - uses: actions/download-artifact@v4.1.8 with: name: "codeclimate" path: "codeclimate" @@ -516,6 +516,7 @@ jobs: matrix: ${{ fromJson(needs.prepare_parallelization_matrix_32.outputs.json) }} steps: - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4.1.8 - uses: "./.github/actions/setup_extension" - name: Run minimal tests run: |- From b6e3f395835b0acdd82f5396c84d42bb645326dd Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 31 Jan 2025 14:59:23 +0300 Subject: [PATCH 14/20] Fix flaky citus upgrade test (cherry picked from commit 4cad81d643df66e11c44e7b3e10edfc0748cd086) --- .../upgrade_pg_dist_cleanup_after_0.out | 9 +++++++++ .../upgrade_pg_dist_cleanup_before_0.out | 17 +++++++++++++++++ .../sql/upgrade_pg_dist_cleanup_after.sql | 5 +++++ .../sql/upgrade_pg_dist_cleanup_before.sql | 10 ++++++++++ 4 files changed, 41 insertions(+) diff --git a/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out b/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out index d71fad887..168c64cca 100644 --- a/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out +++ b/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out @@ -28,3 +28,12 @@ SELECT * FROM pg_dist_cleanup; CALL citus_cleanup_orphaned_resources(); NOTICE: cleaned up 1 orphaned resources DROP TABLE table_with_orphaned_shards; +-- Re-enable automatic shard cleanup by maintenance daemon as +-- we have disabled it in upgrade_pg_dist_cleanup_before.sql +ALTER SYSTEM RESET citus.defer_shard_delete_interval; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + diff --git a/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out b/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out index a0cf9ceb1..dd6c8868e 100644 --- a/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out +++ b/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out @@ -30,6 +30,23 @@ SELECT COUNT(*) FROM pg_dist_placement WHERE shardstate = 1 AND shardid IN (SELE (1 row) -- create an orphaned placement based on an existing one +-- +-- But before doing that, first disable automatic shard cleanup +-- by maintenance daemon so that we can reliably test the cleanup +-- in upgrade_pg_dist_cleanup_after.sql. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(0.1); + pg_sleep +--------------------------------------------------------------------- + +(1 row) + INSERT INTO pg_dist_placement(placementid, shardid, shardstate, shardlength, groupid) SELECT nextval('pg_dist_placement_placementid_seq'::regclass), shardid, 4, shardlength, 3-groupid FROM pg_dist_placement diff --git a/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql b/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql index e84c35b60..333ac60ca 100644 --- a/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql +++ b/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql @@ -13,3 +13,8 @@ SELECT COUNT(*) FROM pg_dist_placement WHERE shardid IN (SELECT shardid FROM pg_ SELECT * FROM pg_dist_cleanup; CALL citus_cleanup_orphaned_resources(); DROP TABLE table_with_orphaned_shards; + +-- Re-enable automatic shard cleanup by maintenance daemon as +-- we have disabled it in upgrade_pg_dist_cleanup_before.sql +ALTER SYSTEM RESET citus.defer_shard_delete_interval; +SELECT pg_reload_conf(); diff --git a/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql b/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql index 62ec8a1fb..ec0eef353 100644 --- a/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql +++ b/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql @@ -16,6 +16,16 @@ SELECT create_distributed_table('table_with_orphaned_shards', 'a'); -- show all 32 placements are active SELECT COUNT(*) FROM pg_dist_placement WHERE shardstate = 1 AND shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='table_with_orphaned_shards'::regclass); -- create an orphaned placement based on an existing one +-- +-- But before doing that, first disable automatic shard cleanup +-- by maintenance daemon so that we can reliably test the cleanup +-- in upgrade_pg_dist_cleanup_after.sql. + +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + +SELECT pg_sleep(0.1); + INSERT INTO pg_dist_placement(placementid, shardid, shardstate, shardlength, groupid) SELECT nextval('pg_dist_placement_placementid_seq'::regclass), shardid, 4, shardlength, 3-groupid FROM pg_dist_placement From 8783cae57f7dbe0422efea3d9b0af75af823c196 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 31 Jan 2025 15:46:42 +0300 Subject: [PATCH 15/20] Avoid publishing artifacts with conflicting names .. as documented in actions/upload-artifact#480. (cherry picked from commit 0d4c676b078c443b499797a1b4fcfaba3b3508b1) --- .github/actions/upload_coverage/action.yml | 2 +- .github/workflows/build_and_test.yml | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/actions/upload_coverage/action.yml b/.github/actions/upload_coverage/action.yml index abffa784a..ba80ba63a 100644 --- a/.github/actions/upload_coverage/action.yml +++ b/.github/actions/upload_coverage/action.yml @@ -24,4 +24,4 @@ runs: - uses: actions/upload-artifact@v4.6.0 with: path: "/tmp/codeclimate/*.json" - name: codeclimate + name: codeclimate-${{ inputs.flags }} diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 20f737744..db5ad0b31 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -284,10 +284,12 @@ jobs: check-arbitrary-configs parallel=4 CONFIGS=$TESTS - uses: "./.github/actions/save_logs_and_results" if: always() + with: + folder: ${{ env.PG_MAJOR }}_arbitrary_configs_${{ matrix.parallel }} - uses: "./.github/actions/upload_coverage" if: always() with: - flags: ${{ env.pg_major }}_upgrade + flags: ${{ env.PG_MAJOR }}_arbitrary_configs_${{ matrix.parallel }} codecov_token: ${{ secrets.CODECOV_TOKEN }} test-pg-upgrade: name: PG${{ matrix.old_pg_major }}-PG${{ matrix.new_pg_major }} - check-pg-upgrade @@ -335,6 +337,8 @@ jobs: if: failure() - uses: "./.github/actions/save_logs_and_results" if: always() + with: + folder: ${{ env.old_pg_major }}_${{ env.new_pg_major }}_upgrade - uses: "./.github/actions/upload_coverage" if: always() with: @@ -380,10 +384,12 @@ jobs: done; - uses: "./.github/actions/save_logs_and_results" if: always() + with: + folder: ${{ env.PG_MAJOR }}_citus_upgrade - uses: "./.github/actions/upload_coverage" if: always() with: - flags: ${{ env.pg_major }}_upgrade + flags: ${{ env.PG_MAJOR }}_citus_upgrade codecov_token: ${{ secrets.CODECOV_TOKEN }} upload-coverage: if: always() @@ -401,8 +407,9 @@ jobs: steps: - uses: actions/download-artifact@v4.1.8 with: - name: "codeclimate" - path: "codeclimate" + pattern: codeclimate* + path: codeclimate + merge-multiple: true - name: Upload coverage results to Code Climate run: |- cc-test-reporter sum-coverage codeclimate/*.json -o total.json From 7073f061537ebc5d63b7c063c9cb70d0986bb5cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCrkan=20=C4=B0ndibay?= Date: Fri, 31 May 2024 20:52:17 +0300 Subject: [PATCH 16/20] Updates github checkout actions to v4 (#7611) (cherry picked from commit 3fe22406e62fb40da12a0d91f3ecc0cba81cdb24) --- .github/workflows/build_and_test.yml | 2 +- .github/workflows/packaging-test-pipelines.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index db5ad0b31..32c761766 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -48,7 +48,7 @@ jobs: image: ${{ needs.params.outputs.build_image_name }}:${{ needs.params.outputs.sql_snapshot_pg_version }}${{ needs.params.outputs.image_suffix }} options: --user root steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@v4 - name: Check Snapshots run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} diff --git a/.github/workflows/packaging-test-pipelines.yml b/.github/workflows/packaging-test-pipelines.yml index 7f89b9f83..db0fd08ef 100644 --- a/.github/workflows/packaging-test-pipelines.yml +++ b/.github/workflows/packaging-test-pipelines.yml @@ -129,7 +129,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set pg_config path and python parameters for deb based distros run: | From 9a0cc282b7e26c29c69882cd9cb4dd709ef8ad0f Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Tue, 4 Feb 2025 15:55:35 +0300 Subject: [PATCH 17/20] Changelog entries for v13.0.1 (#7873) (cherry picked from commit d28a5eae6c78935313824d319480632783d48d10) --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ebb6bec8..ee3f2d0a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### citus v13.0.1 (February 4th, 2025) ### + +* Drops support for PostgreSQL 14 (#7753) + ### citus v13.0.0 (January 17, 2025) ### * Adds support for PostgreSQL 17 (#7699, #7661) From 2d8be018530016228795ecaa1a9bc72c9e8328d4 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Tue, 4 Feb 2025 11:45:24 +0300 Subject: [PATCH 18/20] Disable 2PC recovery while executing ALTER EXTENSION cmd during Citus upgrade tests (cherry picked from commit b6b73e2f4c66211c23e167e094256264992c8bd4) --- .../citus_tests/upgrade/citus_upgrade_test.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py b/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py index 1ab448031..c25a34482 100755 --- a/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py +++ b/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py @@ -62,10 +62,16 @@ def run_citus_upgrade_tests(config, before_upgrade_schedule, after_upgrade_sched install_citus(config.post_tar_path) + # disable 2pc recovery for all nodes to work around https://github.com/citusdata/citus/issues/7875 + disable_2pc_recovery_for_all_nodes(config.bindir, config) + restart_databases(config.bindir, config.datadir, config.mixed_mode, config) run_alter_citus(config.bindir, config.mixed_mode, config) verify_upgrade(config, config.mixed_mode, config.node_name_to_ports.values()) + # re-enable 2pc recovery for all nodes + enable_2pc_recovery_for_all_nodes(config.bindir, config) + run_test_on_coordinator(config, after_upgrade_schedule) remove_citus(config.post_tar_path) @@ -146,6 +152,18 @@ def restart_database(pg_path, abs_data_path, node_name, node_ports, logfile_pref subprocess.run(command, check=True) +def disable_2pc_recovery_for_all_nodes(pg_path, config): + for port in config.node_name_to_ports.values(): + utils.psql(pg_path, port, "ALTER SYSTEM SET citus.recover_2pc_interval TO -1;") + utils.psql(pg_path, port, "SELECT pg_reload_conf();") + + +def enable_2pc_recovery_for_all_nodes(pg_path, config): + for port in config.node_name_to_ports.values(): + utils.psql(pg_path, port, "ALTER SYSTEM RESET citus.recover_2pc_interval;") + utils.psql(pg_path, port, "SELECT pg_reload_conf();") + + def run_alter_citus(pg_path, mixed_mode, config): for port in config.node_name_to_ports.values(): if mixed_mode and port in ( From 565c309a1ec4cd93af7d426a666a6b9ae757bb7d Mon Sep 17 00:00:00 2001 From: mulander Date: Wed, 5 Feb 2025 16:56:19 +0100 Subject: [PATCH 19/20] Update README.md Replace packages for 13.0.1. Drop mention of Centos, we are no longer building packages for it. Change release blog title, URL change pending. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2cf17098f..03551cd27 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -| **
The Citus database is 100% open source.

Learn what's new in the [Citus 12.1 release blog](https://www.citusdata.com/blog/2023/09/22/adding-postgres-16-support-to-citus-12-1/) and the [Citus Updates page](https://www.citusdata.com/updates/).

**| +| **
The Citus database is 100% open source.

Learn what's new in the [Citus 13.0 release blog](https://www.citusdata.com/blog/2023/09/22/adding-postgres-16-support-to-citus-12-1/) and the [Citus Updates page](https://www.citusdata.com/updates/).

**| |---|
@@ -95,14 +95,14 @@ Install packages on Ubuntu / Debian: ```bash curl https://install.citusdata.com/community/deb.sh > add-citus-repo.sh sudo bash add-citus-repo.sh -sudo apt-get -y install postgresql-16-citus-12.1 +sudo apt-get -y install postgresql-17-citus-13.0 ``` -Install packages on CentOS / Red Hat: +Install packages on Red Hat: ```bash curl https://install.citusdata.com/community/rpm.sh > add-citus-repo.sh sudo bash add-citus-repo.sh -sudo yum install -y citus121_16 +sudo yum install -y citus130_17 ``` To add Citus to your local PostgreSQL database, add the following to `postgresql.conf`: From f7c57351a76bce843ede26a10edf0835def72fef Mon Sep 17 00:00:00 2001 From: mulander Date: Thu, 6 Feb 2025 16:41:08 +0100 Subject: [PATCH 20/20] Update 13 blog URL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 03551cd27..44fd65f50 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -| **
The Citus database is 100% open source.

Learn what's new in the [Citus 13.0 release blog](https://www.citusdata.com/blog/2023/09/22/adding-postgres-16-support-to-citus-12-1/) and the [Citus Updates page](https://www.citusdata.com/updates/).

**| +| **
The Citus database is 100% open source.

Learn what's new in the [Citus 13.0 release blog](https://www.citusdata.com/blog/2025/02/06/distribute-postgresql-17-with-citus-13/) and the [Citus Updates page](https://www.citusdata.com/updates/).

**| |---|