Merge branch 'main' into main

pull/7734/head
michailtoksovo 2025-01-09 16:50:00 +00:00 committed by GitHub
commit 794dc5023e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 145 additions and 52 deletions

View File

@ -68,7 +68,7 @@ USER citus
# build postgres versions separately for effective parrallelism and caching of already built versions when changing only certain versions # build postgres versions separately for effective parrallelism and caching of already built versions when changing only certain versions
FROM base AS pg14 FROM base AS pg14
RUN MAKEFLAGS="-j $(nproc)" pgenv build 14.12 RUN MAKEFLAGS="-j $(nproc)" pgenv build 14.14
RUN rm .pgenv/src/*.tar* RUN rm .pgenv/src/*.tar*
RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/ clean
RUN make -C .pgenv/src/postgresql-*/src/include install RUN make -C .pgenv/src/postgresql-*/src/include install
@ -80,7 +80,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/
RUN rm .pgenv-staging/config/default.conf RUN rm .pgenv-staging/config/default.conf
FROM base AS pg15 FROM base AS pg15
RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.7 RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.9
RUN rm .pgenv/src/*.tar* RUN rm .pgenv/src/*.tar*
RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/ clean
RUN make -C .pgenv/src/postgresql-*/src/include install RUN make -C .pgenv/src/postgresql-*/src/include install
@ -92,7 +92,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/
RUN rm .pgenv-staging/config/default.conf RUN rm .pgenv-staging/config/default.conf
FROM base AS pg16 FROM base AS pg16
RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.3 RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.5
RUN rm .pgenv/src/*.tar* RUN rm .pgenv/src/*.tar*
RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/ clean
RUN make -C .pgenv/src/postgresql-*/src/include install RUN make -C .pgenv/src/postgresql-*/src/include install
@ -211,7 +211,7 @@ COPY --chown=citus:citus .psqlrc .
RUN sudo chown --from=root:root citus:citus -R ~ RUN sudo chown --from=root:root citus:citus -R ~
# sets default pg version # sets default pg version
RUN pgenv switch 16.3 RUN pgenv switch 16.5
# make connecting to the coordinator easy # make connecting to the coordinator easy
ENV PGPORT=9700 ENV PGPORT=9700

View File

@ -31,14 +31,14 @@ jobs:
pgupgrade_image_name: "ghcr.io/citusdata/pgupgradetester" pgupgrade_image_name: "ghcr.io/citusdata/pgupgradetester"
style_checker_image_name: "ghcr.io/citusdata/stylechecker" style_checker_image_name: "ghcr.io/citusdata/stylechecker"
style_checker_tools_version: "0.8.18" style_checker_tools_version: "0.8.18"
sql_snapshot_pg_version: "16.3" sql_snapshot_pg_version: "16.5"
image_suffix: "-v13fd57c" image_suffix: "-v1d9d7d7"
pg14_version: '{ "major": "14", "full": "14.12" }' pg14_version: '{ "major": "14", "full": "14.14" }'
pg15_version: '{ "major": "15", "full": "15.7" }' pg15_version: '{ "major": "15", "full": "15.9" }'
pg16_version: '{ "major": "16", "full": "16.3" }' pg16_version: '{ "major": "16", "full": "16.5" }'
upgrade_pg_versions: "14.12-15.7-16.3" upgrade_pg_versions: "14.14-15.9-16.5"
steps: steps:
# Since GHA jobs needs at least one step we use a noop step here. # Since GHA jobs need at least one step we use a noop step here.
- name: Set up parameters - name: Set up parameters
run: echo 'noop' run: echo 'noop'
check-sql-snapshots: check-sql-snapshots:

View File

@ -116,7 +116,6 @@ jobs:
# for each deb based image and we use POSTGRES_VERSION to set # for each deb based image and we use POSTGRES_VERSION to set
# PG_CONFIG variable in each of those runs. # PG_CONFIG variable in each of those runs.
packaging_docker_image: packaging_docker_image:
- debian-buster-all
- debian-bookworm-all - debian-bookworm-all
- debian-bullseye-all - debian-bullseye-all
- ubuntu-focal-all - ubuntu-focal-all

View File

@ -190,6 +190,14 @@ PG_FUNCTION_INFO_V1(worker_save_query_explain_analyze);
void void
CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es) CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es)
{ {
#if PG_VERSION_NUM >= PG_VERSION_16
if (es->generic)
{
ereport(ERROR, (errmsg(
"EXPLAIN GENERIC_PLAN is currently not supported for Citus tables")));
}
#endif
CitusScanState *scanState = (CitusScanState *) node; CitusScanState *scanState = (CitusScanState *) node;
DistributedPlan *distributedPlan = scanState->distributedPlan; DistributedPlan *distributedPlan = scanState->distributedPlan;
EState *executorState = ScanStateGetExecutorState(scanState); EState *executorState = ScanStateGetExecutorState(scanState);
@ -992,18 +1000,12 @@ BuildRemoteExplainQuery(char *queryString, ExplainState *es)
appendStringInfo(explainQuery, appendStringInfo(explainQuery,
"EXPLAIN (ANALYZE %s, VERBOSE %s, " "EXPLAIN (ANALYZE %s, VERBOSE %s, "
"COSTS %s, BUFFERS %s, WAL %s, " "COSTS %s, BUFFERS %s, WAL %s, "
#if PG_VERSION_NUM >= PG_VERSION_16
"GENERIC_PLAN %s, "
#endif
"TIMING %s, SUMMARY %s, FORMAT %s) %s", "TIMING %s, SUMMARY %s, FORMAT %s) %s",
es->analyze ? "TRUE" : "FALSE", es->analyze ? "TRUE" : "FALSE",
es->verbose ? "TRUE" : "FALSE", es->verbose ? "TRUE" : "FALSE",
es->costs ? "TRUE" : "FALSE", es->costs ? "TRUE" : "FALSE",
es->buffers ? "TRUE" : "FALSE", es->buffers ? "TRUE" : "FALSE",
es->wal ? "TRUE" : "FALSE", es->wal ? "TRUE" : "FALSE",
#if PG_VERSION_NUM >= PG_VERSION_16
es->generic ? "TRUE" : "FALSE",
#endif
es->timing ? "TRUE" : "FALSE", es->timing ? "TRUE" : "FALSE",
es->summary ? "TRUE" : "FALSE", es->summary ? "TRUE" : "FALSE",
formatStr, formatStr,

View File

@ -2890,14 +2890,27 @@ ApplicationNameAssignHook(const char *newval, void *extra)
DetermineCitusBackendType(newval); DetermineCitusBackendType(newval);
/* /*
* AssignGlobalPID might read from catalog tables to get the the local * We use StartupCitusBackend to initialize the global pid after catalogs
* nodeid. But ApplicationNameAssignHook might be called before catalog * are available. After that happens this hook becomes responsible to update
* access is available to the backend (such as in early stages of * the global pid on later application_name changes. So we set the
* authentication). We use StartupCitusBackend to initialize the global pid * FinishedStartupCitusBackend flag in StartupCitusBackend to indicate when
* after catalogs are available. After that happens this hook becomes * this responsibility handoff has happened.
* responsible to update the global pid on later application_name changes. *
* So we set the FinishedStartupCitusBackend flag in StartupCitusBackend to * Also note that when application_name changes, we don't actually need to
* indicate when this responsibility handoff has happened. * try re-assigning the global pid for external client backends and
* background workers because application_name doesn't affect the global
* pid for such backends - note that !IsExternalClientBackend() check covers
* both types of backends. Plus,
* trying to re-assign the global pid for such backends would unnecessarily
* cause performing a catalog access when the cached local node id is
* invalidated. However, accessing to the catalog tables is dangerous in
* certain situations like when we're not in a transaction block. And for
* the other types of backends, i.e., the Citus internal backends, we need
* to re-assign the global pid when the application_name changes because for
* such backends we simply extract the global pid inherited from the
* originating backend from the application_name -that's specified by
* originating backend when openning that connection- and this doesn't require
* catalog access.
* *
* Another solution to the catalog table acccess problem would be to update * Another solution to the catalog table acccess problem would be to update
* global pid lazily, like we do for HideShards. But that's not possible * global pid lazily, like we do for HideShards. But that's not possible
@ -2907,7 +2920,7 @@ ApplicationNameAssignHook(const char *newval, void *extra)
* as reasonably possible, which is also why we extract global pids in the * as reasonably possible, which is also why we extract global pids in the
* AuthHook already (extracting doesn't require catalog access). * AuthHook already (extracting doesn't require catalog access).
*/ */
if (FinishedStartupCitusBackend) if (FinishedStartupCitusBackend && !IsExternalClientBackend())
{ {
AssignGlobalPID(newval); AssignGlobalPID(newval);
} }

View File

@ -190,6 +190,9 @@ run_commands_on_session_level_connection_to_node(PG_FUNCTION_ARGS)
/* /*
* override_backend_data_gpid is a wrapper around SetBackendDataGpid(). * override_backend_data_gpid is a wrapper around SetBackendDataGpid().
* Also sets distributedCommandOriginator to true since the only caller of
* this method calls this function actually wants this backend to
* be treated as a distributed command originator with the given global pid.
*/ */
Datum Datum
override_backend_data_gpid(PG_FUNCTION_ARGS) override_backend_data_gpid(PG_FUNCTION_ARGS)
@ -199,6 +202,7 @@ override_backend_data_gpid(PG_FUNCTION_ARGS)
uint64 gpid = PG_GETARG_INT64(0); uint64 gpid = PG_GETARG_INT64(0);
SetBackendDataGlobalPID(gpid); SetBackendDataGlobalPID(gpid);
SetBackendDataDistributedCommandOriginator(true);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }

View File

@ -855,6 +855,16 @@ GetCurrentDistributedTransactionId(void)
void void
AssignDistributedTransactionId(void) AssignDistributedTransactionId(void)
{ {
/*
* MyBackendData should always be available. However, we observed some
* crashes where certain hooks were not executed.
* Bug 3697586: Server crashes when assigning distributed transaction
*/
if (!MyBackendData)
{
ereport(ERROR, (errmsg("backend is not ready for distributed transactions")));
}
pg_atomic_uint64 *transactionNumberSequence = pg_atomic_uint64 *transactionNumberSequence =
&backendManagementShmemData->nextTransactionNumber; &backendManagementShmemData->nextTransactionNumber;
@ -964,6 +974,23 @@ SetBackendDataGlobalPID(uint64 gpid)
} }
/*
* SetBackendDataDistributedCommandOriginator sets the distributedCommandOriginator
* field on MyBackendData.
*/
void
SetBackendDataDistributedCommandOriginator(bool distributedCommandOriginator)
{
if (!MyBackendData)
{
return;
}
SpinLockAcquire(&MyBackendData->mutex);
MyBackendData->distributedCommandOriginator = distributedCommandOriginator;
SpinLockRelease(&MyBackendData->mutex);
}
/* /*
* GetGlobalPID returns the global process id of the current backend. * GetGlobalPID returns the global process id of the current backend.
*/ */

View File

@ -61,6 +61,7 @@ extern void AssignGlobalPID(const char *applicationName);
extern uint64 GetGlobalPID(void); extern uint64 GetGlobalPID(void);
extern void SetBackendDataDatabaseId(void); extern void SetBackendDataDatabaseId(void);
extern void SetBackendDataGlobalPID(uint64 gpid); extern void SetBackendDataGlobalPID(uint64 gpid);
extern void SetBackendDataDistributedCommandOriginator(bool distributedCommandOriginator);
extern uint64 ExtractGlobalPID(const char *applicationName); extern uint64 ExtractGlobalPID(const char *applicationName);
extern int ExtractNodeIdFromGlobalPID(uint64 globalPID, bool missingOk); extern int ExtractNodeIdFromGlobalPID(uint64 globalPID, bool missingOk);
extern int ExtractProcessIdFromGlobalPID(uint64 globalPID); extern int ExtractProcessIdFromGlobalPID(uint64 globalPID);

View File

@ -81,29 +81,9 @@ SELECT create_distributed_table('tenk1', 'unique1');
(1 row) (1 row)
SET citus.log_remote_commands TO on; SET citus.log_remote_commands TO on;
EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = 1000; EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = $1;
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); ERROR: EXPLAIN GENERIC_PLAN is currently not supported for Citus tables
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = $1;
NOTICE: issuing SAVEPOINT citus_explain_savepoint
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing EXPLAIN (ANALYZE FALSE, VERBOSE FALSE, COSTS TRUE, BUFFERS FALSE, WAL FALSE, GENERIC_PLAN TRUE, TIMING FALSE, SUMMARY FALSE, FORMAT TEXT) SELECT unique1 FROM pg16.tenk1_950001 tenk1 WHERE (thousand OPERATOR(pg_catalog.=) 1000)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing ROLLBACK TO SAVEPOINT citus_explain_savepoint
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on tenk1_950001 tenk1 (cost=0.00..35.50 rows=10 width=4)
Filter: (thousand = 1000)
(7 rows)
EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = 1000;
ERROR: EXPLAIN options ANALYZE and GENERIC_PLAN cannot be used together ERROR: EXPLAIN options ANALYZE and GENERIC_PLAN cannot be used together
SET citus.log_remote_commands TO off; SET citus.log_remote_commands TO off;
-- Proper error when creating statistics without a name on a Citus table -- Proper error when creating statistics without a name on a Citus table

View File

@ -5,6 +5,37 @@ SELECT master_remove_node('localhost', :master_port);
(1 row) (1 row)
-- to silence -potentially flaky- "could not establish connection after" warnings in below test
SET client_min_messages TO ERROR;
-- to fail fast when the hostname is not resolvable, as it will be the case below
SET citus.node_connection_timeout to '1s';
BEGIN;
SET application_name TO 'new_app_name';
-- that should fail because of bad hostname & port
SELECT citus_add_node('200.200.200.200', 1, 200);
ERROR: connection to the remote node postgres@200.200.200.200:1 failed
-- Since above command failed, now Postgres will need to revert the
-- application_name change made in this transaction and this will
-- happen within abort-transaction callback, so we won't be in a
-- transaction block while Postgres does that.
--
-- And when the application_name changes, Citus tries to re-assign
-- the global pid but it does so only for Citus internal backends,
-- and doing so for Citus internal backends doesn't require being
-- in a transaction block and is safe.
--
-- However, for the client external backends (like us here), Citus
-- doesn't re-assign the global pid because it's not needed and it's
-- not safe to do so outside of a transaction block. This is because,
-- it would require performing a catalog access to retrive the local
-- node id when the cached local node is invalidated like what just
-- happened here because of the failed citus_add_node() call made
-- above.
--
-- So by failing here (rather than crashing), we ensure this behavior.
ROLLBACK;
RESET client_min_messages;
RESET citus.node_connection_timeout;
-- restore coordinator for the rest of the tests -- restore coordinator for the rest of the tests
SELECT citus_set_coordinator_host('localhost', :master_port); SELECT citus_set_coordinator_host('localhost', :master_port);
citus_set_coordinator_host citus_set_coordinator_host

View File

@ -58,8 +58,8 @@ CREATE TABLE tenk1 (
SELECT create_distributed_table('tenk1', 'unique1'); SELECT create_distributed_table('tenk1', 'unique1');
SET citus.log_remote_commands TO on; SET citus.log_remote_commands TO on;
EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = 1000; EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = $1;
EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = 1000; EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = $1;
SET citus.log_remote_commands TO off; SET citus.log_remote_commands TO off;
-- Proper error when creating statistics without a name on a Citus table -- Proper error when creating statistics without a name on a Citus table

View File

@ -1,5 +1,41 @@
-- removing coordinator from pg_dist_node should update pg_dist_colocation -- removing coordinator from pg_dist_node should update pg_dist_colocation
SELECT master_remove_node('localhost', :master_port); SELECT master_remove_node('localhost', :master_port);
-- to silence -potentially flaky- "could not establish connection after" warnings in below test
SET client_min_messages TO ERROR;
-- to fail fast when the hostname is not resolvable, as it will be the case below
SET citus.node_connection_timeout to '1s';
BEGIN;
SET application_name TO 'new_app_name';
-- that should fail because of bad hostname & port
SELECT citus_add_node('200.200.200.200', 1, 200);
-- Since above command failed, now Postgres will need to revert the
-- application_name change made in this transaction and this will
-- happen within abort-transaction callback, so we won't be in a
-- transaction block while Postgres does that.
--
-- And when the application_name changes, Citus tries to re-assign
-- the global pid but it does so only for Citus internal backends,
-- and doing so for Citus internal backends doesn't require being
-- in a transaction block and is safe.
--
-- However, for the client external backends (like us here), Citus
-- doesn't re-assign the global pid because it's not needed and it's
-- not safe to do so outside of a transaction block. This is because,
-- it would require performing a catalog access to retrive the local
-- node id when the cached local node is invalidated like what just
-- happened here because of the failed citus_add_node() call made
-- above.
--
-- So by failing here (rather than crashing), we ensure this behavior.
ROLLBACK;
RESET client_min_messages;
RESET citus.node_connection_timeout;
-- restore coordinator for the rest of the tests -- restore coordinator for the rest of the tests
SELECT citus_set_coordinator_host('localhost', :master_port); SELECT citus_set_coordinator_host('localhost', :master_port);