diff --git a/src/backend/distributed/commands/statistics.c b/src/backend/distributed/commands/statistics.c index a65d6c0fe..8959843d4 100644 --- a/src/backend/distributed/commands/statistics.c +++ b/src/backend/distributed/commands/statistics.c @@ -26,6 +26,7 @@ #include "catalog/pg_namespace.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_type.h" +#include "commands/defrem.h" #include "distributed/commands/utility_hook.h" #include "distributed/commands.h" #include "distributed/deparse_shard_query.h" @@ -42,6 +43,7 @@ #include "utils/fmgroids.h" #include "utils/fmgrprotos.h" #include "utils/lsyscache.h" +#include "utils/regproc.h" #include "utils/relcache.h" #include "utils/ruleutils.h" #include "utils/syscache.h" @@ -56,6 +58,11 @@ static char * GenerateAlterIndexColumnSetStatsCommand(char *indexNameWithSchema, static Oid GetRelIdByStatsOid(Oid statsOid); static char * CreateAlterCommandIfOwnerNotDefault(Oid statsOid); static char * CreateAlterCommandIfTargetNotDefault(Oid statsOid); +#if PG_VERSION_NUM >= PG_VERSION_16 +static char * ChooseExtendedStatisticName(const char *name1, const char *name2, + const char *label, Oid namespaceid); +static char * ChooseExtendedStatisticNameAddition(List *exprs); +#endif /* * PreprocessCreateStatisticsStmt is called during the planning phase for @@ -77,6 +84,22 @@ PreprocessCreateStatisticsStmt(Node *node, const char *queryString, EnsureCoordinator(); +#if PG_VERSION_NUM >= PG_VERSION_16 + if (!(stmt->defnames)) + { + Relation rel = relation_openrv((RangeVar *) relation, ShareUpdateExclusiveLock); + + Oid namespaceId = RelationGetNamespace(rel); + char *namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel), + ChooseExtendedStatisticNameAddition( + stmt->exprs), + "stat", + namespaceId); + stmt->defnames = stringToQualifiedNameList(namestr, NULL); + relation_close(rel, ShareUpdateExclusiveLock); + } +#endif + QualifyTreeNode((Node *) stmt); Oid statsOid = get_statistics_object_oid(stmt->defnames, true); @@ -780,3 +803,125 @@ CreateAlterCommandIfTargetNotDefault(Oid statsOid) return DeparseAlterStatisticsStmt((Node *) alterStatsStmt); } + + +#if PG_VERSION_NUM >= PG_VERSION_16 + +/* + * ChooseExtendedStatisticName - copied from PG source + * + * Select a nonconflicting name for a new statistics object. + * + * name1, name2, and label are used the same way as for makeObjectName(), + * except that the label can't be NULL; digits will be appended to the label + * if needed to create a name that is unique within the specified namespace. + * + * Returns a palloc'd string. + * + * Note: it is theoretically possible to get a collision anyway, if someone + * else chooses the same name concurrently. This is fairly unlikely to be + * a problem in practice, especially if one is holding a share update + * exclusive lock on the relation identified by name1. However, if choosing + * multiple names within a single command, you'd better create the new object + * and do CommandCounterIncrement before choosing the next one! + */ +static char * +ChooseExtendedStatisticName(const char *name1, const char *name2, + const char *label, Oid namespaceid) +{ + int pass = 0; + char *stxname = NULL; + char modlabel[NAMEDATALEN]; + + /* try the unmodified label first */ + strlcpy(modlabel, label, sizeof(modlabel)); + + for (;;) + { + Oid existingstats; + + stxname = makeObjectName(name1, name2, modlabel); + + existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid, + PointerGetDatum(stxname), + ObjectIdGetDatum(namespaceid)); + if (!OidIsValid(existingstats)) + { + break; + } + + /* found a conflict, so try a new name component */ + pfree(stxname); + snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass); + } + + return stxname; +} + + +/* + * ChooseExtendedStatisticNameAddition - copied from PG source + * + * Generate "name2" for a new statistics object given the list of column + * names for it. This will be passed to ChooseExtendedStatisticName along + * with the parent table name and a suitable label. + * + * We know that less than NAMEDATALEN characters will actually be used, + * so we can truncate the result once we've generated that many. + * + * XXX see also ChooseForeignKeyConstraintNameAddition and + * ChooseIndexNameAddition. + */ +static char * +ChooseExtendedStatisticNameAddition(List *exprs) +{ + char buf[NAMEDATALEN * 2]; + int buflen = 0; + ListCell *lc; + + buf[0] = '\0'; + foreach(lc, exprs) + { + StatsElem *selem = (StatsElem *) lfirst(lc); + const char *name; + + /* It should be one of these, but just skip if it happens not to be */ + if (!IsA(selem, StatsElem)) + { + continue; + } + + name = selem->name; + + if (buflen > 0) + { + buf[buflen++] = '_'; /* insert _ between names */ + } + + /* + * We use fixed 'expr' for expressions, which have empty column names. + * For indexes this is handled in ChooseIndexColumnNames, but we have + * no such function for stats and it does not seem worth adding. If a + * better name is needed, the user can specify it explicitly. + */ + if (!name) + { + name = "expr"; + } + + /* + * At this point we have buflen <= NAMEDATALEN. name should be less + * than NAMEDATALEN already, but use strlcpy for paranoia. + */ + strlcpy(buf + buflen, name, NAMEDATALEN); + buflen += strlen(buf + buflen); + if (buflen >= NAMEDATALEN) + { + break; + } + } + return pstrdup(buf); +} + + +#endif diff --git a/src/test/regress/expected/pg16.out b/src/test/regress/expected/pg16.out index 37580d8a7..a4e502db2 100644 --- a/src/test/regress/expected/pg16.out +++ b/src/test/regress/expected/pg16.out @@ -65,6 +65,69 @@ SET citus.log_remote_commands TO OFF; -- only verifying it works and not printing log -- remote commands because it can be flaky VACUUM (ONLY_DATABASE_STATS); +-- New feature supported on Citus - test statistics without a name +-- Relevant PG commit: +-- https://github.com/postgres/postgres/commit/624aa2a13bd02dd584bb0995c883b5b93b2152df +CREATE TABLE test_stats ( + a int, + b int +); +SELECT create_distributed_table('test_stats', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE STATISTICS (dependencies) ON a, b FROM test_stats; +CREATE STATISTICS (ndistinct, dependencies) on a, b from test_stats; +CREATE STATISTICS (ndistinct, dependencies, mcv) on a, b from test_stats; +-- test for distributing an already existing statistics +CREATE TABLE test_stats2 ( + a int, + b int +); +CREATE STATISTICS test_stats_a_b_stat (dependencies) ON a, b FROM test_stats2; +ERROR: statistics object "test_stats_a_b_stat" already exists +SELECT create_distributed_table('test_stats2', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- test when stats is on a different schema +CREATE SCHEMA sc1; +CREATE TABLE tbl (a int, b int); +SELECT create_distributed_table ('tbl', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SET search_path TO sc1; +CREATE STATISTICS ON a, b FROM pg16.tbl; +\c - - - :worker_1_port +-- check all statistics have been correctly created at shards +SELECT stxnamespace, stxname +FROM pg_statistic_ext +WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('pg16', 'sc1') +) ORDER BY stxname ASC; + stxnamespace | stxname +--------------------------------------------------------------------- + 19232 | tbl_a_b_stat + 19232 | tbl_a_b_stat_950003 + 19204 | test_stats_a_b_stat + 19204 | test_stats_a_b_stat1 + 19204 | test_stats_a_b_stat1_950001 + 19204 | test_stats_a_b_stat2 + 19204 | test_stats_a_b_stat2_950001 + 19204 | test_stats_a_b_stat_950001 +(8 rows) + +\c - - - :master_port +SET search_path TO pg16; \set VERBOSITY terse SET client_min_messages TO ERROR; DROP SCHEMA pg16 CASCADE; diff --git a/src/test/regress/sql/pg16.sql b/src/test/regress/sql/pg16.sql index 29638ac1c..adf1f3106 100644 --- a/src/test/regress/sql/pg16.sql +++ b/src/test/regress/sql/pg16.sql @@ -45,6 +45,52 @@ SET citus.log_remote_commands TO OFF; -- remote commands because it can be flaky VACUUM (ONLY_DATABASE_STATS); +-- New feature supported on Citus - test statistics without a name +-- Relevant PG commit: +-- https://github.com/postgres/postgres/commit/624aa2a13bd02dd584bb0995c883b5b93b2152df + +CREATE TABLE test_stats ( + a int, + b int +); + +SELECT create_distributed_table('test_stats', 'a'); + +CREATE STATISTICS (dependencies) ON a, b FROM test_stats; +CREATE STATISTICS (ndistinct, dependencies) on a, b from test_stats; +CREATE STATISTICS (ndistinct, dependencies, mcv) on a, b from test_stats; + +-- test for distributing an already existing statistics +CREATE TABLE test_stats2 ( + a int, + b int +); + +CREATE STATISTICS test_stats_a_b_stat (dependencies) ON a, b FROM test_stats2; + +SELECT create_distributed_table('test_stats2', 'a'); + +-- test when stats is on a different schema +CREATE SCHEMA sc1; +CREATE TABLE tbl (a int, b int); +SELECT create_distributed_table ('tbl', 'a'); + +SET search_path TO sc1; +CREATE STATISTICS ON a, b FROM pg16.tbl; + +\c - - - :worker_1_port +-- check all statistics have been correctly created at shards +SELECT stxnamespace, stxname +FROM pg_statistic_ext +WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('pg16', 'sc1') +) ORDER BY stxname ASC; + +\c - - - :master_port +SET search_path TO pg16; + \set VERBOSITY terse SET client_min_messages TO ERROR; DROP SCHEMA pg16 CASCADE;