mirror of https://github.com/citusdata/citus.git
Propagate statistics with optional name
parent
682dca1f12
commit
e619127290
|
@ -26,6 +26,7 @@
|
|||
#include "catalog/pg_namespace.h"
|
||||
#include "catalog/pg_statistic_ext.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "distributed/commands/utility_hook.h"
|
||||
#include "distributed/commands.h"
|
||||
#include "distributed/deparse_shard_query.h"
|
||||
|
@ -42,6 +43,7 @@
|
|||
#include "utils/fmgroids.h"
|
||||
#include "utils/fmgrprotos.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/regproc.h"
|
||||
#include "utils/relcache.h"
|
||||
#include "utils/ruleutils.h"
|
||||
#include "utils/syscache.h"
|
||||
|
@ -56,6 +58,11 @@ static char * GenerateAlterIndexColumnSetStatsCommand(char *indexNameWithSchema,
|
|||
static Oid GetRelIdByStatsOid(Oid statsOid);
|
||||
static char * CreateAlterCommandIfOwnerNotDefault(Oid statsOid);
|
||||
static char * CreateAlterCommandIfTargetNotDefault(Oid statsOid);
|
||||
#if PG_VERSION_NUM >= PG_VERSION_16
|
||||
static char * ChooseExtendedStatisticName(const char *name1, const char *name2,
|
||||
const char *label, Oid namespaceid);
|
||||
static char * ChooseExtendedStatisticNameAddition(List *exprs);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* PreprocessCreateStatisticsStmt is called during the planning phase for
|
||||
|
@ -77,6 +84,22 @@ PreprocessCreateStatisticsStmt(Node *node, const char *queryString,
|
|||
|
||||
EnsureCoordinator();
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_16
|
||||
if (!(stmt->defnames))
|
||||
{
|
||||
Relation rel = relation_openrv((RangeVar *) relation, ShareUpdateExclusiveLock);
|
||||
|
||||
Oid namespaceId = RelationGetNamespace(rel);
|
||||
char *namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
|
||||
ChooseExtendedStatisticNameAddition(
|
||||
stmt->exprs),
|
||||
"stat",
|
||||
namespaceId);
|
||||
stmt->defnames = stringToQualifiedNameList(namestr, NULL);
|
||||
relation_close(rel, ShareUpdateExclusiveLock);
|
||||
}
|
||||
#endif
|
||||
|
||||
QualifyTreeNode((Node *) stmt);
|
||||
|
||||
Oid statsOid = get_statistics_object_oid(stmt->defnames, true);
|
||||
|
@ -780,3 +803,125 @@ CreateAlterCommandIfTargetNotDefault(Oid statsOid)
|
|||
|
||||
return DeparseAlterStatisticsStmt((Node *) alterStatsStmt);
|
||||
}
|
||||
|
||||
|
||||
#if PG_VERSION_NUM >= PG_VERSION_16
|
||||
|
||||
/*
|
||||
* ChooseExtendedStatisticName - copied from PG source
|
||||
*
|
||||
* Select a nonconflicting name for a new statistics object.
|
||||
*
|
||||
* name1, name2, and label are used the same way as for makeObjectName(),
|
||||
* except that the label can't be NULL; digits will be appended to the label
|
||||
* if needed to create a name that is unique within the specified namespace.
|
||||
*
|
||||
* Returns a palloc'd string.
|
||||
*
|
||||
* Note: it is theoretically possible to get a collision anyway, if someone
|
||||
* else chooses the same name concurrently. This is fairly unlikely to be
|
||||
* a problem in practice, especially if one is holding a share update
|
||||
* exclusive lock on the relation identified by name1. However, if choosing
|
||||
* multiple names within a single command, you'd better create the new object
|
||||
* and do CommandCounterIncrement before choosing the next one!
|
||||
*/
|
||||
static char *
|
||||
ChooseExtendedStatisticName(const char *name1, const char *name2,
|
||||
const char *label, Oid namespaceid)
|
||||
{
|
||||
int pass = 0;
|
||||
char *stxname = NULL;
|
||||
char modlabel[NAMEDATALEN];
|
||||
|
||||
/* try the unmodified label first */
|
||||
strlcpy(modlabel, label, sizeof(modlabel));
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Oid existingstats;
|
||||
|
||||
stxname = makeObjectName(name1, name2, modlabel);
|
||||
|
||||
existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid,
|
||||
PointerGetDatum(stxname),
|
||||
ObjectIdGetDatum(namespaceid));
|
||||
if (!OidIsValid(existingstats))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
/* found a conflict, so try a new name component */
|
||||
pfree(stxname);
|
||||
snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
|
||||
}
|
||||
|
||||
return stxname;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ChooseExtendedStatisticNameAddition - copied from PG source
|
||||
*
|
||||
* Generate "name2" for a new statistics object given the list of column
|
||||
* names for it. This will be passed to ChooseExtendedStatisticName along
|
||||
* with the parent table name and a suitable label.
|
||||
*
|
||||
* We know that less than NAMEDATALEN characters will actually be used,
|
||||
* so we can truncate the result once we've generated that many.
|
||||
*
|
||||
* XXX see also ChooseForeignKeyConstraintNameAddition and
|
||||
* ChooseIndexNameAddition.
|
||||
*/
|
||||
static char *
|
||||
ChooseExtendedStatisticNameAddition(List *exprs)
|
||||
{
|
||||
char buf[NAMEDATALEN * 2];
|
||||
int buflen = 0;
|
||||
ListCell *lc;
|
||||
|
||||
buf[0] = '\0';
|
||||
foreach(lc, exprs)
|
||||
{
|
||||
StatsElem *selem = (StatsElem *) lfirst(lc);
|
||||
const char *name;
|
||||
|
||||
/* It should be one of these, but just skip if it happens not to be */
|
||||
if (!IsA(selem, StatsElem))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
name = selem->name;
|
||||
|
||||
if (buflen > 0)
|
||||
{
|
||||
buf[buflen++] = '_'; /* insert _ between names */
|
||||
}
|
||||
|
||||
/*
|
||||
* We use fixed 'expr' for expressions, which have empty column names.
|
||||
* For indexes this is handled in ChooseIndexColumnNames, but we have
|
||||
* no such function for stats and it does not seem worth adding. If a
|
||||
* better name is needed, the user can specify it explicitly.
|
||||
*/
|
||||
if (!name)
|
||||
{
|
||||
name = "expr";
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point we have buflen <= NAMEDATALEN. name should be less
|
||||
* than NAMEDATALEN already, but use strlcpy for paranoia.
|
||||
*/
|
||||
strlcpy(buf + buflen, name, NAMEDATALEN);
|
||||
buflen += strlen(buf + buflen);
|
||||
if (buflen >= NAMEDATALEN)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
return pstrdup(buf);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -65,6 +65,69 @@ SET citus.log_remote_commands TO OFF;
|
|||
-- only verifying it works and not printing log
|
||||
-- remote commands because it can be flaky
|
||||
VACUUM (ONLY_DATABASE_STATS);
|
||||
-- New feature supported on Citus - test statistics without a name
|
||||
-- Relevant PG commit:
|
||||
-- https://github.com/postgres/postgres/commit/624aa2a13bd02dd584bb0995c883b5b93b2152df
|
||||
CREATE TABLE test_stats (
|
||||
a int,
|
||||
b int
|
||||
);
|
||||
SELECT create_distributed_table('test_stats', 'a');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
CREATE STATISTICS (dependencies) ON a, b FROM test_stats;
|
||||
CREATE STATISTICS (ndistinct, dependencies) on a, b from test_stats;
|
||||
CREATE STATISTICS (ndistinct, dependencies, mcv) on a, b from test_stats;
|
||||
-- test for distributing an already existing statistics
|
||||
CREATE TABLE test_stats2 (
|
||||
a int,
|
||||
b int
|
||||
);
|
||||
CREATE STATISTICS test_stats_a_b_stat (dependencies) ON a, b FROM test_stats2;
|
||||
ERROR: statistics object "test_stats_a_b_stat" already exists
|
||||
SELECT create_distributed_table('test_stats2', 'a');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- test when stats is on a different schema
|
||||
CREATE SCHEMA sc1;
|
||||
CREATE TABLE tbl (a int, b int);
|
||||
SELECT create_distributed_table ('tbl', 'a');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SET search_path TO sc1;
|
||||
CREATE STATISTICS ON a, b FROM pg16.tbl;
|
||||
\c - - - :worker_1_port
|
||||
-- check all statistics have been correctly created at shards
|
||||
SELECT stxnamespace, stxname
|
||||
FROM pg_statistic_ext
|
||||
WHERE stxnamespace IN (
|
||||
SELECT oid
|
||||
FROM pg_namespace
|
||||
WHERE nspname IN ('pg16', 'sc1')
|
||||
) ORDER BY stxname ASC;
|
||||
stxnamespace | stxname
|
||||
---------------------------------------------------------------------
|
||||
19232 | tbl_a_b_stat
|
||||
19232 | tbl_a_b_stat_950003
|
||||
19204 | test_stats_a_b_stat
|
||||
19204 | test_stats_a_b_stat1
|
||||
19204 | test_stats_a_b_stat1_950001
|
||||
19204 | test_stats_a_b_stat2
|
||||
19204 | test_stats_a_b_stat2_950001
|
||||
19204 | test_stats_a_b_stat_950001
|
||||
(8 rows)
|
||||
|
||||
\c - - - :master_port
|
||||
SET search_path TO pg16;
|
||||
\set VERBOSITY terse
|
||||
SET client_min_messages TO ERROR;
|
||||
DROP SCHEMA pg16 CASCADE;
|
||||
|
|
|
@ -45,6 +45,52 @@ SET citus.log_remote_commands TO OFF;
|
|||
-- remote commands because it can be flaky
|
||||
VACUUM (ONLY_DATABASE_STATS);
|
||||
|
||||
-- New feature supported on Citus - test statistics without a name
|
||||
-- Relevant PG commit:
|
||||
-- https://github.com/postgres/postgres/commit/624aa2a13bd02dd584bb0995c883b5b93b2152df
|
||||
|
||||
CREATE TABLE test_stats (
|
||||
a int,
|
||||
b int
|
||||
);
|
||||
|
||||
SELECT create_distributed_table('test_stats', 'a');
|
||||
|
||||
CREATE STATISTICS (dependencies) ON a, b FROM test_stats;
|
||||
CREATE STATISTICS (ndistinct, dependencies) on a, b from test_stats;
|
||||
CREATE STATISTICS (ndistinct, dependencies, mcv) on a, b from test_stats;
|
||||
|
||||
-- test for distributing an already existing statistics
|
||||
CREATE TABLE test_stats2 (
|
||||
a int,
|
||||
b int
|
||||
);
|
||||
|
||||
CREATE STATISTICS test_stats_a_b_stat (dependencies) ON a, b FROM test_stats2;
|
||||
|
||||
SELECT create_distributed_table('test_stats2', 'a');
|
||||
|
||||
-- test when stats is on a different schema
|
||||
CREATE SCHEMA sc1;
|
||||
CREATE TABLE tbl (a int, b int);
|
||||
SELECT create_distributed_table ('tbl', 'a');
|
||||
|
||||
SET search_path TO sc1;
|
||||
CREATE STATISTICS ON a, b FROM pg16.tbl;
|
||||
|
||||
\c - - - :worker_1_port
|
||||
-- check all statistics have been correctly created at shards
|
||||
SELECT stxnamespace, stxname
|
||||
FROM pg_statistic_ext
|
||||
WHERE stxnamespace IN (
|
||||
SELECT oid
|
||||
FROM pg_namespace
|
||||
WHERE nspname IN ('pg16', 'sc1')
|
||||
) ORDER BY stxname ASC;
|
||||
|
||||
\c - - - :master_port
|
||||
SET search_path TO pg16;
|
||||
|
||||
\set VERBOSITY terse
|
||||
SET client_min_messages TO ERROR;
|
||||
DROP SCHEMA pg16 CASCADE;
|
||||
|
|
Loading…
Reference in New Issue