mirror of https://github.com/citusdata/citus.git
Propagate statistics with optional name
parent
682dca1f12
commit
e619127290
|
@ -26,6 +26,7 @@
|
||||||
#include "catalog/pg_namespace.h"
|
#include "catalog/pg_namespace.h"
|
||||||
#include "catalog/pg_statistic_ext.h"
|
#include "catalog/pg_statistic_ext.h"
|
||||||
#include "catalog/pg_type.h"
|
#include "catalog/pg_type.h"
|
||||||
|
#include "commands/defrem.h"
|
||||||
#include "distributed/commands/utility_hook.h"
|
#include "distributed/commands/utility_hook.h"
|
||||||
#include "distributed/commands.h"
|
#include "distributed/commands.h"
|
||||||
#include "distributed/deparse_shard_query.h"
|
#include "distributed/deparse_shard_query.h"
|
||||||
|
@ -42,6 +43,7 @@
|
||||||
#include "utils/fmgroids.h"
|
#include "utils/fmgroids.h"
|
||||||
#include "utils/fmgrprotos.h"
|
#include "utils/fmgrprotos.h"
|
||||||
#include "utils/lsyscache.h"
|
#include "utils/lsyscache.h"
|
||||||
|
#include "utils/regproc.h"
|
||||||
#include "utils/relcache.h"
|
#include "utils/relcache.h"
|
||||||
#include "utils/ruleutils.h"
|
#include "utils/ruleutils.h"
|
||||||
#include "utils/syscache.h"
|
#include "utils/syscache.h"
|
||||||
|
@ -56,6 +58,11 @@ static char * GenerateAlterIndexColumnSetStatsCommand(char *indexNameWithSchema,
|
||||||
static Oid GetRelIdByStatsOid(Oid statsOid);
|
static Oid GetRelIdByStatsOid(Oid statsOid);
|
||||||
static char * CreateAlterCommandIfOwnerNotDefault(Oid statsOid);
|
static char * CreateAlterCommandIfOwnerNotDefault(Oid statsOid);
|
||||||
static char * CreateAlterCommandIfTargetNotDefault(Oid statsOid);
|
static char * CreateAlterCommandIfTargetNotDefault(Oid statsOid);
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_16
|
||||||
|
static char * ChooseExtendedStatisticName(const char *name1, const char *name2,
|
||||||
|
const char *label, Oid namespaceid);
|
||||||
|
static char * ChooseExtendedStatisticNameAddition(List *exprs);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* PreprocessCreateStatisticsStmt is called during the planning phase for
|
* PreprocessCreateStatisticsStmt is called during the planning phase for
|
||||||
|
@ -77,6 +84,22 @@ PreprocessCreateStatisticsStmt(Node *node, const char *queryString,
|
||||||
|
|
||||||
EnsureCoordinator();
|
EnsureCoordinator();
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_16
|
||||||
|
if (!(stmt->defnames))
|
||||||
|
{
|
||||||
|
Relation rel = relation_openrv((RangeVar *) relation, ShareUpdateExclusiveLock);
|
||||||
|
|
||||||
|
Oid namespaceId = RelationGetNamespace(rel);
|
||||||
|
char *namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
|
||||||
|
ChooseExtendedStatisticNameAddition(
|
||||||
|
stmt->exprs),
|
||||||
|
"stat",
|
||||||
|
namespaceId);
|
||||||
|
stmt->defnames = stringToQualifiedNameList(namestr, NULL);
|
||||||
|
relation_close(rel, ShareUpdateExclusiveLock);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
QualifyTreeNode((Node *) stmt);
|
QualifyTreeNode((Node *) stmt);
|
||||||
|
|
||||||
Oid statsOid = get_statistics_object_oid(stmt->defnames, true);
|
Oid statsOid = get_statistics_object_oid(stmt->defnames, true);
|
||||||
|
@ -780,3 +803,125 @@ CreateAlterCommandIfTargetNotDefault(Oid statsOid)
|
||||||
|
|
||||||
return DeparseAlterStatisticsStmt((Node *) alterStatsStmt);
|
return DeparseAlterStatisticsStmt((Node *) alterStatsStmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= PG_VERSION_16
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ChooseExtendedStatisticName - copied from PG source
|
||||||
|
*
|
||||||
|
* Select a nonconflicting name for a new statistics object.
|
||||||
|
*
|
||||||
|
* name1, name2, and label are used the same way as for makeObjectName(),
|
||||||
|
* except that the label can't be NULL; digits will be appended to the label
|
||||||
|
* if needed to create a name that is unique within the specified namespace.
|
||||||
|
*
|
||||||
|
* Returns a palloc'd string.
|
||||||
|
*
|
||||||
|
* Note: it is theoretically possible to get a collision anyway, if someone
|
||||||
|
* else chooses the same name concurrently. This is fairly unlikely to be
|
||||||
|
* a problem in practice, especially if one is holding a share update
|
||||||
|
* exclusive lock on the relation identified by name1. However, if choosing
|
||||||
|
* multiple names within a single command, you'd better create the new object
|
||||||
|
* and do CommandCounterIncrement before choosing the next one!
|
||||||
|
*/
|
||||||
|
static char *
|
||||||
|
ChooseExtendedStatisticName(const char *name1, const char *name2,
|
||||||
|
const char *label, Oid namespaceid)
|
||||||
|
{
|
||||||
|
int pass = 0;
|
||||||
|
char *stxname = NULL;
|
||||||
|
char modlabel[NAMEDATALEN];
|
||||||
|
|
||||||
|
/* try the unmodified label first */
|
||||||
|
strlcpy(modlabel, label, sizeof(modlabel));
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
Oid existingstats;
|
||||||
|
|
||||||
|
stxname = makeObjectName(name1, name2, modlabel);
|
||||||
|
|
||||||
|
existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid,
|
||||||
|
PointerGetDatum(stxname),
|
||||||
|
ObjectIdGetDatum(namespaceid));
|
||||||
|
if (!OidIsValid(existingstats))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* found a conflict, so try a new name component */
|
||||||
|
pfree(stxname);
|
||||||
|
snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
|
||||||
|
}
|
||||||
|
|
||||||
|
return stxname;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ChooseExtendedStatisticNameAddition - copied from PG source
|
||||||
|
*
|
||||||
|
* Generate "name2" for a new statistics object given the list of column
|
||||||
|
* names for it. This will be passed to ChooseExtendedStatisticName along
|
||||||
|
* with the parent table name and a suitable label.
|
||||||
|
*
|
||||||
|
* We know that less than NAMEDATALEN characters will actually be used,
|
||||||
|
* so we can truncate the result once we've generated that many.
|
||||||
|
*
|
||||||
|
* XXX see also ChooseForeignKeyConstraintNameAddition and
|
||||||
|
* ChooseIndexNameAddition.
|
||||||
|
*/
|
||||||
|
static char *
|
||||||
|
ChooseExtendedStatisticNameAddition(List *exprs)
|
||||||
|
{
|
||||||
|
char buf[NAMEDATALEN * 2];
|
||||||
|
int buflen = 0;
|
||||||
|
ListCell *lc;
|
||||||
|
|
||||||
|
buf[0] = '\0';
|
||||||
|
foreach(lc, exprs)
|
||||||
|
{
|
||||||
|
StatsElem *selem = (StatsElem *) lfirst(lc);
|
||||||
|
const char *name;
|
||||||
|
|
||||||
|
/* It should be one of these, but just skip if it happens not to be */
|
||||||
|
if (!IsA(selem, StatsElem))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
name = selem->name;
|
||||||
|
|
||||||
|
if (buflen > 0)
|
||||||
|
{
|
||||||
|
buf[buflen++] = '_'; /* insert _ between names */
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We use fixed 'expr' for expressions, which have empty column names.
|
||||||
|
* For indexes this is handled in ChooseIndexColumnNames, but we have
|
||||||
|
* no such function for stats and it does not seem worth adding. If a
|
||||||
|
* better name is needed, the user can specify it explicitly.
|
||||||
|
*/
|
||||||
|
if (!name)
|
||||||
|
{
|
||||||
|
name = "expr";
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At this point we have buflen <= NAMEDATALEN. name should be less
|
||||||
|
* than NAMEDATALEN already, but use strlcpy for paranoia.
|
||||||
|
*/
|
||||||
|
strlcpy(buf + buflen, name, NAMEDATALEN);
|
||||||
|
buflen += strlen(buf + buflen);
|
||||||
|
if (buflen >= NAMEDATALEN)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pstrdup(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -65,6 +65,69 @@ SET citus.log_remote_commands TO OFF;
|
||||||
-- only verifying it works and not printing log
|
-- only verifying it works and not printing log
|
||||||
-- remote commands because it can be flaky
|
-- remote commands because it can be flaky
|
||||||
VACUUM (ONLY_DATABASE_STATS);
|
VACUUM (ONLY_DATABASE_STATS);
|
||||||
|
-- New feature supported on Citus - test statistics without a name
|
||||||
|
-- Relevant PG commit:
|
||||||
|
-- https://github.com/postgres/postgres/commit/624aa2a13bd02dd584bb0995c883b5b93b2152df
|
||||||
|
CREATE TABLE test_stats (
|
||||||
|
a int,
|
||||||
|
b int
|
||||||
|
);
|
||||||
|
SELECT create_distributed_table('test_stats', 'a');
|
||||||
|
create_distributed_table
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE STATISTICS (dependencies) ON a, b FROM test_stats;
|
||||||
|
CREATE STATISTICS (ndistinct, dependencies) on a, b from test_stats;
|
||||||
|
CREATE STATISTICS (ndistinct, dependencies, mcv) on a, b from test_stats;
|
||||||
|
-- test for distributing an already existing statistics
|
||||||
|
CREATE TABLE test_stats2 (
|
||||||
|
a int,
|
||||||
|
b int
|
||||||
|
);
|
||||||
|
CREATE STATISTICS test_stats_a_b_stat (dependencies) ON a, b FROM test_stats2;
|
||||||
|
ERROR: statistics object "test_stats_a_b_stat" already exists
|
||||||
|
SELECT create_distributed_table('test_stats2', 'a');
|
||||||
|
create_distributed_table
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- test when stats is on a different schema
|
||||||
|
CREATE SCHEMA sc1;
|
||||||
|
CREATE TABLE tbl (a int, b int);
|
||||||
|
SELECT create_distributed_table ('tbl', 'a');
|
||||||
|
create_distributed_table
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SET search_path TO sc1;
|
||||||
|
CREATE STATISTICS ON a, b FROM pg16.tbl;
|
||||||
|
\c - - - :worker_1_port
|
||||||
|
-- check all statistics have been correctly created at shards
|
||||||
|
SELECT stxnamespace, stxname
|
||||||
|
FROM pg_statistic_ext
|
||||||
|
WHERE stxnamespace IN (
|
||||||
|
SELECT oid
|
||||||
|
FROM pg_namespace
|
||||||
|
WHERE nspname IN ('pg16', 'sc1')
|
||||||
|
) ORDER BY stxname ASC;
|
||||||
|
stxnamespace | stxname
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
19232 | tbl_a_b_stat
|
||||||
|
19232 | tbl_a_b_stat_950003
|
||||||
|
19204 | test_stats_a_b_stat
|
||||||
|
19204 | test_stats_a_b_stat1
|
||||||
|
19204 | test_stats_a_b_stat1_950001
|
||||||
|
19204 | test_stats_a_b_stat2
|
||||||
|
19204 | test_stats_a_b_stat2_950001
|
||||||
|
19204 | test_stats_a_b_stat_950001
|
||||||
|
(8 rows)
|
||||||
|
|
||||||
|
\c - - - :master_port
|
||||||
|
SET search_path TO pg16;
|
||||||
\set VERBOSITY terse
|
\set VERBOSITY terse
|
||||||
SET client_min_messages TO ERROR;
|
SET client_min_messages TO ERROR;
|
||||||
DROP SCHEMA pg16 CASCADE;
|
DROP SCHEMA pg16 CASCADE;
|
||||||
|
|
|
@ -45,6 +45,52 @@ SET citus.log_remote_commands TO OFF;
|
||||||
-- remote commands because it can be flaky
|
-- remote commands because it can be flaky
|
||||||
VACUUM (ONLY_DATABASE_STATS);
|
VACUUM (ONLY_DATABASE_STATS);
|
||||||
|
|
||||||
|
-- New feature supported on Citus - test statistics without a name
|
||||||
|
-- Relevant PG commit:
|
||||||
|
-- https://github.com/postgres/postgres/commit/624aa2a13bd02dd584bb0995c883b5b93b2152df
|
||||||
|
|
||||||
|
CREATE TABLE test_stats (
|
||||||
|
a int,
|
||||||
|
b int
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT create_distributed_table('test_stats', 'a');
|
||||||
|
|
||||||
|
CREATE STATISTICS (dependencies) ON a, b FROM test_stats;
|
||||||
|
CREATE STATISTICS (ndistinct, dependencies) on a, b from test_stats;
|
||||||
|
CREATE STATISTICS (ndistinct, dependencies, mcv) on a, b from test_stats;
|
||||||
|
|
||||||
|
-- test for distributing an already existing statistics
|
||||||
|
CREATE TABLE test_stats2 (
|
||||||
|
a int,
|
||||||
|
b int
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE STATISTICS test_stats_a_b_stat (dependencies) ON a, b FROM test_stats2;
|
||||||
|
|
||||||
|
SELECT create_distributed_table('test_stats2', 'a');
|
||||||
|
|
||||||
|
-- test when stats is on a different schema
|
||||||
|
CREATE SCHEMA sc1;
|
||||||
|
CREATE TABLE tbl (a int, b int);
|
||||||
|
SELECT create_distributed_table ('tbl', 'a');
|
||||||
|
|
||||||
|
SET search_path TO sc1;
|
||||||
|
CREATE STATISTICS ON a, b FROM pg16.tbl;
|
||||||
|
|
||||||
|
\c - - - :worker_1_port
|
||||||
|
-- check all statistics have been correctly created at shards
|
||||||
|
SELECT stxnamespace, stxname
|
||||||
|
FROM pg_statistic_ext
|
||||||
|
WHERE stxnamespace IN (
|
||||||
|
SELECT oid
|
||||||
|
FROM pg_namespace
|
||||||
|
WHERE nspname IN ('pg16', 'sc1')
|
||||||
|
) ORDER BY stxname ASC;
|
||||||
|
|
||||||
|
\c - - - :master_port
|
||||||
|
SET search_path TO pg16;
|
||||||
|
|
||||||
\set VERBOSITY terse
|
\set VERBOSITY terse
|
||||||
SET client_min_messages TO ERROR;
|
SET client_min_messages TO ERROR;
|
||||||
DROP SCHEMA pg16 CASCADE;
|
DROP SCHEMA pg16 CASCADE;
|
||||||
|
|
Loading…
Reference in New Issue