mirror of https://github.com/citusdata/citus.git
Greatly speed up "\d tablename" on servers with many tables (#7577)
DESCRIPTION: Fix performance issue when using "\d tablename" on a server with many tables We introduce a filter to every query on pg_class to automatically remove shards. This is useful to make sure \d and PgAdmin are not cluttered with shards. However, the way we were introducing this filter was using `securityQuals` which can have negative impact on query performance. On clusters with 100k+ tables this could cause a simple "\d tablename" command to take multiple seconds, because a skipped optimization by Postgres causes a full table scan. This changes the code to introduce this filter in the regular `quals` list instead of in `securityQuals`. Which causes Postgres to use the intended optimization again. For reference, this was initially reported as a Postgres issue by me: https://www.postgresql.org/message-id/flat/4189982.1712785863%40sss.pgh.pa.us#b87421293b362d581ea8677e3bfea920pull/7563/head^2
parent
ada3ba2507
commit
a0151aa31d
|
@ -54,6 +54,7 @@ static bool ShouldHideShardsInternal(void);
|
|||
static bool IsPgBgWorker(void);
|
||||
static bool FilterShardsFromPgclass(Node *node, void *context);
|
||||
static Node * CreateRelationIsAKnownShardFilter(int pgClassVarno);
|
||||
static bool HasRangeTableRef(Node *node, int *varno);
|
||||
|
||||
PG_FUNCTION_INFO_V1(citus_table_is_visible);
|
||||
PG_FUNCTION_INFO_V1(relation_is_a_known_shard);
|
||||
|
@ -421,8 +422,8 @@ IsPgBgWorker(void)
|
|||
|
||||
|
||||
/*
|
||||
* FilterShardsFromPgclass adds a NOT relation_is_a_known_shard(oid) filter
|
||||
* to the security quals of pg_class RTEs.
|
||||
* FilterShardsFromPgclass adds a "relation_is_a_known_shard(oid) IS NOT TRUE"
|
||||
* filter to the quals of queries that query pg_class.
|
||||
*/
|
||||
static bool
|
||||
FilterShardsFromPgclass(Node *node, void *context)
|
||||
|
@ -456,12 +457,35 @@ FilterShardsFromPgclass(Node *node, void *context)
|
|||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip if pg_class is not actually queried. This is possible on
|
||||
* INSERT statements that insert into pg_class.
|
||||
*/
|
||||
if (!expression_tree_walker((Node *) query->jointree->fromlist,
|
||||
HasRangeTableRef, &varno))
|
||||
{
|
||||
/* the query references pg_class */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* make sure the expression is in the right memory context */
|
||||
MemoryContext originalContext = MemoryContextSwitchTo(queryContext);
|
||||
|
||||
/* add NOT relation_is_a_known_shard(oid) to the security quals of the RTE */
|
||||
rangeTableEntry->securityQuals =
|
||||
list_make1(CreateRelationIsAKnownShardFilter(varno));
|
||||
|
||||
/* add relation_is_a_known_shard(oid) IS NOT TRUE to the quals of the query */
|
||||
Node *newQual = CreateRelationIsAKnownShardFilter(varno);
|
||||
Node *oldQuals = query->jointree->quals;
|
||||
if (oldQuals)
|
||||
{
|
||||
query->jointree->quals = (Node *) makeBoolExpr(
|
||||
AND_EXPR,
|
||||
list_make2(oldQuals, newQual),
|
||||
-1);
|
||||
}
|
||||
else
|
||||
{
|
||||
query->jointree->quals = newQual;
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(originalContext);
|
||||
}
|
||||
|
@ -473,9 +497,32 @@ FilterShardsFromPgclass(Node *node, void *context)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* HasRangeTableRef passed to expression_tree_walker to check if a node is a
|
||||
* RangeTblRef of the given varno is present in a fromlist.
|
||||
*/
|
||||
static bool
|
||||
HasRangeTableRef(Node *node, int *varno)
|
||||
{
|
||||
if (IsA(node, RangeTblRef))
|
||||
{
|
||||
RangeTblRef *rangeTblRef = (RangeTblRef *) node;
|
||||
return rangeTblRef->rtindex == *varno;
|
||||
}
|
||||
|
||||
return expression_tree_walker(node, HasRangeTableRef, varno);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateRelationIsAKnownShardFilter constructs an expression of the form:
|
||||
* NOT pg_catalog.relation_is_a_known_shard(oid)
|
||||
* pg_catalog.relation_is_a_known_shard(oid) IS NOT TRUE
|
||||
*
|
||||
* The difference between "NOT pg_catalog.relation_is_a_known_shard(oid)" and
|
||||
* "pg_catalog.relation_is_a_known_shard(oid) IS NOT TRUE" is that the former
|
||||
* will return FALSE if the function returns NULL, while the second will return
|
||||
* TRUE. This difference is important in the case of outer joins, because this
|
||||
* filter might be applied on an oid that is then NULL.
|
||||
*/
|
||||
static Node *
|
||||
CreateRelationIsAKnownShardFilter(int pgClassVarno)
|
||||
|
@ -496,9 +543,9 @@ CreateRelationIsAKnownShardFilter(int pgClassVarno)
|
|||
funcExpr->location = -1;
|
||||
funcExpr->args = list_make1(oidVar);
|
||||
|
||||
BoolExpr *notExpr = makeNode(BoolExpr);
|
||||
notExpr->boolop = NOT_EXPR;
|
||||
notExpr->args = list_make1(funcExpr);
|
||||
BooleanTest *notExpr = makeNode(BooleanTest);
|
||||
notExpr->booltesttype = IS_NOT_TRUE;
|
||||
notExpr->arg = (Expr *) funcExpr;
|
||||
notExpr->location = -1;
|
||||
|
||||
return (Node *) notExpr;
|
||||
|
|
|
@ -83,6 +83,52 @@ SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_name
|
|||
test_table
|
||||
(1 row)
|
||||
|
||||
-- Even when using subquery and having no existing quals on pg_clcass
|
||||
SELECT relname FROM (SELECT relname, relnamespace FROM pg_catalog.pg_class) AS q WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname;
|
||||
relname
|
||||
---------------------------------------------------------------------
|
||||
test_table
|
||||
(1 row)
|
||||
|
||||
-- Check that inserts into pg_class don't add the filter
|
||||
EXPLAIN (COSTS OFF) INSERT INTO pg_class VALUES (1);
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Insert on pg_class
|
||||
-> Result
|
||||
(2 rows)
|
||||
|
||||
-- Unless it's an INSERT SELECT that queries from pg_class;
|
||||
EXPLAIN (COSTS OFF) INSERT INTO pg_class SELECT * FROM pg_class;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Insert on pg_class
|
||||
-> Seq Scan on pg_class pg_class_1
|
||||
Filter: (relation_is_a_known_shard(oid) IS NOT TRUE)
|
||||
(3 rows)
|
||||
|
||||
-- Check that query that psql "\d test_table" does gets optimized to an index
|
||||
-- scan
|
||||
EXPLAIN (COSTS OFF) SELECT c.oid,
|
||||
n.nspname,
|
||||
c.relname
|
||||
FROM pg_catalog.pg_class c
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
||||
WHERE c.relname OPERATOR(pg_catalog.~) '^(test_table)$' COLLATE pg_catalog.default
|
||||
AND pg_catalog.pg_table_is_visible(c.oid)
|
||||
ORDER BY 2, 3;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: n.nspname, c.relname
|
||||
-> Nested Loop Left Join
|
||||
Join Filter: (n.oid = c.relnamespace)
|
||||
-> Index Scan using pg_class_relname_nsp_index on pg_class c
|
||||
Index Cond: (relname = 'test_table'::text)
|
||||
Filter: ((relname ~ '^(test_table)$'::text) AND (relation_is_a_known_shard(oid) IS NOT TRUE) AND pg_table_is_visible(oid))
|
||||
-> Seq Scan on pg_namespace n
|
||||
(8 rows)
|
||||
|
||||
commit prepared 'take-aggressive-lock';
|
||||
-- now create an index
|
||||
\c - - - :master_port
|
||||
|
|
|
@ -50,6 +50,24 @@ prepare transaction 'take-aggressive-lock';
|
|||
|
||||
-- shards are hidden when using psql as application_name
|
||||
SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname;
|
||||
-- Even when using subquery and having no existing quals on pg_clcass
|
||||
SELECT relname FROM (SELECT relname, relnamespace FROM pg_catalog.pg_class) AS q WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname;
|
||||
|
||||
-- Check that inserts into pg_class don't add the filter
|
||||
EXPLAIN (COSTS OFF) INSERT INTO pg_class VALUES (1);
|
||||
-- Unless it's an INSERT SELECT that queries from pg_class;
|
||||
EXPLAIN (COSTS OFF) INSERT INTO pg_class SELECT * FROM pg_class;
|
||||
|
||||
-- Check that query that psql "\d test_table" does gets optimized to an index
|
||||
-- scan
|
||||
EXPLAIN (COSTS OFF) SELECT c.oid,
|
||||
n.nspname,
|
||||
c.relname
|
||||
FROM pg_catalog.pg_class c
|
||||
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
||||
WHERE c.relname OPERATOR(pg_catalog.~) '^(test_table)$' COLLATE pg_catalog.default
|
||||
AND pg_catalog.pg_table_is_visible(c.oid)
|
||||
ORDER BY 2, 3;
|
||||
|
||||
commit prepared 'take-aggressive-lock';
|
||||
|
||||
|
|
Loading…
Reference in New Issue