Greatly speed up "\d tablename" on servers with many tables (#7577)

DESCRIPTION: Fix performance issue when using "\d tablename" on a server
with many tables

We introduce a filter to every query on pg_class to automatically remove
shards. This is useful to make sure \d and PgAdmin are not cluttered
with shards. However, the way we were introducing this filter was using
`securityQuals` which can have negative impact on query performance.

On clusters with 100k+ tables this could cause a simple "\d tablename"
command to take multiple seconds, because a skipped optimization by
Postgres causes a full table scan. This changes the code to introduce
this filter in the regular `quals` list instead of in `securityQuals`.
Which causes Postgres to use the intended optimization again.

For reference, this was initially reported as a Postgres issue by me:

https://www.postgresql.org/message-id/flat/4189982.1712785863%40sss.pgh.pa.us#b87421293b362d581ea8677e3bfea920
pull/7563/head^2
Jelte Fennema-Nio 2024-04-16 17:26:12 +02:00 committed by GitHub
parent ada3ba2507
commit a0151aa31d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 120 additions and 9 deletions

View File

@ -54,6 +54,7 @@ static bool ShouldHideShardsInternal(void);
static bool IsPgBgWorker(void); static bool IsPgBgWorker(void);
static bool FilterShardsFromPgclass(Node *node, void *context); static bool FilterShardsFromPgclass(Node *node, void *context);
static Node * CreateRelationIsAKnownShardFilter(int pgClassVarno); static Node * CreateRelationIsAKnownShardFilter(int pgClassVarno);
static bool HasRangeTableRef(Node *node, int *varno);
PG_FUNCTION_INFO_V1(citus_table_is_visible); PG_FUNCTION_INFO_V1(citus_table_is_visible);
PG_FUNCTION_INFO_V1(relation_is_a_known_shard); PG_FUNCTION_INFO_V1(relation_is_a_known_shard);
@ -421,8 +422,8 @@ IsPgBgWorker(void)
/* /*
* FilterShardsFromPgclass adds a NOT relation_is_a_known_shard(oid) filter * FilterShardsFromPgclass adds a "relation_is_a_known_shard(oid) IS NOT TRUE"
* to the security quals of pg_class RTEs. * filter to the quals of queries that query pg_class.
*/ */
static bool static bool
FilterShardsFromPgclass(Node *node, void *context) FilterShardsFromPgclass(Node *node, void *context)
@ -456,12 +457,35 @@ FilterShardsFromPgclass(Node *node, void *context)
continue; continue;
} }
/*
* Skip if pg_class is not actually queried. This is possible on
* INSERT statements that insert into pg_class.
*/
if (!expression_tree_walker((Node *) query->jointree->fromlist,
HasRangeTableRef, &varno))
{
/* the query references pg_class */
continue;
}
/* make sure the expression is in the right memory context */ /* make sure the expression is in the right memory context */
MemoryContext originalContext = MemoryContextSwitchTo(queryContext); MemoryContext originalContext = MemoryContextSwitchTo(queryContext);
/* add NOT relation_is_a_known_shard(oid) to the security quals of the RTE */
rangeTableEntry->securityQuals = /* add relation_is_a_known_shard(oid) IS NOT TRUE to the quals of the query */
list_make1(CreateRelationIsAKnownShardFilter(varno)); Node *newQual = CreateRelationIsAKnownShardFilter(varno);
Node *oldQuals = query->jointree->quals;
if (oldQuals)
{
query->jointree->quals = (Node *) makeBoolExpr(
AND_EXPR,
list_make2(oldQuals, newQual),
-1);
}
else
{
query->jointree->quals = newQual;
}
MemoryContextSwitchTo(originalContext); MemoryContextSwitchTo(originalContext);
} }
@ -473,9 +497,32 @@ FilterShardsFromPgclass(Node *node, void *context)
} }
/*
* HasRangeTableRef passed to expression_tree_walker to check if a node is a
* RangeTblRef of the given varno is present in a fromlist.
*/
static bool
HasRangeTableRef(Node *node, int *varno)
{
if (IsA(node, RangeTblRef))
{
RangeTblRef *rangeTblRef = (RangeTblRef *) node;
return rangeTblRef->rtindex == *varno;
}
return expression_tree_walker(node, HasRangeTableRef, varno);
}
/* /*
* CreateRelationIsAKnownShardFilter constructs an expression of the form: * CreateRelationIsAKnownShardFilter constructs an expression of the form:
* NOT pg_catalog.relation_is_a_known_shard(oid) * pg_catalog.relation_is_a_known_shard(oid) IS NOT TRUE
*
* The difference between "NOT pg_catalog.relation_is_a_known_shard(oid)" and
* "pg_catalog.relation_is_a_known_shard(oid) IS NOT TRUE" is that the former
* will return FALSE if the function returns NULL, while the second will return
* TRUE. This difference is important in the case of outer joins, because this
* filter might be applied on an oid that is then NULL.
*/ */
static Node * static Node *
CreateRelationIsAKnownShardFilter(int pgClassVarno) CreateRelationIsAKnownShardFilter(int pgClassVarno)
@ -496,9 +543,9 @@ CreateRelationIsAKnownShardFilter(int pgClassVarno)
funcExpr->location = -1; funcExpr->location = -1;
funcExpr->args = list_make1(oidVar); funcExpr->args = list_make1(oidVar);
BoolExpr *notExpr = makeNode(BoolExpr); BooleanTest *notExpr = makeNode(BooleanTest);
notExpr->boolop = NOT_EXPR; notExpr->booltesttype = IS_NOT_TRUE;
notExpr->args = list_make1(funcExpr); notExpr->arg = (Expr *) funcExpr;
notExpr->location = -1; notExpr->location = -1;
return (Node *) notExpr; return (Node *) notExpr;

View File

@ -83,6 +83,52 @@ SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_name
test_table test_table
(1 row) (1 row)
-- Even when using subquery and having no existing quals on pg_clcass
SELECT relname FROM (SELECT relname, relnamespace FROM pg_catalog.pg_class) AS q WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname;
relname
---------------------------------------------------------------------
test_table
(1 row)
-- Check that inserts into pg_class don't add the filter
EXPLAIN (COSTS OFF) INSERT INTO pg_class VALUES (1);
QUERY PLAN
---------------------------------------------------------------------
Insert on pg_class
-> Result
(2 rows)
-- Unless it's an INSERT SELECT that queries from pg_class;
EXPLAIN (COSTS OFF) INSERT INTO pg_class SELECT * FROM pg_class;
QUERY PLAN
---------------------------------------------------------------------
Insert on pg_class
-> Seq Scan on pg_class pg_class_1
Filter: (relation_is_a_known_shard(oid) IS NOT TRUE)
(3 rows)
-- Check that query that psql "\d test_table" does gets optimized to an index
-- scan
EXPLAIN (COSTS OFF) SELECT c.oid,
n.nspname,
c.relname
FROM pg_catalog.pg_class c
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname OPERATOR(pg_catalog.~) '^(test_table)$' COLLATE pg_catalog.default
AND pg_catalog.pg_table_is_visible(c.oid)
ORDER BY 2, 3;
QUERY PLAN
---------------------------------------------------------------------
Sort
Sort Key: n.nspname, c.relname
-> Nested Loop Left Join
Join Filter: (n.oid = c.relnamespace)
-> Index Scan using pg_class_relname_nsp_index on pg_class c
Index Cond: (relname = 'test_table'::text)
Filter: ((relname ~ '^(test_table)$'::text) AND (relation_is_a_known_shard(oid) IS NOT TRUE) AND pg_table_is_visible(oid))
-> Seq Scan on pg_namespace n
(8 rows)
commit prepared 'take-aggressive-lock'; commit prepared 'take-aggressive-lock';
-- now create an index -- now create an index
\c - - - :master_port \c - - - :master_port

View File

@ -50,6 +50,24 @@ prepare transaction 'take-aggressive-lock';
-- shards are hidden when using psql as application_name -- shards are hidden when using psql as application_name
SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname; SELECT relname FROM pg_catalog.pg_class WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname;
-- Even when using subquery and having no existing quals on pg_clcass
SELECT relname FROM (SELECT relname, relnamespace FROM pg_catalog.pg_class) AS q WHERE relnamespace = 'mx_hide_shard_names'::regnamespace ORDER BY relname;
-- Check that inserts into pg_class don't add the filter
EXPLAIN (COSTS OFF) INSERT INTO pg_class VALUES (1);
-- Unless it's an INSERT SELECT that queries from pg_class;
EXPLAIN (COSTS OFF) INSERT INTO pg_class SELECT * FROM pg_class;
-- Check that query that psql "\d test_table" does gets optimized to an index
-- scan
EXPLAIN (COSTS OFF) SELECT c.oid,
n.nspname,
c.relname
FROM pg_catalog.pg_class c
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname OPERATOR(pg_catalog.~) '^(test_table)$' COLLATE pg_catalog.default
AND pg_catalog.pg_table_is_visible(c.oid)
ORDER BY 2, 3;
commit prepared 'take-aggressive-lock'; commit prepared 'take-aggressive-lock';