From 6056cb2c2931ae33b42f009872385af518cf2f8b Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Tue, 8 Aug 2023 15:51:21 +0300 Subject: [PATCH] PG16 compatibility - get_relation_info hook to avoid crash from adjusted partitioning (#7099) PG16 compatibility - Part 5 Check out part 1 https://github.com/citusdata/citus/commit/42d956888d5be65153ccf24cb039027ecd7c0192 part 2 https://github.com/citusdata/citus/commit/0d503dd5ac5547ca71cd0147e53236d8d8a22fce part 3 https://github.com/citusdata/citus/commit/907d72e60d4043924f52200b24d281fe7b79f75f part 4 https://github.com/citusdata/citus/commit/7c6b4ce1035491ff5a31a9d15bb8b28f3c0dd5b3 This commit is in the series of PG16 compatibility commits. Find the explanation below: If we allow to adjust partitioning, we get a crash when accessing amcostestimate of partitioned indexes, because amcostestimate is NULL for them. The following PG commit is the culprit: https://github.com/postgres/postgres/commit/3c569049b7b502bb4952483d19ce622ff0af5fd6 3c569049b7b502bb4952483d19ce622ff0af5fd6 Previously, partitioned indexes would just be ignored. Now, they are added in the list. However get_relation_info expects the tables which have partitioned indexes to have the inh flag set properly. AdjustPartitioningForDistributedPlanning plays with that flag, hence we don't get the desired behaviour. The hook is simply removing all partitioned indexes from the list. More PG16 compatibility commits are coming soon ... --- .../distributed/planner/distributed_planner.c | 71 +++++++++++++++++++ src/backend/distributed/shared_library_init.c | 2 + src/include/distributed/distributed_planner.h | 2 + 3 files changed, 75 insertions(+) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 3b6a8f9f7..dfce411ad 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -108,6 +108,7 @@ static int AssignRTEIdentities(List *rangeTableList, int rteIdCounter); static void AssignRTEIdentity(RangeTblEntry *rangeTableEntry, int rteIdentifier); static void AdjustPartitioningForDistributedPlanning(List *rangeTableList, bool setPartitionedTablesInherited); +static bool RTEWentThroughAdjustPartitioning(RangeTblEntry *rangeTableEntry); static PlannedStmt * FinalizeNonRouterPlan(PlannedStmt *localPlan, DistributedPlan *distributedPlan, CustomScan *customScan); @@ -494,6 +495,20 @@ AdjustPartitioningForDistributedPlanning(List *rangeTableList, } +/* + * RTEWentThroughAdjustPartitioning returns true if the given rangetableentry + * has been modified through AdjustPartitioningForDistributedPlanning + * function, false otherwise. + */ +static bool +RTEWentThroughAdjustPartitioning(RangeTblEntry *rangeTableEntry) +{ + return (rangeTableEntry->rtekind == RTE_RELATION && + PartitionedTable(rangeTableEntry->relid) && + rangeTableEntry->inh == false); +} + + /* * AssignRTEIdentity assigns the given rteIdentifier to the given range table * entry. @@ -1976,6 +1991,62 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo, } +/* + * multi_get_relation_info_hook modifies the relation's indexlist + * if necessary, to avoid a crash in PG16 caused by our + * Citus function AdjustPartitioningForDistributedPlanning(). + * + * AdjustPartitioningForDistributedPlanning() is a hack that we use + * to prevent Postgres' standard_planner() to expand all the partitions + * for the distributed planning when a distributed partitioned table + * is queried. It is required for both correctness and performance + * reasons. Although we can eliminate the use of the function for + * the correctness (e.g., make sure that rest of the planner can handle + * partitions), it's performance implication is hard to avoid. Certain + * planning logic of Citus (such as router or query pushdown) relies + * heavily on the relationRestrictionList. If + * AdjustPartitioningForDistributedPlanning() is removed, all the + * partitions show up in the relationRestrictionList, causing high + * planning times for such queries. + */ +void +multi_get_relation_info_hook(PlannerInfo *root, Oid relationObjectId, bool inhparent, + RelOptInfo *rel) +{ + if (!CitusHasBeenLoaded()) + { + return; + } + + Index varno = rel->relid; + RangeTblEntry *rangeTableEntry = planner_rt_fetch(varno, root); + + if (RTEWentThroughAdjustPartitioning(rangeTableEntry)) + { + ListCell *lc = NULL; + foreach(lc, rel->indexlist) + { + IndexOptInfo *indexOptInfo = (IndexOptInfo *) lfirst(lc); + if (get_rel_relkind(indexOptInfo->indexoid) == RELKIND_PARTITIONED_INDEX) + { + /* + * Normally, we should not need this. However, the combination of + * Postgres commit 3c569049b7b502bb4952483d19ce622ff0af5fd6 and + * Citus function AdjustPartitioningForDistributedPlanning() + * forces us to do this. The commit expects partitioned indexes + * to belong to relations with "inh" flag set properly. Whereas, the + * function overrides "inh" flag. To avoid a crash, + * we go over the list of indexinfos and remove all partitioned indexes. + * Partitioned indexes were ignored pre PG16 anyway, we are essentially + * not breaking any logic. + */ + rel->indexlist = foreach_delete_current(rel->indexlist, lc); + } + } + } +} + + /* * TranslatedVars deep copies the translated vars for the given relation index * if there is any append rel list. diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index a2b1811b8..e84aa282a 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -104,6 +104,7 @@ #include "replication/walsender.h" #include "storage/ipc.h" #include "optimizer/planner.h" +#include "optimizer/plancat.h" #include "optimizer/paths.h" #include "tcop/tcopprot.h" #include "utils/guc.h" @@ -452,6 +453,7 @@ _PG_init(void) /* register for planner hook */ set_rel_pathlist_hook = multi_relation_restriction_hook; + get_relation_info_hook = multi_get_relation_info_hook; set_join_pathlist_hook = multi_join_restriction_hook; ExecutorStart_hook = CitusExecutorStart; ExecutorRun_hook = CitusExecutorRun; diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h index aac936a98..d46fbf2e6 100644 --- a/src/include/distributed/distributed_planner.h +++ b/src/include/distributed/distributed_planner.h @@ -234,6 +234,8 @@ extern List * TranslatedVarsForRteIdentity(int rteIdentity); extern struct DistributedPlan * GetDistributedPlan(CustomScan *node); extern void multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo, Index restrictionIndex, RangeTblEntry *rte); +extern void multi_get_relation_info_hook(PlannerInfo *root, Oid relationObjectId, bool + inhparent, RelOptInfo *rel); extern void multi_join_restriction_hook(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel,