Prevent failures on partitioned distributed tables with statistics objects on PG 15

Comment from the code is clear on this:
/*
 * The statistics objects of the distributed table are not relevant
 * for the distributed planning, so we can override it.
 *
 * Normally, we should not need this. However, the combination of
 * Postgres commit 269b532aef55a579ae02a3e8e8df14101570dfd9 and
 * Citus function AdjustPartitioningForDistributedPlanning()
 * forces us to do this. The commit expects statistics objects
 * of partitions to have "inh" flag set properly. Whereas, the
 * function overrides "inh" flag. To avoid Postgres to throw error,
 * we override statlist such that Postgres does not try to process
 * any statistics objects during the standard_planner() on the
 * coordinator. In the end, we do not need the standard_planner()
 * on the coordinator to generate an optimized plan. We call
 * into standard_planner() for other purposes, such as generating the
 * relationRestrictionContext here.
 *
 * AdjustPartitioningForDistributedPlanning() is a hack that we use
 * to prevent Postgres' standard_planner() to expand all the partitions
 * for the distributed planning when a distributed partitioned table
 * is queried. It is required for both correctness and performance
 * reasons. Although we can eliminate the use of the function for
 * the correctness (e.g., make sure that rest of the planner can handle
 * partitions), it's performance implication is hard to avoid. Certain
 * planning logic of Citus (such as router or query pushdown) relies
 * heavily on the relationRestrictionList. If
 * AdjustPartitioningForDistributedPlanning() is removed, all the
 * partitions show up in the, causing high planning times for
 * such queries.
 */
pull/6349/head^2
Onder Kalaci 2022-09-14 15:59:49 +03:00 committed by Hanefi Onaldi
parent 739b91afa6
commit 766f340ce0
7 changed files with 124 additions and 14 deletions

View File

@ -1897,14 +1897,14 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
MemoryContext restrictionsMemoryContext = plannerRestrictionContext->memoryContext;
MemoryContext oldMemoryContext = MemoryContextSwitchTo(restrictionsMemoryContext);
bool distributedTable = IsCitusTable(rte->relid);
bool isCitusTable = IsCitusTable(rte->relid);
RelationRestriction *relationRestriction = palloc0(sizeof(RelationRestriction));
relationRestriction->index = restrictionIndex;
relationRestriction->relationId = rte->relid;
relationRestriction->rte = rte;
relationRestriction->relOptInfo = relOptInfo;
relationRestriction->distributedRelation = distributedTable;
relationRestriction->citusTable = isCitusTable;
relationRestriction->plannerInfo = root;
/* see comments on GetVarFromAssignedParam() */
@ -1919,10 +1919,42 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
* We're also keeping track of whether all participant
* tables are reference tables.
*/
if (distributedTable)
if (isCitusTable)
{
cacheEntry = GetCitusTableCacheEntry(rte->relid);
/*
* The statistics objects of the distributed table are not relevant
* for the distributed planning, so we can override it.
*
* Normally, we should not need this. However, the combination of
* Postgres commit 269b532aef55a579ae02a3e8e8df14101570dfd9 and
* Citus function AdjustPartitioningForDistributedPlanning()
* forces us to do this. The commit expects statistics objects
* of partitions to have "inh" flag set properly. Whereas, the
* function overrides "inh" flag. To avoid Postgres to throw error,
* we override statlist such that Postgres does not try to process
* any statistics objects during the standard_planner() on the
* coordinator. In the end, we do not need the standard_planner()
* on the coordinator to generate an optimized plan. We call
* into standard_planner() for other purposes, such as generating the
* relationRestrictionContext here.
*
* AdjustPartitioningForDistributedPlanning() is a hack that we use
* to prevent Postgres' standard_planner() to expand all the partitions
* for the distributed planning when a distributed partitioned table
* is queried. It is required for both correctness and performance
* reasons. Although we can eliminate the use of the function for
* the correctness (e.g., make sure that rest of the planner can handle
* partitions), it's performance implication is hard to avoid. Certain
* planning logic of Citus (such as router or query pushdown) relies
* heavily on the relationRestrictionList. If
* AdjustPartitioningForDistributedPlanning() is removed, all the
* partitions show up in the, causing high planning times for
* such queries.
*/
relOptInfo->statlist = NIL;
relationRestrictionContext->allReferenceTables &=
IsCitusTableTypeCacheEntry(cacheEntry, REFERENCE_TABLE);
}

View File

@ -3692,7 +3692,7 @@ CopyRelationRestrictionContext(RelationRestrictionContext *oldContext)
newRestriction->index = oldRestriction->index;
newRestriction->relationId = oldRestriction->relationId;
newRestriction->distributedRelation = oldRestriction->distributedRelation;
newRestriction->citusTable = oldRestriction->citusTable;
newRestriction->rte = copyObject(oldRestriction->rte);
/* can't be copied, we copy (flatly) a RelOptInfo, and then decouple baserestrictinfo */

View File

@ -224,7 +224,7 @@ ContextContainsLocalRelation(RelationRestrictionContext *restrictionContext)
{
RelationRestriction *relationRestriction = lfirst(relationRestrictionCell);
if (!relationRestriction->distributedRelation)
if (!relationRestriction->citusTable)
{
return true;
}

View File

@ -56,7 +56,7 @@ typedef struct RelationRestriction
{
Index index;
Oid relationId;
bool distributedRelation;
bool citusTable;
RangeTblEntry *rte;
RelOptInfo *relOptInfo;
PlannerInfo *plannerInfo;

View File

@ -521,9 +521,9 @@ SELECT tablename, indexname FROM pg_indexes WHERE schemaname = 'fix_idx_names' O
tablename | indexname
---------------------------------------------------------------------
date_partitioned_citus_local_table | date_partitioned_citus_local_table_measureid_idx
date_partitioned_citus_local_table_361369 | date_partitioned_citus_local_table_measureid_idx_361369
date_partitioned_citus_local_table_361377 | date_partitioned_citus_local_table_measureid_idx_361377
partition_local_table | partition_local_table_measureid_idx
partition_local_table_361370 | partition_local_table_measureid_idx_361370
partition_local_table_361378 | partition_local_table_measureid_idx_361378
(4 rows)
-- creating a single object should only need to trigger fixing the single object
@ -753,7 +753,7 @@ DETAIL: drop cascades to table not_partitioned
drop cascades to table not_distributed
drop cascades to table fk_table
drop cascades to table p
drop cascades to table date_partitioned_citus_local_table_361369
drop cascades to table date_partitioned_citus_local_table_361377
drop cascades to table date_partitioned_citus_local_table
drop cascades to table parent_table
SELECT citus_remove_node('localhost', :master_port);

View File

@ -4324,12 +4324,66 @@ WHERE schemaname = 'partitioning_schema' AND tablename ilike '%part_table_with_%
(6 rows)
\c - - - :master_port
SET search_path TO partitioning_schema;
-- create parent table
CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i);
-- create partition
CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1) TO (100);
-- populate table
INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1, 999) a;
-- create extended statistics
CREATE STATISTICS stxdinp ON a, b FROM stxdinp;
-- distribute parent table
SELECT create_distributed_table('stxdinp', 'i');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$partitioning_schema.stxdinp1$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- run select query, works fine
SELECT a, b FROM stxdinp GROUP BY 1, 2;
a | b
---------------------------------------------------------------------
1 | 1
3 | 3
7 | 7
2 | 2
8 | 8
0 | 0
5 | 5
6 | 6
9 | 9
4 | 4
(10 rows)
-- partitions are processed recursively for PG15+
VACUUM ANALYZE stxdinp;
SELECT a, b FROM stxdinp GROUP BY 1, 2;
a | b
---------------------------------------------------------------------
1 | 1
3 | 3
7 | 7
2 | 2
8 | 8
0 | 0
5 | 5
6 | 6
9 | 9
4 | 4
(10 rows)
DROP SCHEMA partitioning_schema CASCADE;
NOTICE: drop cascades to 4 other objects
DETAIL: drop cascades to table partitioning_schema."schema-test"
drop cascades to table partitioning_schema.another_distributed_table
drop cascades to table partitioning_schema.distributed_parent_table
drop cascades to table partitioning_schema.part_table_with_very_long_name
NOTICE: drop cascades to 5 other objects
DETAIL: drop cascades to table "schema-test"
drop cascades to table another_distributed_table
drop cascades to table distributed_parent_table
drop cascades to table part_table_with_very_long_name
drop cascades to table stxdinp
RESET search_path;
DROP TABLE IF EXISTS
partitioning_hash_test,

View File

@ -2002,6 +2002,30 @@ SELECT tablename, indexname FROM pg_indexes
WHERE schemaname = 'partitioning_schema' AND tablename ilike '%part_table_with_%' ORDER BY 1, 2;
\c - - - :master_port
SET search_path TO partitioning_schema;
-- create parent table
CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i);
-- create partition
CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1) TO (100);
-- populate table
INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1, 999) a;
-- create extended statistics
CREATE STATISTICS stxdinp ON a, b FROM stxdinp;
-- distribute parent table
SELECT create_distributed_table('stxdinp', 'i');
-- run select query, works fine
SELECT a, b FROM stxdinp GROUP BY 1, 2;
-- partitions are processed recursively for PG15+
VACUUM ANALYZE stxdinp;
SELECT a, b FROM stxdinp GROUP BY 1, 2;
DROP SCHEMA partitioning_schema CASCADE;
RESET search_path;
DROP TABLE IF EXISTS