Fix bug in redundant WHERE clause detection. (#8162)

Need to also check Postgres plan's rangetables for relations used in Initplans.

DESCRIPTION: Fix a bug in redundant WHERE clause detection; we need to
additionally check the Postgres plan's range tables for the presence of
citus tables, to account for relations that are referenced from scalar
subqueries.

There is a fundamental flaw in 4139370, the assumption that, after
Postgres planning has completed, all tables used in a query can be
obtained by walking the query tree. This is not the case for scalar
subqueries, which will be referenced by `PARAM` nodes. The fix adds an
additional check of the Postgres plan range tables; if there is at least
one citus table in there we do not need to change the needs distributed
planning flag.

Fixes #8159
release-13.2-colm
Colm 2025-08-27 13:32:02 +01:00 committed by Colm McHugh
parent 0b9acbeb3d
commit 91cae1fb29
4 changed files with 105 additions and 11 deletions

View File

@ -141,10 +141,9 @@ static RouterPlanType GetRouterPlanType(Query *query,
bool hasUnresolvedParams);
static void ConcatenateRTablesAndPerminfos(PlannedStmt *mainPlan,
PlannedStmt *concatPlan);
static bool CheckPostPlanDistribution(bool isDistributedQuery,
Query *origQuery,
List *rangeTableList,
Query *plannedQuery);
static bool CheckPostPlanDistribution(DistributedPlanningContext *planContext,
bool isDistributedQuery,
List *rangeTableList);
/* Distributed planner hook */
PlannedStmt *
@ -265,10 +264,9 @@ distributed_planner(Query *parse,
planContext.plan = standard_planner(planContext.query, NULL,
planContext.cursorOptions,
planContext.boundParams);
needsDistributedPlanning = CheckPostPlanDistribution(needsDistributedPlanning,
planContext.originalQuery,
rangeTableList,
planContext.query);
needsDistributedPlanning = CheckPostPlanDistribution(&planContext,
needsDistributedPlanning,
rangeTableList);
if (needsDistributedPlanning)
{
@ -2740,12 +2738,13 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList)
static bool
CheckPostPlanDistribution(bool isDistributedQuery,
Query *origQuery, List *rangeTableList,
Query *plannedQuery)
CheckPostPlanDistribution(DistributedPlanningContext *planContext, bool
isDistributedQuery, List *rangeTableList)
{
if (isDistributedQuery)
{
Query *origQuery = planContext->originalQuery;
Query *plannedQuery = planContext->query;
Node *origQuals = origQuery->jointree->quals;
Node *plannedQuals = plannedQuery->jointree->quals;
@ -2764,6 +2763,23 @@ CheckPostPlanDistribution(bool isDistributedQuery,
*/
if (origQuals != NULL && plannedQuals == NULL)
{
bool planHasDistTable = ListContainsDistributedTableRTE(
planContext->plan->rtable, NULL);
/*
* If the Postgres plan has a distributed table, we know for sure that
* the query requires distributed planning.
*/
if (planHasDistTable)
{
return true;
}
/*
* Otherwise, if the query has less range table entries after Postgres,
* planning, we should re-evaluate the distribution of the query. Postgres
* may have optimized away all citus tables, per issues 7782, 7783.
*/
List *rtesPostPlan = ExtractRangeTableEntryList(plannedQuery);
if (list_length(rtesPostPlan) < list_length(rangeTableList))
{

View File

@ -262,6 +262,9 @@ DEPS = {
"multi_subquery_in_where_reference_clause": TestDeps(
"minimal_schedule", ["multi_behavioral_analytics_create_table"]
),
"subquery_in_where": TestDeps(
"minimal_schedule", ["multi_behavioral_analytics_create_table"]
),
}

View File

@ -1253,10 +1253,53 @@ SELECT vkey, pkey FROM t3;
---------------------------------------------------------------------
(0 rows)
-- Redundant WHERE clause with distributed parititioned table
CREATE TABLE a (a int);
INSERT INTO a VALUES (1);
-- populated distributed partitioned table
create table partitioned_table (a INT UNIQUE) PARTITION BY RANGE(a);
CREATE TABLE par_1 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (41);
CREATE TABLE par_2 PARTITION OF partitioned_table FOR VALUES FROM (41) TO (81);
CREATE TABLE par_3 PARTITION OF partitioned_table FOR VALUES FROM (81) TO (121);
CREATE TABLE par_4 PARTITION OF partitioned_table FOR VALUES FROM (121) TO (161);
SELECT create_distributed_table('partitioned_table', 'a');
create_distributed_table
---------------------------------------------------------------------
(1 row)
insert into partitioned_table(a) select i from generate_series(1,160) i;
-- test citus table in init plan
-- with redundant WHERE clause
SELECT CASE WHEN EXISTS (
SELECT * FROM partitioned_table
) THEN 1 ELSE 0 END AS table_non_empty
FROM a
WHERE true;
table_non_empty
---------------------------------------------------------------------
1
(1 row)
-- test citus table in init plan
-- with redundant WHERE clause involving
-- a citus table
SELECT CASE WHEN EXISTS (
SELECT * FROM partitioned_table
) THEN 1 ELSE 0 END AS table_non_empty
FROM a
WHERE true OR NOT EXISTS (SELECT 1 FROM t1);
table_non_empty
---------------------------------------------------------------------
1
(1 row)
DROP TABLE local_table;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t3;
DROP TABLE t7;
DROP TABLE a;
DROP TABLE partitioned_table CASCADE;
DROP SCHEMA subquery_in_where CASCADE;
SET search_path TO public;

View File

@ -929,10 +929,42 @@ where TRUE or (((t3.vkey) >= (select
-- Distributed table t3 is now empty
SELECT vkey, pkey FROM t3;
-- Redundant WHERE clause with distributed parititioned table
CREATE TABLE a (a int);
INSERT INTO a VALUES (1);
-- populated distributed partitioned table
create table partitioned_table (a INT UNIQUE) PARTITION BY RANGE(a);
CREATE TABLE par_1 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (41);
CREATE TABLE par_2 PARTITION OF partitioned_table FOR VALUES FROM (41) TO (81);
CREATE TABLE par_3 PARTITION OF partitioned_table FOR VALUES FROM (81) TO (121);
CREATE TABLE par_4 PARTITION OF partitioned_table FOR VALUES FROM (121) TO (161);
SELECT create_distributed_table('partitioned_table', 'a');
insert into partitioned_table(a) select i from generate_series(1,160) i;
-- test citus table in init plan
-- with redundant WHERE clause
SELECT CASE WHEN EXISTS (
SELECT * FROM partitioned_table
) THEN 1 ELSE 0 END AS table_non_empty
FROM a
WHERE true;
-- test citus table in init plan
-- with redundant WHERE clause involving
-- a citus table
SELECT CASE WHEN EXISTS (
SELECT * FROM partitioned_table
) THEN 1 ELSE 0 END AS table_non_empty
FROM a
WHERE true OR NOT EXISTS (SELECT 1 FROM t1);
DROP TABLE local_table;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t3;
DROP TABLE t7;
DROP TABLE a;
DROP TABLE partitioned_table CASCADE;
DROP SCHEMA subquery_in_where CASCADE;
SET search_path TO public;