#7782 - catch when Postgres planning removes all Citus tables (#7907)

DESCRIPTION: fix a planning error caused by a redundant WHERE clause

Fix a Citus planning glitch that occurs in a DML query when the WHERE
clause of the query is of the form:
    ` WHERE true OR <expression with 1 or more citus tables> `
and this is the only place in the query referencing a citus table.
Postgres' standard planner transforms the WHERE clause to:
    ` WHERE true `
So the query now has no citus tables, confusing the Citus planner as
described in issues #7782 and #7783. The fix is to check, after Postgres
standard planner, if the Query has been transformed as shown, and re-run
the check of whether or not the query needs distributed planning.
pull/7922/head
Colm 2025-02-27 10:54:39 +00:00 committed by naisila
parent 87ec3def55
commit 4139370a1d
3 changed files with 241 additions and 1 deletions

View File

@ -152,7 +152,10 @@ static RouterPlanType GetRouterPlanType(Query *query,
bool hasUnresolvedParams);
static void ConcatenateRTablesAndPerminfos(PlannedStmt *mainPlan,
PlannedStmt *concatPlan);
static bool CheckPostPlanDistribution(bool isDistributedQuery,
Query *origQuery,
List *rangeTableList,
Query *plannedQuery);
/* Distributed planner hook */
PlannedStmt *
@ -273,6 +276,11 @@ distributed_planner(Query *parse,
planContext.plan = standard_planner(planContext.query, NULL,
planContext.cursorOptions,
planContext.boundParams);
needsDistributedPlanning = CheckPostPlanDistribution(needsDistributedPlanning,
planContext.originalQuery,
rangeTableList,
planContext.query);
if (needsDistributedPlanning)
{
result = PlanDistributedStmt(&planContext, rteIdCounter);
@ -2730,3 +2738,41 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList)
}
}
}
static bool
CheckPostPlanDistribution(bool isDistributedQuery,
Query *origQuery, List *rangeTableList,
Query *plannedQuery)
{
if (isDistributedQuery)
{
Node *origQuals = origQuery->jointree->quals;
Node *plannedQuals = plannedQuery->jointree->quals;
#if PG_VERSION_NUM >= PG_VERSION_17
if (IsMergeQuery(origQuery))
{
origQuals = origQuery->mergeJoinCondition;
plannedQuals = plannedQuery->mergeJoinCondition;
}
#endif
/*
* The WHERE quals have been eliminated by the Postgres planner, possibly by
* an OR clause that was simplified to TRUE. In such cases, we need to check
* if the planned query still requires distributed planning.
*/
if (origQuals != NULL && plannedQuals == NULL)
{
List *rtesPostPlan = ExtractRangeTableEntryList(plannedQuery);
if (list_length(rtesPostPlan) < list_length(rangeTableList))
{
isDistributedQuery = ListContainsDistributedTableRTE(
rtesPostPlan, NULL);
}
}
}
return isDistributedQuery;
}

View File

@ -1146,7 +1146,117 @@ WHERE (SELECT COUNT(DISTINCT e1.value_2)
(1 row)
-- Test redundant WHERE clause (fix #7782, #7783)
CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp);
CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text);
CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp);
CREATE TABLE t7 (vkey int4, pkey int4);
-- DEBUG messages not needed for these tests
SET client_min_messages TO DEFAULT;
INSERT INTO t0 (vkey, pkey, c0) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));
INSERT INTO t7 (vkey, pkey) values
(3, 59525);
SELECT create_reference_table('t1');
create_reference_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('t3', 'c9');
create_distributed_table
---------------------------------------------------------------------
(1 row)
UPDATE t0 set vkey = 117
where (((t0.pkey) in (select t7.vkey from t7 where false
union all
select t3.pkey from t3 where false
)))
or TRUE;
-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
117 | 13000 | Sat Sep 04 13:38:00 2032
(1 row)
-- MERGE command with redundant join can be planned locally
EXPLAIN (costs off, timing off)
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;
QUERY PLAN
---------------------------------------------------------------------
Merge on t0
-> Nested Loop
-> Seq Scan on t7
-> Materialize
-> Seq Scan on t0
(5 rows)
-- UPDATE via MERGE with redundant join clause:
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;
-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
113 | 13000 | Sat Sep 04 13:38:00 2032
(1 row)
DELETE FROM t0
where TRUE or (((t0.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)));
-- Local table t0 is now empty (0 rows)
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
(0 rows)
INSERT INTO t3 (vkey, pkey, c9) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));
-- Distributed table update with redundant WHERE
UPDATE t3 set vkey = 117
where (((t3.pkey) in (select t1.vkey from t1 where false
union all
select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false
)))
or TRUE;
SELECT vkey, pkey FROM t3;
vkey | pkey
---------------------------------------------------------------------
117 | 13000
(1 row)
-- Distributed table delete with redundant WHERE
DELETE FROM t3
where TRUE or (((t3.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0);
-- Distributed table t3 is now empty
SELECT vkey, pkey FROM t3;
vkey | pkey
---------------------------------------------------------------------
(0 rows)
DROP TABLE local_table;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t3;
DROP TABLE t7;
DROP SCHEMA subquery_in_where CASCADE;
SET search_path TO public;

View File

@ -847,8 +847,92 @@ WHERE (SELECT COUNT(DISTINCT e1.value_2)
WHERE e1.user_id = u1.user_id
) > 115 AND false;
-- Test redundant WHERE clause (fix #7782, #7783)
CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp);
CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text);
CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp);
CREATE TABLE t7 (vkey int4, pkey int4);
-- DEBUG messages not needed for these tests
SET client_min_messages TO DEFAULT;
INSERT INTO t0 (vkey, pkey, c0) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));
INSERT INTO t7 (vkey, pkey) values
(3, 59525);
SELECT create_reference_table('t1');
SELECT create_distributed_table('t3', 'c9');
UPDATE t0 set vkey = 117
where (((t0.pkey) in (select t7.vkey from t7 where false
union all
select t3.pkey from t3 where false
)))
or TRUE;
-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;
-- MERGE command with redundant join can be planned locally
EXPLAIN (costs off, timing off)
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;
-- UPDATE via MERGE with redundant join clause:
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;
-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;
DELETE FROM t0
where TRUE or (((t0.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)));
-- Local table t0 is now empty (0 rows)
SELECT vkey, pkey, c0 FROM t0;
INSERT INTO t3 (vkey, pkey, c9) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));
-- Distributed table update with redundant WHERE
UPDATE t3 set vkey = 117
where (((t3.pkey) in (select t1.vkey from t1 where false
union all
select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false
)))
or TRUE;
SELECT vkey, pkey FROM t3;
-- Distributed table delete with redundant WHERE
DELETE FROM t3
where TRUE or (((t3.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0);
-- Distributed table t3 is now empty
SELECT vkey, pkey FROM t3;
DROP TABLE local_table;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t3;
DROP TABLE t7;
DROP SCHEMA subquery_in_where CASCADE;
SET search_path TO public;