mirror of https://github.com/citusdata/citus.git
Fix bug in redundant WHERE clause detection. (#8162)
Need to also check Postgres plan's rangetables for relations used in Initplans.
DESCRIPTION: Fix a bug in redundant WHERE clause detection; we need to
additionally check the Postgres plan's range tables for the presence of
citus tables, to account for relations that are referenced from scalar
subqueries.
There is a fundamental flaw in 4139370, the assumption that, after
Postgres planning has completed, all tables used in a query can be
obtained by walking the query tree. This is not the case for scalar
subqueries, which will be referenced by `PARAM` nodes. The fix adds an
additional check of the Postgres plan range tables; if there is at least
one citus table in there we do not need to change the needs distributed
planning flag.
Fixes #8159
pull/8205/head
parent
46fd74933c
commit
6ac5ad500e
|
|
@ -151,10 +151,9 @@ static RouterPlanType GetRouterPlanType(Query *query,
|
|||
bool hasUnresolvedParams);
|
||||
static void ConcatenateRTablesAndPerminfos(PlannedStmt *mainPlan,
|
||||
PlannedStmt *concatPlan);
|
||||
static bool CheckPostPlanDistribution(bool isDistributedQuery,
|
||||
Query *origQuery,
|
||||
List *rangeTableList,
|
||||
Query *plannedQuery);
|
||||
static bool CheckPostPlanDistribution(DistributedPlanningContext *planContext,
|
||||
bool isDistributedQuery,
|
||||
List *rangeTableList);
|
||||
|
||||
/* Distributed planner hook */
|
||||
PlannedStmt *
|
||||
|
|
@ -275,10 +274,9 @@ distributed_planner(Query *parse,
|
|||
planContext.plan = standard_planner(planContext.query, NULL,
|
||||
planContext.cursorOptions,
|
||||
planContext.boundParams);
|
||||
needsDistributedPlanning = CheckPostPlanDistribution(needsDistributedPlanning,
|
||||
planContext.originalQuery,
|
||||
rangeTableList,
|
||||
planContext.query);
|
||||
needsDistributedPlanning = CheckPostPlanDistribution(&planContext,
|
||||
needsDistributedPlanning,
|
||||
rangeTableList);
|
||||
|
||||
if (needsDistributedPlanning)
|
||||
{
|
||||
|
|
@ -2739,12 +2737,13 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList)
|
|||
|
||||
|
||||
static bool
|
||||
CheckPostPlanDistribution(bool isDistributedQuery,
|
||||
Query *origQuery, List *rangeTableList,
|
||||
Query *plannedQuery)
|
||||
CheckPostPlanDistribution(DistributedPlanningContext *planContext, bool
|
||||
isDistributedQuery, List *rangeTableList)
|
||||
{
|
||||
if (isDistributedQuery)
|
||||
{
|
||||
Query *origQuery = planContext->originalQuery;
|
||||
Query *plannedQuery = planContext->query;
|
||||
Node *origQuals = origQuery->jointree->quals;
|
||||
Node *plannedQuals = plannedQuery->jointree->quals;
|
||||
|
||||
|
|
@ -2763,6 +2762,23 @@ CheckPostPlanDistribution(bool isDistributedQuery,
|
|||
*/
|
||||
if (origQuals != NULL && plannedQuals == NULL)
|
||||
{
|
||||
bool planHasDistTable = ListContainsDistributedTableRTE(
|
||||
planContext->plan->rtable, NULL);
|
||||
|
||||
/*
|
||||
* If the Postgres plan has a distributed table, we know for sure that
|
||||
* the query requires distributed planning.
|
||||
*/
|
||||
if (planHasDistTable)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Otherwise, if the query has less range table entries after Postgres,
|
||||
* planning, we should re-evaluate the distribution of the query. Postgres
|
||||
* may have optimized away all citus tables, per issues 7782, 7783.
|
||||
*/
|
||||
List *rtesPostPlan = ExtractRangeTableEntryList(plannedQuery);
|
||||
if (list_length(rtesPostPlan) < list_length(rangeTableList))
|
||||
{
|
||||
|
|
|
|||
|
|
@ -186,6 +186,32 @@ DEPS = {
|
|||
),
|
||||
"limit_intermediate_size": TestDeps("base_schedule"),
|
||||
"pg17": TestDeps("minimal_schedule", ["multi_behavioral_analytics_create_table"]),
|
||||
"multi_subquery_misc": TestDeps(
|
||||
"minimal_schedule", ["multi_behavioral_analytics_create_table"]
|
||||
),
|
||||
"multi_subquery_union": TestDeps(
|
||||
"minimal_schedule", ["multi_behavioral_analytics_create_table"]
|
||||
),
|
||||
"multi_subquery_in_where_clause": TestDeps(
|
||||
"minimal_schedule", ["multi_behavioral_analytics_create_table"]
|
||||
),
|
||||
"multi_limit_clause_approximate": TestDeps(
|
||||
"minimal_schedule",
|
||||
["multi_create_table", "multi_create_users", "multi_load_data"],
|
||||
),
|
||||
"multi_single_relation_subquery": TestDeps(
|
||||
"minimal_schedule",
|
||||
["multi_create_table", "multi_create_users", "multi_load_data"],
|
||||
),
|
||||
"multi_subquery_complex_reference_clause": TestDeps(
|
||||
"minimal_schedule", ["multi_behavioral_analytics_create_table"]
|
||||
),
|
||||
"multi_subquery_in_where_reference_clause": TestDeps(
|
||||
"minimal_schedule", ["multi_behavioral_analytics_create_table"]
|
||||
),
|
||||
"subquery_in_where": TestDeps(
|
||||
"minimal_schedule", ["multi_behavioral_analytics_create_table"]
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1253,10 +1253,53 @@ SELECT vkey, pkey FROM t3;
|
|||
---------------------------------------------------------------------
|
||||
(0 rows)
|
||||
|
||||
-- Redundant WHERE clause with distributed parititioned table
|
||||
CREATE TABLE a (a int);
|
||||
INSERT INTO a VALUES (1);
|
||||
-- populated distributed partitioned table
|
||||
create table partitioned_table (a INT UNIQUE) PARTITION BY RANGE(a);
|
||||
CREATE TABLE par_1 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (41);
|
||||
CREATE TABLE par_2 PARTITION OF partitioned_table FOR VALUES FROM (41) TO (81);
|
||||
CREATE TABLE par_3 PARTITION OF partitioned_table FOR VALUES FROM (81) TO (121);
|
||||
CREATE TABLE par_4 PARTITION OF partitioned_table FOR VALUES FROM (121) TO (161);
|
||||
SELECT create_distributed_table('partitioned_table', 'a');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
insert into partitioned_table(a) select i from generate_series(1,160) i;
|
||||
-- test citus table in init plan
|
||||
-- with redundant WHERE clause
|
||||
SELECT CASE WHEN EXISTS (
|
||||
SELECT * FROM partitioned_table
|
||||
) THEN 1 ELSE 0 END AS table_non_empty
|
||||
FROM a
|
||||
WHERE true;
|
||||
table_non_empty
|
||||
---------------------------------------------------------------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
-- test citus table in init plan
|
||||
-- with redundant WHERE clause involving
|
||||
-- a citus table
|
||||
SELECT CASE WHEN EXISTS (
|
||||
SELECT * FROM partitioned_table
|
||||
) THEN 1 ELSE 0 END AS table_non_empty
|
||||
FROM a
|
||||
WHERE true OR NOT EXISTS (SELECT 1 FROM t1);
|
||||
table_non_empty
|
||||
---------------------------------------------------------------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
DROP TABLE local_table;
|
||||
DROP TABLE t0;
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t3;
|
||||
DROP TABLE t7;
|
||||
DROP TABLE a;
|
||||
DROP TABLE partitioned_table CASCADE;
|
||||
DROP SCHEMA subquery_in_where CASCADE;
|
||||
SET search_path TO public;
|
||||
|
|
|
|||
|
|
@ -929,10 +929,42 @@ where TRUE or (((t3.vkey) >= (select
|
|||
-- Distributed table t3 is now empty
|
||||
SELECT vkey, pkey FROM t3;
|
||||
|
||||
-- Redundant WHERE clause with distributed parititioned table
|
||||
CREATE TABLE a (a int);
|
||||
INSERT INTO a VALUES (1);
|
||||
|
||||
-- populated distributed partitioned table
|
||||
create table partitioned_table (a INT UNIQUE) PARTITION BY RANGE(a);
|
||||
CREATE TABLE par_1 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (41);
|
||||
CREATE TABLE par_2 PARTITION OF partitioned_table FOR VALUES FROM (41) TO (81);
|
||||
CREATE TABLE par_3 PARTITION OF partitioned_table FOR VALUES FROM (81) TO (121);
|
||||
CREATE TABLE par_4 PARTITION OF partitioned_table FOR VALUES FROM (121) TO (161);
|
||||
SELECT create_distributed_table('partitioned_table', 'a');
|
||||
insert into partitioned_table(a) select i from generate_series(1,160) i;
|
||||
|
||||
-- test citus table in init plan
|
||||
-- with redundant WHERE clause
|
||||
SELECT CASE WHEN EXISTS (
|
||||
SELECT * FROM partitioned_table
|
||||
) THEN 1 ELSE 0 END AS table_non_empty
|
||||
FROM a
|
||||
WHERE true;
|
||||
|
||||
-- test citus table in init plan
|
||||
-- with redundant WHERE clause involving
|
||||
-- a citus table
|
||||
SELECT CASE WHEN EXISTS (
|
||||
SELECT * FROM partitioned_table
|
||||
) THEN 1 ELSE 0 END AS table_non_empty
|
||||
FROM a
|
||||
WHERE true OR NOT EXISTS (SELECT 1 FROM t1);
|
||||
|
||||
DROP TABLE local_table;
|
||||
DROP TABLE t0;
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t3;
|
||||
DROP TABLE t7;
|
||||
DROP TABLE a;
|
||||
DROP TABLE partitioned_table CASCADE;
|
||||
DROP SCHEMA subquery_in_where CASCADE;
|
||||
SET search_path TO public;
|
||||
|
|
|
|||
Loading…
Reference in New Issue