diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 6730ab982..a9ab5f4a2 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -151,10 +151,9 @@ static RouterPlanType GetRouterPlanType(Query *query, bool hasUnresolvedParams); static void ConcatenateRTablesAndPerminfos(PlannedStmt *mainPlan, PlannedStmt *concatPlan); -static bool CheckPostPlanDistribution(bool isDistributedQuery, - Query *origQuery, - List *rangeTableList, - Query *plannedQuery); +static bool CheckPostPlanDistribution(DistributedPlanningContext *planContext, + bool isDistributedQuery, + List *rangeTableList); /* Distributed planner hook */ PlannedStmt * @@ -275,10 +274,9 @@ distributed_planner(Query *parse, planContext.plan = standard_planner(planContext.query, NULL, planContext.cursorOptions, planContext.boundParams); - needsDistributedPlanning = CheckPostPlanDistribution(needsDistributedPlanning, - planContext.originalQuery, - rangeTableList, - planContext.query); + needsDistributedPlanning = CheckPostPlanDistribution(&planContext, + needsDistributedPlanning, + rangeTableList); if (needsDistributedPlanning) { @@ -2739,12 +2737,13 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList) static bool -CheckPostPlanDistribution(bool isDistributedQuery, - Query *origQuery, List *rangeTableList, - Query *plannedQuery) +CheckPostPlanDistribution(DistributedPlanningContext *planContext, bool + isDistributedQuery, List *rangeTableList) { if (isDistributedQuery) { + Query *origQuery = planContext->originalQuery; + Query *plannedQuery = planContext->query; Node *origQuals = origQuery->jointree->quals; Node *plannedQuals = plannedQuery->jointree->quals; @@ -2763,6 +2762,23 @@ CheckPostPlanDistribution(bool isDistributedQuery, */ if (origQuals != NULL && plannedQuals == NULL) { + bool planHasDistTable = ListContainsDistributedTableRTE( + planContext->plan->rtable, NULL); + + /* + * If the Postgres plan has a distributed table, we know for sure that + * the query requires distributed planning. + */ + if (planHasDistTable) + { + return true; + } + + /* + * Otherwise, if the query has less range table entries after Postgres, + * planning, we should re-evaluate the distribution of the query. Postgres + * may have optimized away all citus tables, per issues 7782, 7783. + */ List *rtesPostPlan = ExtractRangeTableEntryList(plannedQuery); if (list_length(rtesPostPlan) < list_length(rangeTableList)) { diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py index 6a4583675..8cea96d76 100755 --- a/src/test/regress/citus_tests/run_test.py +++ b/src/test/regress/citus_tests/run_test.py @@ -186,6 +186,32 @@ DEPS = { ), "limit_intermediate_size": TestDeps("base_schedule"), "pg17": TestDeps("minimal_schedule", ["multi_behavioral_analytics_create_table"]), + "multi_subquery_misc": TestDeps( + "minimal_schedule", ["multi_behavioral_analytics_create_table"] + ), + "multi_subquery_union": TestDeps( + "minimal_schedule", ["multi_behavioral_analytics_create_table"] + ), + "multi_subquery_in_where_clause": TestDeps( + "minimal_schedule", ["multi_behavioral_analytics_create_table"] + ), + "multi_limit_clause_approximate": TestDeps( + "minimal_schedule", + ["multi_create_table", "multi_create_users", "multi_load_data"], + ), + "multi_single_relation_subquery": TestDeps( + "minimal_schedule", + ["multi_create_table", "multi_create_users", "multi_load_data"], + ), + "multi_subquery_complex_reference_clause": TestDeps( + "minimal_schedule", ["multi_behavioral_analytics_create_table"] + ), + "multi_subquery_in_where_reference_clause": TestDeps( + "minimal_schedule", ["multi_behavioral_analytics_create_table"] + ), + "subquery_in_where": TestDeps( + "minimal_schedule", ["multi_behavioral_analytics_create_table"] + ), } diff --git a/src/test/regress/expected/subquery_in_where.out b/src/test/regress/expected/subquery_in_where.out index 901954265..361415800 100644 --- a/src/test/regress/expected/subquery_in_where.out +++ b/src/test/regress/expected/subquery_in_where.out @@ -1253,10 +1253,53 @@ SELECT vkey, pkey FROM t3; --------------------------------------------------------------------- (0 rows) +-- Redundant WHERE clause with distributed parititioned table +CREATE TABLE a (a int); +INSERT INTO a VALUES (1); +-- populated distributed partitioned table +create table partitioned_table (a INT UNIQUE) PARTITION BY RANGE(a); +CREATE TABLE par_1 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (41); +CREATE TABLE par_2 PARTITION OF partitioned_table FOR VALUES FROM (41) TO (81); +CREATE TABLE par_3 PARTITION OF partitioned_table FOR VALUES FROM (81) TO (121); +CREATE TABLE par_4 PARTITION OF partitioned_table FOR VALUES FROM (121) TO (161); +SELECT create_distributed_table('partitioned_table', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +insert into partitioned_table(a) select i from generate_series(1,160) i; +-- test citus table in init plan +-- with redundant WHERE clause +SELECT CASE WHEN EXISTS ( + SELECT * FROM partitioned_table + ) THEN 1 ELSE 0 END AS table_non_empty +FROM a +WHERE true; + table_non_empty +--------------------------------------------------------------------- + 1 +(1 row) + +-- test citus table in init plan +-- with redundant WHERE clause involving +-- a citus table +SELECT CASE WHEN EXISTS ( + SELECT * FROM partitioned_table + ) THEN 1 ELSE 0 END AS table_non_empty +FROM a +WHERE true OR NOT EXISTS (SELECT 1 FROM t1); + table_non_empty +--------------------------------------------------------------------- + 1 +(1 row) + DROP TABLE local_table; DROP TABLE t0; DROP TABLE t1; DROP TABLE t3; DROP TABLE t7; +DROP TABLE a; +DROP TABLE partitioned_table CASCADE; DROP SCHEMA subquery_in_where CASCADE; SET search_path TO public; diff --git a/src/test/regress/sql/subquery_in_where.sql b/src/test/regress/sql/subquery_in_where.sql index ebdb60890..3a07cdc75 100644 --- a/src/test/regress/sql/subquery_in_where.sql +++ b/src/test/regress/sql/subquery_in_where.sql @@ -929,10 +929,42 @@ where TRUE or (((t3.vkey) >= (select -- Distributed table t3 is now empty SELECT vkey, pkey FROM t3; +-- Redundant WHERE clause with distributed parititioned table +CREATE TABLE a (a int); +INSERT INTO a VALUES (1); + +-- populated distributed partitioned table +create table partitioned_table (a INT UNIQUE) PARTITION BY RANGE(a); +CREATE TABLE par_1 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (41); +CREATE TABLE par_2 PARTITION OF partitioned_table FOR VALUES FROM (41) TO (81); +CREATE TABLE par_3 PARTITION OF partitioned_table FOR VALUES FROM (81) TO (121); +CREATE TABLE par_4 PARTITION OF partitioned_table FOR VALUES FROM (121) TO (161); +SELECT create_distributed_table('partitioned_table', 'a'); +insert into partitioned_table(a) select i from generate_series(1,160) i; + +-- test citus table in init plan +-- with redundant WHERE clause +SELECT CASE WHEN EXISTS ( + SELECT * FROM partitioned_table + ) THEN 1 ELSE 0 END AS table_non_empty +FROM a +WHERE true; + +-- test citus table in init plan +-- with redundant WHERE clause involving +-- a citus table +SELECT CASE WHEN EXISTS ( + SELECT * FROM partitioned_table + ) THEN 1 ELSE 0 END AS table_non_empty +FROM a +WHERE true OR NOT EXISTS (SELECT 1 FROM t1); + DROP TABLE local_table; DROP TABLE t0; DROP TABLE t1; DROP TABLE t3; DROP TABLE t7; +DROP TABLE a; +DROP TABLE partitioned_table CASCADE; DROP SCHEMA subquery_in_where CASCADE; SET search_path TO public;