From 0f1ce7a91366a0b5dcef15d13f40f54aa87f79cf Mon Sep 17 00:00:00 2001 From: SaitTalhaNisanci Date: Fri, 12 Feb 2021 12:33:55 +0300 Subject: [PATCH] Not skip relation in conversion if it doesn't have RelationRestriction (#4685) We would exclude tables without relationRestriction from conversion candidates in local-distributed table joins. This could leave a leftover local table which should have been converted to a subquery. Ideally I would expect that in each call to CreateDistributedPlan we would pass a new plan id, but that seems like a bigger change. --- .../planner/local_distributed_join_planner.c | 14 +++--- .../expected/local_dist_join_mixed.out | 13 +++--- .../regress/expected/local_table_join.out | 43 ++++++++++++++++++- src/test/regress/sql/local_table_join.sql | 27 ++++++++++++ 4 files changed, 84 insertions(+), 13 deletions(-) diff --git a/src/backend/distributed/planner/local_distributed_join_planner.c b/src/backend/distributed/planner/local_distributed_join_planner.c index 0920a70a6..c951b6056 100644 --- a/src/backend/distributed/planner/local_distributed_join_planner.c +++ b/src/backend/distributed/planner/local_distributed_join_planner.c @@ -384,8 +384,15 @@ static bool HasConstantFilterOnUniqueColumn(RangeTblEntry *rangeTableEntry, RelationRestriction *relationRestriction) { - if (rangeTableEntry == NULL) + if (rangeTableEntry == NULL || relationRestriction == NULL) { + /* + * Postgres might not pass relationRestriction info with hooks if + * the table doesn't contribute to the result, and in that case + * relationRestriction will be NULL. Ideally it doesn't make sense + * to recursively plan such tables but for the time being we don't + * add any special logic for these tables as it might introduce bugs. + */ return false; } List *baseRestrictionList = relationRestriction->relOptInfo->baserestrictinfo; @@ -538,11 +545,6 @@ CreateConversionCandidates(PlannerRestrictionContext *plannerRestrictionContext, RelationRestriction *relationRestriction = RelationRestrictionForRelation(rangeTableEntry, plannerRestrictionContext); - if (relationRestriction == NULL) - { - continue; - } - RangeTableEntryDetails *rangeTableEntryDetails = palloc0(sizeof(RangeTableEntryDetails)); diff --git a/src/test/regress/expected/local_dist_join_mixed.out b/src/test/regress/expected/local_dist_join_mixed.out index 003818350..f8a14972b 100644 --- a/src/test/regress/expected/local_dist_join_mixed.out +++ b/src/test/regress/expected/local_dist_join_mixed.out @@ -1182,9 +1182,9 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c -- w count(*) it works fine as PG ignores the inner tables SELECT count(*) FROM distributed LEFT JOIN local USING (id); -DEBUG: Wrapping relation "distributed" to a subquery -DEBUG: generating subplan XXX_1 for subquery SELECT id FROM local_dist_join_mixed.distributed WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT NULL::integer AS "dummy-1", distributed_1.id, NULL::text AS name, NULL::timestamp with time zone AS created_at FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint)) distributed_1) distributed LEFT JOIN local_dist_join_mixed.local USING (id)) +DEBUG: Wrapping relation "local" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT NULL::integer AS "dummy-1" FROM local_dist_join_mixed.local WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (local_dist_join_mixed.distributed LEFT JOIN (SELECT NULL::integer AS "dummy-1", NULL::bigint AS id, NULL::integer AS "dummy-3", NULL::text AS title, NULL::integer AS "dummy-5" FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) local_1) local USING (id)) count --------------------------------------------------------------------- 101 @@ -1200,9 +1200,10 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c (1 row) SELECT id, name FROM distributed LEFT JOIN local USING (id) ORDER BY 1 LIMIT 1; -DEBUG: Wrapping relation "distributed" to a subquery -DEBUG: generating subplan XXX_1 for subquery SELECT id, name FROM local_dist_join_mixed.distributed WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT distributed.id, distributed.name FROM ((SELECT NULL::integer AS "dummy-1", distributed_1.id, distributed_1.name, NULL::timestamp with time zone AS created_at FROM (SELECT intermediate_result.id, intermediate_result.name FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, name text)) distributed_1) distributed LEFT JOIN local_dist_join_mixed.local USING (id)) ORDER BY distributed.id LIMIT 1 +DEBUG: Wrapping relation "local" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT NULL::integer AS "dummy-1" FROM local_dist_join_mixed.local WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT distributed.id, distributed.name FROM (local_dist_join_mixed.distributed LEFT JOIN (SELECT NULL::integer AS "dummy-1", NULL::bigint AS id, NULL::integer AS "dummy-3", NULL::text AS title, NULL::integer AS "dummy-5" FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) local_1) local USING (id)) ORDER BY distributed.id LIMIT 1 +DEBUG: push down of limit count: 1 id | name --------------------------------------------------------------------- 0 | 0 diff --git a/src/test/regress/expected/local_table_join.out b/src/test/regress/expected/local_table_join.out index 65d0ddba2..75195e96f 100644 --- a/src/test/regress/expected/local_table_join.out +++ b/src/test/regress/expected/local_table_join.out @@ -1466,7 +1466,48 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c 100 (1 row) +-- issue 4682 +create table tbl1 (a int, b int, c int, d int); +INSERT INTO tbl1 SELECT i,i,i,i FROM generate_series(1,10) i; +create table custom_pg_operator(oprname text); +INSERT INTO custom_pg_operator values('a'); +-- try with local tables to make sure the results are same when tbl1 is distributed +select COUNT(*) from + custom_pg_operator + inner join tbl1 on (select 1 from custom_pg_type) >= d + left join pg_dist_rebalance_strategy on 'by_shard_count' = name +where a + b + c > 0; + count +--------------------------------------------------------------------- + 1 +(1 row) + +select create_distributed_table('tbl1', 'a'); +NOTICE: Copying data from local table... +DEBUG: Copied 10 rows +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$local_table_join.tbl1$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- there is a different output in pg11 and in this query the debug messages are not +-- as important as the others so we use notice +set client_min_messages to NOTICE; +select COUNT(*) from + custom_pg_operator + inner join tbl1 on (select 1 from custom_pg_type) >= d + left join pg_dist_rebalance_strategy on 'by_shard_count' = name +where a + b + c > 0; + count +--------------------------------------------------------------------- + 1 +(1 row) + +SET client_min_messages to DEBUG1; RESET client_min_messages; \set VERBOSITY terse DROP SCHEMA local_table_join CASCADE; -NOTICE: drop cascades to 18 other objects +NOTICE: drop cascades to 20 other objects diff --git a/src/test/regress/sql/local_table_join.sql b/src/test/regress/sql/local_table_join.sql index 0c8c3317c..6b56e5801 100644 --- a/src/test/regress/sql/local_table_join.sql +++ b/src/test/regress/sql/local_table_join.sql @@ -413,6 +413,33 @@ WHERE distributed_table_pkey.key IN (SELECT key FROM distributed_table_pkey WHER SELECT COUNT(*) FROM distributed_table_pkey JOIN postgres_table using(key) WHERE distributed_table_pkey.key IN (SELECT key FROM distributed_table_pkey WHERE key = 5) AND distributed_table_pkey.key = 5; +-- issue 4682 +create table tbl1 (a int, b int, c int, d int); +INSERT INTO tbl1 SELECT i,i,i,i FROM generate_series(1,10) i; + +create table custom_pg_operator(oprname text); +INSERT INTO custom_pg_operator values('a'); + +-- try with local tables to make sure the results are same when tbl1 is distributed +select COUNT(*) from + custom_pg_operator + inner join tbl1 on (select 1 from custom_pg_type) >= d + left join pg_dist_rebalance_strategy on 'by_shard_count' = name +where a + b + c > 0; + +select create_distributed_table('tbl1', 'a'); + +-- there is a different output in pg11 and in this query the debug messages are not +-- as important as the others so we use notice +set client_min_messages to NOTICE; +select COUNT(*) from + custom_pg_operator + inner join tbl1 on (select 1 from custom_pg_type) >= d + left join pg_dist_rebalance_strategy on 'by_shard_count' = name +where a + b + c > 0; +SET client_min_messages to DEBUG1; + + RESET client_min_messages; \set VERBOSITY terse DROP SCHEMA local_table_join CASCADE;