From 847f02d41feb1b5fae065e6875e3ecad3ef26b72 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Wed, 11 Dec 2019 14:31:34 -0800 Subject: [PATCH] Don't plan joins between ref tables and views locally --- .../distributed/planner/distributed_planner.c | 9 ++++ .../regress/expected/multi_test_helpers.out | 24 +++++++--- ...licate_reference_tables_to_coordinator.out | 45 +++++++++++++++++++ src/test/regress/sql/multi_test_helpers.sql | 25 ++++++++--- ...licate_reference_tables_to_coordinator.sql | 31 +++++++++++++ 5 files changed, 124 insertions(+), 10 deletions(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index d4e4b6b06..1f2bd4928 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -1882,6 +1882,15 @@ IsLocalReferenceTableJoin(Query *parse, List *rangeTableList) continue; } + /* + * We only allow local join for the relation kinds for which we can + * determine deterministcly that access to hem are local or distributed. + * For this reason, we don't allow non-materialized views. + */ + if (rangeTableEntry->relkind == RELKIND_VIEW) + { + return false; + } if (!IsDistributedTable(rangeTableEntry->relid)) { diff --git a/src/test/regress/expected/multi_test_helpers.out b/src/test/regress/expected/multi_test_helpers.out index ce648077f..cbb78de3c 100644 --- a/src/test/regress/expected/multi_test_helpers.out +++ b/src/test/regress/expected/multi_test_helpers.out @@ -1,6 +1,6 @@ --- File to create functions and helpers needed for subsequent tests +-- File to CREATE FUNCTIONs and helpers needed for subsequent tests -- create a helper function to create objects on each node -CREATE FUNCTION run_command_on_master_and_workers(p_sql text) +CREATE OR REPLACE FUNCTION run_command_on_master_and_workers(p_sql text) RETURNS void LANGUAGE plpgsql AS $$ BEGIN EXECUTE p_sql; @@ -111,7 +111,7 @@ $desc_views$ (1 row) -- Create a function to make sure that queries returning the same result -CREATE FUNCTION raise_failed_execution(query text) RETURNS void AS $$ +CREATE OR REPLACE FUNCTION raise_failed_execution(query text) RETURNS void AS $$ BEGIN EXECUTE query; EXCEPTION WHEN OTHERS THEN @@ -133,8 +133,22 @@ BEGIN END LOOP; RETURN; END; $$ language plpgsql; +-- Is a distributed plan? +CREATE OR REPLACE FUNCTION plan_is_distributed(explain_commmand text) +RETURNS BOOLEAN AS $$ +DECLARE + query_plan TEXT; +BEGIN + FOR query_plan IN execute explain_commmand LOOP + IF query_plan LIKE '%Task Count:%' + THEN + RETURN TRUE; + END IF; + END LOOP; + RETURN FALSE; +END; $$ language plpgsql; -- helper function to quickly run SQL on the whole cluster -CREATE FUNCTION run_command_on_coordinator_and_workers(p_sql text) +CREATE OR REPLACE FUNCTION run_command_on_coordinator_and_workers(p_sql text) RETURNS void LANGUAGE plpgsql AS $$ BEGIN EXECUTE p_sql; @@ -142,7 +156,7 @@ BEGIN END;$$; -- 1. Marks the given procedure as colocated with the given table. -- 2. Marks the argument index with which we route the procedure. -CREATE FUNCTION colocate_proc_with_table(procname text, tablerelid regclass, argument_index int) +CREATE OR REPLACE FUNCTION colocate_proc_with_table(procname text, tablerelid regclass, argument_index int) RETURNS void LANGUAGE plpgsql AS $$ BEGIN update citus.pg_dist_object diff --git a/src/test/regress/expected/replicate_reference_tables_to_coordinator.out b/src/test/regress/expected/replicate_reference_tables_to_coordinator.out index 150d54d71..a856196d4 100644 --- a/src/test/regress/expected/replicate_reference_tables_to_coordinator.out +++ b/src/test/regress/expected/replicate_reference_tables_to_coordinator.out @@ -166,6 +166,51 @@ HINT: Consider using an equality filter on the distributed table's partition co SELECT local_table.a, numbers.a FROM local_table NATURAL JOIN numbers FOR UPDATE; ERROR: could not run distributed query with FOR UPDATE/SHARE commands HINT: Consider using an equality filter on the distributed table's partition column. +-- +-- Joins between reference tables and views shouldn't be planned locally. +-- +CREATE VIEW numbers_v AS SELECT * FROM numbers WHERE a=1; +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT * FROM squares JOIN numbers_v ON squares.a = numbers_v.a; +$Q$); + coordinator_plan +------------------------------ + Custom Scan (Citus Adaptive) + Task Count: 1 +(2 rows) + +CREATE VIEW local_table_v AS SELECT * FROM local_table WHERE a BETWEEN 1 AND 10; +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT * FROM squares JOIN local_table_v ON squares.a = local_table_v.a; +$Q$); + coordinator_plan +------------------------------------------------ + Custom Scan (Citus Adaptive) + -> Distributed Subplan 24_1 + -> Seq Scan on local_table + Filter: ((a >= 1) AND (a <= 10)) + Task Count: 1 +(5 rows) + +DROP VIEW numbers_v, local_table_v; +-- +-- Joins between reference tables and materialized views are allowed to +-- be planned locally +-- +CREATE MATERIALIZED VIEW numbers_v AS SELECT * FROM numbers WHERE a BETWEEN 1 AND 10; +LOG: executing the command locally: SELECT a FROM replicate_ref_to_coordinator.numbers_8000001 numbers WHERE ((a OPERATOR(pg_catalog.>=) 1) AND (a OPERATOR(pg_catalog.<=) 10)) +REFRESH MATERIALIZED VIEW numbers_v; +SELECT public.plan_is_distributed($Q$ +EXPLAIN (COSTS FALSE) + SELECT * FROM squares JOIN numbers_v ON squares.a = numbers_v.a; +$Q$); + plan_is_distributed +--------------------- + f +(1 row) + -- verify that we can drop columns from reference tables replicated to the coordinator -- see https://github.com/citusdata/citus/issues/3279 ALTER TABLE squares DROP COLUMN b; diff --git a/src/test/regress/sql/multi_test_helpers.sql b/src/test/regress/sql/multi_test_helpers.sql index 28e464f7a..df3e3359a 100644 --- a/src/test/regress/sql/multi_test_helpers.sql +++ b/src/test/regress/sql/multi_test_helpers.sql @@ -1,7 +1,7 @@ --- File to create functions and helpers needed for subsequent tests +-- File to CREATE FUNCTIONs and helpers needed for subsequent tests -- create a helper function to create objects on each node -CREATE FUNCTION run_command_on_master_and_workers(p_sql text) +CREATE OR REPLACE FUNCTION run_command_on_master_and_workers(p_sql text) RETURNS void LANGUAGE plpgsql AS $$ BEGIN EXECUTE p_sql; @@ -109,7 +109,7 @@ $desc_views$ ); -- Create a function to make sure that queries returning the same result -CREATE FUNCTION raise_failed_execution(query text) RETURNS void AS $$ +CREATE OR REPLACE FUNCTION raise_failed_execution(query text) RETURNS void AS $$ BEGIN EXECUTE query; EXCEPTION WHEN OTHERS THEN @@ -133,8 +133,23 @@ BEGIN RETURN; END; $$ language plpgsql; +-- Is a distributed plan? +CREATE OR REPLACE FUNCTION plan_is_distributed(explain_commmand text) +RETURNS BOOLEAN AS $$ +DECLARE + query_plan TEXT; +BEGIN + FOR query_plan IN execute explain_commmand LOOP + IF query_plan LIKE '%Task Count:%' + THEN + RETURN TRUE; + END IF; + END LOOP; + RETURN FALSE; +END; $$ language plpgsql; + -- helper function to quickly run SQL on the whole cluster -CREATE FUNCTION run_command_on_coordinator_and_workers(p_sql text) +CREATE OR REPLACE FUNCTION run_command_on_coordinator_and_workers(p_sql text) RETURNS void LANGUAGE plpgsql AS $$ BEGIN EXECUTE p_sql; @@ -143,7 +158,7 @@ END;$$; -- 1. Marks the given procedure as colocated with the given table. -- 2. Marks the argument index with which we route the procedure. -CREATE FUNCTION colocate_proc_with_table(procname text, tablerelid regclass, argument_index int) +CREATE OR REPLACE FUNCTION colocate_proc_with_table(procname text, tablerelid regclass, argument_index int) RETURNS void LANGUAGE plpgsql AS $$ BEGIN update citus.pg_dist_object diff --git a/src/test/regress/sql/replicate_reference_tables_to_coordinator.sql b/src/test/regress/sql/replicate_reference_tables_to_coordinator.sql index 37ec3675d..4fcc40e1e 100644 --- a/src/test/regress/sql/replicate_reference_tables_to_coordinator.sql +++ b/src/test/regress/sql/replicate_reference_tables_to_coordinator.sql @@ -83,10 +83,41 @@ WITH t AS (SELECT *, random() x FROM dist) SELECT * FROM numbers, local_table SELECT local_table.a, numbers.a FROM local_table NATURAL JOIN numbers FOR SHARE; SELECT local_table.a, numbers.a FROM local_table NATURAL JOIN numbers FOR UPDATE; + +-- +-- Joins between reference tables and views shouldn't be planned locally. +-- + +CREATE VIEW numbers_v AS SELECT * FROM numbers WHERE a=1; +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT * FROM squares JOIN numbers_v ON squares.a = numbers_v.a; +$Q$); + +CREATE VIEW local_table_v AS SELECT * FROM local_table WHERE a BETWEEN 1 AND 10; +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT * FROM squares JOIN local_table_v ON squares.a = local_table_v.a; +$Q$); + +DROP VIEW numbers_v, local_table_v; + +-- +-- Joins between reference tables and materialized views are allowed to +-- be planned locally +-- +CREATE MATERIALIZED VIEW numbers_v AS SELECT * FROM numbers WHERE a BETWEEN 1 AND 10; +REFRESH MATERIALIZED VIEW numbers_v; +SELECT public.plan_is_distributed($Q$ +EXPLAIN (COSTS FALSE) + SELECT * FROM squares JOIN numbers_v ON squares.a = numbers_v.a; +$Q$); + -- verify that we can drop columns from reference tables replicated to the coordinator -- see https://github.com/citusdata/citus/issues/3279 ALTER TABLE squares DROP COLUMN b; + -- clean-up SET client_min_messages TO ERROR; DROP SCHEMA replicate_ref_to_coordinator CASCADE;