Fix: distributed function with table reference in declare (#3384)

DESCRIPTION: Fixes a problem when adding a new node due to tables referenced in a functions body Fixes #3378 It was reported that `master_add_node` would fail if a distributed function has a table name referenced in its declare section of the body. By default postgres validates the body of a function on creation. This is not a problem in the normal case as tables are replicated to the workers when we distribute functions. However when a new node is added we first create dependencies on the workers before we try to create any tables, and the original tables get created out of bound when the metadata gets synced to the new node. This causes the function body validator to raise an error the table is not on the worker. To mitigate this issue we set `check_function_bodies` to `off` right before we are creating the function. The added test shows this does resolve the issue. (issue can be reproduced on the commit without the fix)
2020-01-16 14:21:54 +01:00 · 2020-01-16 14:21:54 +01:00 · b6e09eb691
parent e76281500c
commit b6e09eb691
6 changed files with 157 additions and 5 deletions
--- a/src/backend/distributed/commands/function.c
+++ b/src/backend/distributed/commands/function.c
@ -55,6 +55,8 @@
 #include "utils/syscache.h"
 #include "utils/regproc.h"

+#define DISABLE_LOCAL_CHECK_FUNCTION_BODIES "SET LOCAL check_function_bodies TO off;"
+#define RESET_CHECK_FUNCTION_BODIES "RESET check_function_bodies;"
 #define argumentStartsWith(arg, prefix) \
 	(strncmp(arg, prefix, strlen(prefix)) == 0)

@ -224,7 +226,11 @@ CreateFunctionDDLCommandsIdempotent(const ObjectAddress *functionAddress)
 	char *ddlCommand = GetFunctionDDLCommand(functionAddress->objectId, true);
 	char *alterFunctionOwnerSQL = GetFunctionAlterOwnerCommand(functionAddress->objectId);

-	return list_make2(ddlCommand, alterFunctionOwnerSQL);
+	return list_make4(
+		DISABLE_LOCAL_CHECK_FUNCTION_BODIES,
+		ddlCommand,
+		alterFunctionOwnerSQL,
+		RESET_CHECK_FUNCTION_BODIES);
 }


--- a/src/test/regress/expected/multi_mx_function_table_reference.out
+++ b/src/test/regress/expected/multi_mx_function_table_reference.out
@ -0,0 +1,89 @@
+-- when using distributed functions in mx for function call delegation you could define
+-- functions that have references to distributed tables in their signature or declare
+-- blocks.
+-- This has caused issues with adding nodes to the cluster as functions will be
+-- distributed to the new node before shards or metadata gets synced to the new now,
+-- causing referenced tables to not be available during dependency distribution time,
+SET citus.next_shard_id TO 20060000;
+CREATE SCHEMA function_table_reference;
+SET search_path TO function_table_reference;
+SET citus.shard_count TO 4;
+SET citus.shard_replication_factor TO 1;
+SET citus.replication_model TO streaming;
+SELECT start_metadata_sync_to_node('localhost', :worker_1_port);
+ start_metadata_sync_to_node
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
+ start_metadata_sync_to_node
+---------------------------------------------------------------------
+
+(1 row)
+
+-- SET citus.log_remote_commands TO on;
+-- SET client_min_messages TO log;
+-- remove worker 2, so we can add it after we have created some functions that caused
+-- problems
+SELECT master_remove_node('localhost', :worker_2_port);
+ master_remove_node
+---------------------------------------------------------------------
+
+(1 row)
+
+-- reproduction case as described in #3378
+CREATE TABLE zoop_table (x int, y int);
+SELECT create_distributed_table('zoop_table','x');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Create a function that refers to the distributed table
+CREATE OR REPLACE FUNCTION zoop(a int)
+    RETURNS int
+    LANGUAGE plpgsql
+    -- setting the search path makes the table name resolve on the worker during initial
+    -- distribution
+    SET search_path FROM CURRENT
+AS $$
+DECLARE
+    b zoop_table.x%TYPE := 3;
+BEGIN
+    return a + b;
+END;
+$$;
+SELECT create_distributed_function('zoop(int)', '$1');
+ create_distributed_function
+---------------------------------------------------------------------
+
+(1 row)
+
+-- now add the worker back, this triggers function distribution which should not fail.
+SELECT 1 FROM master_add_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+SELECT public.wait_until_metadata_sync();
+ wait_until_metadata_sync
+---------------------------------------------------------------------
+
+(1 row)
+
+-- clean up after testing
+DROP SCHEMA function_table_reference CASCADE;
+NOTICE:  drop cascades to 2 other objects
+DETAIL:  drop cascades to table zoop_table
+drop cascades to function zoop(integer)
+-- make sure the worker is added at the end irregardless of anything failing to not make
+-- subsequent tests fail as well. All artifacts created during this test should have been
+-- dropped by the drop cascade above.
+SELECT 1 FROM master_add_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
--- a/src/test/regress/expected/multi_mx_modifications.out
+++ b/src/test/regress/expected/multi_mx_modifications.out
@ -454,10 +454,10 @@ INSERT INTO app_analytics_events_mx (app_id, name) VALUES (103, 'Mynt') RETURNIN
 (1 row)

 -- clean up
-SELECT setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
-      setval
+SELECT 1 FROM setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
+ ?column?
 ---------------------------------------------------------------------
- 4503599627370497
+        1
 (1 row)

 ALTER SEQUENCE app_analytics_events_mx_id_seq
--- a/src/test/regress/multi_mx_schedule
+++ b/src/test/regress/multi_mx_schedule
@ -17,6 +17,7 @@ test: multi_extension
 test: multi_test_helpers
 test: multi_mx_node_metadata
 test: multi_cluster_management
+test: multi_mx_function_table_reference
 test: multi_test_catalog_views

 # the following test has to be run sequentially
--- a/src/test/regress/sql/multi_mx_function_table_reference.sql
+++ b/src/test/regress/sql/multi_mx_function_table_reference.sql
@ -0,0 +1,56 @@
+-- when using distributed functions in mx for function call delegation you could define
+-- functions that have references to distributed tables in their signature or declare
+-- blocks.
+-- This has caused issues with adding nodes to the cluster as functions will be
+-- distributed to the new node before shards or metadata gets synced to the new now,
+-- causing referenced tables to not be available during dependency distribution time,
+SET citus.next_shard_id TO 20060000;
+CREATE SCHEMA function_table_reference;
+SET search_path TO function_table_reference;
+SET citus.shard_count TO 4;
+SET citus.shard_replication_factor TO 1;
+SET citus.replication_model TO streaming;
+
+SELECT start_metadata_sync_to_node('localhost', :worker_1_port);
+SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
+
+-- SET citus.log_remote_commands TO on;
+-- SET client_min_messages TO log;
+
+-- remove worker 2, so we can add it after we have created some functions that caused
+-- problems
+SELECT master_remove_node('localhost', :worker_2_port);
+
+-- reproduction case as described in #3378
+CREATE TABLE zoop_table (x int, y int);
+SELECT create_distributed_table('zoop_table','x');
+
+-- Create a function that refers to the distributed table
+CREATE OR REPLACE FUNCTION zoop(a int)
+    RETURNS int
+    LANGUAGE plpgsql
+
+    -- setting the search path makes the table name resolve on the worker during initial
+    -- distribution
+    SET search_path FROM CURRENT
+AS $$
+DECLARE
+    b zoop_table.x%TYPE := 3;
+BEGIN
+    return a + b;
+END;
+$$;
+SELECT create_distributed_function('zoop(int)', '$1');
+
+-- now add the worker back, this triggers function distribution which should not fail.
+SELECT 1 FROM master_add_node('localhost', :worker_2_port);
+SELECT public.wait_until_metadata_sync();
+
+
+-- clean up after testing
+DROP SCHEMA function_table_reference CASCADE;
+
+-- make sure the worker is added at the end irregardless of anything failing to not make
+-- subsequent tests fail as well. All artifacts created during this test should have been
+-- dropped by the drop cascade above.
+SELECT 1 FROM master_add_node('localhost', :worker_2_port);
--- a/src/test/regress/sql/multi_mx_modifications.sql
+++ b/src/test/regress/sql/multi_mx_modifications.sql
@ -301,6 +301,6 @@ INSERT INTO app_analytics_events_mx (app_id, name) VALUES (102, 'Wayz') RETURNIN
 INSERT INTO app_analytics_events_mx (app_id, name) VALUES (103, 'Mynt') RETURNING *;

 -- clean up
-SELECT setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
+SELECT 1 FROM setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
 ALTER SEQUENCE app_analytics_events_mx_id_seq
  MINVALUE :min_value MAXVALUE :max_value;