From b6e09eb69144707ee06263dde9fa1d7923c9ade2 Mon Sep 17 00:00:00 2001
From: Nils Dijk <nils@citusdata.com>
Date: Thu, 16 Jan 2020 14:21:54 +0100
Subject: [PATCH] Fix: distributed function with table reference in declare
 (#3384)

DESCRIPTION: Fixes a problem when adding a new node due to tables referenced in a functions body

Fixes #3378

It was reported that `master_add_node` would fail if a distributed function has a table name referenced in its declare section of the body. By default postgres validates the body of a function on creation. This is not a problem in the normal case as tables are replicated to the workers when we distribute functions.

However when a new node is added we first create dependencies on the workers before we try to create any tables, and the original tables get created out of bound when the metadata gets synced to the new node. This causes the function body validator to raise an error the table is not on the worker.

To mitigate this issue we set `check_function_bodies` to `off` right before we are creating the function.

The added test shows this does resolve the issue. (issue can be reproduced on the commit without the fix)
---
 src/backend/distributed/commands/function.c   |  8 +-
 .../multi_mx_function_table_reference.out     | 89 +++++++++++++++++++
 .../expected/multi_mx_modifications.out       |  6 +-
 src/test/regress/multi_mx_schedule            |  1 +
 .../sql/multi_mx_function_table_reference.sql | 56 ++++++++++++
 .../regress/sql/multi_mx_modifications.sql    |  2 +-
 6 files changed, 157 insertions(+), 5 deletions(-)
 create mode 100644 src/test/regress/expected/multi_mx_function_table_reference.out
 create mode 100644 src/test/regress/sql/multi_mx_function_table_reference.sql

diff --git a/src/backend/distributed/commands/function.c b/src/backend/distributed/commands/function.c
index 845a15fb6..0c363df34 100644
--- a/src/backend/distributed/commands/function.c
+++ b/src/backend/distributed/commands/function.c
@@ -55,6 +55,8 @@
 #include "utils/syscache.h"
 #include "utils/regproc.h"
 
+#define DISABLE_LOCAL_CHECK_FUNCTION_BODIES "SET LOCAL check_function_bodies TO off;"
+#define RESET_CHECK_FUNCTION_BODIES "RESET check_function_bodies;"
 #define argumentStartsWith(arg, prefix) \
 	(strncmp(arg, prefix, strlen(prefix)) == 0)
 
@@ -224,7 +226,11 @@ CreateFunctionDDLCommandsIdempotent(const ObjectAddress *functionAddress)
 	char *ddlCommand = GetFunctionDDLCommand(functionAddress->objectId, true);
 	char *alterFunctionOwnerSQL = GetFunctionAlterOwnerCommand(functionAddress->objectId);
 
-	return list_make2(ddlCommand, alterFunctionOwnerSQL);
+	return list_make4(
+		DISABLE_LOCAL_CHECK_FUNCTION_BODIES,
+		ddlCommand,
+		alterFunctionOwnerSQL,
+		RESET_CHECK_FUNCTION_BODIES);
 }
 
 
diff --git a/src/test/regress/expected/multi_mx_function_table_reference.out b/src/test/regress/expected/multi_mx_function_table_reference.out
new file mode 100644
index 000000000..a433615cb
--- /dev/null
+++ b/src/test/regress/expected/multi_mx_function_table_reference.out
@@ -0,0 +1,89 @@
+-- when using distributed functions in mx for function call delegation you could define
+-- functions that have references to distributed tables in their signature or declare
+-- blocks.
+-- This has caused issues with adding nodes to the cluster as functions will be
+-- distributed to the new node before shards or metadata gets synced to the new now,
+-- causing referenced tables to not be available during dependency distribution time,
+SET citus.next_shard_id TO 20060000;
+CREATE SCHEMA function_table_reference;
+SET search_path TO function_table_reference;
+SET citus.shard_count TO 4;
+SET citus.shard_replication_factor TO 1;
+SET citus.replication_model TO streaming;
+SELECT start_metadata_sync_to_node('localhost', :worker_1_port);
+ start_metadata_sync_to_node
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
+ start_metadata_sync_to_node
+---------------------------------------------------------------------
+
+(1 row)
+
+-- SET citus.log_remote_commands TO on;
+-- SET client_min_messages TO log;
+-- remove worker 2, so we can add it after we have created some functions that caused
+-- problems
+SELECT master_remove_node('localhost', :worker_2_port);
+ master_remove_node
+---------------------------------------------------------------------
+
+(1 row)
+
+-- reproduction case as described in #3378
+CREATE TABLE zoop_table (x int, y int);
+SELECT create_distributed_table('zoop_table','x');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Create a function that refers to the distributed table
+CREATE OR REPLACE FUNCTION zoop(a int)
+    RETURNS int
+    LANGUAGE plpgsql
+    -- setting the search path makes the table name resolve on the worker during initial
+    -- distribution
+    SET search_path FROM CURRENT
+AS $$
+DECLARE
+    b zoop_table.x%TYPE := 3;
+BEGIN
+    return a + b;
+END;
+$$;
+SELECT create_distributed_function('zoop(int)', '$1');
+ create_distributed_function
+---------------------------------------------------------------------
+
+(1 row)
+
+-- now add the worker back, this triggers function distribution which should not fail.
+SELECT 1 FROM master_add_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+SELECT public.wait_until_metadata_sync();
+ wait_until_metadata_sync
+---------------------------------------------------------------------
+
+(1 row)
+
+-- clean up after testing
+DROP SCHEMA function_table_reference CASCADE;
+NOTICE:  drop cascades to 2 other objects
+DETAIL:  drop cascades to table zoop_table
+drop cascades to function zoop(integer)
+-- make sure the worker is added at the end irregardless of anything failing to not make
+-- subsequent tests fail as well. All artifacts created during this test should have been
+-- dropped by the drop cascade above.
+SELECT 1 FROM master_add_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
diff --git a/src/test/regress/expected/multi_mx_modifications.out b/src/test/regress/expected/multi_mx_modifications.out
index 86132f12f..ab98f0d64 100644
--- a/src/test/regress/expected/multi_mx_modifications.out
+++ b/src/test/regress/expected/multi_mx_modifications.out
@@ -454,10 +454,10 @@ INSERT INTO app_analytics_events_mx (app_id, name) VALUES (103, 'Mynt') RETURNIN
 (1 row)
 
 -- clean up
-SELECT setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
-      setval
+SELECT 1 FROM setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
+ ?column?
 ---------------------------------------------------------------------
- 4503599627370497
+        1
 (1 row)
 
 ALTER SEQUENCE app_analytics_events_mx_id_seq
diff --git a/src/test/regress/multi_mx_schedule b/src/test/regress/multi_mx_schedule
index 38d000eb4..74406ec49 100644
--- a/src/test/regress/multi_mx_schedule
+++ b/src/test/regress/multi_mx_schedule
@@ -17,6 +17,7 @@ test: multi_extension
 test: multi_test_helpers
 test: multi_mx_node_metadata
 test: multi_cluster_management
+test: multi_mx_function_table_reference
 test: multi_test_catalog_views
 
 # the following test has to be run sequentially
diff --git a/src/test/regress/sql/multi_mx_function_table_reference.sql b/src/test/regress/sql/multi_mx_function_table_reference.sql
new file mode 100644
index 000000000..ac249d879
--- /dev/null
+++ b/src/test/regress/sql/multi_mx_function_table_reference.sql
@@ -0,0 +1,56 @@
+-- when using distributed functions in mx for function call delegation you could define
+-- functions that have references to distributed tables in their signature or declare
+-- blocks.
+-- This has caused issues with adding nodes to the cluster as functions will be
+-- distributed to the new node before shards or metadata gets synced to the new now,
+-- causing referenced tables to not be available during dependency distribution time,
+SET citus.next_shard_id TO 20060000;
+CREATE SCHEMA function_table_reference;
+SET search_path TO function_table_reference;
+SET citus.shard_count TO 4;
+SET citus.shard_replication_factor TO 1;
+SET citus.replication_model TO streaming;
+
+SELECT start_metadata_sync_to_node('localhost', :worker_1_port);
+SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
+
+-- SET citus.log_remote_commands TO on;
+-- SET client_min_messages TO log;
+
+-- remove worker 2, so we can add it after we have created some functions that caused
+-- problems
+SELECT master_remove_node('localhost', :worker_2_port);
+
+-- reproduction case as described in #3378
+CREATE TABLE zoop_table (x int, y int);
+SELECT create_distributed_table('zoop_table','x');
+
+-- Create a function that refers to the distributed table
+CREATE OR REPLACE FUNCTION zoop(a int)
+    RETURNS int
+    LANGUAGE plpgsql
+
+    -- setting the search path makes the table name resolve on the worker during initial
+    -- distribution
+    SET search_path FROM CURRENT
+AS $$
+DECLARE
+    b zoop_table.x%TYPE := 3;
+BEGIN
+    return a + b;
+END;
+$$;
+SELECT create_distributed_function('zoop(int)', '$1');
+
+-- now add the worker back, this triggers function distribution which should not fail.
+SELECT 1 FROM master_add_node('localhost', :worker_2_port);
+SELECT public.wait_until_metadata_sync();
+
+
+-- clean up after testing
+DROP SCHEMA function_table_reference CASCADE;
+
+-- make sure the worker is added at the end irregardless of anything failing to not make
+-- subsequent tests fail as well. All artifacts created during this test should have been
+-- dropped by the drop cascade above.
+SELECT 1 FROM master_add_node('localhost', :worker_2_port);
diff --git a/src/test/regress/sql/multi_mx_modifications.sql b/src/test/regress/sql/multi_mx_modifications.sql
index b69b12318..dae4cbf34 100644
--- a/src/test/regress/sql/multi_mx_modifications.sql
+++ b/src/test/regress/sql/multi_mx_modifications.sql
@@ -301,6 +301,6 @@ INSERT INTO app_analytics_events_mx (app_id, name) VALUES (102, 'Wayz') RETURNIN
 INSERT INTO app_analytics_events_mx (app_id, name) VALUES (103, 'Mynt') RETURNING *;
 
 -- clean up
-SELECT setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
+SELECT 1 FROM setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
 ALTER SEQUENCE app_analytics_events_mx_id_seq
   MINVALUE :min_value MAXVALUE :max_value;