Fix: distributed function with table reference in declare (#3384)

DESCRIPTION: Fixes a problem when adding a new node due to tables referenced in a functions body

Fixes #3378 

It was reported that `master_add_node` would fail if a distributed function has a table name referenced in its declare section of the body. By default postgres validates the body of a function on creation. This is not a problem in the normal case as tables are replicated to the workers when we distribute functions.

However when a new node is added we first create dependencies on the workers before we try to create any tables, and the original tables get created out of bound when the metadata gets synced to the new node. This causes the function body validator to raise an error the table is not on the worker.

To mitigate this issue we set `check_function_bodies` to `off` right before we are creating the function.

The added test shows this does resolve the issue. (issue can be reproduced on the commit without the fix)
pull/3373/head
Nils Dijk 2020-01-16 14:21:54 +01:00 committed by GitHub
parent e76281500c
commit b6e09eb691
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 157 additions and 5 deletions

View File

@ -55,6 +55,8 @@
#include "utils/syscache.h"
#include "utils/regproc.h"
#define DISABLE_LOCAL_CHECK_FUNCTION_BODIES "SET LOCAL check_function_bodies TO off;"
#define RESET_CHECK_FUNCTION_BODIES "RESET check_function_bodies;"
#define argumentStartsWith(arg, prefix) \
(strncmp(arg, prefix, strlen(prefix)) == 0)
@ -224,7 +226,11 @@ CreateFunctionDDLCommandsIdempotent(const ObjectAddress *functionAddress)
char *ddlCommand = GetFunctionDDLCommand(functionAddress->objectId, true);
char *alterFunctionOwnerSQL = GetFunctionAlterOwnerCommand(functionAddress->objectId);
return list_make2(ddlCommand, alterFunctionOwnerSQL);
return list_make4(
DISABLE_LOCAL_CHECK_FUNCTION_BODIES,
ddlCommand,
alterFunctionOwnerSQL,
RESET_CHECK_FUNCTION_BODIES);
}

View File

@ -0,0 +1,89 @@
-- when using distributed functions in mx for function call delegation you could define
-- functions that have references to distributed tables in their signature or declare
-- blocks.
-- This has caused issues with adding nodes to the cluster as functions will be
-- distributed to the new node before shards or metadata gets synced to the new now,
-- causing referenced tables to not be available during dependency distribution time,
SET citus.next_shard_id TO 20060000;
CREATE SCHEMA function_table_reference;
SET search_path TO function_table_reference;
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
SET citus.replication_model TO streaming;
SELECT start_metadata_sync_to_node('localhost', :worker_1_port);
start_metadata_sync_to_node
---------------------------------------------------------------------
(1 row)
SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
start_metadata_sync_to_node
---------------------------------------------------------------------
(1 row)
-- SET citus.log_remote_commands TO on;
-- SET client_min_messages TO log;
-- remove worker 2, so we can add it after we have created some functions that caused
-- problems
SELECT master_remove_node('localhost', :worker_2_port);
master_remove_node
---------------------------------------------------------------------
(1 row)
-- reproduction case as described in #3378
CREATE TABLE zoop_table (x int, y int);
SELECT create_distributed_table('zoop_table','x');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Create a function that refers to the distributed table
CREATE OR REPLACE FUNCTION zoop(a int)
RETURNS int
LANGUAGE plpgsql
-- setting the search path makes the table name resolve on the worker during initial
-- distribution
SET search_path FROM CURRENT
AS $$
DECLARE
b zoop_table.x%TYPE := 3;
BEGIN
return a + b;
END;
$$;
SELECT create_distributed_function('zoop(int)', '$1');
create_distributed_function
---------------------------------------------------------------------
(1 row)
-- now add the worker back, this triggers function distribution which should not fail.
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT public.wait_until_metadata_sync();
wait_until_metadata_sync
---------------------------------------------------------------------
(1 row)
-- clean up after testing
DROP SCHEMA function_table_reference CASCADE;
NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to table zoop_table
drop cascades to function zoop(integer)
-- make sure the worker is added at the end irregardless of anything failing to not make
-- subsequent tests fail as well. All artifacts created during this test should have been
-- dropped by the drop cascade above.
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
?column?
---------------------------------------------------------------------
1
(1 row)

View File

@ -454,10 +454,10 @@ INSERT INTO app_analytics_events_mx (app_id, name) VALUES (103, 'Mynt') RETURNIN
(1 row)
-- clean up
SELECT setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
setval
SELECT 1 FROM setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
?column?
---------------------------------------------------------------------
4503599627370497
1
(1 row)
ALTER SEQUENCE app_analytics_events_mx_id_seq

View File

@ -17,6 +17,7 @@ test: multi_extension
test: multi_test_helpers
test: multi_mx_node_metadata
test: multi_cluster_management
test: multi_mx_function_table_reference
test: multi_test_catalog_views
# the following test has to be run sequentially

View File

@ -0,0 +1,56 @@
-- when using distributed functions in mx for function call delegation you could define
-- functions that have references to distributed tables in their signature or declare
-- blocks.
-- This has caused issues with adding nodes to the cluster as functions will be
-- distributed to the new node before shards or metadata gets synced to the new now,
-- causing referenced tables to not be available during dependency distribution time,
SET citus.next_shard_id TO 20060000;
CREATE SCHEMA function_table_reference;
SET search_path TO function_table_reference;
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
SET citus.replication_model TO streaming;
SELECT start_metadata_sync_to_node('localhost', :worker_1_port);
SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
-- SET citus.log_remote_commands TO on;
-- SET client_min_messages TO log;
-- remove worker 2, so we can add it after we have created some functions that caused
-- problems
SELECT master_remove_node('localhost', :worker_2_port);
-- reproduction case as described in #3378
CREATE TABLE zoop_table (x int, y int);
SELECT create_distributed_table('zoop_table','x');
-- Create a function that refers to the distributed table
CREATE OR REPLACE FUNCTION zoop(a int)
RETURNS int
LANGUAGE plpgsql
-- setting the search path makes the table name resolve on the worker during initial
-- distribution
SET search_path FROM CURRENT
AS $$
DECLARE
b zoop_table.x%TYPE := 3;
BEGIN
return a + b;
END;
$$;
SELECT create_distributed_function('zoop(int)', '$1');
-- now add the worker back, this triggers function distribution which should not fail.
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
SELECT public.wait_until_metadata_sync();
-- clean up after testing
DROP SCHEMA function_table_reference CASCADE;
-- make sure the worker is added at the end irregardless of anything failing to not make
-- subsequent tests fail as well. All artifacts created during this test should have been
-- dropped by the drop cascade above.
SELECT 1 FROM master_add_node('localhost', :worker_2_port);

View File

@ -301,6 +301,6 @@ INSERT INTO app_analytics_events_mx (app_id, name) VALUES (102, 'Wayz') RETURNIN
INSERT INTO app_analytics_events_mx (app_id, name) VALUES (103, 'Mynt') RETURNING *;
-- clean up
SELECT setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
SELECT 1 FROM setval('app_analytics_events_mx_id_seq'::regclass, :last_value);
ALTER SEQUENCE app_analytics_events_mx_id_seq
MINVALUE :min_value MAXVALUE :max_value;