mirror of https://github.com/citusdata/citus.git
distributed_functions: test aggregation strategies
parent
364b33a22d
commit
04b5dc39bc
|
@ -91,12 +91,19 @@ begin
|
||||||
return state * 2;
|
return state * 2;
|
||||||
end;
|
end;
|
||||||
$$;
|
$$;
|
||||||
|
CREATE FUNCTION agg_combinefunc(state1 int, state2 int)
|
||||||
|
RETURNS int IMMUTABLE LANGUAGE plpgsql AS $$
|
||||||
|
begin
|
||||||
|
return state1 + state2;
|
||||||
|
end;
|
||||||
|
$$;
|
||||||
CREATE AGGREGATE sum2(int) (
|
CREATE AGGREGATE sum2(int) (
|
||||||
sfunc = agg_sfunc,
|
sfunc = agg_sfunc,
|
||||||
stype = int,
|
stype = int,
|
||||||
sspace = 8,
|
sspace = 8,
|
||||||
finalfunc = agg_finalfunc,
|
finalfunc = agg_finalfunc,
|
||||||
finalfunc_extra,
|
finalfunc_extra,
|
||||||
|
combinefunc = agg_combinefunc,
|
||||||
initcond = '5',
|
initcond = '5',
|
||||||
msfunc = agg_sfunc,
|
msfunc = agg_sfunc,
|
||||||
mstype = int,
|
mstype = int,
|
||||||
|
@ -111,13 +118,22 @@ CREATE AGGREGATE sum2(int) (
|
||||||
-- allow alter statements to be propagated and keep the functions in sync across machines
|
-- allow alter statements to be propagated and keep the functions in sync across machines
|
||||||
SET citus.enable_ddl_propagation TO on;
|
SET citus.enable_ddl_propagation TO on;
|
||||||
-- functions are distributed by int arguments, when run in isolation it is not guaranteed a table actually exists.
|
-- functions are distributed by int arguments, when run in isolation it is not guaranteed a table actually exists.
|
||||||
CREATE TABLE colocation_table(id int);
|
CREATE TABLE colocation_table(id int, val int);
|
||||||
SELECT create_distributed_table('colocation_table','id');
|
SELECT create_distributed_table('colocation_table','id');
|
||||||
create_distributed_table
|
create_distributed_table
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
INSERT INTO colocation_table VALUES (1, 0), (2, 1), (3, 4), (6, 2), (3, 2), (2, 0), (8, 4);
|
||||||
|
CREATE TABLE cojoin_table(id int, val int);
|
||||||
|
SELECT create_distributed_table('cojoin_table','id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
INSERT INTO cojoin_table VALUES (1, 2), (2, 3), (5, 6), (6, 7), (3, 4), (2, 3), (8, 9);
|
||||||
-- make sure that none of the active and primary nodes hasmetadata
|
-- make sure that none of the active and primary nodes hasmetadata
|
||||||
-- at the start of the test
|
-- at the start of the test
|
||||||
select bool_or(hasmetadata) from pg_dist_node WHERE isactive AND noderole = 'primary';
|
select bool_or(hasmetadata) from pg_dist_node WHERE isactive AND noderole = 'primary';
|
||||||
|
@ -369,6 +385,50 @@ SELECT * FROM run_command_on_workers('SELECT function_tests2.add(2,3);') ORDER B
|
||||||
|
|
||||||
ALTER FUNCTION function_tests2.add(int,int) SET SCHEMA function_tests;
|
ALTER FUNCTION function_tests2.add(int,int) SET SCHEMA function_tests;
|
||||||
ALTER AGGREGATE sum2(int) SET SCHEMA function_tests2;
|
ALTER AGGREGATE sum2(int) SET SCHEMA function_tests2;
|
||||||
|
-- Test distributed execution
|
||||||
|
-- This fails before being marked for distributed execution
|
||||||
|
SELECT ct.val, function_tests2.sum2(ct.id + jt.val)
|
||||||
|
FROM colocation_table ct
|
||||||
|
JOIN cojoin_table jt ON ct.id = jt.id
|
||||||
|
GROUP BY ct.val ORDER BY ct.val;
|
||||||
|
ERROR: unsupported aggregate function sum2
|
||||||
|
SELECT mark_aggregate_for_distributed_execution('function_tests2.sum2(int)', 'combine');
|
||||||
|
mark_aggregate_for_distributed_execution
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ct.val, function_tests2.sum2(ct.id + jt.val)
|
||||||
|
FROM colocation_table ct
|
||||||
|
JOIN cojoin_table jt ON ct.id = jt.id
|
||||||
|
GROUP BY ct.val ORDER BY ct.val;
|
||||||
|
val | sum2
|
||||||
|
-----+------
|
||||||
|
0 | 56
|
||||||
|
1 | 40
|
||||||
|
2 | 70
|
||||||
|
4 | 78
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
-- Commute strategy isn't rejected, but gives incorrect results
|
||||||
|
SELECT mark_aggregate_for_distributed_execution('function_tests2.sum2(int)', 'commute');
|
||||||
|
mark_aggregate_for_distributed_execution
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ct.val, function_tests2.sum2(ct.id + jt.val)
|
||||||
|
FROM colocation_table ct
|
||||||
|
JOIN cojoin_table jt ON ct.id = jt.id
|
||||||
|
GROUP BY ct.val ORDER BY ct.val;
|
||||||
|
val | sum2
|
||||||
|
-----+------
|
||||||
|
0 | 102
|
||||||
|
1 | 70
|
||||||
|
2 | 130
|
||||||
|
4 | 146
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
-- when a function is distributed and we create or replace the function we need to propagate the statement to the worker to keep it in sync with the coordinator
|
-- when a function is distributed and we create or replace the function we need to propagate the statement to the worker to keep it in sync with the coordinator
|
||||||
CREATE OR REPLACE FUNCTION add(integer, integer) RETURNS integer
|
CREATE OR REPLACE FUNCTION add(integer, integer) RETURNS integer
|
||||||
AS 'select $1 * $2;' -- I know, this is not an add, but the output will tell us if the update succeeded
|
AS 'select $1 * $2;' -- I know, this is not an add, but the output will tell us if the update succeeded
|
||||||
|
@ -388,6 +448,10 @@ SELECT * FROM run_command_on_workers('SELECT function_tests.add(2,3);') ORDER BY
|
||||||
localhost | 57638 | t | 6
|
localhost | 57638 | t | 6
|
||||||
(2 rows)
|
(2 rows)
|
||||||
|
|
||||||
|
-- First test that we reject NULL
|
||||||
|
SELECT create_distributed_function(NULL);
|
||||||
|
ERROR: the first parameter for create_distributed_function() should be a single a valid function or procedure name followed by a list of parameters in parantheses
|
||||||
|
HINT: skip the parameters with OUT argtype as they are not part of the signature in PostgreSQL
|
||||||
-- distributed functions should not be allowed to depend on an extension, also functions
|
-- distributed functions should not be allowed to depend on an extension, also functions
|
||||||
-- that depend on an extension should not be allowed to be distributed.
|
-- that depend on an extension should not be allowed to be distributed.
|
||||||
ALTER FUNCTION add(int,int) DEPENDS ON EXTENSION citus;
|
ALTER FUNCTION add(int,int) DEPENDS ON EXTENSION citus;
|
||||||
|
@ -441,7 +505,7 @@ SELECT create_distributed_function('add_with_param_names(int, int)', '$3');
|
||||||
ERROR: cannot distribute the function "add_with_param_names" since the distribution argument is not valid
|
ERROR: cannot distribute the function "add_with_param_names" since the distribution argument is not valid
|
||||||
HINT: Either provide a valid function argument name or a valid "$paramIndex" to create_distributed_function()
|
HINT: Either provide a valid function argument name or a valid "$paramIndex" to create_distributed_function()
|
||||||
SELECT create_distributed_function('add_with_param_names(int, int)', '$1a');
|
SELECT create_distributed_function('add_with_param_names(int, int)', '$1a');
|
||||||
ERROR: invalid input syntax for integer: "1a"
|
ERROR: invalid input syntax for type integer: "1a"
|
||||||
-- non existing column name
|
-- non existing column name
|
||||||
SELECT create_distributed_function('add_with_param_names(int, int)', 'aaa');
|
SELECT create_distributed_function('add_with_param_names(int, int)', 'aaa');
|
||||||
ERROR: cannot distribute the function "add_with_param_names" since the distribution argument is not valid
|
ERROR: cannot distribute the function "add_with_param_names" since the distribution argument is not valid
|
||||||
|
|
|
@ -97,12 +97,20 @@ begin
|
||||||
end;
|
end;
|
||||||
$$;
|
$$;
|
||||||
|
|
||||||
|
CREATE FUNCTION agg_combinefunc(state1 int, state2 int)
|
||||||
|
RETURNS int IMMUTABLE LANGUAGE plpgsql AS $$
|
||||||
|
begin
|
||||||
|
return state1 + state2;
|
||||||
|
end;
|
||||||
|
$$;
|
||||||
|
|
||||||
CREATE AGGREGATE sum2(int) (
|
CREATE AGGREGATE sum2(int) (
|
||||||
sfunc = agg_sfunc,
|
sfunc = agg_sfunc,
|
||||||
stype = int,
|
stype = int,
|
||||||
sspace = 8,
|
sspace = 8,
|
||||||
finalfunc = agg_finalfunc,
|
finalfunc = agg_finalfunc,
|
||||||
finalfunc_extra,
|
finalfunc_extra,
|
||||||
|
combinefunc = agg_combinefunc,
|
||||||
initcond = '5',
|
initcond = '5',
|
||||||
msfunc = agg_sfunc,
|
msfunc = agg_sfunc,
|
||||||
mstype = int,
|
mstype = int,
|
||||||
|
@ -119,8 +127,13 @@ CREATE AGGREGATE sum2(int) (
|
||||||
SET citus.enable_ddl_propagation TO on;
|
SET citus.enable_ddl_propagation TO on;
|
||||||
|
|
||||||
-- functions are distributed by int arguments, when run in isolation it is not guaranteed a table actually exists.
|
-- functions are distributed by int arguments, when run in isolation it is not guaranteed a table actually exists.
|
||||||
CREATE TABLE colocation_table(id int);
|
CREATE TABLE colocation_table(id int, val int);
|
||||||
SELECT create_distributed_table('colocation_table','id');
|
SELECT create_distributed_table('colocation_table','id');
|
||||||
|
INSERT INTO colocation_table VALUES (1, 0), (2, 1), (3, 4), (6, 2), (3, 2), (2, 0), (8, 4);
|
||||||
|
|
||||||
|
CREATE TABLE cojoin_table(id int, val int);
|
||||||
|
SELECT create_distributed_table('cojoin_table','id');
|
||||||
|
INSERT INTO cojoin_table VALUES (1, 2), (2, 3), (5, 6), (6, 7), (3, 4), (2, 3), (8, 9);
|
||||||
|
|
||||||
-- make sure that none of the active and primary nodes hasmetadata
|
-- make sure that none of the active and primary nodes hasmetadata
|
||||||
-- at the start of the test
|
-- at the start of the test
|
||||||
|
@ -217,6 +230,26 @@ ALTER FUNCTION function_tests2.add(int,int) SET SCHEMA function_tests;
|
||||||
|
|
||||||
ALTER AGGREGATE sum2(int) SET SCHEMA function_tests2;
|
ALTER AGGREGATE sum2(int) SET SCHEMA function_tests2;
|
||||||
|
|
||||||
|
-- Test distributed execution
|
||||||
|
-- This fails before being marked for distributed execution
|
||||||
|
SELECT ct.val, function_tests2.sum2(ct.id + jt.val)
|
||||||
|
FROM colocation_table ct
|
||||||
|
JOIN cojoin_table jt ON ct.id = jt.id
|
||||||
|
GROUP BY ct.val ORDER BY ct.val;
|
||||||
|
|
||||||
|
SELECT mark_aggregate_for_distributed_execution('function_tests2.sum2(int)', 'combine');
|
||||||
|
SELECT ct.val, function_tests2.sum2(ct.id + jt.val)
|
||||||
|
FROM colocation_table ct
|
||||||
|
JOIN cojoin_table jt ON ct.id = jt.id
|
||||||
|
GROUP BY ct.val ORDER BY ct.val;
|
||||||
|
|
||||||
|
-- Commute strategy isn't rejected, but gives incorrect results
|
||||||
|
SELECT mark_aggregate_for_distributed_execution('function_tests2.sum2(int)', 'commute');
|
||||||
|
SELECT ct.val, function_tests2.sum2(ct.id + jt.val)
|
||||||
|
FROM colocation_table ct
|
||||||
|
JOIN cojoin_table jt ON ct.id = jt.id
|
||||||
|
GROUP BY ct.val ORDER BY ct.val;
|
||||||
|
|
||||||
-- when a function is distributed and we create or replace the function we need to propagate the statement to the worker to keep it in sync with the coordinator
|
-- when a function is distributed and we create or replace the function we need to propagate the statement to the worker to keep it in sync with the coordinator
|
||||||
CREATE OR REPLACE FUNCTION add(integer, integer) RETURNS integer
|
CREATE OR REPLACE FUNCTION add(integer, integer) RETURNS integer
|
||||||
AS 'select $1 * $2;' -- I know, this is not an add, but the output will tell us if the update succeeded
|
AS 'select $1 * $2;' -- I know, this is not an add, but the output will tell us if the update succeeded
|
||||||
|
@ -226,6 +259,9 @@ AS 'select $1 * $2;' -- I know, this is not an add, but the output will tell us
|
||||||
SELECT public.verify_function_is_same_on_workers('function_tests.add(int,int)');
|
SELECT public.verify_function_is_same_on_workers('function_tests.add(int,int)');
|
||||||
SELECT * FROM run_command_on_workers('SELECT function_tests.add(2,3);') ORDER BY 1,2;
|
SELECT * FROM run_command_on_workers('SELECT function_tests.add(2,3);') ORDER BY 1,2;
|
||||||
|
|
||||||
|
-- First test that we reject NULL
|
||||||
|
SELECT create_distributed_function(NULL);
|
||||||
|
|
||||||
-- distributed functions should not be allowed to depend on an extension, also functions
|
-- distributed functions should not be allowed to depend on an extension, also functions
|
||||||
-- that depend on an extension should not be allowed to be distributed.
|
-- that depend on an extension should not be allowed to be distributed.
|
||||||
ALTER FUNCTION add(int,int) DEPENDS ON EXTENSION citus;
|
ALTER FUNCTION add(int,int) DEPENDS ON EXTENSION citus;
|
||||||
|
|
Loading…
Reference in New Issue