citus/src/test/regress/expected/aggregate_support.out

--
-- AGGREGATE SUPPORT
--
-- Tests support for user defined aggregates
create schema aggregate_support;
set search_path to aggregate_support;
set citus.coordinator_aggregation_strategy to 'disabled';
-- We test with & without STRICT as our code is responsible for managing these NULL checks
create function sum2_sfunc_strict(state int, x int)
returns int immutable strict language plpgsql as $$
begin return state + x;
end;
$$;
create function sum2_finalfunc_strict(state int)
returns int immutable strict language plpgsql as $$
begin return state * 2;
end;
$$;
create function sum2_sfunc(state int, x int)
returns int immutable language plpgsql as $$
begin return state + x;
end;
$$;
create function sum2_finalfunc(state int)
returns int immutable language plpgsql as $$
begin return state * 2;
end;
$$;
create aggregate sum2 (int) (
    sfunc = sum2_sfunc,
    stype = int,
    finalfunc = sum2_finalfunc,
    combinefunc = sum2_sfunc,
    initcond = '0'
);
create aggregate sum2_strict (int) (
    sfunc = sum2_sfunc_strict,
    stype = int,
    finalfunc = sum2_finalfunc_strict,
    combinefunc = sum2_sfunc_strict
);
select create_distributed_function('sum2(int)');
 create_distributed_function
---------------------------------------------------------------------

(1 row)

select create_distributed_function('sum2_strict(int)');
 create_distributed_function
---------------------------------------------------------------------

(1 row)

create table aggdata (id int, key int, val int, valf float8);
select create_distributed_table('aggdata', 'id');
 create_distributed_table
---------------------------------------------------------------------

(1 row)

insert into aggdata (id, key, val, valf) values (1, 1, 2, 11.2), (2, 1, NULL, 2.1), (3, 2, 2, 3.22), (4, 2, 3, 4.23), (5, 2, 5, 5.25), (6, 3, 4, 63.4), (7, 5, NULL, 75), (8, 6, NULL, NULL), (9, 6, NULL, 96), (10, 7, 8, 1078), (11, 9, 0, 1.19);
select key, sum2(val), sum2_strict(val), stddev(valf) from aggdata group by key order by key;
 key | sum2 | sum2_strict |      stddev
---------------------------------------------------------------------
   1 |      |           4 | 6.43467170879758
   2 |   20 |          20 | 1.01500410508201
   3 |    8 |           8 |
   5 |      |             |
   6 |      |             |
   7 |   16 |          16 |
   9 |    0 |           0 |
(7 rows)

-- FILTER supported
select key, sum2(val) filter (where valf < 5), sum2_strict(val) filter (where valf < 5) from aggdata group by key order by key;
 key | sum2 | sum2_strict
---------------------------------------------------------------------
   1 |      |
   2 |   10 |          10
   3 |    0 |
   5 |    0 |
   6 |    0 |
   7 |    0 |
   9 |    0 |           0
(7 rows)

-- DISTINCT unsupported, unless grouped by partition key
select key, sum2(distinct val), sum2_strict(distinct val) from aggdata group by key order by key;
ERROR:  cannot compute aggregate (distinct)
DETAIL:  table partitioning is unsuitable for aggregate (distinct)
select id, sum2(distinct val), sum2_strict(distinct val) from aggdata group by id order by id;
 id | sum2 | sum2_strict
---------------------------------------------------------------------
  1 |    4 |           4
  2 |      |
  3 |    4 |           4
  4 |    6 |           6
  5 |   10 |          10
  6 |    8 |           8
  7 |      |
  8 |      |
  9 |      |
 10 |   16 |          16
 11 |    0 |           0
(11 rows)

-- ORDER BY unsupported
select key, sum2(val order by valf), sum2_strict(val order by valf) from aggdata group by key order by key;
ERROR:  unsupported aggregate function sum2
-- Test handling a lack of intermediate results
select sum2(val), sum2_strict(val) from aggdata where valf = 0;
 sum2 | sum2_strict
---------------------------------------------------------------------
    0 |
(1 row)

-- Test HAVING
select key, stddev(valf) from aggdata group by key having stddev(valf) > 2 order by key;
 key |      stddev
---------------------------------------------------------------------
   1 | 6.43467170879758
(1 row)

select key, stddev(valf) from aggdata group by key having stddev(val::float8) > 1 order by key;
 key |      stddev
---------------------------------------------------------------------
   2 | 1.01500410508201
(1 row)

-- Test https://github.com/citusdata/citus/issues/3446
set citus.coordinator_aggregation_strategy to 'row-gather';
select id, stddev(val) from aggdata group by id order by 1;
 id | stddev
---------------------------------------------------------------------
  1 |
  2 |
  3 |
  4 |
  5 |
  6 |
  7 |
  8 |
  9 |
 10 |
 11 |
(11 rows)

set citus.coordinator_aggregation_strategy to 'disabled';
-- test polymorphic aggregates from https://github.com/citusdata/citus/issues/2397
-- we do not currently support pseudotypes for transition types, so this errors for now
CREATE OR REPLACE FUNCTION first_agg(anyelement, anyelement)
RETURNS anyelement AS $$
	SELECT CASE WHEN $1 IS NULL THEN $2 ELSE $1 END;
$$ LANGUAGE SQL STABLE;
CREATE AGGREGATE first (
	sfunc    = first_agg,
	basetype = anyelement,
	stype    = anyelement,
	combinefunc = first_agg
);
CREATE OR REPLACE FUNCTION last_agg(anyelement, anyelement)
RETURNS anyelement AS $$
	SELECT $2;
$$ LANGUAGE SQL STABLE;
CREATE AGGREGATE last (
	sfunc    = last_agg,
	basetype = anyelement,
	stype    = anyelement,
	combinefunc = last_agg
);
SELECT create_distributed_function('first(anyelement)');
 create_distributed_function
---------------------------------------------------------------------

(1 row)

SELECT create_distributed_function('last(anyelement)');
 create_distributed_function
---------------------------------------------------------------------

(1 row)

SELECT key, first(val ORDER BY id), last(val ORDER BY id)
FROM aggdata GROUP BY key ORDER BY key;
ERROR:  unsupported aggregate function first
-- However, GROUP BY on distribution column gets pushed down
SELECT id, first(val ORDER BY key), last(val ORDER BY key)
FROM aggdata GROUP BY id ORDER BY id;
 id | first | last
---------------------------------------------------------------------
  1 |     2 |    2
  2 |       |
  3 |     2 |    2
  4 |     3 |    3
  5 |     5 |    5
  6 |     4 |    4
  7 |       |
  8 |       |
  9 |       |
 10 |     8 |    8
 11 |     0 |    0
(11 rows)

-- Test that expressions don't slip past. This fails
SELECT id%5, first(val ORDER BY key), last(val ORDER BY key)
FROM aggdata GROUP BY id%5 ORDER BY id%5;
ERROR:  unsupported aggregate function first
-- test aggregate with stype which is not a by-value datum
-- also test our handling of the aggregate not existing on workers
create function sumstring_sfunc(state text, x text)
returns text immutable language plpgsql as $$
begin return (state::float8 + x::float8)::text;
end;
$$;
create aggregate sumstring(text) (
	sfunc = sumstring_sfunc,
	stype = text,
	combinefunc = sumstring_sfunc,
	initcond = '0'
);
select sumstring(valf::text) from aggdata where valf is not null;
ERROR:  function "aggregate_support.sumstring(text)" does not exist
CONTEXT:  while executing command on localhost:xxxxx
select create_distributed_function('sumstring(text)');
 create_distributed_function
---------------------------------------------------------------------

(1 row)

select sumstring(valf::text) from aggdata where valf is not null;
 sumstring
---------------------------------------------------------------------
 1339.59
(1 row)

-- test aggregate with stype that has an expanded read-write form
CREATE FUNCTION array_sort (int[])
RETURNS int[] LANGUAGE SQL AS $$
    SELECT ARRAY(SELECT unnest($1) ORDER BY 1)
$$;
create aggregate array_collect_sort(el int) (
	sfunc = array_append,
	stype = int[],
	combinefunc = array_cat,
	finalfunc = array_sort,
	initcond = '{}'
);
select create_distributed_function('array_collect_sort(int)');
 create_distributed_function
---------------------------------------------------------------------

(1 row)

select array_collect_sort(val) from aggdata;
         array_collect_sort
---------------------------------------------------------------------
 {0,2,2,3,4,5,8,NULL,NULL,NULL,NULL}
(1 row)

-- Test multiuser scenario
create user notsuper;
NOTICE:  not propagating CREATE ROLE/USER commands to worker nodes
HINT:  Connect to worker nodes directly to manually create all necessary users and roles.
select run_command_on_workers($$create user notsuper$$);
      run_command_on_workers
---------------------------------------------------------------------
 (localhost,57637,t,"CREATE ROLE")
 (localhost,57638,t,"CREATE ROLE")
(2 rows)

grant all on schema aggregate_support to notsuper;
grant all on all tables in schema aggregate_support to notsuper;
select run_command_on_workers($$
grant all on schema aggregate_support to notsuper;
grant all on all tables in schema aggregate_support to notsuper;
$$);
  run_command_on_workers
---------------------------------------------------------------------
 (localhost,57637,t,GRANT)
 (localhost,57638,t,GRANT)
(2 rows)

set role notsuper;
select array_collect_sort(val) from aggdata;
         array_collect_sort
---------------------------------------------------------------------
 {0,2,2,3,4,5,8,NULL,NULL,NULL,NULL}
(1 row)

reset role;
-- Test aggregation on coordinator
set citus.coordinator_aggregation_strategy to 'row-gather';
select key, first(val order by id), last(val order by id)
from aggdata group by key order by key;
 key | first | last
---------------------------------------------------------------------
   1 |     2 |
   2 |     2 |    5
   3 |     4 |    4
   5 |       |
   6 |       |
   7 |     8 |    8
   9 |     0 |    0
(7 rows)

select key, sum2(distinct val), sum2_strict(distinct val) from aggdata group by key order by key;
 key | sum2 | sum2_strict
---------------------------------------------------------------------
   1 |      |           4
   2 |   20 |          20
   3 |    8 |           8
   5 |      |
   6 |      |
   7 |   16 |          16
   9 |    0 |           0
(7 rows)

select key, sum2(val order by valf), sum2_strict(val order by valf) from aggdata group by key order by key;
 key | sum2 | sum2_strict
---------------------------------------------------------------------
   1 |      |           4
   2 |   20 |          20
   3 |    8 |           8
   5 |      |
   6 |      |
   7 |   16 |          16
   9 |    0 |           0
(7 rows)

select string_agg(distinct floor(val/2)::text, '|' order by floor(val/2)::text) from aggdata;
 string_agg
---------------------------------------------------------------------
 0|1|2|4
(1 row)

select string_agg(distinct floor(val/2)::text, '|' order by floor(val/2)::text) filter (where val < 5) from aggdata;
 string_agg
---------------------------------------------------------------------
 0|1|2
(1 row)

select mode() within group (order by floor(val/2)) from aggdata;
 mode
---------------------------------------------------------------------
    1
(1 row)

select percentile_cont(0.5) within group(order by valf) from aggdata;
 percentile_cont
---------------------------------------------------------------------
           8.225
(1 row)

select key, percentile_cont(key/10.0) within group(order by val) from aggdata group by key;
 key | percentile_cont
---------------------------------------------------------------------
   1 |               2
   2 |             2.4
   3 |               4
   5 |
   6 |
   7 |               8
   9 |               0
(7 rows)

select floor(val/2), corr(valf, valf + val) from aggdata group by floor(val/2) order by 1;
 floor |       corr
---------------------------------------------------------------------
     0 |
     1 | 0.991808518376741
     2 |                 1
     4 |
       |
(5 rows)

select floor(val/2), corr(valf, valf + val) from aggdata group by floor(val/2) having corr(valf + val, val) < 1 order by 1;
 floor |       corr
---------------------------------------------------------------------
     1 | 0.991808518376741
     2 |                 1
(2 rows)

select array_agg(val order by valf) from aggdata;
              array_agg
---------------------------------------------------------------------
 {0,NULL,2,3,5,2,4,NULL,NULL,8,NULL}
(1 row)

-- Test TransformSubqueryNode
SET citus.task_executor_type to "task-tracker";
select * FROM (
    SELECT key, mode() within group (order by floor(agg1.val/2)) m from aggdata agg1
    group by key
) subq ORDER BY 2, 1 LIMIT 5;
 key | m
---------------------------------------------------------------------
   9 | 0
   1 | 1
   2 | 1
   3 | 2
   7 | 4
(5 rows)

select * FROM (
    SELECT key k, avg(distinct floor(agg1.val/2)) m from aggdata agg1
    group by key
) subq;
 k |  m
---------------------------------------------------------------------
 1 |   1
 5 |
 3 |   2
 7 |   4
 6 |
 2 | 1.5
 9 |   0
(7 rows)

-- Test TransformsSubqueryNode with group by not in FROM (failed in past)
select count(*) FROM (
    SELECT avg(distinct floor(agg1.val/2)) m from aggdata agg1
    group by key
) subq;
 count
---------------------------------------------------------------------
     7
(1 row)

RESET citus.task_executor_type;
select key, count(distinct aggdata)
from aggdata group by key order by 1, 2;
 key | count
---------------------------------------------------------------------
   1 |     2
   2 |     3
   3 |     1
   5 |     1
   6 |     2
   7 |     1
   9 |     1
(7 rows)

-- GROUPING parses to GroupingFunc, distinct from Aggref
-- These three queries represent edge cases implementation would have to consider
-- For now we error out of all three
select grouping(id)
from aggdata group by id order by 1 limit 3;
ERROR:  could not run distributed query with GROUPING
HINT:  Consider using an equality filter on the distributed table's partition column.
select key, grouping(val)
from aggdata group by key, val order by 1, 2;
ERROR:  could not run distributed query with GROUPING
HINT:  Consider using an equality filter on the distributed table's partition column.
select key, grouping(val), sum(distinct valf)
from aggdata group by key, val order by 1, 2;
ERROR:  could not run distributed query with GROUPING
HINT:  Consider using an equality filter on the distributed table's partition column.
-- Test https://github.com/citusdata/citus/issues/3328
create table nulltable(id int);
insert into nulltable values (0);
-- These cases are not type correct
select pg_catalog.worker_partial_agg('string_agg(text,text)'::regprocedure, id) from nulltable;
ERROR:  worker_partial_agg_sfunc cannot type check aggregates taking anything other than 1 argument
select pg_catalog.worker_partial_agg('sum(int8)'::regprocedure, id) from nulltable;
ERROR:  worker_partial_agg_sfunc could not confirm type correctness
select pg_catalog.coord_combine_agg('sum(float8)'::regprocedure, id::text::cstring, null::text) from nulltable;
ERROR:  coord_combine_agg_ffunc could not confirm type correctness
select pg_catalog.coord_combine_agg('avg(float8)'::regprocedure, ARRAY[id,id,id]::text::cstring, null::text) from nulltable;
ERROR:  coord_combine_agg_ffunc could not confirm type correctness
-- These cases are type correct
select pg_catalog.worker_partial_agg('sum(int)'::regprocedure, id) from nulltable;
 worker_partial_agg
---------------------------------------------------------------------
 0
(1 row)

select pg_catalog.coord_combine_agg('sum(float8)'::regprocedure, id::text::cstring, null::float8) from nulltable;
 coord_combine_agg
---------------------------------------------------------------------
                 0
(1 row)

select pg_catalog.coord_combine_agg('avg(float8)'::regprocedure, ARRAY[id,id,id]::text::cstring, null::float8) from nulltable;
 coord_combine_agg
---------------------------------------------------------------------

(1 row)

-- Test that we don't crash with empty resultset
-- See https://github.com/citusdata/citus/issues/3953
CREATE TABLE t1 (a int PRIMARY KEY, b int);
CREATE TABLE t2 (a int PRIMARY KEY, b int);
SELECT create_distributed_table('t1','a');
 create_distributed_table
---------------------------------------------------------------------

(1 row)

SELECT 'foo' as foo, count(distinct b) FROM t1;
 foo | count
---------------------------------------------------------------------
 foo |     0
(1 row)

SELECT 'foo' as foo, count(distinct b) FROM t2;
 foo | count
---------------------------------------------------------------------
 foo |     0
(1 row)

SELECT 'foo' as foo, string_agg(distinct a::character varying, ',') FROM t1;
 foo | string_agg
---------------------------------------------------------------------
 foo |
(1 row)

SELECT 'foo' as foo, string_agg(distinct a::character varying, ',') FROM t2;
 foo | string_agg
---------------------------------------------------------------------
 foo |
(1 row)

CREATE OR REPLACE FUNCTION const_function(int)
RETURNS int STABLE
LANGUAGE plpgsql
AS $function$
BEGIN
RAISE NOTICE 'stable_fn called';
RETURN 1;
END;
$function$;
CREATE OR REPLACE FUNCTION square_func_stable(int)
RETURNS int STABLE
LANGUAGE plpgsql
AS $function$
BEGIN
RETURN $1 * $1;
END;
$function$;
CREATE OR REPLACE FUNCTION square_func(int)
RETURNS int
LANGUAGE plpgsql
AS $function$
BEGIN
RETURN $1 * $1;
END;
$function$;
SELECT const_function(1), string_agg(a::character, ',') FROM t1;
NOTICE:  stable_fn called
CONTEXT:  PL/pgSQL function const_function(integer) line 3 at RAISE
 const_function | string_agg
---------------------------------------------------------------------
              1 |
(1 row)

SELECT const_function(1), count(b) FROM t1;
NOTICE:  stable_fn called
CONTEXT:  PL/pgSQL function const_function(integer) line 3 at RAISE
 const_function | count
---------------------------------------------------------------------
              1 |     0
(1 row)

SELECT const_function(1), count(b), 10 FROM t1;
NOTICE:  stable_fn called
CONTEXT:  PL/pgSQL function const_function(integer) line 3 at RAISE
 const_function | count | ?column?
---------------------------------------------------------------------
              1 |     0 |       10
(1 row)

SELECT const_function(1), count(b), const_function(10) FROM t1;
NOTICE:  stable_fn called
CONTEXT:  PL/pgSQL function const_function(integer) line 3 at RAISE
NOTICE:  stable_fn called
CONTEXT:  PL/pgSQL function const_function(integer) line 3 at RAISE
 const_function | count | const_function
---------------------------------------------------------------------
              1 |     0 |              1
(1 row)

SELECT square_func(5), string_agg(a::character, ','),const_function(1) FROM t1;
NOTICE:  stable_fn called
CONTEXT:  PL/pgSQL function const_function(integer) line 3 at RAISE
 square_func | string_agg | const_function
---------------------------------------------------------------------
          25 |            |              1
(1 row)

SELECT square_func_stable(5), string_agg(a::character, ','),const_function(1) FROM t1;
NOTICE:  stable_fn called
CONTEXT:  PL/pgSQL function const_function(integer) line 3 at RAISE
 square_func_stable | string_agg | const_function
---------------------------------------------------------------------
                 25 |            |              1
(1 row)

-- this will error since the expression will be
-- pushed down (group by) and the function doesn't exist on workers
SELECT square_func(5), a FROM t1 GROUP BY a;
ERROR:  function aggregate_support.square_func(integer) does not exist
HINT:  No function matches the given name and argument types. You might need to add explicit type casts.
CONTEXT:  while executing command on localhost:xxxxx
-- this will error since it has group by even though there is an aggregation
-- the expression will be pushed down.
SELECT square_func(5), a, count(a) FROM t1 GROUP BY a;
ERROR:  function aggregate_support.square_func(integer) does not exist
HINT:  No function matches the given name and argument types. You might need to add explicit type casts.
CONTEXT:  while executing command on localhost:xxxxx
set client_min_messages to error;
drop schema aggregate_support cascade;