diff --git a/src/test/regress/expected/custom_aggregate_support.out b/src/test/regress/expected/custom_aggregate_support.out new file mode 100644 index 000000000..2b85f94be --- /dev/null +++ b/src/test/regress/expected/custom_aggregate_support.out @@ -0,0 +1,110 @@ +-- +-- CUSTOM_AGGREGATE_SUPPORT +-- +-- Create HLL extension if present, print false result otherwise +SELECT CASE WHEN COUNT(*) > 0 THEN + 'CREATE EXTENSION HLL' +ELSE 'SELECT false AS hll_present' END +AS create_cmd FROM pg_available_extensions() +WHERE name = 'hll' +\gset +:create_cmd; +ERROR: extension "hll" already exists +\c - - - :worker_1_port +:create_cmd; +ERROR: extension "hll" already exists +\c - - - :worker_2_port +:create_cmd; +ERROR: extension "hll" already exists +\c - - - :master_port +SET citus.shard_count TO 4; +CREATE TABLE raw_table (day date, user_id int); +CREATE TABLE daily_uniques(day date, unique_users hll); +SELECT create_distributed_table('raw_table', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +SELECT create_distributed_table('daily_uniques', 'day'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO raw_table + SELECT day, user_id % 19 + FROM generate_series('2018-05-24'::timestamp, '2018-06-24'::timestamp, '1 day'::interval) as f(day), + generate_series(1,100) as g(user_id); +INSERT INTO raw_table + SELECT day, user_id % 13 + FROM generate_series('2018-06-10'::timestamp, '2018-07-10'::timestamp, '1 day'::interval) as f(day), + generate_series(1,100) as g(user_id); +-- Run hll on raw data +SELECT hll_cardinality(hll_union_agg(agg)) +FROM ( + SELECT hll_add_agg(hll_hash_integer(user_id)) AS agg + FROM raw_table)a; + hll_cardinality +----------------- + 19 +(1 row) + +-- Aggregate the data into daily_uniques +INSERT INTO daily_uniques + SELECT day, hll_add_agg(hll_hash_integer(user_id)) + FROM raw_table + GROUP BY 1; +-- Basic hll_cardinality check on aggregated data +SELECT day, hll_cardinality(unique_users) +FROM daily_uniques +WHERE day >= '2018-06-20' and day <= '2018-06-30' +ORDER BY 2 DESC,1 +LIMIT 10; + day | hll_cardinality +------------+----------------- + 06-20-2018 | 19 + 06-21-2018 | 19 + 06-22-2018 | 19 + 06-23-2018 | 19 + 06-24-2018 | 19 + 06-25-2018 | 13 + 06-26-2018 | 13 + 06-27-2018 | 13 + 06-28-2018 | 13 + 06-29-2018 | 13 +(10 rows) + +-- Union aggregated data for one week +SELECT hll_cardinality(hll_union_agg(unique_users)) +FROM daily_uniques +WHERE day >= '2018-05-24'::date AND day <= '2018-05-31'::date; + hll_cardinality +----------------- + 19 +(1 row) + +SELECT EXTRACT(MONTH FROM day) AS month, hll_cardinality(hll_union_agg(unique_users)) +FROM daily_uniques +WHERE day >= '2018-06-23' AND day <= '2018-07-01' +GROUP BY 1 +ORDER BY 1; + month | hll_cardinality +-------+----------------- + 6 | 19 + 7 | 13 +(2 rows) + +-- These are going to be supported after window function support +SELECT day, hll_cardinality(hll_union_agg(unique_users) OVER seven_days) +FROM daily_uniques +WINDOW seven_days AS (ORDER BY day ASC ROWS 6 PRECEDING); +ERROR: could not run distributed query because the window function that is used cannot be pushed down +HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column +SELECT day, (hll_cardinality(hll_union_agg(unique_users) OVER two_days)) - hll_cardinality(unique_users) AS lost_uniques +FROM daily_uniques +WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING); +ERROR: could not run distributed query because the window function that is used cannot be pushed down +HINT: Window functions are supported in two ways. Either add an equality filter on the distributed tables' partition column or use the window functions with a PARTITION BY clause containing the distribution column +DROP TABLE raw_table; +DROP TABLE daily_uniques; diff --git a/src/test/regress/expected/custom_aggregate_support_0.out b/src/test/regress/expected/custom_aggregate_support_0.out new file mode 100644 index 000000000..323087988 --- /dev/null +++ b/src/test/regress/expected/custom_aggregate_support_0.out @@ -0,0 +1,112 @@ +-- +-- CUSTOM_AGGREGATE_SUPPORT +-- +-- Create HLL extension if present, print false result otherwise +SELECT CASE WHEN COUNT(*) > 0 THEN + 'CREATE EXTENSION HLL' +ELSE 'SELECT false AS hll_present' END +AS create_cmd FROM pg_available_extensions() +WHERE name = 'hll' +\gset +:create_cmd; + hll_present +------------- + f +(1 row) + +\c - - - :worker_1_port +:create_cmd; + hll_present +------------- + f +(1 row) + +\c - - - :worker_2_port +:create_cmd; + hll_present +------------- + f +(1 row) + +\c - - - :master_port +SET citus.shard_count TO 4; +CREATE TABLE raw_table (day date, user_id int); +CREATE TABLE daily_uniques(day date, unique_users hll); +ERROR: type "hll" does not exist +LINE 1: CREATE TABLE daily_uniques(day date, unique_users hll); + ^ +SELECT create_distributed_table('raw_table', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +SELECT create_distributed_table('daily_uniques', 'day'); +ERROR: relation "daily_uniques" does not exist +LINE 1: SELECT create_distributed_table('daily_uniques', 'day'); + ^ +INSERT INTO raw_table + SELECT day, user_id % 19 + FROM generate_series('2018-05-24'::timestamp, '2018-06-24'::timestamp, '1 day'::interval) as f(day), + generate_series(1,100) as g(user_id); +INSERT INTO raw_table + SELECT day, user_id % 13 + FROM generate_series('2018-06-10'::timestamp, '2018-07-10'::timestamp, '1 day'::interval) as f(day), + generate_series(1,100) as g(user_id); +-- Run hll on raw data +SELECT hll_cardinality(hll_union_agg(agg)) +FROM ( + SELECT hll_add_agg(hll_hash_integer(user_id)) AS agg + FROM raw_table)a; +ERROR: function hll_hash_integer(integer) does not exist +LINE 3: SELECT hll_add_agg(hll_hash_integer(user_id)) AS agg + ^ +HINT: No function matches the given name and argument types. You might need to add explicit type casts. +-- Aggregate the data into daily_uniques +INSERT INTO daily_uniques + SELECT day, hll_add_agg(hll_hash_integer(user_id)) + FROM raw_table + GROUP BY 1; +ERROR: relation "daily_uniques" does not exist +LINE 1: INSERT INTO daily_uniques + ^ +-- Basic hll_cardinality check on aggregated data +SELECT day, hll_cardinality(unique_users) +FROM daily_uniques +WHERE day >= '2018-06-20' and day <= '2018-06-30' +ORDER BY 2 DESC,1 +LIMIT 10; +ERROR: relation "daily_uniques" does not exist +LINE 2: FROM daily_uniques + ^ +-- Union aggregated data for one week +SELECT hll_cardinality(hll_union_agg(unique_users)) +FROM daily_uniques +WHERE day >= '2018-05-24'::date AND day <= '2018-05-31'::date; +ERROR: relation "daily_uniques" does not exist +LINE 2: FROM daily_uniques + ^ +SELECT EXTRACT(MONTH FROM day) AS month, hll_cardinality(hll_union_agg(unique_users)) +FROM daily_uniques +WHERE day >= '2018-06-23' AND day <= '2018-07-01' +GROUP BY 1 +ORDER BY 1; +ERROR: relation "daily_uniques" does not exist +LINE 2: FROM daily_uniques + ^ +-- These are going to be supported after window function support +SELECT day, hll_cardinality(hll_union_agg(unique_users) OVER seven_days) +FROM daily_uniques +WINDOW seven_days AS (ORDER BY day ASC ROWS 6 PRECEDING); +ERROR: relation "daily_uniques" does not exist +LINE 2: FROM daily_uniques + ^ +SELECT day, (hll_cardinality(hll_union_agg(unique_users) OVER two_days)) - hll_cardinality(unique_users) AS lost_uniques +FROM daily_uniques +WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING); +ERROR: relation "daily_uniques" does not exist +LINE 2: FROM daily_uniques + ^ +DROP TABLE raw_table; +DROP TABLE daily_uniques; +ERROR: table "daily_uniques" does not exist diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 9b902a91d..31949fe9f 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -58,7 +58,7 @@ test: multi_subquery_complex_reference_clause multi_subquery_window_functions mu test: multi_subquery_in_where_reference_clause test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc test: multi_agg_distinct multi_agg_approximate_distinct multi_limit_clause_approximate multi_outer_join_reference multi_single_relation_subquery multi_prepare_plsql -test: multi_reference_table multi_select_for_update relation_access_tracking +test: multi_reference_table multi_select_for_update relation_access_tracking custom_aggregate_support test: multi_average_expression multi_working_columns multi_having_pushdown test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg diff --git a/src/test/regress/sql/custom_aggregate_support.sql b/src/test/regress/sql/custom_aggregate_support.sql new file mode 100644 index 000000000..3eaf0bfc0 --- /dev/null +++ b/src/test/regress/sql/custom_aggregate_support.sql @@ -0,0 +1,80 @@ +-- +-- CUSTOM_AGGREGATE_SUPPORT +-- +-- Create HLL extension if present, print false result otherwise +SELECT CASE WHEN COUNT(*) > 0 THEN + 'CREATE EXTENSION HLL' +ELSE 'SELECT false AS hll_present' END +AS create_cmd FROM pg_available_extensions() +WHERE name = 'hll' +\gset + +:create_cmd; + +\c - - - :worker_1_port +:create_cmd; + +\c - - - :worker_2_port +:create_cmd; + +\c - - - :master_port + +SET citus.shard_count TO 4; + +CREATE TABLE raw_table (day date, user_id int); +CREATE TABLE daily_uniques(day date, unique_users hll); + +SELECT create_distributed_table('raw_table', 'user_id'); +SELECT create_distributed_table('daily_uniques', 'day'); + +INSERT INTO raw_table + SELECT day, user_id % 19 + FROM generate_series('2018-05-24'::timestamp, '2018-06-24'::timestamp, '1 day'::interval) as f(day), + generate_series(1,100) as g(user_id); +INSERT INTO raw_table + SELECT day, user_id % 13 + FROM generate_series('2018-06-10'::timestamp, '2018-07-10'::timestamp, '1 day'::interval) as f(day), + generate_series(1,100) as g(user_id); + +-- Run hll on raw data +SELECT hll_cardinality(hll_union_agg(agg)) +FROM ( + SELECT hll_add_agg(hll_hash_integer(user_id)) AS agg + FROM raw_table)a; + +-- Aggregate the data into daily_uniques +INSERT INTO daily_uniques + SELECT day, hll_add_agg(hll_hash_integer(user_id)) + FROM raw_table + GROUP BY 1; + +-- Basic hll_cardinality check on aggregated data +SELECT day, hll_cardinality(unique_users) +FROM daily_uniques +WHERE day >= '2018-06-20' and day <= '2018-06-30' +ORDER BY 2 DESC,1 +LIMIT 10; + +-- Union aggregated data for one week +SELECT hll_cardinality(hll_union_agg(unique_users)) +FROM daily_uniques +WHERE day >= '2018-05-24'::date AND day <= '2018-05-31'::date; + + +SELECT EXTRACT(MONTH FROM day) AS month, hll_cardinality(hll_union_agg(unique_users)) +FROM daily_uniques +WHERE day >= '2018-06-23' AND day <= '2018-07-01' +GROUP BY 1 +ORDER BY 1; + +-- These are going to be supported after window function support +SELECT day, hll_cardinality(hll_union_agg(unique_users) OVER seven_days) +FROM daily_uniques +WINDOW seven_days AS (ORDER BY day ASC ROWS 6 PRECEDING); + +SELECT day, (hll_cardinality(hll_union_agg(unique_users) OVER two_days)) - hll_cardinality(unique_users) AS lost_uniques +FROM daily_uniques +WINDOW two_days AS (ORDER BY day ASC ROWS 1 PRECEDING); + +DROP TABLE raw_table; +DROP TABLE daily_uniques;