Merge pull request #3344 from citusdata/fix-extension-already-exists-test

Fix tests when hll/topn installed
pull/3307/head
Philip Dubé 2019-12-24 21:45:36 +00:00 committed by GitHub
commit 11368451f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 62 additions and 159 deletions

View File

@ -10,13 +10,6 @@ WHERE name = 'hll'
\gset
:create_cmd;
ERROR: extension "hll" already exists
\c - - - :worker_1_port
:create_cmd;
ERROR: extension "hll" already exists
\c - - - :worker_2_port
:create_cmd;
ERROR: extension "hll" already exists
\c - - - :master_port
SET citus.shard_count TO 4;
CREATE TABLE raw_table (day date, user_id int);
CREATE TABLE daily_uniques(day date, unique_users hll);
@ -32,18 +25,18 @@ SELECT create_distributed_table('daily_uniques', 'day');
(1 row)
INSERT INTO raw_table
SELECT day, user_id % 19
INSERT INTO raw_table
SELECT day, user_id % 19
FROM generate_series('2018-05-24'::timestamp, '2018-06-24'::timestamp, '1 day'::interval) as f(day),
generate_series(1,100) as g(user_id);
INSERT INTO raw_table
SELECT day, user_id % 13
FROM generate_series('2018-06-10'::timestamp, '2018-07-10'::timestamp, '1 day'::interval) as f(day),
INSERT INTO raw_table
SELECT day, user_id % 13
FROM generate_series('2018-06-10'::timestamp, '2018-07-10'::timestamp, '1 day'::interval) as f(day),
generate_series(1,100) as g(user_id);
-- Run hll on raw data
SELECT hll_cardinality(hll_union_agg(agg))
SELECT hll_cardinality(hll_union_agg(agg))
FROM (
SELECT hll_add_agg(hll_hash_integer(user_id)) AS agg
SELECT hll_add_agg(hll_hash_integer(user_id)) AS agg
FROM raw_table)a;
hll_cardinality
-----------------
@ -51,15 +44,15 @@ FROM (
(1 row)
-- Aggregate the data into daily_uniques
INSERT INTO daily_uniques
SELECT day, hll_add_agg(hll_hash_integer(user_id))
INSERT INTO daily_uniques
SELECT day, hll_add_agg(hll_hash_integer(user_id))
FROM raw_table
GROUP BY 1;
-- Basic hll_cardinality check on aggregated data
SELECT day, hll_cardinality(unique_users)
FROM daily_uniques
WHERE day >= '2018-06-20' and day <= '2018-06-30'
ORDER BY 2 DESC,1
SELECT day, hll_cardinality(unique_users)
FROM daily_uniques
WHERE day >= '2018-06-20' and day <= '2018-06-30'
ORDER BY 2 DESC,1
LIMIT 10;
day | hll_cardinality
------------+-----------------
@ -76,8 +69,8 @@ LIMIT 10;
(10 rows)
-- Union aggregated data for one week
SELECT hll_cardinality(hll_union_agg(unique_users))
FROM daily_uniques
SELECT hll_cardinality(hll_union_agg(unique_users))
FROM daily_uniques
WHERE day >= '2018-05-24'::date AND day <= '2018-05-31'::date;
hll_cardinality
-----------------
@ -87,7 +80,7 @@ WHERE day >= '2018-05-24'::date AND day <= '2018-05-31'::date;
SELECT EXTRACT(MONTH FROM day) AS month, hll_cardinality(hll_union_agg(unique_users))
FROM daily_uniques
WHERE day >= '2018-06-23' AND day <= '2018-07-01'
GROUP BY 1
GROUP BY 1
ORDER BY 1;
month | hll_cardinality
-------+-----------------
@ -126,22 +119,22 @@ GROUP BY(1);
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Seq Scan on daily_uniques_360615 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Seq Scan on daily_uniques_360616 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Seq Scan on daily_uniques_360617 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
-> Seq Scan on daily_uniques_360618 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
@ -164,22 +157,22 @@ GROUP BY(1);
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Seq Scan on daily_uniques_360615 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Seq Scan on daily_uniques_360616 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Seq Scan on daily_uniques_360617 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
-> Seq Scan on daily_uniques_360618 daily_uniques
(27 rows)
-- Test disabling hash_agg with operator on coordinator query
@ -201,22 +194,22 @@ GROUP BY(1);
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Seq Scan on daily_uniques_360615 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Seq Scan on daily_uniques_360616 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Seq Scan on daily_uniques_360617 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
-> Seq Scan on daily_uniques_360618 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
@ -239,22 +232,22 @@ GROUP BY(1);
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Seq Scan on daily_uniques_360615 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Seq Scan on daily_uniques_360616 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Seq Scan on daily_uniques_360617 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
-> Seq Scan on daily_uniques_360618 daily_uniques
(27 rows)
-- Test disabling hash_agg with expression on coordinator query
@ -276,22 +269,22 @@ GROUP BY(1);
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Seq Scan on daily_uniques_360615 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Seq Scan on daily_uniques_360616 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Seq Scan on daily_uniques_360617 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
-> Seq Scan on daily_uniques_360618 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
@ -314,22 +307,22 @@ GROUP BY(1);
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Seq Scan on daily_uniques_360615 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Seq Scan on daily_uniques_360616 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Seq Scan on daily_uniques_360617 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
-> Seq Scan on daily_uniques_360618 daily_uniques
(27 rows)
-- Test disabling hash_agg with having
@ -351,22 +344,22 @@ GROUP BY(1);
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Seq Scan on daily_uniques_360615 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Seq Scan on daily_uniques_360616 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Seq Scan on daily_uniques_360617 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> HashAggregate
Group Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
-> Seq Scan on daily_uniques_360618 daily_uniques
(25 rows)
SET hll.force_groupagg to ON;
@ -394,7 +387,7 @@ HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360289 daily_uniques
-> Seq Scan on daily_uniques_360615 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> GroupAggregate
@ -402,7 +395,7 @@ HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360290 daily_uniques
-> Seq Scan on daily_uniques_360616 daily_uniques
-> Task
Node: host=localhost port=57637 dbname=regression
-> GroupAggregate
@ -410,7 +403,7 @@ HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360291 daily_uniques
-> Seq Scan on daily_uniques_360617 daily_uniques
-> Task
Node: host=localhost port=57638 dbname=regression
-> GroupAggregate
@ -418,7 +411,7 @@ HAVING hll_cardinality(hll_union_agg(unique_users)) > 1;
Filter: (hll_cardinality(hll_union_agg(unique_users)) > '1'::double precision)
-> Sort
Sort Key: day
-> Seq Scan on daily_uniques_360292 daily_uniques
-> Seq Scan on daily_uniques_360618 daily_uniques
(40 rows)
DROP TABLE raw_table;
@ -432,11 +425,6 @@ AS create_topn FROM pg_available_extensions()
WHERE name = 'topn'
\gset
:create_topn;
\c - - - :worker_1_port
:create_topn;
\c - - - :worker_2_port
:create_topn;
\c - - - :master_port
CREATE TABLE customer_reviews (day date, user_id int, review int);
CREATE TABLE popular_reviewer(day date, reviewers jsonb);
SELECT create_distributed_table('customer_reviews', 'user_id');
@ -451,18 +439,18 @@ SELECT create_distributed_table('popular_reviewer', 'day');
(1 row)
INSERT INTO customer_reviews
INSERT INTO customer_reviews
SELECT day, user_id % 7, review % 5
FROM generate_series('2018-05-24'::timestamp, '2018-06-24'::timestamp, '1 day'::interval) as f(day),
generate_series(1,30) as g(user_id), generate_series(0,30) AS r(review);
INSERT INTO customer_reviews
INSERT INTO customer_reviews
SELECT day, user_id % 13, review % 3
FROM generate_series('2018-06-10'::timestamp, '2018-07-10'::timestamp, '1 day'::interval) as f(day),
FROM generate_series('2018-06-10'::timestamp, '2018-07-10'::timestamp, '1 day'::interval) as f(day),
generate_series(1,30) as g(user_id), generate_series(0,30) AS r(review);
-- Run topn on raw data
SELECT (topn(agg, 10)).*
FROM (
SELECT topn_add_agg(user_id::text) AS agg
SELECT topn_add_agg(user_id::text) AS agg
FROM customer_reviews
)a
ORDER BY 2 DESC, 1;
@ -481,14 +469,14 @@ ORDER BY 2 DESC, 1;
(10 rows)
-- Aggregate the data into popular_reviewer
INSERT INTO popular_reviewer
INSERT INTO popular_reviewer
SELECT day, topn_add_agg(user_id::text)
FROM customer_reviews
GROUP BY 1;
-- Basic topn check on aggregated data
SELECT day, (topn(reviewers, 10)).*
FROM popular_reviewer
WHERE day >= '2018-06-20' and day <= '2018-06-30'
SELECT day, (topn(reviewers, 10)).*
FROM popular_reviewer
WHERE day >= '2018-06-20' and day <= '2018-06-30'
ORDER BY 3 DESC, 1, 2
LIMIT 10;
day | item | frequency
@ -506,7 +494,7 @@ LIMIT 10;
(10 rows)
-- Union aggregated data for one week
SELECT (topn(agg, 10)).*
SELECT (topn(agg, 10)).*
FROM (
SELECT topn_union_agg(reviewers) AS agg
FROM popular_reviewer
@ -524,7 +512,7 @@ ORDER BY 2 DESC, 1;
6 | 992
(7 rows)
SELECT month, (topn(agg, 5)).*
SELECT month, (topn(agg, 5)).*
FROM (
SELECT EXTRACT(MONTH FROM day) AS month, topn_union_agg(reviewers) AS agg
FROM popular_reviewer
@ -549,12 +537,12 @@ ORDER BY 1, 3 DESC, 2;
-- TODO the following queries will be supported after we fix #2265
-- They work for PG9.6 but not for PG10
SELECT (topn(topn_union_agg(reviewers), 10)).*
SELECT (topn(topn_union_agg(reviewers), 10)).*
FROM popular_reviewer
WHERE day >= '2018-05-24'::date AND day <= '2018-05-31'::date
ORDER BY 2 DESC, 1;
ERROR: set-valued function called in context that cannot accept a set
LINE 1: SELECT (topn(topn_union_agg(reviewers), 10)).*
LINE 1: SELECT (topn(topn_union_agg(reviewers), 10)).*
^
SELECT (topn(topn_add_agg(user_id::text), 10)).*
FROM customer_reviews

View File

@ -14,21 +14,6 @@ WHERE name = 'hll'
f
(1 row)
\c - - - :worker_1_port
:create_cmd;
hll_present
-------------
f
(1 row)
\c - - - :worker_2_port
:create_cmd;
hll_present
-------------
f
(1 row)
\c - - - :master_port
SET citus.shard_count TO 4;
CREATE TABLE raw_table (day date, user_id int);
CREATE TABLE daily_uniques(day date, unique_users hll);
@ -210,21 +195,6 @@ WHERE name = 'topn'
f
(1 row)
\c - - - :worker_1_port
:create_topn;
topn_present
--------------
f
(1 row)
\c - - - :worker_2_port
:create_topn;
topn_present
--------------
f
(1 row)
\c - - - :master_port
CREATE TABLE customer_reviews (day date, user_id int, review int);
CREATE TABLE popular_reviewer(day date, reviewers jsonb);
SELECT create_distributed_table('customer_reviews', 'user_id');

View File

@ -10,13 +10,6 @@ WHERE name = 'hll'
\gset
:create_cmd;
ERROR: extension "hll" already exists
\c - - - :worker_1_port
:create_cmd;
ERROR: extension "hll" already exists
\c - - - :worker_2_port
:create_cmd;
ERROR: extension "hll" already exists
\c - - - :master_port
SET citus.shard_count TO 4;
CREATE TABLE raw_table (day date, user_id int);
CREATE TABLE daily_uniques(day date, unique_users hll);
@ -432,11 +425,6 @@ AS create_topn FROM pg_available_extensions()
WHERE name = 'topn'
\gset
:create_topn;
\c - - - :worker_1_port
:create_topn;
\c - - - :worker_2_port
:create_topn;
\c - - - :master_port
CREATE TABLE customer_reviews (day date, user_id int, review int);
CREATE TABLE popular_reviewer(day date, reviewers jsonb);
SELECT create_distributed_table('customer_reviews', 'user_id');

View File

@ -9,11 +9,6 @@ AS create_cmd FROM pg_available_extensions()
WHERE name = 'hll'
\gset
:create_cmd;
\c - - - :worker_1_port
:create_cmd;
\c - - - :worker_2_port
:create_cmd;
\c - - - :master_port
-- Try to execute count(distinct) when approximate distincts aren't enabled
SELECT count(distinct l_orderkey) FROM lineitem;
count
@ -153,14 +148,14 @@ SET search_path TO public;
-- If we have an order by on count(distinct) that we intend to push down to
-- worker nodes, we need to error out. Otherwise, we are fine.
SET citus.limit_clause_row_fetch_count = 1000;
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
FROM lineitem
GROUP BY l_returnflag
ORDER BY count_distinct
LIMIT 10;
ERROR: cannot approximate count(distinct) and order by it
HINT: You might need to disable approximations for either count(distinct) or limit through configuration.
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total
FROM lineitem
GROUP BY l_returnflag
ORDER BY total

View File

@ -14,21 +14,6 @@ WHERE name = 'hll'
f
(1 row)
\c - - - :worker_1_port
:create_cmd;
hll_present
-------------
f
(1 row)
\c - - - :worker_2_port
:create_cmd;
hll_present
-------------
f
(1 row)
\c - - - :master_port
-- Try to execute count(distinct) when approximate distincts aren't enabled
SELECT count(distinct l_orderkey) FROM lineitem;
count

View File

@ -11,14 +11,6 @@ WHERE name = 'hll'
:create_cmd;
\c - - - :worker_1_port
:create_cmd;
\c - - - :worker_2_port
:create_cmd;
\c - - - :master_port
SET citus.shard_count TO 4;
CREATE TABLE raw_table (day date, user_id int);
@ -160,13 +152,6 @@ WHERE name = 'topn'
:create_topn;
\c - - - :worker_1_port
:create_topn;
\c - - - :worker_2_port
:create_topn;
\c - - - :master_port
CREATE TABLE customer_reviews (day date, user_id int, review int);
CREATE TABLE popular_reviewer(day date, reviewers jsonb);

View File

@ -13,14 +13,6 @@ WHERE name = 'hll'
:create_cmd;
\c - - - :worker_1_port
:create_cmd;
\c - - - :worker_2_port
:create_cmd;
\c - - - :master_port
-- Try to execute count(distinct) when approximate distincts aren't enabled
SELECT count(distinct l_orderkey) FROM lineitem;