diff --git a/.travis.yml b/.travis.yml index f8f6ef1a8..7826bcb65 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,10 +13,12 @@ before_install: - git clone -b v0.4.1 --depth 1 https://github.com/citusdata/tools.git - sudo make -C tools install - setup_apt + - curl https://install.citusdata.com/community/deb.sh | sudo bash - nuke_pg install: - install_uncrustify - install_pg + - sudo apt-get install -y "postgresql-${PGVERSION}-hll=2.10.1.citus-1" before_script: citus_indent --quiet --check -script: CFLAGS=-Werror pg_travis_multi_test check-multi check-worker +script: CFLAGS=-Werror pg_travis_multi_test check after_success: sync_to_enterprise diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile index de8759931..4d72f20ac 100644 --- a/src/test/regress/Makefile +++ b/src/test/regress/Makefile @@ -49,10 +49,6 @@ check-multi: all tempinstall-main $(pg_regress_multi_check) --load-extension=citus \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_schedule $(EXTRA_TESTS) -check-multi-hll: all tempinstall-main - $(pg_regress_multi_check) --load-extension=citus --load-extension=hll -- \ - $(MULTI_REGRESS_OPTS) $(EXTRA_TESTS) multi_create_table multi_master_protocol multi_stage_data multi_agg_approximate_distinct - check-multi-task-tracker-extra: all tempinstall-main $(pg_regress_multi_check) --load-extension=citus \ --server-option=citus.task_executor_type=task-tracker \ diff --git a/src/test/regress/expected/multi_agg_approximate_distinct.out b/src/test/regress/expected/multi_agg_approximate_distinct.out index 4249d8414..69d11aa64 100644 --- a/src/test/regress/expected/multi_agg_approximate_distinct.out +++ b/src/test/regress/expected/multi_agg_approximate_distinct.out @@ -3,6 +3,19 @@ -- ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 340000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 340000; +-- Create HLL extension if present, print false result otherwise +SELECT CASE WHEN COUNT(*) > 0 THEN + 'CREATE EXTENSION HLL' +ELSE 'SELECT false AS hll_present' END +AS create_cmd FROM pg_available_extensions() +WHERE name = 'hll' +\gset +:create_cmd; +\c - - - :worker_1_port +:create_cmd; +\c - - - :worker_2_port +:create_cmd; +\c - - - :master_port -- Try to execute count(distinct) when approximate distincts aren't enabled SELECT count(distinct l_orderkey) FROM lineitem; ERROR: cannot compute aggregate (distinct) diff --git a/src/test/regress/expected/multi_agg_approximate_distinct_0.out b/src/test/regress/expected/multi_agg_approximate_distinct_0.out new file mode 100644 index 000000000..d21998819 --- /dev/null +++ b/src/test/regress/expected/multi_agg_approximate_distinct_0.out @@ -0,0 +1,143 @@ +-- +-- MULTI_AGG_APPROXIMATE_DISTINCT +-- +ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 340000; +ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 340000; +-- Create HLL extension if present, print false result otherwise +SELECT CASE WHEN COUNT(*) > 0 THEN + 'CREATE EXTENSION HLL' +ELSE 'SELECT false AS hll_present' END +AS create_cmd FROM pg_available_extensions() +WHERE name = 'hll' +\gset +:create_cmd; + hll_present +------------- + f +(1 row) + +\c - - - :worker_1_port +:create_cmd; + hll_present +------------- + f +(1 row) + +\c - - - :worker_2_port +:create_cmd; + hll_present +------------- + f +(1 row) + +\c - - - :master_port +-- Try to execute count(distinct) when approximate distincts aren't enabled +SELECT count(distinct l_orderkey) FROM lineitem; +ERROR: cannot compute aggregate (distinct) +DETAIL: table partitioning is unsuitable for aggregate (distinct) +HINT: You can load the hll extension from contrib packages and enable distinct approximations. +-- Check approximate count(distinct) at different precisions / error rates +SET citus.count_distinct_error_rate = 0.1; +SELECT count(distinct l_orderkey) FROM lineitem; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +SET citus.count_distinct_error_rate = 0.01; +SELECT count(distinct l_orderkey) FROM lineitem; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +-- Check approximate count(distinct) for different data types +SELECT count(distinct l_partkey) FROM lineitem; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +SELECT count(distinct l_extendedprice) FROM lineitem; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +SELECT count(distinct l_shipdate) FROM lineitem; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +SELECT count(distinct l_comment) FROM lineitem; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +-- Check that we can execute approximate count(distinct) on complex expressions +SELECT count(distinct (l_orderkey * 2 + 1)) FROM lineitem; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +SELECT count(distinct extract(month from l_shipdate)) AS my_month FROM lineitem; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +SELECT count(distinct l_partkey) / count(distinct l_orderkey) FROM lineitem; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +-- Check that we can execute approximate count(distinct) on select queries that +-- contain different filter, join, sort and limit clauses +SELECT count(distinct l_orderkey) FROM lineitem + WHERE octet_length(l_comment) + octet_length('randomtext'::text) > 40; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +SELECT count(DISTINCT l_orderkey) FROM lineitem, orders + WHERE l_orderkey = o_orderkey AND l_quantity < 5; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +SELECT count(DISTINCT l_orderkey) as distinct_order_count, l_quantity FROM lineitem + WHERE l_quantity < 32.0 + GROUP BY l_quantity + ORDER BY distinct_order_count ASC, l_quantity ASC + LIMIT 10; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +-- Check that approximate count(distinct) works at a table in a schema other than public +-- create necessary objects +CREATE SCHEMA test_count_distinct_schema; +CREATE TABLE test_count_distinct_schema.nation_hash( + n_nationkey integer not null, + n_name char(25) not null, + n_regionkey integer not null, + n_comment varchar(152) +); +SELECT master_create_distributed_table('test_count_distinct_schema.nation_hash', 'n_nationkey', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_worker_shards('test_count_distinct_schema.nation_hash', 4, 2); + master_create_worker_shards +----------------------------- + +(1 row) + +\copy test_count_distinct_schema.nation_hash FROM STDIN with delimiter '|'; +SET search_path TO public; +SET citus.count_distinct_error_rate TO 0.01; +SELECT COUNT (DISTINCT n_regionkey) FROM test_count_distinct_schema.nation_hash; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +-- test with search_path is set +SET search_path TO test_count_distinct_schema; +SELECT COUNT (DISTINCT n_regionkey) FROM nation_hash; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +SET search_path TO public; +-- If we have an order by on count(distinct) that we intend to push down to +-- worker nodes, we need to error out. Otherwise, we are fine. +SET citus.limit_clause_row_fetch_count = 1000; +SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total + FROM lineitem + GROUP BY l_returnflag + ORDER BY count_distinct + LIMIT 10; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +SELECT l_returnflag, count(DISTINCT l_shipdate) as count_distinct, count(*) as total + FROM lineitem + GROUP BY l_returnflag + ORDER BY total + LIMIT 10; +ERROR: cannot compute count (distinct) approximation +HINT: You need to have the hll extension loaded. +-- Check that we can revert config and disable count(distinct) approximations +SET citus.count_distinct_error_rate = 0.0; +SELECT count(distinct l_orderkey) FROM lineitem; +ERROR: cannot compute aggregate (distinct) +DETAIL: table partitioning is unsuitable for aggregate (distinct) +HINT: You can load the hll extension from contrib packages and enable distinct approximations. diff --git a/src/test/regress/expected/multi_hash_pruning.out b/src/test/regress/expected/multi_hash_pruning.out index de473e728..c37dfe6e5 100644 --- a/src/test/regress/expected/multi_hash_pruning.out +++ b/src/test/regress/expected/multi_hash_pruning.out @@ -4,6 +4,13 @@ -- Tests for shard and join pruning logic on hash partitioned tables. ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 630000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 630000; +-- Print the executor type for clarity in test output +SHOW citus.task_executor_type; + citus.task_executor_type +-------------------------- + real-time +(1 row) + -- Create a table partitioned on integer column and update partition type to -- hash. Then load data into this table and update shard min max values with -- hashed ones. Hash value of 1, 2, 3 and 4 are consecutively -1905060026, diff --git a/src/test/regress/expected/multi_hash_pruning_0.out b/src/test/regress/expected/multi_hash_pruning_0.out new file mode 100644 index 000000000..a0bbae3c4 --- /dev/null +++ b/src/test/regress/expected/multi_hash_pruning_0.out @@ -0,0 +1,336 @@ +-- +-- MULTI_HASH_PRUNING +-- +-- Tests for shard and join pruning logic on hash partitioned tables. +ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 630000; +ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 630000; +-- Print the executor type for clarity in test output +SHOW citus.task_executor_type; + citus.task_executor_type +-------------------------- + task-tracker +(1 row) + +-- Create a table partitioned on integer column and update partition type to +-- hash. Then load data into this table and update shard min max values with +-- hashed ones. Hash value of 1, 2, 3 and 4 are consecutively -1905060026, +-- 1134484726, -28094569 and -1011077333. +CREATE TABLE orders_hash_partitioned ( + o_orderkey integer, + o_custkey integer, + o_orderstatus char(1), + o_totalprice decimal(15,2), + o_orderdate date, + o_orderpriority char(15), + o_clerk char(15), + o_shippriority integer, + o_comment varchar(79) ); +SELECT master_create_distributed_table('orders_hash_partitioned', 'o_orderkey', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_worker_shards('orders_hash_partitioned', 4, 1); + master_create_worker_shards +----------------------------- + +(1 row) + +SET client_min_messages TO DEBUG2; +-- Check that we can prune shards for simple cases, boolean expressions and +-- immutable functions. +-- Since router plans are not triggered for task-tracker executor type, +-- we need to run the tests that triggers router planning seperately for +-- both executors. Otherwise, check-full fails on the task-tracker. +-- Later, we need to switch back to the actual task executor +-- to contuinue with correct executor type for check-full. +SELECT quote_literal(current_setting('citus.task_executor_type')) AS actual_task_executor +\gset +SET citus.task_executor_type TO 'real-time'; +SELECT count(*) FROM orders_hash_partitioned; + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1; +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 2; +DEBUG: predicate pruning for shardId 630000 +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 3; +DEBUG: predicate pruning for shardId 630000 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 4; +DEBUG: predicate pruning for shardId 630000 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey = 1 AND o_clerk = 'aaa'; +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = abs(-1); +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 +DEBUG: Creating router plan +DEBUG: Plan is router executable + count +------- + 0 +(1 row) + +SET citus.task_executor_type TO 'task-tracker'; +SELECT count(*) FROM orders_hash_partitioned; + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1; +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 2; +DEBUG: predicate pruning for shardId 630000 +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 3; +DEBUG: predicate pruning for shardId 630000 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 4; +DEBUG: predicate pruning for shardId 630000 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey = 1 AND o_clerk = 'aaa'; +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = abs(-1); +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 + count +------- + 0 +(1 row) + +SET citus.task_executor_type TO :actual_task_executor; +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey is NULL; + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey is not NULL; + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey > 2; + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey = 1 OR o_orderkey = 2; +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey = 1 OR o_clerk = 'aaa'; + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey = 1 OR (o_orderkey = 3 AND o_clerk = 'aaa'); +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey = 1 OR o_orderkey is NULL; + count +------- + 0 +(1 row) + +SELECT count(*) FROM + (SELECT o_orderkey FROM orders_hash_partitioned WHERE o_orderkey = 1) AS orderkeys; +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 + count +------- + 0 +(1 row) + +-- Check that we don't support pruning for ANY (array expression) and give +-- a notice message when used with the partition column +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey = ANY ('{1,2,3}'); +NOTICE: cannot use shard pruning with ANY/ALL (array expression) +HINT: Consider rewriting the expression with OR/AND clauses. + count +------- + 0 +(1 row) + +-- Check that we don't show the message if the operator is not +-- equality operator +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey < ALL ('{1,2,3}'); + count +------- + 0 +(1 row) + +-- Check that we don't give a spurious hint message when non-partition +-- columns are used with ANY/IN/ALL +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey = 1 OR o_totalprice IN (2, 5); + count +------- + 0 +(1 row) + +-- Check that we cannot prune for mutable functions. +SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = random(); + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey = random() OR o_orderkey = 1; + count +------- + 0 +(1 row) + +SELECT count(*) FROM orders_hash_partitioned + WHERE o_orderkey = random() AND o_orderkey = 1; +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 + count +------- + 0 +(1 row) + +-- Check that we can do join pruning. +SELECT count(*) + FROM orders_hash_partitioned orders1, orders_hash_partitioned orders2 + WHERE orders1.o_orderkey = orders2.o_orderkey; +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] +DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] +DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] +DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] +DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] +DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] +DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] + count +------- + 0 +(1 row) + +SELECT count(*) + FROM orders_hash_partitioned orders1, orders_hash_partitioned orders2 + WHERE orders1.o_orderkey = orders2.o_orderkey + AND orders1.o_orderkey = 1 + AND orders2.o_orderkey is NULL; +DEBUG: predicate pruning for shardId 630001 +DEBUG: predicate pruning for shardId 630002 +DEBUG: predicate pruning for shardId 630003 +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] +DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] + count +------- + 0 +(1 row) + diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 799783713..6840ea72b 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -36,7 +36,7 @@ test: multi_basic_queries multi_complex_expressions multi_verify_no_subquery test: multi_explain test: multi_subquery test: multi_single_relation_subquery -test: multi_agg_distinct multi_limit_clause multi_limit_clause_approximate +test: multi_agg_distinct multi_agg_approximate_distinct multi_limit_clause multi_limit_clause_approximate test: multi_average_expression multi_working_columns test: multi_array_agg test: multi_agg_type_conversion multi_count_type_conversion diff --git a/src/test/regress/sql/multi_agg_approximate_distinct.sql b/src/test/regress/sql/multi_agg_approximate_distinct.sql index 1a8a7d168..8e54bc650 100644 --- a/src/test/regress/sql/multi_agg_approximate_distinct.sql +++ b/src/test/regress/sql/multi_agg_approximate_distinct.sql @@ -6,6 +6,23 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 340000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 340000; +-- Create HLL extension if present, print false result otherwise +SELECT CASE WHEN COUNT(*) > 0 THEN + 'CREATE EXTENSION HLL' +ELSE 'SELECT false AS hll_present' END +AS create_cmd FROM pg_available_extensions() +WHERE name = 'hll' +\gset + +:create_cmd; + +\c - - - :worker_1_port +:create_cmd; + +\c - - - :worker_2_port +:create_cmd; + +\c - - - :master_port -- Try to execute count(distinct) when approximate distincts aren't enabled diff --git a/src/test/regress/sql/multi_hash_pruning.sql b/src/test/regress/sql/multi_hash_pruning.sql index 0088264a4..9f26c879c 100644 --- a/src/test/regress/sql/multi_hash_pruning.sql +++ b/src/test/regress/sql/multi_hash_pruning.sql @@ -8,6 +8,8 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 630000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 630000; +-- Print the executor type for clarity in test output +SHOW citus.task_executor_type; -- Create a table partitioned on integer column and update partition type to -- hash. Then load data into this table and update shard min max values with