From 64506143e40be75f2fcbec0cfd49fffe19526825 Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Thu, 25 Jun 2020 15:19:15 +0200 Subject: [PATCH] Replace flaky repartition analyze test with a non flaky one (#3950) The flaky test was introduced in #3941. This removes that flaky test and adds a new one that fails in the same manner when removing the fix in #3941. An example of a random failure can be found here: https://app.circleci.com/pipelines/github/citusdata/citus/9558/workflows/de76e7a5-6558-46c9-97e7-8b1dae1f173b/jobs/135876/steps --- .../expected/chbenchmark_all_queries.out | 64 ------------------- src/test/regress/expected/multi_explain.out | 27 ++++++++ .../regress/sql/chbenchmark_all_queries.sql | 25 -------- src/test/regress/sql/multi_explain.sql | 4 ++ 4 files changed, 31 insertions(+), 89 deletions(-) diff --git a/src/test/regress/expected/chbenchmark_all_queries.out b/src/test/regress/expected/chbenchmark_all_queries.out index b0a1a775f..32e60b324 100644 --- a/src/test/regress/expected/chbenchmark_all_queries.out +++ b/src/test/regress/expected/chbenchmark_all_queries.out @@ -864,70 +864,6 @@ LOG: join order: [ "stock" ][ reference join "item" ][ dual partition join "ord abc | def (1 row) -\set default_analyze_flags '(ANALYZE on, COSTS off, TIMING off, SUMMARY off)' -EXPLAIN :default_analyze_flags SELECT - su_name, - su_address -FROM - supplier, - nation -WHERE su_suppkey in - (SELECT - mod(s_i_id * s_w_id, 10000) - FROM - stock, - order_line - WHERE s_i_id IN - (SELECT i_id - FROM item - WHERE i_data LIKE 'co%') - AND ol_i_id = s_i_id - AND ol_delivery_d > '2008-05-23 12:00:00' -- was 2010, but our order is in 2008 - GROUP BY s_i_id, s_w_id, s_quantity - HAVING 2*s_quantity > sum(ol_quantity)) - AND su_nationkey = n_nationkey - AND n_name = 'Germany' -ORDER BY su_name; -LOG: join order: [ "stock" ][ reference join "item" ][ dual partition join "order_line" ] - QUERY PLAN ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) (actual rows=1 loops=1) - -> Distributed Subplan XXX_1 - Intermediate Data Size: 30 bytes - Result destination: Send to 2 nodes - -> Custom Scan (Citus Adaptive) (actual rows=3 loops=1) - Task Count: 4 - Tuple data received from nodes: 12 bytes - Tasks Shown: None, not supported for re-partition queries - -> MapMergeJob - Map Task Count: 4 - Merge Task Count: 4 - -> MapMergeJob - Map Task Count: 4 - Merge Task Count: 4 - Task Count: 1 - Tuple data received from nodes: 28 bytes - Tasks Shown: All - -> Task - Tuple data received from node: 28 bytes - Node: host=localhost port=xxxxx dbname=regression - -> Sort (actual rows=1 loops=1) - Sort Key: supplier.su_name - Sort Method: quicksort Memory: 25kB - -> Nested Loop Semi Join (actual rows=1 loops=1) - Join Filter: (supplier.su_suppkey = intermediate_result.mod) - Rows Removed by Join Filter: 1 - -> Hash Join (actual rows=1 loops=1) - Hash Cond: (supplier.su_nationkey = nation.n_nationkey) - -> Seq Scan on supplier_1650035 supplier (actual rows=11 loops=1) - -> Hash (actual rows=1 loops=1) - Buckets: 1024 Batches: 1 Memory Usage: 9kB - -> Seq Scan on nation_1650034 nation (actual rows=1 loops=1) - Filter: (n_name = 'Germany'::bpchar) - Rows Removed by Filter: 3 - -> Function Scan on read_intermediate_result intermediate_result (actual rows=2 loops=1) -(35 rows) - -- Query 21 -- DATA SET DOES NOT COVER THIS QUERY SELECT diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index 0c80062e8..ad1a18401 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -311,6 +311,33 @@ Aggregate (actual rows=1 loops=1) -> MapMergeJob Map Task Count: 3 Merge Task Count: 4 +-- Confirm repartiton join in distributed subplan works +EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off) +WITH repartion AS (SELECT count(*) FROM t1, t2 WHERE t1.a=t2.b) +SELECT count(*) from repartion; +Custom Scan (Citus Adaptive) (actual rows=1 loops=1) + -> Distributed Subplan XXX_1 + Intermediate Data Size: 14 bytes + Result destination: Write locally + -> Aggregate (actual rows=1 loops=1) + -> Custom Scan (Citus Adaptive) (actual rows=4 loops=1) + Task Count: 4 + Tuple data received from nodes: 4 bytes + Tasks Shown: None, not supported for re-partition queries + -> MapMergeJob + Map Task Count: 3 + Merge Task Count: 4 + -> MapMergeJob + Map Task Count: 3 + Merge Task Count: 4 + Task Count: 1 + Tuple data received from nodes: 1 bytes + Tasks Shown: All + -> Task + Tuple data received from node: 1 bytes + Node: host=localhost port=xxxxx dbname=regression + -> Aggregate (actual rows=1 loops=1) + -> Function Scan on read_intermediate_result intermediate_result (actual rows=1 loops=1) END; DROP TABLE t1, t2; -- Test query text output, with ANALYZE ON diff --git a/src/test/regress/sql/chbenchmark_all_queries.sql b/src/test/regress/sql/chbenchmark_all_queries.sql index 82ce7eb68..0f8f337a7 100644 --- a/src/test/regress/sql/chbenchmark_all_queries.sql +++ b/src/test/regress/sql/chbenchmark_all_queries.sql @@ -673,31 +673,6 @@ WHERE su_suppkey in AND n_name = 'Germany' ORDER BY su_name; -\set default_analyze_flags '(ANALYZE on, COSTS off, TIMING off, SUMMARY off)' -EXPLAIN :default_analyze_flags SELECT - su_name, - su_address -FROM - supplier, - nation -WHERE su_suppkey in - (SELECT - mod(s_i_id * s_w_id, 10000) - FROM - stock, - order_line - WHERE s_i_id IN - (SELECT i_id - FROM item - WHERE i_data LIKE 'co%') - AND ol_i_id = s_i_id - AND ol_delivery_d > '2008-05-23 12:00:00' -- was 2010, but our order is in 2008 - GROUP BY s_i_id, s_w_id, s_quantity - HAVING 2*s_quantity > sum(ol_quantity)) - AND su_nationkey = n_nationkey - AND n_name = 'Germany' -ORDER BY su_name; - -- Query 21 -- DATA SET DOES NOT COVER THIS QUERY diff --git a/src/test/regress/sql/multi_explain.sql b/src/test/regress/sql/multi_explain.sql index 3f315e524..de516bd99 100644 --- a/src/test/regress/sql/multi_explain.sql +++ b/src/test/regress/sql/multi_explain.sql @@ -93,6 +93,10 @@ SELECT create_distributed_table('t1', 'a'), create_distributed_table('t2', 'a'); BEGIN; SET LOCAL citus.enable_repartition_joins TO true; EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off) SELECT count(*) FROM t1, t2 WHERE t1.a=t2.b; +-- Confirm repartiton join in distributed subplan works +EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off) +WITH repartion AS (SELECT count(*) FROM t1, t2 WHERE t1.a=t2.b) +SELECT count(*) from repartion; END; DROP TABLE t1, t2;