From 64506143e40be75f2fcbec0cfd49fffe19526825 Mon Sep 17 00:00:00 2001
From: Jelte Fennema <github-tech@jeltef.nl>
Date: Thu, 25 Jun 2020 15:19:15 +0200
Subject: [PATCH] Replace flaky repartition analyze test with a non flaky one
 (#3950)

The flaky test was introduced in #3941. This removes that flaky test and
adds a new one that fails in the same manner when removing the fix in #3941.

An example of a random failure can be found here:
https://app.circleci.com/pipelines/github/citusdata/citus/9558/workflows/de76e7a5-6558-46c9-97e7-8b1dae1f173b/jobs/135876/steps
---
 .../expected/chbenchmark_all_queries.out      | 64 -------------------
 src/test/regress/expected/multi_explain.out   | 27 ++++++++
 .../regress/sql/chbenchmark_all_queries.sql   | 25 --------
 src/test/regress/sql/multi_explain.sql        |  4 ++
 4 files changed, 31 insertions(+), 89 deletions(-)

diff --git a/src/test/regress/expected/chbenchmark_all_queries.out b/src/test/regress/expected/chbenchmark_all_queries.out
index b0a1a775f..32e60b324 100644
--- a/src/test/regress/expected/chbenchmark_all_queries.out
+++ b/src/test/regress/expected/chbenchmark_all_queries.out
@@ -864,70 +864,6 @@ LOG:  join order: [ "stock" ][ reference join "item" ][ dual partition join "ord
  abc                       | def
 (1 row)
 
-\set default_analyze_flags '(ANALYZE on, COSTS off, TIMING off, SUMMARY off)'
-EXPLAIN :default_analyze_flags SELECT
-    su_name,
-    su_address
-FROM
-    supplier,
-    nation
-WHERE su_suppkey in
-      (SELECT
-           mod(s_i_id * s_w_id, 10000)
-       FROM
-           stock,
-           order_line
-       WHERE s_i_id IN
-             (SELECT i_id
-              FROM item
-              WHERE i_data LIKE 'co%')
-       AND ol_i_id = s_i_id
-       AND ol_delivery_d > '2008-05-23 12:00:00' -- was 2010, but our order is in 2008
-       GROUP BY s_i_id, s_w_id, s_quantity
-       HAVING   2*s_quantity > sum(ol_quantity))
-  AND su_nationkey = n_nationkey
-  AND n_name = 'Germany'
-ORDER BY su_name;
-LOG:  join order: [ "stock" ][ reference join "item" ][ dual partition join "order_line" ]
-                                                  QUERY PLAN
----------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
-   ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 30 bytes
-         Result destination: Send to 2 nodes
-         ->  Custom Scan (Citus Adaptive) (actual rows=3 loops=1)
-               Task Count: 4
-               Tuple data received from nodes: 12 bytes
-               Tasks Shown: None, not supported for re-partition queries
-               ->  MapMergeJob
-                     Map Task Count: 4
-                     Merge Task Count: 4
-               ->  MapMergeJob
-                     Map Task Count: 4
-                     Merge Task Count: 4
-   Task Count: 1
-   Tuple data received from nodes: 28 bytes
-   Tasks Shown: All
-   ->  Task
-         Tuple data received from node: 28 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=1 loops=1)
-               Sort Key: supplier.su_name
-               Sort Method: quicksort  Memory: 25kB
-               ->  Nested Loop Semi Join (actual rows=1 loops=1)
-                     Join Filter: (supplier.su_suppkey = intermediate_result.mod)
-                     Rows Removed by Join Filter: 1
-                     ->  Hash Join (actual rows=1 loops=1)
-                           Hash Cond: (supplier.su_nationkey = nation.n_nationkey)
-                           ->  Seq Scan on supplier_1650035 supplier (actual rows=11 loops=1)
-                           ->  Hash (actual rows=1 loops=1)
-                                 Buckets: 1024  Batches: 1  Memory Usage: 9kB
-                                 ->  Seq Scan on nation_1650034 nation (actual rows=1 loops=1)
-                                       Filter: (n_name = 'Germany'::bpchar)
-                                       Rows Removed by Filter: 3
-                     ->  Function Scan on read_intermediate_result intermediate_result (actual rows=2 loops=1)
-(35 rows)
-
 -- Query 21
 -- DATA SET DOES NOT COVER THIS QUERY
 SELECT
diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out
index 0c80062e8..ad1a18401 100644
--- a/src/test/regress/expected/multi_explain.out
+++ b/src/test/regress/expected/multi_explain.out
@@ -311,6 +311,33 @@ Aggregate (actual rows=1 loops=1)
         ->  MapMergeJob
               Map Task Count: 3
               Merge Task Count: 4
+-- Confirm repartiton join in distributed subplan works
+EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off)
+WITH repartion AS (SELECT count(*) FROM t1, t2 WHERE t1.a=t2.b)
+SELECT count(*) from repartion;
+Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
+  ->  Distributed Subplan XXX_1
+        Intermediate Data Size: 14 bytes
+        Result destination: Write locally
+        ->  Aggregate (actual rows=1 loops=1)
+              ->  Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
+                    Task Count: 4
+                    Tuple data received from nodes: 4 bytes
+                    Tasks Shown: None, not supported for re-partition queries
+                    ->  MapMergeJob
+                          Map Task Count: 3
+                          Merge Task Count: 4
+                    ->  MapMergeJob
+                          Map Task Count: 3
+                          Merge Task Count: 4
+  Task Count: 1
+  Tuple data received from nodes: 1 bytes
+  Tasks Shown: All
+  ->  Task
+        Tuple data received from node: 1 bytes
+        Node: host=localhost port=xxxxx dbname=regression
+        ->  Aggregate (actual rows=1 loops=1)
+              ->  Function Scan on read_intermediate_result intermediate_result (actual rows=1 loops=1)
 END;
 DROP TABLE t1, t2;
 -- Test query text output, with ANALYZE ON
diff --git a/src/test/regress/sql/chbenchmark_all_queries.sql b/src/test/regress/sql/chbenchmark_all_queries.sql
index 82ce7eb68..0f8f337a7 100644
--- a/src/test/regress/sql/chbenchmark_all_queries.sql
+++ b/src/test/regress/sql/chbenchmark_all_queries.sql
@@ -673,31 +673,6 @@ WHERE su_suppkey in
   AND n_name = 'Germany'
 ORDER BY su_name;
 
-\set default_analyze_flags '(ANALYZE on, COSTS off, TIMING off, SUMMARY off)'
-EXPLAIN :default_analyze_flags SELECT
-    su_name,
-    su_address
-FROM
-    supplier,
-    nation
-WHERE su_suppkey in
-      (SELECT
-           mod(s_i_id * s_w_id, 10000)
-       FROM
-           stock,
-           order_line
-       WHERE s_i_id IN
-             (SELECT i_id
-              FROM item
-              WHERE i_data LIKE 'co%')
-       AND ol_i_id = s_i_id
-       AND ol_delivery_d > '2008-05-23 12:00:00' -- was 2010, but our order is in 2008
-       GROUP BY s_i_id, s_w_id, s_quantity
-       HAVING   2*s_quantity > sum(ol_quantity))
-  AND su_nationkey = n_nationkey
-  AND n_name = 'Germany'
-ORDER BY su_name;
-
 
 -- Query 21
 -- DATA SET DOES NOT COVER THIS QUERY
diff --git a/src/test/regress/sql/multi_explain.sql b/src/test/regress/sql/multi_explain.sql
index 3f315e524..de516bd99 100644
--- a/src/test/regress/sql/multi_explain.sql
+++ b/src/test/regress/sql/multi_explain.sql
@@ -93,6 +93,10 @@ SELECT create_distributed_table('t1', 'a'), create_distributed_table('t2', 'a');
 BEGIN;
 SET LOCAL citus.enable_repartition_joins TO true;
 EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off) SELECT count(*) FROM t1, t2 WHERE t1.a=t2.b;
+-- Confirm repartiton join in distributed subplan works
+EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off)
+WITH repartion AS (SELECT count(*) FROM t1, t2 WHERE t1.a=t2.b)
+SELECT count(*) from repartion;
 END;
 DROP TABLE t1, t2;