Show amount of data received in EXPLAIN ANALYZE (#3901)

Sadly this does not actually work yet for binary protocol data, because when doing EXPLAIN ANALYZE we send two commands at the same time. This means we cannot use `SendRemoteCommandParams`, and thus cannot use the binary protocol. This can still be useful though when using the text protocol, to find out that a lot of data is being sent.
2020-06-15 16:01:05 +02:00 · 2020-06-15 16:01:05 +02:00 · 927de6d187
parent 077c784fe9
commit 927de6d187
10 changed files with 376 additions and 26 deletions
--- a/src/backend/distributed/executor/adaptive_executor.c
+++ b/src/backend/distributed/executor/adaptive_executor.c
@ -3731,6 +3731,21 @@ ReceiveResults(WorkerSession *session, bool storeRows)
 					{
 						executionStats->totalIntermediateResultSize += valueLength;
 					}
+
+					/*
+					 * We use this to count the amount of data that has been
+					 * received for EXPLAIN ANALYZE.
+					 * Only EXPLAIN ANALYZE TupleDestination has originalTask
+					 * defined. So that's why we check for it, otherwise we
+					 * don't have to keep track of this data.
+					 * The worker plan itself is also sent as a result in the
+					 * same task. We filter this out by only counting the data
+					 * from the first query.
+					 */
+					if (tupleDest->originalTask && queryIndex == 0)
+					{
+						tupleDest->originalTask->totalReceivedData += valueLength;
+					}
 				}
 			}

--- a/src/backend/distributed/planner/multi_explain.c
+++ b/src/backend/distributed/planner/multi_explain.c
@ -116,13 +116,14 @@ typedef struct ExplainAnalyzeDestination

 /* Explain functions for distributed queries */
 static void ExplainSubPlans(DistributedPlan *distributedPlan, ExplainState *es);
-static void ExplainJob(Job *job, ExplainState *es);
+static void ExplainJob(CitusScanState *scanState, Job *job, ExplainState *es);
 static void ExplainMapMergeJob(MapMergeJob *mapMergeJob, ExplainState *es);
-static void ExplainTaskList(List *taskList, ExplainState *es);
+static void ExplainTaskList(CitusScanState *scanState, List *taskList, ExplainState *es);
 static RemoteExplainPlan * RemoteExplain(Task *task, ExplainState *es);
 static RemoteExplainPlan * GetSavedRemoteExplain(Task *task, ExplainState *es);
 static RemoteExplainPlan * FetchRemoteExplainFromWorkers(Task *task, ExplainState *es);
-static void ExplainTask(Task *task, int placementIndex, List *explainOutputList,
+static void ExplainTask(CitusScanState *scanState, Task *task, int placementIndex,
+						List *explainOutputList,
 						ExplainState *es);
 static void ExplainTaskPlacement(ShardPlacement *taskPlacement, List *explainOutputList,
 								 ExplainState *es);
@ -154,6 +155,9 @@ static void ExplainOneQuery(Query *query, int cursorOptions,
 							const char *queryString, ParamListInfo params,
 							QueryEnvironment *queryEnv);
 static double elapsed_time(instr_time *starttime);
+static void ExplainPropertyBytes(const char *qlabel, int64 bytes, ExplainState *es);
+static uint64 TaskReceivedData(Task *task);
+static bool ShowReceivedData(CitusScanState *scanState, ExplainState *es);


 /* exports for SQL callable functions */
@ -187,7 +191,7 @@ CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es
 		ExplainSubPlans(distributedPlan, es);
 	}

-	ExplainJob(distributedPlan->workerJob, es);
+	ExplainJob(scanState, distributedPlan->workerJob, es);

 	ExplainCloseGroup("Distributed Query", "Distributed Query", true, es);
 }
@ -287,8 +291,8 @@ ExplainSubPlans(DistributedPlan *distributedPlan, ExplainState *es)
 									 2, es);
 			}

-			ExplainPropertyInteger("Intermediate Data Size", "bytes",
-								   subPlan->bytesSentPerWorker, es);
+			ExplainPropertyBytes("Intermediate Data Size",
+								 subPlan->bytesSentPerWorker, es);

 			StringInfo destination = makeStringInfo();
 			if (subPlan->remoteWorkerCount && subPlan->writeLocalFile)
@ -323,13 +327,37 @@ ExplainSubPlans(DistributedPlan *distributedPlan, ExplainState *es)
 }


+/*
+ * ExplainPropertyBytes formats bytes in a human readable way by using
+ * pg_size_pretty.
+ */
+static void
+ExplainPropertyBytes(const char *qlabel, int64 bytes, ExplainState *es)
+{
+	Datum textDatum = DirectFunctionCall1(pg_size_pretty, Int64GetDatum(bytes));
+	ExplainPropertyText(qlabel, text_to_cstring(DatumGetTextP(textDatum)), es);
+}
+
+
+/*
+ * ShowReceivedData returns true if explain should show received data. This is
+ * only the case when using EXPLAIN ANALYZE on queries that return rows.
+ */
+static bool
+ShowReceivedData(CitusScanState *scanState, ExplainState *es)
+{
+	TupleDesc tupDesc = ScanStateGetTupleDescriptor(scanState);
+	return es->analyze && tupDesc != NULL && tupDesc->natts > 0;
+}
+
+
 /*
 * ExplainJob shows the EXPLAIN output for a Job in the physical plan of
 * a distributed query by showing the remote EXPLAIN for the first task,
 * or all tasks if citus.explain_all_tasks is on.
 */
 static void
-ExplainJob(Job *job, ExplainState *es)
+ExplainJob(CitusScanState *scanState, Job *job, ExplainState *es)
 {
 	List *dependentJobList = job->dependentJobList;
 	int dependentJobCount = list_length(dependentJobList);
@ -340,6 +368,18 @@ ExplainJob(Job *job, ExplainState *es)
 	ExplainOpenGroup("Job", "Job", true, es);

 	ExplainPropertyInteger("Task Count", NULL, taskCount, es);
+	if (ShowReceivedData(scanState, es))
+	{
+		Task *task = NULL;
+		uint64 totalReceivedDataForAllTasks = 0;
+		foreach_ptr(task, taskList)
+		{
+			totalReceivedDataForAllTasks += TaskReceivedData(task);
+		}
+		ExplainPropertyBytes("Data received from workers",
+							 totalReceivedDataForAllTasks,
+							 es);
+	}

 	if (dependentJobCount > 0)
 	{
@ -366,7 +406,7 @@ ExplainJob(Job *job, ExplainState *es)
 	{
 		ExplainOpenGroup("Tasks", "Tasks", false, es);

-		ExplainTaskList(taskList, es);
+		ExplainTaskList(scanState, taskList, es);

 		ExplainCloseGroup("Tasks", "Tasks", false, es);
 	}
@ -392,6 +432,23 @@ ExplainJob(Job *job, ExplainState *es)
 }


+/*
+ * TaskReceivedData returns the amount of data that was received by the
+ * coordinator for the task. If it's a RETURNING DML task the value stored in
+ * totalReceivedData is not correct yet because it only counts the bytes for
+ * one placement.
+ */
+static uint64
+TaskReceivedData(Task *task)
+{
+	if (task->taskType == MODIFY_TASK)
+	{
+		return task->totalReceivedData * list_length(task->taskPlacementList);
+	}
+	return task->totalReceivedData;
+}
+
+
 /*
 * ExplainMapMergeJob shows a very basic EXPLAIN plan for a MapMergeJob. It does
 * not yet show the EXPLAIN plan for the individual tasks, because this requires
@ -449,7 +506,7 @@ ExplainMapMergeJob(MapMergeJob *mapMergeJob, ExplainState *es)
 * or all tasks if citus.explain_all_tasks is on.
 */
 static void
-ExplainTaskList(List *taskList, ExplainState *es)
+ExplainTaskList(CitusScanState *scanState, List *taskList, ExplainState *es)
 {
 	ListCell *taskCell = NULL;
 	ListCell *remoteExplainCell = NULL;
@ -477,7 +534,7 @@ ExplainTaskList(List *taskList, ExplainState *es)
 		RemoteExplainPlan *remoteExplain =
 			(RemoteExplainPlan *) lfirst(remoteExplainCell);

-		ExplainTask(task, remoteExplain->placementIndex,
+		ExplainTask(scanState, task, remoteExplain->placementIndex,
 					remoteExplain->explainOutputList, es);
 	}
 }
@ -623,7 +680,9 @@ FetchRemoteExplainFromWorkers(Task *task, ExplainState *es)
 * then the EXPLAIN output could not be fetched from any placement.
 */
 static void
-ExplainTask(Task *task, int placementIndex, List *explainOutputList, ExplainState *es)
+ExplainTask(CitusScanState *scanState, Task *task, int placementIndex,
+			List *explainOutputList,
+			ExplainState *es)
 {
 	ExplainOpenGroup("Task", NULL, true, es);

@ -640,6 +699,13 @@ ExplainTask(Task *task, int placementIndex, List *explainOutputList, ExplainStat
 		ExplainPropertyText("Query", queryText, es);
 	}

+	if (ShowReceivedData(scanState, es))
+	{
+		ExplainPropertyBytes("Data received from worker",
+							 TaskReceivedData(task),
+							 es);
+	}
+
 	if (explainOutputList != NIL)
 	{
 		List *taskPlacementList = task->taskPlacementList;
@ -1092,6 +1158,7 @@ CreateExplainAnlyzeDestination(Task *task, TupleDestination *taskDest)

 	tupleDestination->pub.putTuple = ExplainAnalyzeDestPutTuple;
 	tupleDestination->pub.tupleDescForQuery = ExplainAnalyzeDestTupleDescForQuery;
+	tupleDestination->pub.originalTask = task;

 	return (TupleDestination *) tupleDestination;
 }
--- a/src/backend/distributed/utils/citus_copyfuncs.c
+++ b/src/backend/distributed/utils/citus_copyfuncs.c
@ -329,6 +329,7 @@ CopyNodeTask(COPYFUNC_ARGS)
 	COPY_SCALAR_FIELD(parametersInQueryStringResolved);
 	COPY_SCALAR_FIELD(tupleDest);
 	COPY_SCALAR_FIELD(queryCount);
+	COPY_SCALAR_FIELD(totalReceivedData);
 	COPY_SCALAR_FIELD(fetchedExplainAnalyzePlacementIndex);
 	COPY_STRING_FIELD(fetchedExplainAnalyzePlan);
 }
--- a/src/include/distributed/multi_physical_planner.h
+++ b/src/include/distributed/multi_physical_planner.h
@ -341,7 +341,11 @@ typedef struct Task
 	/*
 	 * EXPLAIN ANALYZE output fetched from worker. This is saved to be used later
 	 * by RemoteExplain().
+	 *
+	 * totalReceivedData only counts the data for a single placement. So for
+	 * RETURNING DML this is not really correct.
 	 */
+	uint64 totalReceivedData;
 	char *fetchedExplainAnalyzePlan;
 	int fetchedExplainAnalyzePlacementIndex;
 } Task;
--- a/src/include/distributed/tuple_destination.h
+++ b/src/include/distributed/tuple_destination.h
@ -36,6 +36,7 @@ struct TupleDestination

 	/* tupleDescForQuery returns tuple descriptor for a query number. Can return NULL. */
 	TupleDesc (*tupleDescForQuery)(TupleDestination *self, int queryNumber);
+	Task *originalTask;
 };

 extern TupleDestination * CreateTupleStoreTupleDest(Tuplestorestate *tupleStore, TupleDesc
--- a/src/test/regress/expected/local_shard_execution.out
+++ b/src/test/regress/expected/local_shard_execution.out
@ -296,17 +296,19 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)   SELECT * FROM distribute
 ---------------------------------------------------------------------
 Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
   Task Count: 1
+   Data received from workers: 5 bytes
   Tasks Shown: All
   ->  Task
+         Data received from worker: 5 bytes
         Node: host=localhost port=xxxxx dbname=regression
         ->  Index Scan using distributed_table_pkey_1470001 on distributed_table_1470001 distributed_table (actual rows=1 loops=1)
               Index Cond: (key = 1)
               Filter: (age = 20)
-(8 rows)
+(10 rows)

 EXPLAIN (ANALYZE ON, COSTS OFF, SUMMARY OFF, TIMING OFF)
 WITH r AS ( SELECT random() z,* FROM distributed_table)
-SELECT * FROM r WHERE z < 2;
+SELECT 1 FROM r WHERE z < 2;
                                               QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
@ -315,17 +317,21 @@ SELECT * FROM r WHERE z < 2;
         Result destination: Write locally
         ->  Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
               Task Count: 4
+               Data received from workers: 22 bytes
               Tasks Shown: One of 4
               ->  Task
+                     Data received from worker: 22 bytes
                     Node: host=localhost port=xxxxx dbname=regression
                     ->  Seq Scan on distributed_table_1470001 distributed_table (actual rows=1 loops=1)
   Task Count: 1
+   Data received from workers: 1 bytes
   Tasks Shown: All
   ->  Task
+         Data received from worker: 1 bytes
         Node: host=localhost port=xxxxx dbname=regression
         ->  Function Scan on read_intermediate_result intermediate_result (actual rows=1 loops=1)
               Filter: (z < '2'::double precision)
-(16 rows)
+(20 rows)

 EXPLAIN (COSTS OFF) DELETE FROM distributed_table WHERE key = 1 AND age = 20;
                                                    QUERY PLAN
--- a/src/test/regress/expected/multi_explain.out
+++ b/src/test/regress/expected/multi_explain.out
@ -283,8 +283,10 @@ Sort (actual rows=50 loops=1)
        Group Key: remote_scan.l_quantity
        ->  Custom Scan (Citus Adaptive) (actual rows=100 loops=1)
              Task Count: 2
+              Data received from workers: 780 bytes
              Tasks Shown: One of 2
              ->  Task
+                    Data received from worker: 390 bytes
                    Node: host=localhost port=xxxxx dbname=regression
                    ->  HashAggregate (actual rows=50 loops=1)
                          Group Key: l_quantity
@ -301,6 +303,7 @@ EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off) SELECT count(*) FROM t1
 Aggregate (actual rows=1 loops=1)
  ->  Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
        Task Count: 4
+        Data received from workers: 4 bytes
        Tasks Shown: None, not supported for re-partition queries
        ->  MapMergeJob
              Map Task Count: 3
@ -324,9 +327,11 @@ Sort (actual rows=50 loops=1)
        ->  Custom Scan (Citus Adaptive) (actual rows=100 loops=1)
              Output: remote_scan.l_quantity, remote_scan.count_quantity
              Task Count: 2
+              Data received from workers: 780 bytes
              Tasks Shown: One of 2
              ->  Task
                    Query: SELECT l_quantity, count(*) AS count_quantity FROM lineitem_290000 lineitem WHERE true GROUP BY l_quantity
+                    Data received from worker: 390 bytes
                    Node: host=localhost port=xxxxx dbname=regression
                    ->  HashAggregate (actual rows=50 loops=1)
                          Output: l_quantity, count(*)
@ -966,13 +971,16 @@ Sort (actual rows=50 loops=1)
        Group Key: remote_scan.l_quantity
        ->  Custom Scan (Citus Adaptive) (actual rows=100 loops=1)
              Task Count: 2
+              Data received from workers: 780 bytes
              Tasks Shown: All
              ->  Task
+                    Data received from worker: 390 bytes
                    Node: host=localhost port=xxxxx dbname=regression
                    ->  HashAggregate (actual rows=50 loops=1)
                          Group Key: l_quantity
                          ->  Seq Scan on lineitem_290000 lineitem (actual rows=6000 loops=1)
              ->  Task
+                    Data received from worker: 390 bytes
                    Node: host=localhost port=xxxxx dbname=regression
                    ->  HashAggregate (actual rows=50 loops=1)
                          Group Key: l_quantity
@ -1239,8 +1247,10 @@ Custom Scan (Citus Adaptive)  (cost=0.00..0.00 rows=100000 width=18)
 EXPLAIN (ANALYZE ON, COSTS OFF, TIMING OFF, SUMMARY OFF) EXECUTE router_executor_query_param(5);
 Custom Scan (Citus Adaptive) (actual rows=3 loops=1)
  Task Count: 1
+  Data received from workers: 0 bytes
  Tasks Shown: All
  ->  Task
+        Data received from worker: 0 bytes
        Node: host=localhost port=xxxxx dbname=regression
        ->  Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (actual rows=3 loops=1)
              Index Cond: (l_orderkey = 5)
@ -1796,8 +1806,10 @@ SELECT create_distributed_table('explain_analyze_test', 'a');
 EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE a = 1;
 Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
  Task Count: 1
+  Data received from workers: 8 bytes
  Tasks Shown: All
  ->  Task
+        Data received from worker: 8 bytes
        Node: host=localhost port=xxxxx dbname=regression
        ->  Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1)
              Filter: (a = 1)
@ -1806,11 +1818,37 @@ EXPLAIN :default_analyze_flags SELECT count(*) FROM explain_analyze_test;
 Aggregate (actual rows=1 loops=1)
  ->  Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
        Task Count: 4
+        Data received from workers: 4 bytes
        Tasks Shown: One of 4
        ->  Task
+              Data received from worker: 1 bytes
              Node: host=localhost port=xxxxx dbname=regression
              ->  Aggregate (actual rows=1 loops=1)
                    ->  Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1)
+-- empty router SELECT
+EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE a = 10000;
+Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
+  Task Count: 1
+  Data received from workers: 0 bytes
+  Tasks Shown: All
+  ->  Task
+        Data received from worker: 0 bytes
+        Node: host=localhost port=xxxxx dbname=regression
+        ->  Seq Scan on explain_analyze_test_570012 explain_analyze_test (actual rows=0 loops=1)
+              Filter: (a = 10000)
+              Rows Removed by Filter: 1
+-- empty multi-shard SELECT
+EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE b = 'does not exist';
+Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
+  Task Count: 4
+  Data received from workers: 0 bytes
+  Tasks Shown: One of 4
+  ->  Task
+        Data received from worker: 0 bytes
+        Node: host=localhost port=xxxxx dbname=regression
+        ->  Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1)
+              Filter: (b = 'does not exist'::text)
+              Rows Removed by Filter: 1
 -- router DML
 BEGIN;
 EXPLAIN :default_analyze_flags DELETE FROM explain_analyze_test WHERE a = 1;
@ -1857,6 +1895,32 @@ Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
              ->  Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1)
 SELECT * FROM explain_analyze_test ORDER BY a;
 ROLLBACK;
+-- router DML with RETURNING with empty result
+EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'something' WHERE a = 10000 RETURNING *;
+Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
+  Task Count: 1
+  Data received from workers: 0 bytes
+  Tasks Shown: All
+  ->  Task
+        Data received from worker: 0 bytes
+        Node: host=localhost port=xxxxx dbname=regression
+        ->  Update on explain_analyze_test_570012 explain_analyze_test (actual rows=0 loops=1)
+              ->  Seq Scan on explain_analyze_test_570012 explain_analyze_test (actual rows=0 loops=1)
+                    Filter: (a = 10000)
+                    Rows Removed by Filter: 1
+-- multi-shard DML with RETURNING with empty result
+EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'something' WHERE b = 'does not exist' RETURNING *;
+Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
+  Task Count: 4
+  Data received from workers: 0 bytes
+  Tasks Shown: One of 4
+  ->  Task
+        Data received from worker: 0 bytes
+        Node: host=localhost port=xxxxx dbname=regression
+        ->  Update on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1)
+              ->  Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1)
+                    Filter: (b = 'does not exist'::text)
+                    Rows Removed by Filter: 1
 -- single-row insert
 BEGIN;
 EXPLAIN :default_analyze_flags INSERT INTO explain_analyze_test VALUES (5, 'value 5');
@ -1964,6 +2028,50 @@ EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT JSON) INSERT INT
  }
 ]
 ROLLBACK;
+EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT JSON) SELECT * FROM explain_pk;
+[
+  {
+    "Plan": {
+      "Node Type": "Custom Scan",
+      "Custom Plan Provider": "Citus Adaptive",
+      "Parallel Aware": false,
+      "Actual Rows": 0,
+      "Actual Loops": 1,
+      "Distributed Query": {
+        "Job": {
+          "Task Count": 4,
+          "Data received from workers": "0 bytes",
+          "Tasks Shown": "One of 4",
+          "Tasks": [
+            {
+              "Data received from worker": "0 bytes",
+              "Node": "host=localhost port=xxxxx dbname=regression",
+              "Remote Plan": [
+                [
+                  {
+                    "Plan": {
+                      "Node Type": "Seq Scan",
+                      "Parallel Aware": false,
+                      "Relation Name": "explain_pk_570013",
+                      "Alias": "explain_pk",
+                      "Actual Rows": 0,
+                      "Actual Loops": 1
+                    },
+                    "Triggers": [
+                    ]
+                  }
+                ]
+
+              ]
+            }
+          ]
+        }
+      }
+    },
+    "Triggers": [
+    ]
+  }
+]
 BEGIN;
 EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) INSERT INTO explain_pk VALUES (1, 2), (2, 3);
 <explain xmlns="http://www.postgresql.org/2009/explain">
@ -2017,6 +2125,49 @@ EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) INSERT INTO
  </Query>
 </explain>
 ROLLBACK;
+EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) SELECT * FROM explain_pk;
+<explain xmlns="http://www.postgresql.org/2009/explain">
+  <Query>
+    <Plan>
+      <Node-Type>Custom Scan</Node-Type>
+      <Custom-Plan-Provider>Citus Adaptive</Custom-Plan-Provider>
+      <Parallel-Aware>false</Parallel-Aware>
+      <Actual-Rows>0</Actual-Rows>
+      <Actual-Loops>1</Actual-Loops>
+      <Distributed-Query>
+        <Job>
+          <Task-Count>4</Task-Count>
+          <Data-received-from-workers>0 bytes</Data-received-from-workers>
+          <Tasks-Shown>One of 4</Tasks-Shown>
+          <Tasks>
+            <Task>
+              <Data-received-from-worker>0 bytes</Data-received-from-worker>
+              <Node>host=localhost port=xxxxx dbname=regression</Node>
+              <Remote-Plan>
+                <explain xmlns="http://www.postgresql.org/2009/explain">
+                  <Query>
+                    <Plan>
+                      <Node-Type>Seq Scan</Node-Type>
+                      <Parallel-Aware>false</Parallel-Aware>
+                      <Relation-Name>explain_pk_570013</Relation-Name>
+                      <Alias>explain_pk</Alias>
+                      <Actual-Rows>0</Actual-Rows>
+                      <Actual-Loops>1</Actual-Loops>
+                    </Plan>
+                    <Triggers>
+                    </Triggers>
+                  </Query>
+                </explain>
+              </Remote-Plan>
+            </Task>
+          </Tasks>
+        </Job>
+      </Distributed-Query>
+    </Plan>
+    <Triggers>
+    </Triggers>
+  </Query>
+</explain>
 DROP TABLE explain_pk;
 -- test EXPLAIN ANALYZE with CTEs and subqueries
 CREATE TABLE dist_table(a int, b int);
@ -2029,7 +2180,7 @@ INSERT INTO dist_table SELECT i, i*i FROM generate_series(1, 10) i;
 INSERT INTO ref_table SELECT i FROM generate_series(1, 10) i;
 EXPLAIN :default_analyze_flags
 WITH r AS (
-	SELECT random() r, a FROM dist_table
+	SELECT GREATEST(random(), 2) r, a FROM dist_table
 )
 SELECT count(distinct a) from r NATURAL JOIN ref_table;
 Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
@ -2038,13 +2189,17 @@ Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
        Result destination: Send to 2 nodes
        ->  Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
              Task Count: 4
+              Data received from workers: 21 bytes
              Tasks Shown: One of 4
              ->  Task
+                    Data received from worker: 9 bytes
                    Node: host=localhost port=xxxxx dbname=regression
                    ->  Seq Scan on dist_table_570017 dist_table (actual rows=4 loops=1)
  Task Count: 1
+  Data received from workers: 2 bytes
  Tasks Shown: All
  ->  Task
+        Data received from worker: 2 bytes
        Node: host=localhost port=xxxxx dbname=regression
        ->  Aggregate (actual rows=1 loops=1)
              ->  Hash Join (actual rows=10 loops=1)
@ -2054,12 +2209,14 @@ Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
                          Buckets: 1024  Batches: 1  Memory Usage: 9kB
                          ->  Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1)
 EXPLAIN :default_analyze_flags
-SELECT count(distinct a) FROM (SELECT random() r, a FROM dist_table) t NATURAL JOIN ref_table;
+SELECT count(distinct a) FROM (SELECT GREATEST(random(), 2) r, a FROM dist_table) t NATURAL JOIN ref_table;
 Aggregate (actual rows=1 loops=1)
  ->  Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
        Task Count: 4
+        Data received from workers: 11 bytes
        Tasks Shown: One of 4
        ->  Task
+              Data received from worker: 5 bytes
              Node: host=localhost port=xxxxx dbname=regression
              ->  Group (actual rows=4 loops=1)
                    Group Key: t.a
@ -2076,16 +2233,18 @@ Aggregate (actual rows=1 loops=1)
                                ->  Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1)
 EXPLAIN :default_analyze_flags
 SELECT count(distinct a) FROM dist_table
-WHERE EXISTS(SELECT random() FROM dist_table NATURAL JOIN ref_table);
+WHERE EXISTS(SELECT random() < 2 FROM dist_table NATURAL JOIN ref_table);
 Aggregate (actual rows=1 loops=1)
  ->  Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
        ->  Distributed Subplan XXX_1
-              Intermediate Data Size: 140 bytes
+              Intermediate Data Size: 70 bytes
              Result destination: Send to 2 nodes
              ->  Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
                    Task Count: 4
+                    Data received from workers: 10 bytes
                    Tasks Shown: One of 4
                    ->  Task
+                          Data received from worker: 4 bytes
                          Node: host=localhost port=xxxxx dbname=regression
                          ->  Merge Join (actual rows=4 loops=1)
                                Merge Cond: (dist_table.a = ref_table.a)
@ -2098,8 +2257,10 @@ Aggregate (actual rows=1 loops=1)
                                      Sort Method: quicksort  Memory: 25kB
                                      ->  Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1)
        Task Count: 4
+        Data received from workers: 11 bytes
        Tasks Shown: One of 4
        ->  Task
+              Data received from worker: 5 bytes
              Node: host=localhost port=xxxxx dbname=regression
              ->  HashAggregate (actual rows=4 loops=1)
                    Group Key: dist_table.a
@ -2114,7 +2275,7 @@ WITH r AS (
 	INSERT INTO dist_table SELECT a, a * a FROM dist_table
 	RETURNING a
 ), s AS (
-	SELECT random(), a * a a2 FROM r
+	SELECT random() < 2, a * a a2 FROM r
 )
 SELECT count(distinct a2) FROM s;
 Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
@ -2123,26 +2284,81 @@ Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
        Result destination: Write locally
        ->  Custom Scan (Citus Adaptive) (actual rows=20 loops=1)
              Task Count: 4
+              Data received from workers: 44 bytes
              Tasks Shown: One of 4
              ->  Task
+                    Data received from worker: 20 bytes
                    Node: host=localhost port=xxxxx dbname=regression
                    ->  Insert on dist_table_570017 citus_table_alias (actual rows=8 loops=1)
                          ->  Seq Scan on dist_table_570017 dist_table (actual rows=8 loops=1)
                                Filter: ((worker_hash(a) >= '-2147483648'::integer) AND (worker_hash(a) <= '-1073741825'::integer))
  ->  Distributed Subplan XXX_2
-        Intermediate Data Size: 220 bytes
+        Intermediate Data Size: 150 bytes
        Result destination: Write locally
        ->  Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
              Task Count: 1
+              Data received from workers: 28 bytes
              Tasks Shown: All
              ->  Task
+                    Data received from worker: 28 bytes
                    Node: host=localhost port=xxxxx dbname=regression
                    ->  Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1)
  Task Count: 1
+  Data received from workers: 2 bytes
  Tasks Shown: All
  ->  Task
+        Data received from worker: 2 bytes
        Node: host=localhost port=xxxxx dbname=regression
        ->  Aggregate (actual rows=1 loops=1)
              ->  Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1)
 ROLLBACK;
 DROP TABLE ref_table, dist_table;
+-- test EXPLAIN ANALYZE with different replication factors
+SET citus.shard_count = 2;
+SET citus.shard_replication_factor = 1;
+CREATE TABLE dist_table_rep1(a int);
+SELECT create_distributed_table('dist_table_rep1', 'a');
+
+SET citus.shard_replication_factor = 2;
+CREATE TABLE dist_table_rep2(a int);
+SELECT create_distributed_table('dist_table_rep2', 'a');
+
+EXPLAIN :default_analyze_flags INSERT INTO dist_table_rep1 VALUES(1), (2), (3), (4) RETURNING *;
+Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
+  Task Count: 2
+  Data received from workers: 4 bytes
+  Tasks Shown: One of 2
+  ->  Task
+        Data received from worker: 3 bytes
+        Node: host=localhost port=xxxxx dbname=regression
+        ->  Insert on dist_table_rep1_570022 citus_table_alias (actual rows=3 loops=1)
+              ->  Values Scan on "*VALUES*" (actual rows=3 loops=1)
+EXPLAIN :default_analyze_flags SELECT * from dist_table_rep1;
+Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
+  Task Count: 2
+  Data received from workers: 4 bytes
+  Tasks Shown: One of 2
+  ->  Task
+        Data received from worker: 3 bytes
+        Node: host=localhost port=xxxxx dbname=regression
+        ->  Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=3 loops=1)
+EXPLAIN :default_analyze_flags INSERT INTO dist_table_rep2 VALUES(1), (2), (3), (4) RETURNING *;
+Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
+  Task Count: 2
+  Data received from workers: 8 bytes
+  Tasks Shown: One of 2
+  ->  Task
+        Data received from worker: 6 bytes
+        Node: host=localhost port=xxxxx dbname=regression
+        ->  Insert on dist_table_rep2_570024 citus_table_alias (actual rows=3 loops=1)
+              ->  Values Scan on "*VALUES*" (actual rows=3 loops=1)
+EXPLAIN :default_analyze_flags SELECT * from dist_table_rep2;
+Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
+  Task Count: 2
+  Data received from workers: 4 bytes
+  Tasks Shown: One of 2
+  ->  Task
+        Data received from worker: 3 bytes
+        Node: host=localhost port=xxxxx dbname=regression
+        ->  Seq Scan on dist_table_rep2_570024 dist_table_rep2 (actual rows=3 loops=1)
+DROP TABLE dist_table_rep1, dist_table_rep2;
--- a/src/test/regress/sql/binary_protocol.sql
+++ b/src/test/regress/sql/binary_protocol.sql
@ -1,4 +1,4 @@
-SET citus.shard_count = 4;
+SET citus.shard_count = 2;
 SET citus.next_shard_id TO 4754000;
 CREATE SCHEMA binary_protocol;
 SET search_path TO binary_protocol;
@ -19,6 +19,12 @@ SELECT id, id, id, id, id,
       id, id, id, id, id
    FROM t ORDER BY id;

+-- EXPLAIN ANALYZE is currently forced to use text protocol. Once that is
+-- changed the numbers reported should change.
+EXPLAIN (ANALYZE TRUE, TIMING FALSE, COSTS FALSE, SUMMARY FALSE) SELECT id FROM t ORDER BY 1;
+SET citus.explain_all_tasks TO ON;
+EXPLAIN (ANALYZE TRUE, TIMING FALSE, COSTS FALSE, SUMMARY FALSE) SELECT id FROM t ORDER BY 1;
+
 INSERT INTO t SELECT count(*) from t;

 INSERT INTO t (SELECT id+1 from t);
--- a/src/test/regress/sql/local_shard_execution.sql
+++ b/src/test/regress/sql/local_shard_execution.sql
@ -205,7 +205,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)   SELECT * FROM distribute

 EXPLAIN (ANALYZE ON, COSTS OFF, SUMMARY OFF, TIMING OFF)
 WITH r AS ( SELECT random() z,* FROM distributed_table)
-SELECT * FROM r WHERE z < 2;
+SELECT 1 FROM r WHERE z < 2;

 EXPLAIN (COSTS OFF) DELETE FROM distributed_table WHERE key = 1 AND age = 20;

--- a/src/test/regress/sql/multi_explain.sql
+++ b/src/test/regress/sql/multi_explain.sql
@ -769,6 +769,12 @@ EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE a = 1;
 -- multi-shard SELECT
 EXPLAIN :default_analyze_flags SELECT count(*) FROM explain_analyze_test;

+-- empty router SELECT
+EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE a = 10000;
+
+-- empty multi-shard SELECT
+EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE b = 'does not exist';
+
 -- router DML
 BEGIN;
 EXPLAIN :default_analyze_flags DELETE FROM explain_analyze_test WHERE a = 1;
@ -783,6 +789,12 @@ EXPLAIN :default_analyze_flags DELETE FROM explain_analyze_test;
 SELECT * FROM explain_analyze_test ORDER BY a;
 ROLLBACK;

+-- router DML with RETURNING with empty result
+EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'something' WHERE a = 10000 RETURNING *;
+-- multi-shard DML with RETURNING with empty result
+EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'something' WHERE b = 'does not exist' RETURNING *;
+
+
 -- single-row insert
 BEGIN;
 EXPLAIN :default_analyze_flags INSERT INTO explain_analyze_test VALUES (5, 'value 5');
@ -814,10 +826,14 @@ BEGIN;
 EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT JSON) INSERT INTO explain_pk VALUES (1, 2), (2, 3);
 ROLLBACK;

+EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT JSON) SELECT * FROM explain_pk;
+
 BEGIN;
 EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) INSERT INTO explain_pk VALUES (1, 2), (2, 3);
 ROLLBACK;

+EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) SELECT * FROM explain_pk;
+
 DROP TABLE explain_pk;

 -- test EXPLAIN ANALYZE with CTEs and subqueries
@ -831,16 +847,16 @@ INSERT INTO ref_table SELECT i FROM generate_series(1, 10) i;

 EXPLAIN :default_analyze_flags
 WITH r AS (
-	SELECT random() r, a FROM dist_table
+	SELECT GREATEST(random(), 2) r, a FROM dist_table
 )
 SELECT count(distinct a) from r NATURAL JOIN ref_table;

 EXPLAIN :default_analyze_flags
-SELECT count(distinct a) FROM (SELECT random() r, a FROM dist_table) t NATURAL JOIN ref_table;
+SELECT count(distinct a) FROM (SELECT GREATEST(random(), 2) r, a FROM dist_table) t NATURAL JOIN ref_table;

 EXPLAIN :default_analyze_flags
 SELECT count(distinct a) FROM dist_table
-WHERE EXISTS(SELECT random() FROM dist_table NATURAL JOIN ref_table);
+WHERE EXISTS(SELECT random() < 2 FROM dist_table NATURAL JOIN ref_table);

 BEGIN;
 EXPLAIN :default_analyze_flags
@ -848,9 +864,27 @@ WITH r AS (
 	INSERT INTO dist_table SELECT a, a * a FROM dist_table
 	RETURNING a
 ), s AS (
-	SELECT random(), a * a a2 FROM r
+	SELECT random() < 2, a * a a2 FROM r
 )
 SELECT count(distinct a2) FROM s;
 ROLLBACK;

 DROP TABLE ref_table, dist_table;
+
+-- test EXPLAIN ANALYZE with different replication factors
+SET citus.shard_count = 2;
+SET citus.shard_replication_factor = 1;
+CREATE TABLE dist_table_rep1(a int);
+SELECT create_distributed_table('dist_table_rep1', 'a');
+
+SET citus.shard_replication_factor = 2;
+CREATE TABLE dist_table_rep2(a int);
+SELECT create_distributed_table('dist_table_rep2', 'a');
+
+EXPLAIN :default_analyze_flags INSERT INTO dist_table_rep1 VALUES(1), (2), (3), (4) RETURNING *;
+EXPLAIN :default_analyze_flags SELECT * from dist_table_rep1;
+
+EXPLAIN :default_analyze_flags INSERT INTO dist_table_rep2 VALUES(1), (2), (3), (4) RETURNING *;
+EXPLAIN :default_analyze_flags SELECT * from dist_table_rep2;
+
+DROP TABLE dist_table_rep1, dist_table_rep2;