Show amount of data received in EXPLAIN ANALYZE (#3901)

Sadly this does not actually work yet for binary protocol data, because
when doing EXPLAIN ANALYZE we send two commands at the same time. This
means we cannot use `SendRemoteCommandParams`, and thus cannot use the
binary protocol. This can still be useful though when using the text
protocol, to find out that a lot of data is being sent.
pull/3914/head
Jelte Fennema 2020-06-15 16:01:05 +02:00 committed by GitHub
parent 077c784fe9
commit 927de6d187
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 376 additions and 26 deletions

View File

@ -3731,6 +3731,21 @@ ReceiveResults(WorkerSession *session, bool storeRows)
{
executionStats->totalIntermediateResultSize += valueLength;
}
/*
* We use this to count the amount of data that has been
* received for EXPLAIN ANALYZE.
* Only EXPLAIN ANALYZE TupleDestination has originalTask
* defined. So that's why we check for it, otherwise we
* don't have to keep track of this data.
* The worker plan itself is also sent as a result in the
* same task. We filter this out by only counting the data
* from the first query.
*/
if (tupleDest->originalTask && queryIndex == 0)
{
tupleDest->originalTask->totalReceivedData += valueLength;
}
}
}

View File

@ -116,13 +116,14 @@ typedef struct ExplainAnalyzeDestination
/* Explain functions for distributed queries */
static void ExplainSubPlans(DistributedPlan *distributedPlan, ExplainState *es);
static void ExplainJob(Job *job, ExplainState *es);
static void ExplainJob(CitusScanState *scanState, Job *job, ExplainState *es);
static void ExplainMapMergeJob(MapMergeJob *mapMergeJob, ExplainState *es);
static void ExplainTaskList(List *taskList, ExplainState *es);
static void ExplainTaskList(CitusScanState *scanState, List *taskList, ExplainState *es);
static RemoteExplainPlan * RemoteExplain(Task *task, ExplainState *es);
static RemoteExplainPlan * GetSavedRemoteExplain(Task *task, ExplainState *es);
static RemoteExplainPlan * FetchRemoteExplainFromWorkers(Task *task, ExplainState *es);
static void ExplainTask(Task *task, int placementIndex, List *explainOutputList,
static void ExplainTask(CitusScanState *scanState, Task *task, int placementIndex,
List *explainOutputList,
ExplainState *es);
static void ExplainTaskPlacement(ShardPlacement *taskPlacement, List *explainOutputList,
ExplainState *es);
@ -154,6 +155,9 @@ static void ExplainOneQuery(Query *query, int cursorOptions,
const char *queryString, ParamListInfo params,
QueryEnvironment *queryEnv);
static double elapsed_time(instr_time *starttime);
static void ExplainPropertyBytes(const char *qlabel, int64 bytes, ExplainState *es);
static uint64 TaskReceivedData(Task *task);
static bool ShowReceivedData(CitusScanState *scanState, ExplainState *es);
/* exports for SQL callable functions */
@ -187,7 +191,7 @@ CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es
ExplainSubPlans(distributedPlan, es);
}
ExplainJob(distributedPlan->workerJob, es);
ExplainJob(scanState, distributedPlan->workerJob, es);
ExplainCloseGroup("Distributed Query", "Distributed Query", true, es);
}
@ -287,8 +291,8 @@ ExplainSubPlans(DistributedPlan *distributedPlan, ExplainState *es)
2, es);
}
ExplainPropertyInteger("Intermediate Data Size", "bytes",
subPlan->bytesSentPerWorker, es);
ExplainPropertyBytes("Intermediate Data Size",
subPlan->bytesSentPerWorker, es);
StringInfo destination = makeStringInfo();
if (subPlan->remoteWorkerCount && subPlan->writeLocalFile)
@ -323,13 +327,37 @@ ExplainSubPlans(DistributedPlan *distributedPlan, ExplainState *es)
}
/*
* ExplainPropertyBytes formats bytes in a human readable way by using
* pg_size_pretty.
*/
static void
ExplainPropertyBytes(const char *qlabel, int64 bytes, ExplainState *es)
{
Datum textDatum = DirectFunctionCall1(pg_size_pretty, Int64GetDatum(bytes));
ExplainPropertyText(qlabel, text_to_cstring(DatumGetTextP(textDatum)), es);
}
/*
* ShowReceivedData returns true if explain should show received data. This is
* only the case when using EXPLAIN ANALYZE on queries that return rows.
*/
static bool
ShowReceivedData(CitusScanState *scanState, ExplainState *es)
{
TupleDesc tupDesc = ScanStateGetTupleDescriptor(scanState);
return es->analyze && tupDesc != NULL && tupDesc->natts > 0;
}
/*
* ExplainJob shows the EXPLAIN output for a Job in the physical plan of
* a distributed query by showing the remote EXPLAIN for the first task,
* or all tasks if citus.explain_all_tasks is on.
*/
static void
ExplainJob(Job *job, ExplainState *es)
ExplainJob(CitusScanState *scanState, Job *job, ExplainState *es)
{
List *dependentJobList = job->dependentJobList;
int dependentJobCount = list_length(dependentJobList);
@ -340,6 +368,18 @@ ExplainJob(Job *job, ExplainState *es)
ExplainOpenGroup("Job", "Job", true, es);
ExplainPropertyInteger("Task Count", NULL, taskCount, es);
if (ShowReceivedData(scanState, es))
{
Task *task = NULL;
uint64 totalReceivedDataForAllTasks = 0;
foreach_ptr(task, taskList)
{
totalReceivedDataForAllTasks += TaskReceivedData(task);
}
ExplainPropertyBytes("Data received from workers",
totalReceivedDataForAllTasks,
es);
}
if (dependentJobCount > 0)
{
@ -366,7 +406,7 @@ ExplainJob(Job *job, ExplainState *es)
{
ExplainOpenGroup("Tasks", "Tasks", false, es);
ExplainTaskList(taskList, es);
ExplainTaskList(scanState, taskList, es);
ExplainCloseGroup("Tasks", "Tasks", false, es);
}
@ -392,6 +432,23 @@ ExplainJob(Job *job, ExplainState *es)
}
/*
* TaskReceivedData returns the amount of data that was received by the
* coordinator for the task. If it's a RETURNING DML task the value stored in
* totalReceivedData is not correct yet because it only counts the bytes for
* one placement.
*/
static uint64
TaskReceivedData(Task *task)
{
if (task->taskType == MODIFY_TASK)
{
return task->totalReceivedData * list_length(task->taskPlacementList);
}
return task->totalReceivedData;
}
/*
* ExplainMapMergeJob shows a very basic EXPLAIN plan for a MapMergeJob. It does
* not yet show the EXPLAIN plan for the individual tasks, because this requires
@ -449,7 +506,7 @@ ExplainMapMergeJob(MapMergeJob *mapMergeJob, ExplainState *es)
* or all tasks if citus.explain_all_tasks is on.
*/
static void
ExplainTaskList(List *taskList, ExplainState *es)
ExplainTaskList(CitusScanState *scanState, List *taskList, ExplainState *es)
{
ListCell *taskCell = NULL;
ListCell *remoteExplainCell = NULL;
@ -477,7 +534,7 @@ ExplainTaskList(List *taskList, ExplainState *es)
RemoteExplainPlan *remoteExplain =
(RemoteExplainPlan *) lfirst(remoteExplainCell);
ExplainTask(task, remoteExplain->placementIndex,
ExplainTask(scanState, task, remoteExplain->placementIndex,
remoteExplain->explainOutputList, es);
}
}
@ -623,7 +680,9 @@ FetchRemoteExplainFromWorkers(Task *task, ExplainState *es)
* then the EXPLAIN output could not be fetched from any placement.
*/
static void
ExplainTask(Task *task, int placementIndex, List *explainOutputList, ExplainState *es)
ExplainTask(CitusScanState *scanState, Task *task, int placementIndex,
List *explainOutputList,
ExplainState *es)
{
ExplainOpenGroup("Task", NULL, true, es);
@ -640,6 +699,13 @@ ExplainTask(Task *task, int placementIndex, List *explainOutputList, ExplainStat
ExplainPropertyText("Query", queryText, es);
}
if (ShowReceivedData(scanState, es))
{
ExplainPropertyBytes("Data received from worker",
TaskReceivedData(task),
es);
}
if (explainOutputList != NIL)
{
List *taskPlacementList = task->taskPlacementList;
@ -1092,6 +1158,7 @@ CreateExplainAnlyzeDestination(Task *task, TupleDestination *taskDest)
tupleDestination->pub.putTuple = ExplainAnalyzeDestPutTuple;
tupleDestination->pub.tupleDescForQuery = ExplainAnalyzeDestTupleDescForQuery;
tupleDestination->pub.originalTask = task;
return (TupleDestination *) tupleDestination;
}

View File

@ -329,6 +329,7 @@ CopyNodeTask(COPYFUNC_ARGS)
COPY_SCALAR_FIELD(parametersInQueryStringResolved);
COPY_SCALAR_FIELD(tupleDest);
COPY_SCALAR_FIELD(queryCount);
COPY_SCALAR_FIELD(totalReceivedData);
COPY_SCALAR_FIELD(fetchedExplainAnalyzePlacementIndex);
COPY_STRING_FIELD(fetchedExplainAnalyzePlan);
}

View File

@ -341,7 +341,11 @@ typedef struct Task
/*
* EXPLAIN ANALYZE output fetched from worker. This is saved to be used later
* by RemoteExplain().
*
* totalReceivedData only counts the data for a single placement. So for
* RETURNING DML this is not really correct.
*/
uint64 totalReceivedData;
char *fetchedExplainAnalyzePlan;
int fetchedExplainAnalyzePlacementIndex;
} Task;

View File

@ -36,6 +36,7 @@ struct TupleDestination
/* tupleDescForQuery returns tuple descriptor for a query number. Can return NULL. */
TupleDesc (*tupleDescForQuery)(TupleDestination *self, int queryNumber);
Task *originalTask;
};
extern TupleDestination * CreateTupleStoreTupleDest(Tuplestorestate *tupleStore, TupleDesc

View File

@ -296,17 +296,19 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM distribute
---------------------------------------------------------------------
Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
Task Count: 1
Data received from workers: 5 bytes
Tasks Shown: All
-> Task
Data received from worker: 5 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Index Scan using distributed_table_pkey_1470001 on distributed_table_1470001 distributed_table (actual rows=1 loops=1)
Index Cond: (key = 1)
Filter: (age = 20)
(8 rows)
(10 rows)
EXPLAIN (ANALYZE ON, COSTS OFF, SUMMARY OFF, TIMING OFF)
WITH r AS ( SELECT random() z,* FROM distributed_table)
SELECT * FROM r WHERE z < 2;
SELECT 1 FROM r WHERE z < 2;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
@ -315,17 +317,21 @@ SELECT * FROM r WHERE z < 2;
Result destination: Write locally
-> Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
Task Count: 4
Data received from workers: 22 bytes
Tasks Shown: One of 4
-> Task
Data received from worker: 22 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on distributed_table_1470001 distributed_table (actual rows=1 loops=1)
Task Count: 1
Data received from workers: 1 bytes
Tasks Shown: All
-> Task
Data received from worker: 1 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Function Scan on read_intermediate_result intermediate_result (actual rows=1 loops=1)
Filter: (z < '2'::double precision)
(16 rows)
(20 rows)
EXPLAIN (COSTS OFF) DELETE FROM distributed_table WHERE key = 1 AND age = 20;
QUERY PLAN

View File

@ -283,8 +283,10 @@ Sort (actual rows=50 loops=1)
Group Key: remote_scan.l_quantity
-> Custom Scan (Citus Adaptive) (actual rows=100 loops=1)
Task Count: 2
Data received from workers: 780 bytes
Tasks Shown: One of 2
-> Task
Data received from worker: 390 bytes
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate (actual rows=50 loops=1)
Group Key: l_quantity
@ -301,6 +303,7 @@ EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off) SELECT count(*) FROM t1
Aggregate (actual rows=1 loops=1)
-> Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
Task Count: 4
Data received from workers: 4 bytes
Tasks Shown: None, not supported for re-partition queries
-> MapMergeJob
Map Task Count: 3
@ -324,9 +327,11 @@ Sort (actual rows=50 loops=1)
-> Custom Scan (Citus Adaptive) (actual rows=100 loops=1)
Output: remote_scan.l_quantity, remote_scan.count_quantity
Task Count: 2
Data received from workers: 780 bytes
Tasks Shown: One of 2
-> Task
Query: SELECT l_quantity, count(*) AS count_quantity FROM lineitem_290000 lineitem WHERE true GROUP BY l_quantity
Data received from worker: 390 bytes
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate (actual rows=50 loops=1)
Output: l_quantity, count(*)
@ -966,13 +971,16 @@ Sort (actual rows=50 loops=1)
Group Key: remote_scan.l_quantity
-> Custom Scan (Citus Adaptive) (actual rows=100 loops=1)
Task Count: 2
Data received from workers: 780 bytes
Tasks Shown: All
-> Task
Data received from worker: 390 bytes
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate (actual rows=50 loops=1)
Group Key: l_quantity
-> Seq Scan on lineitem_290000 lineitem (actual rows=6000 loops=1)
-> Task
Data received from worker: 390 bytes
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate (actual rows=50 loops=1)
Group Key: l_quantity
@ -1239,8 +1247,10 @@ Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=100000 width=18)
EXPLAIN (ANALYZE ON, COSTS OFF, TIMING OFF, SUMMARY OFF) EXECUTE router_executor_query_param(5);
Custom Scan (Citus Adaptive) (actual rows=3 loops=1)
Task Count: 1
Data received from workers: 0 bytes
Tasks Shown: All
-> Task
Data received from worker: 0 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Index Scan using lineitem_pkey_290000 on lineitem_290000 lineitem (actual rows=3 loops=1)
Index Cond: (l_orderkey = 5)
@ -1796,8 +1806,10 @@ SELECT create_distributed_table('explain_analyze_test', 'a');
EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE a = 1;
Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
Task Count: 1
Data received from workers: 8 bytes
Tasks Shown: All
-> Task
Data received from worker: 8 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1)
Filter: (a = 1)
@ -1806,11 +1818,37 @@ EXPLAIN :default_analyze_flags SELECT count(*) FROM explain_analyze_test;
Aggregate (actual rows=1 loops=1)
-> Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
Task Count: 4
Data received from workers: 4 bytes
Tasks Shown: One of 4
-> Task
Data received from worker: 1 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Aggregate (actual rows=1 loops=1)
-> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1)
-- empty router SELECT
EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE a = 10000;
Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
Task Count: 1
Data received from workers: 0 bytes
Tasks Shown: All
-> Task
Data received from worker: 0 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on explain_analyze_test_570012 explain_analyze_test (actual rows=0 loops=1)
Filter: (a = 10000)
Rows Removed by Filter: 1
-- empty multi-shard SELECT
EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE b = 'does not exist';
Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
Task Count: 4
Data received from workers: 0 bytes
Tasks Shown: One of 4
-> Task
Data received from worker: 0 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1)
Filter: (b = 'does not exist'::text)
Rows Removed by Filter: 1
-- router DML
BEGIN;
EXPLAIN :default_analyze_flags DELETE FROM explain_analyze_test WHERE a = 1;
@ -1857,6 +1895,32 @@ Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
-> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=1 loops=1)
SELECT * FROM explain_analyze_test ORDER BY a;
ROLLBACK;
-- router DML with RETURNING with empty result
EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'something' WHERE a = 10000 RETURNING *;
Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
Task Count: 1
Data received from workers: 0 bytes
Tasks Shown: All
-> Task
Data received from worker: 0 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Update on explain_analyze_test_570012 explain_analyze_test (actual rows=0 loops=1)
-> Seq Scan on explain_analyze_test_570012 explain_analyze_test (actual rows=0 loops=1)
Filter: (a = 10000)
Rows Removed by Filter: 1
-- multi-shard DML with RETURNING with empty result
EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'something' WHERE b = 'does not exist' RETURNING *;
Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
Task Count: 4
Data received from workers: 0 bytes
Tasks Shown: One of 4
-> Task
Data received from worker: 0 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Update on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1)
-> Seq Scan on explain_analyze_test_570009 explain_analyze_test (actual rows=0 loops=1)
Filter: (b = 'does not exist'::text)
Rows Removed by Filter: 1
-- single-row insert
BEGIN;
EXPLAIN :default_analyze_flags INSERT INTO explain_analyze_test VALUES (5, 'value 5');
@ -1964,6 +2028,50 @@ EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT JSON) INSERT INT
}
]
ROLLBACK;
EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT JSON) SELECT * FROM explain_pk;
[
{
"Plan": {
"Node Type": "Custom Scan",
"Custom Plan Provider": "Citus Adaptive",
"Parallel Aware": false,
"Actual Rows": 0,
"Actual Loops": 1,
"Distributed Query": {
"Job": {
"Task Count": 4,
"Data received from workers": "0 bytes",
"Tasks Shown": "One of 4",
"Tasks": [
{
"Data received from worker": "0 bytes",
"Node": "host=localhost port=xxxxx dbname=regression",
"Remote Plan": [
[
{
"Plan": {
"Node Type": "Seq Scan",
"Parallel Aware": false,
"Relation Name": "explain_pk_570013",
"Alias": "explain_pk",
"Actual Rows": 0,
"Actual Loops": 1
},
"Triggers": [
]
}
]
]
}
]
}
}
},
"Triggers": [
]
}
]
BEGIN;
EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) INSERT INTO explain_pk VALUES (1, 2), (2, 3);
<explain xmlns="http://www.postgresql.org/2009/explain">
@ -2017,6 +2125,49 @@ EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) INSERT INTO
</Query>
</explain>
ROLLBACK;
EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) SELECT * FROM explain_pk;
<explain xmlns="http://www.postgresql.org/2009/explain">
<Query>
<Plan>
<Node-Type>Custom Scan</Node-Type>
<Custom-Plan-Provider>Citus Adaptive</Custom-Plan-Provider>
<Parallel-Aware>false</Parallel-Aware>
<Actual-Rows>0</Actual-Rows>
<Actual-Loops>1</Actual-Loops>
<Distributed-Query>
<Job>
<Task-Count>4</Task-Count>
<Data-received-from-workers>0 bytes</Data-received-from-workers>
<Tasks-Shown>One of 4</Tasks-Shown>
<Tasks>
<Task>
<Data-received-from-worker>0 bytes</Data-received-from-worker>
<Node>host=localhost port=xxxxx dbname=regression</Node>
<Remote-Plan>
<explain xmlns="http://www.postgresql.org/2009/explain">
<Query>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parallel-Aware>false</Parallel-Aware>
<Relation-Name>explain_pk_570013</Relation-Name>
<Alias>explain_pk</Alias>
<Actual-Rows>0</Actual-Rows>
<Actual-Loops>1</Actual-Loops>
</Plan>
<Triggers>
</Triggers>
</Query>
</explain>
</Remote-Plan>
</Task>
</Tasks>
</Job>
</Distributed-Query>
</Plan>
<Triggers>
</Triggers>
</Query>
</explain>
DROP TABLE explain_pk;
-- test EXPLAIN ANALYZE with CTEs and subqueries
CREATE TABLE dist_table(a int, b int);
@ -2029,7 +2180,7 @@ INSERT INTO dist_table SELECT i, i*i FROM generate_series(1, 10) i;
INSERT INTO ref_table SELECT i FROM generate_series(1, 10) i;
EXPLAIN :default_analyze_flags
WITH r AS (
SELECT random() r, a FROM dist_table
SELECT GREATEST(random(), 2) r, a FROM dist_table
)
SELECT count(distinct a) from r NATURAL JOIN ref_table;
Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
@ -2038,13 +2189,17 @@ Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
Result destination: Send to 2 nodes
-> Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
Task Count: 4
Data received from workers: 21 bytes
Tasks Shown: One of 4
-> Task
Data received from worker: 9 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on dist_table_570017 dist_table (actual rows=4 loops=1)
Task Count: 1
Data received from workers: 2 bytes
Tasks Shown: All
-> Task
Data received from worker: 2 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Aggregate (actual rows=1 loops=1)
-> Hash Join (actual rows=10 loops=1)
@ -2054,12 +2209,14 @@ Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 9kB
-> Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1)
EXPLAIN :default_analyze_flags
SELECT count(distinct a) FROM (SELECT random() r, a FROM dist_table) t NATURAL JOIN ref_table;
SELECT count(distinct a) FROM (SELECT GREATEST(random(), 2) r, a FROM dist_table) t NATURAL JOIN ref_table;
Aggregate (actual rows=1 loops=1)
-> Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
Task Count: 4
Data received from workers: 11 bytes
Tasks Shown: One of 4
-> Task
Data received from worker: 5 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Group (actual rows=4 loops=1)
Group Key: t.a
@ -2076,16 +2233,18 @@ Aggregate (actual rows=1 loops=1)
-> Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1)
EXPLAIN :default_analyze_flags
SELECT count(distinct a) FROM dist_table
WHERE EXISTS(SELECT random() FROM dist_table NATURAL JOIN ref_table);
WHERE EXISTS(SELECT random() < 2 FROM dist_table NATURAL JOIN ref_table);
Aggregate (actual rows=1 loops=1)
-> Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
-> Distributed Subplan XXX_1
Intermediate Data Size: 140 bytes
Intermediate Data Size: 70 bytes
Result destination: Send to 2 nodes
-> Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
Task Count: 4
Data received from workers: 10 bytes
Tasks Shown: One of 4
-> Task
Data received from worker: 4 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Merge Join (actual rows=4 loops=1)
Merge Cond: (dist_table.a = ref_table.a)
@ -2098,8 +2257,10 @@ Aggregate (actual rows=1 loops=1)
Sort Method: quicksort Memory: 25kB
-> Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1)
Task Count: 4
Data received from workers: 11 bytes
Tasks Shown: One of 4
-> Task
Data received from worker: 5 bytes
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate (actual rows=4 loops=1)
Group Key: dist_table.a
@ -2114,7 +2275,7 @@ WITH r AS (
INSERT INTO dist_table SELECT a, a * a FROM dist_table
RETURNING a
), s AS (
SELECT random(), a * a a2 FROM r
SELECT random() < 2, a * a a2 FROM r
)
SELECT count(distinct a2) FROM s;
Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
@ -2123,26 +2284,81 @@ Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
Result destination: Write locally
-> Custom Scan (Citus Adaptive) (actual rows=20 loops=1)
Task Count: 4
Data received from workers: 44 bytes
Tasks Shown: One of 4
-> Task
Data received from worker: 20 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Insert on dist_table_570017 citus_table_alias (actual rows=8 loops=1)
-> Seq Scan on dist_table_570017 dist_table (actual rows=8 loops=1)
Filter: ((worker_hash(a) >= '-2147483648'::integer) AND (worker_hash(a) <= '-1073741825'::integer))
-> Distributed Subplan XXX_2
Intermediate Data Size: 220 bytes
Intermediate Data Size: 150 bytes
Result destination: Write locally
-> Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
Task Count: 1
Data received from workers: 28 bytes
Tasks Shown: All
-> Task
Data received from worker: 28 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1)
Task Count: 1
Data received from workers: 2 bytes
Tasks Shown: All
-> Task
Data received from worker: 2 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Aggregate (actual rows=1 loops=1)
-> Function Scan on read_intermediate_result intermediate_result (actual rows=10 loops=1)
ROLLBACK;
DROP TABLE ref_table, dist_table;
-- test EXPLAIN ANALYZE with different replication factors
SET citus.shard_count = 2;
SET citus.shard_replication_factor = 1;
CREATE TABLE dist_table_rep1(a int);
SELECT create_distributed_table('dist_table_rep1', 'a');
SET citus.shard_replication_factor = 2;
CREATE TABLE dist_table_rep2(a int);
SELECT create_distributed_table('dist_table_rep2', 'a');
EXPLAIN :default_analyze_flags INSERT INTO dist_table_rep1 VALUES(1), (2), (3), (4) RETURNING *;
Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
Task Count: 2
Data received from workers: 4 bytes
Tasks Shown: One of 2
-> Task
Data received from worker: 3 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Insert on dist_table_rep1_570022 citus_table_alias (actual rows=3 loops=1)
-> Values Scan on "*VALUES*" (actual rows=3 loops=1)
EXPLAIN :default_analyze_flags SELECT * from dist_table_rep1;
Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
Task Count: 2
Data received from workers: 4 bytes
Tasks Shown: One of 2
-> Task
Data received from worker: 3 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on dist_table_rep1_570022 dist_table_rep1 (actual rows=3 loops=1)
EXPLAIN :default_analyze_flags INSERT INTO dist_table_rep2 VALUES(1), (2), (3), (4) RETURNING *;
Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
Task Count: 2
Data received from workers: 8 bytes
Tasks Shown: One of 2
-> Task
Data received from worker: 6 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Insert on dist_table_rep2_570024 citus_table_alias (actual rows=3 loops=1)
-> Values Scan on "*VALUES*" (actual rows=3 loops=1)
EXPLAIN :default_analyze_flags SELECT * from dist_table_rep2;
Custom Scan (Citus Adaptive) (actual rows=4 loops=1)
Task Count: 2
Data received from workers: 4 bytes
Tasks Shown: One of 2
-> Task
Data received from worker: 3 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on dist_table_rep2_570024 dist_table_rep2 (actual rows=3 loops=1)
DROP TABLE dist_table_rep1, dist_table_rep2;

View File

@ -1,4 +1,4 @@
SET citus.shard_count = 4;
SET citus.shard_count = 2;
SET citus.next_shard_id TO 4754000;
CREATE SCHEMA binary_protocol;
SET search_path TO binary_protocol;
@ -19,6 +19,12 @@ SELECT id, id, id, id, id,
id, id, id, id, id
FROM t ORDER BY id;
-- EXPLAIN ANALYZE is currently forced to use text protocol. Once that is
-- changed the numbers reported should change.
EXPLAIN (ANALYZE TRUE, TIMING FALSE, COSTS FALSE, SUMMARY FALSE) SELECT id FROM t ORDER BY 1;
SET citus.explain_all_tasks TO ON;
EXPLAIN (ANALYZE TRUE, TIMING FALSE, COSTS FALSE, SUMMARY FALSE) SELECT id FROM t ORDER BY 1;
INSERT INTO t SELECT count(*) from t;
INSERT INTO t (SELECT id+1 from t);

View File

@ -205,7 +205,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM distribute
EXPLAIN (ANALYZE ON, COSTS OFF, SUMMARY OFF, TIMING OFF)
WITH r AS ( SELECT random() z,* FROM distributed_table)
SELECT * FROM r WHERE z < 2;
SELECT 1 FROM r WHERE z < 2;
EXPLAIN (COSTS OFF) DELETE FROM distributed_table WHERE key = 1 AND age = 20;

View File

@ -769,6 +769,12 @@ EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE a = 1;
-- multi-shard SELECT
EXPLAIN :default_analyze_flags SELECT count(*) FROM explain_analyze_test;
-- empty router SELECT
EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE a = 10000;
-- empty multi-shard SELECT
EXPLAIN :default_analyze_flags SELECT * FROM explain_analyze_test WHERE b = 'does not exist';
-- router DML
BEGIN;
EXPLAIN :default_analyze_flags DELETE FROM explain_analyze_test WHERE a = 1;
@ -783,6 +789,12 @@ EXPLAIN :default_analyze_flags DELETE FROM explain_analyze_test;
SELECT * FROM explain_analyze_test ORDER BY a;
ROLLBACK;
-- router DML with RETURNING with empty result
EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'something' WHERE a = 10000 RETURNING *;
-- multi-shard DML with RETURNING with empty result
EXPLAIN :default_analyze_flags UPDATE explain_analyze_test SET b = 'something' WHERE b = 'does not exist' RETURNING *;
-- single-row insert
BEGIN;
EXPLAIN :default_analyze_flags INSERT INTO explain_analyze_test VALUES (5, 'value 5');
@ -814,10 +826,14 @@ BEGIN;
EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT JSON) INSERT INTO explain_pk VALUES (1, 2), (2, 3);
ROLLBACK;
EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT JSON) SELECT * FROM explain_pk;
BEGIN;
EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) INSERT INTO explain_pk VALUES (1, 2), (2, 3);
ROLLBACK;
EXPLAIN (COSTS off, ANALYZE on, TIMING off, SUMMARY off, FORMAT XML) SELECT * FROM explain_pk;
DROP TABLE explain_pk;
-- test EXPLAIN ANALYZE with CTEs and subqueries
@ -831,16 +847,16 @@ INSERT INTO ref_table SELECT i FROM generate_series(1, 10) i;
EXPLAIN :default_analyze_flags
WITH r AS (
SELECT random() r, a FROM dist_table
SELECT GREATEST(random(), 2) r, a FROM dist_table
)
SELECT count(distinct a) from r NATURAL JOIN ref_table;
EXPLAIN :default_analyze_flags
SELECT count(distinct a) FROM (SELECT random() r, a FROM dist_table) t NATURAL JOIN ref_table;
SELECT count(distinct a) FROM (SELECT GREATEST(random(), 2) r, a FROM dist_table) t NATURAL JOIN ref_table;
EXPLAIN :default_analyze_flags
SELECT count(distinct a) FROM dist_table
WHERE EXISTS(SELECT random() FROM dist_table NATURAL JOIN ref_table);
WHERE EXISTS(SELECT random() < 2 FROM dist_table NATURAL JOIN ref_table);
BEGIN;
EXPLAIN :default_analyze_flags
@ -848,9 +864,27 @@ WITH r AS (
INSERT INTO dist_table SELECT a, a * a FROM dist_table
RETURNING a
), s AS (
SELECT random(), a * a a2 FROM r
SELECT random() < 2, a * a a2 FROM r
)
SELECT count(distinct a2) FROM s;
ROLLBACK;
DROP TABLE ref_table, dist_table;
-- test EXPLAIN ANALYZE with different replication factors
SET citus.shard_count = 2;
SET citus.shard_replication_factor = 1;
CREATE TABLE dist_table_rep1(a int);
SELECT create_distributed_table('dist_table_rep1', 'a');
SET citus.shard_replication_factor = 2;
CREATE TABLE dist_table_rep2(a int);
SELECT create_distributed_table('dist_table_rep2', 'a');
EXPLAIN :default_analyze_flags INSERT INTO dist_table_rep1 VALUES(1), (2), (3), (4) RETURNING *;
EXPLAIN :default_analyze_flags SELECT * from dist_table_rep1;
EXPLAIN :default_analyze_flags INSERT INTO dist_table_rep2 VALUES(1), (2), (3), (4) RETURNING *;
EXPLAIN :default_analyze_flags SELECT * from dist_table_rep2;
DROP TABLE dist_table_rep1, dist_table_rep2;