Merge pull request #4191 from citusdata/sort-explain-analyze-output-by-time

Sort explain analyze output by task time
pull/4173/head
Ahmet Gedemenli 2020-09-24 14:38:06 +03:00 committed by GitHub
commit e892e253b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 155 additions and 9 deletions

View File

@ -68,6 +68,7 @@
/* Config variables that enable printing distributed query plans */
bool ExplainDistributedQueries = true;
bool ExplainAllTasks = false;
int ExplainAnalyzeSortMethod = EXPLAIN_ANALYZE_SORT_BY_TIME;
/*
* If enabled, EXPLAIN ANALYZE output & other statistics of last worker task
@ -529,8 +530,34 @@ ExplainMapMergeJob(MapMergeJob *mapMergeJob, ExplainState *es)
/*
* ExplainTaskList shows the remote EXPLAIN for the first task in taskList,
* or all tasks if citus.explain_all_tasks is on.
* CompareTasksByFetchedExplainAnalyzeDuration is a helper function to compare two tasks by their execution duration.
*/
static int
CompareTasksByFetchedExplainAnalyzeDuration(const void *leftElement, const
void *rightElement)
{
const Task *leftTask = *((const Task **) leftElement);
const Task *rightTask = *((const Task **) rightElement);
double leftTaskExecutionDuration = leftTask->fetchedExplainAnalyzeExecutionDuration;
double rightTaskExecutionDuration = rightTask->fetchedExplainAnalyzeExecutionDuration;
double diff = leftTaskExecutionDuration - rightTaskExecutionDuration;
if (diff > 0)
{
return -1;
}
else if (diff < 0)
{
return 1;
}
return 0;
}
/*
* ExplainTaskList shows the remote EXPLAIN and execution time for the first task
* in taskList, or all tasks if citus.explain_all_tasks is on.
*/
static void
ExplainTaskList(CitusScanState *scanState, List *taskList, ExplainState *es,
@ -540,8 +567,17 @@ ExplainTaskList(CitusScanState *scanState, List *taskList, ExplainState *es,
ListCell *remoteExplainCell = NULL;
List *remoteExplainList = NIL;
/* make sure that the output is consistent */
taskList = SortList(taskList, CompareTasksByTaskId);
/* if tasks are executed, we sort them by time; unless we are on a test env */
if (es->analyze && ExplainAnalyzeSortMethod == EXPLAIN_ANALYZE_SORT_BY_TIME)
{
/* sort by execution duration only in case of ANALYZE */
taskList = SortList(taskList, CompareTasksByFetchedExplainAnalyzeDuration);
}
else
{
/* make sure that the output is consistent */
taskList = SortList(taskList, CompareTasksByTaskId);
}
foreach(taskCell, taskList)
{
@ -1225,13 +1261,15 @@ CreateExplainAnlyzeDestination(Task *task, TupleDestination *taskDest)
tupleDestination->originalTaskDestination = taskDest;
#if PG_VERSION_NUM >= PG_VERSION_12
TupleDesc lastSavedExplainAnalyzeTupDesc = CreateTemplateTupleDesc(1);
TupleDesc lastSavedExplainAnalyzeTupDesc = CreateTemplateTupleDesc(2);
#else
TupleDesc lastSavedExplainAnalyzeTupDesc = CreateTemplateTupleDesc(1, false);
TupleDesc lastSavedExplainAnalyzeTupDesc = CreateTemplateTupleDesc(2, false);
#endif
TupleDescInitEntry(lastSavedExplainAnalyzeTupDesc, 1, "explain analyze", TEXTOID, 0,
0);
TupleDescInitEntry(lastSavedExplainAnalyzeTupDesc, 2, "duration", FLOAT8OID, 0, 0);
tupleDestination->lastSavedExplainAnalyzeTupDesc = lastSavedExplainAnalyzeTupDesc;
tupleDestination->pub.putTuple = ExplainAnalyzeDestPutTuple;
@ -1271,7 +1309,16 @@ ExplainAnalyzeDestPutTuple(TupleDestination *self, Task *task,
return;
}
Datum executionDuration = heap_getattr(heapTuple, 2, tupDesc, &isNull);
if (isNull)
{
ereport(WARNING, (errmsg("received null execution time from worker")));
return;
}
char *fetchedExplainAnalyzePlan = TextDatumGetCString(explainAnalyze);
double fetchedExplainAnalyzeExecutionDuration = DatumGetFloat8(executionDuration);
/*
* Allocate fetchedExplainAnalyzePlan in the same context as the Task, since we are
@ -1281,7 +1328,7 @@ ExplainAnalyzeDestPutTuple(TupleDestination *self, Task *task,
* calls to CheckNodeCopyAndSerialization() which asserts copy functions of the task
* work as expected, which will try to copy this value in a future execution.
*
* Why we don't we just allocate this field in executor context and reset it before
* Why don't we just allocate this field in executor context and reset it before
* the next execution? Because when an error is raised we can skip pretty much most
* of the meaningful places that we can insert the reset.
*
@ -1295,6 +1342,8 @@ ExplainAnalyzeDestPutTuple(TupleDestination *self, Task *task,
MemoryContextStrdup(taskContext, fetchedExplainAnalyzePlan);
tupleDestination->originalTask->fetchedExplainAnalyzePlacementIndex =
placementIndex;
tupleDestination->originalTask->fetchedExplainAnalyzeExecutionDuration =
fetchedExplainAnalyzeExecutionDuration;
}
else
{
@ -1385,7 +1434,7 @@ ExplainAnalyzeTaskList(List *originalTaskList,
const char *queryString = TaskQueryString(explainAnalyzeTask);
char *wrappedQuery = WrapQueryForExplainAnalyze(queryString, tupleDesc);
char *fetchQuery =
"SELECT explain_analyze_output FROM worker_last_saved_explain_analyze()";
"SELECT explain_analyze_output, execution_duration FROM worker_last_saved_explain_analyze()";
SetTaskQueryStringList(explainAnalyzeTask, list_make2(wrappedQuery, fetchQuery));
TupleDestination *originalTaskDest = originalTask->tupleDest ?

View File

@ -5570,7 +5570,9 @@ SetPlacementNodeMetadata(ShardPlacement *placement, WorkerNode *workerNode)
}
/* Helper function to compare two tasks by their taskId. */
/*
* CompareTasksByTaskId is a helper function to compare two tasks by their taskId.
*/
int
CompareTasksByTaskId(const void *leftElement, const void *rightElement)
{

View File

@ -191,6 +191,12 @@ static const struct config_enum_entry multi_shard_modify_connection_options[] =
{ NULL, 0, false }
};
static const struct config_enum_entry explain_analyze_sort_method_options[] = {
{ "execution-time", EXPLAIN_ANALYZE_SORT_BY_TIME, false },
{ "taskId", EXPLAIN_ANALYZE_SORT_BY_TASK_ID, false },
{ NULL, 0, false }
};
/* *INDENT-ON* */
@ -1487,6 +1493,21 @@ RegisterCitusConfigVariables(void)
GUC_NO_SHOW_ALL,
NULL, NULL, NULL);
DefineCustomEnumVariable(
"citus.explain_analyze_sort_method",
gettext_noop("Sets the sorting method for EXPLAIN ANALYZE queries."),
gettext_noop("This parameter is intended for testing. It is developed "
"to get consistent regression test outputs. When it is set "
"to 'time', EXPLAIN ANALYZE output is sorted by execution "
"duration on workers. When it is set to 'taskId', it is "
"sorted by task id. By default, it is set to 'time'; but "
"in regression tests, it's set to 'taskId' for consistency."),
&ExplainAnalyzeSortMethod,
EXPLAIN_ANALYZE_SORT_BY_TIME, explain_analyze_sort_method_options,
PGC_USERSET,
GUC_NO_SHOW_ALL,
NULL, NULL, NULL);
/* warn about config items in the citus namespace that are not registered above */
EmitWarningsOnPlaceholders("citus");
}

View File

@ -326,6 +326,7 @@ CopyNodeTask(COPYFUNC_ARGS)
COPY_SCALAR_FIELD(totalReceivedTupleData);
COPY_SCALAR_FIELD(fetchedExplainAnalyzePlacementIndex);
COPY_STRING_FIELD(fetchedExplainAnalyzePlan);
COPY_SCALAR_FIELD(fetchedExplainAnalyzeExecutionDuration);
}

View File

@ -527,6 +527,11 @@ OutTask(OUTFUNC_ARGS)
WRITE_NODE_FIELD(rowValuesLists);
WRITE_BOOL_FIELD(partiallyLocalOrRemote);
WRITE_BOOL_FIELD(parametersInQueryStringResolved);
WRITE_INT_FIELD(queryCount);
WRITE_UINT64_FIELD(totalReceivedTupleData);
WRITE_INT_FIELD(fetchedExplainAnalyzePlacementIndex);
WRITE_STRING_FIELD(fetchedExplainAnalyzePlan);
WRITE_FLOAT_FIELD(fetchedExplainAnalyzeExecutionDuration, "%.2f");
}

View File

@ -13,9 +13,16 @@
#include "executor/executor.h"
#include "tuple_destination.h"
typedef enum
{
EXPLAIN_ANALYZE_SORT_BY_TIME = 0,
EXPLAIN_ANALYZE_SORT_BY_TASK_ID = 1
} ExplainAnalyzeSortMethods;
/* Config variables managed via guc.c to explain distributed query plans */
extern bool ExplainDistributedQueries;
extern bool ExplainAllTasks;
extern int ExplainAnalyzeSortMethod;
extern void FreeSavedExplainPlan(void);
extern void CitusExplainOneQuery(Query *query, int cursorOptions, IntoClause *into,

View File

@ -321,6 +321,12 @@ typedef struct Task
*/
char *fetchedExplainAnalyzePlan;
int fetchedExplainAnalyzePlacementIndex;
/*
* Execution Duration fetched from worker. This is saved to be used later by
* ExplainTaskList().
*/
double fetchedExplainAnalyzeExecutionDuration;
} Task;
@ -564,6 +570,8 @@ extern List * RoundRobinAssignTaskList(List *taskList);
extern List * RoundRobinReorder(List *placementList);
extern void SetPlacementNodeMetadata(ShardPlacement *placement, WorkerNode *workerNode);
extern int CompareTasksByTaskId(const void *leftElement, const void *rightElement);
extern int CompareTasksByExecutionDuration(const void *leftElement, const
void *rightElement);
/* function declaration for creating Task */
extern List * QueryPushdownSqlTaskList(Query *query, uint64 jobId,

View File

@ -2924,3 +2924,37 @@ Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on users_table_2_570028 users_table_2 (actual rows=0 loops=1)
Filter: ((value_1)::text = '00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000X'::text)
-- sorted explain analyze output
CREATE TABLE explain_analyze_execution_time (a int);
INSERT INTO explain_analyze_execution_time VALUES (2);
SELECT create_distributed_table('explain_analyze_execution_time', 'a');
-- show that we can sort the output wrt execution time
-- we do the following hack to make the test outputs
-- be consistent. First, ingest a single row then add
-- pg_sleep() call on the query. Postgres will only
-- sleep for the shard that has the single row, so that
-- will definitely be slower
set citus.explain_analyze_sort_method to "taskId";
EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) select a, CASE WHEN pg_sleep(0.4) IS NULL THEN 'x' END from explain_analyze_execution_time;
Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
Task Count: 2
Tuple data received from nodes: 1 bytes
Tasks Shown: One of 2
-> Task
Tuple data received from node: 0 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on explain_analyze_execution_time_570029 explain_analyze_execution_time (actual rows=0 loops=1)
set citus.explain_analyze_sort_method to "execution-time";
EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) select a, CASE WHEN pg_sleep(0.4) IS NULL THEN 'x' END from explain_analyze_execution_time;
Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
Task Count: 2
Tuple data received from nodes: 1 bytes
Tasks Shown: One of 2
-> Task
Tuple data received from node: 1 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on explain_analyze_execution_time_570030 explain_analyze_execution_time (actual rows=1 loops=1)
-- reset back
reset citus.explain_analyze_sort_method;
DROP TABLE explain_analyze_execution_time;

View File

@ -416,6 +416,7 @@ push(@pgOptions, '-c', "citus.task_tracker_delay=10ms");
push(@pgOptions, '-c', "citus.remote_task_check_interval=1ms");
push(@pgOptions, '-c', "citus.shard_replication_factor=2");
push(@pgOptions, '-c', "citus.node_connection_timeout=${connectionTimeout}");
push(@pgOptions, '-c', "citus.explain_analyze_sort_method=taskId");
# we disable slow start by default to encourage parallelism within tests
push(@pgOptions, '-c', "citus.executor_slow_start_interval=0ms");

View File

@ -1034,3 +1034,21 @@ EXPLAIN :default_analyze_flags execute p4(20,20);
-- simple test to confirm we can fetch long (>4KB) plans
EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM users_table_2 WHERE value_1::text = '00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000X';
-- sorted explain analyze output
CREATE TABLE explain_analyze_execution_time (a int);
INSERT INTO explain_analyze_execution_time VALUES (2);
SELECT create_distributed_table('explain_analyze_execution_time', 'a');
-- show that we can sort the output wrt execution time
-- we do the following hack to make the test outputs
-- be consistent. First, ingest a single row then add
-- pg_sleep() call on the query. Postgres will only
-- sleep for the shard that has the single row, so that
-- will definitely be slower
set citus.explain_analyze_sort_method to "taskId";
EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) select a, CASE WHEN pg_sleep(0.4) IS NULL THEN 'x' END from explain_analyze_execution_time;
set citus.explain_analyze_sort_method to "execution-time";
EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) select a, CASE WHEN pg_sleep(0.4) IS NULL THEN 'x' END from explain_analyze_execution_time;
-- reset back
reset citus.explain_analyze_sort_method;
DROP TABLE explain_analyze_execution_time;