Make maxTaskStringSize configurable (#1501)

maxTaskStringSize determines the size of worker query string.
It was originally hard coded to a specific value. This has caused
issues at some users. Since it determines initial shared memory
allocation, we did not want to set it to an arbitrary higher number.
Instead made it configurable.

This commit introduces a new GUC variable max_task_string_size

Changes in this variable requires restart to be in effect.
pull/1522/head
Murat Tuncer 2017-07-27 11:39:12 -07:00 committed by GitHub
parent 6698ca8d9e
commit 26f020dc6e
4 changed files with 44 additions and 17 deletions

View File

@ -692,6 +692,19 @@ RegisterCitusConfigVariables(void)
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL, GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
NULL, NULL, NULL); NULL, NULL, NULL);
DefineCustomIntVariable(
"citus.max_task_string_size",
gettext_noop("Sets the maximum size (in bytes) of a worker task call string."),
gettext_noop("Active worker tasks' are tracked in a shared hash table "
"on the master node. This configuration value limits the "
"maximum size of an individual worker task, and "
"affects the size of pre-allocated shared memory."),
&MaxTaskStringSize,
12288, 8192, 65536,
PGC_POSTMASTER,
0,
NULL, NULL, NULL);
/* warn about config items in the citus namespace that are not registered above */ /* warn about config items in the citus namespace that are not registered above */
EmitWarningsOnPlaceholders("citus"); EmitWarningsOnPlaceholders("citus");
} }

View File

@ -53,6 +53,7 @@
int TaskTrackerDelay = 200; /* process sleep interval in millisecs */ int TaskTrackerDelay = 200; /* process sleep interval in millisecs */
int MaxRunningTasksPerNode = 16; /* max number of running tasks */ int MaxRunningTasksPerNode = 16; /* max number of running tasks */
int MaxTrackedTasksPerNode = 1024; /* max number of tracked tasks */ int MaxTrackedTasksPerNode = 1024; /* max number of tracked tasks */
int MaxTaskStringSize = 12288; /* max size of a worker task call string in bytes */
WorkerTasksSharedStateData *WorkerTasksSharedState; /* shared memory state */ WorkerTasksSharedStateData *WorkerTasksSharedState; /* shared memory state */
static shmem_startup_hook_type prev_shmem_startup_hook = NULL; static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
@ -390,7 +391,7 @@ TrackerCleanupJobSchemas(void)
cleanupTask->assignedAt = HIGH_PRIORITY_TASK_TIME; cleanupTask->assignedAt = HIGH_PRIORITY_TASK_TIME;
cleanupTask->taskStatus = TASK_ASSIGNED; cleanupTask->taskStatus = TASK_ASSIGNED;
strlcpy(cleanupTask->taskCallString, JOB_SCHEMA_CLEANUP, TASK_CALL_STRING_SIZE); strlcpy(cleanupTask->taskCallString, JOB_SCHEMA_CLEANUP, MaxTaskStringSize);
strlcpy(cleanupTask->databaseName, databaseName, NAMEDATALEN); strlcpy(cleanupTask->databaseName, databaseName, NAMEDATALEN);
/* zero out all other fields */ /* zero out all other fields */
@ -532,7 +533,7 @@ TaskTrackerShmemSize(void)
size = add_size(size, sizeof(WorkerTasksSharedStateData)); size = add_size(size, sizeof(WorkerTasksSharedStateData));
hashSize = hash_estimate_size(MaxTrackedTasksPerNode, sizeof(WorkerTask)); hashSize = hash_estimate_size(MaxTrackedTasksPerNode, WORKER_TASK_SIZE);
size = add_size(size, hashSize); size = add_size(size, hashSize);
return size; return size;
@ -559,7 +560,7 @@ TaskTrackerShmemInit(void)
*/ */
memset(&info, 0, sizeof(info)); memset(&info, 0, sizeof(info));
info.keysize = sizeof(uint64) + sizeof(uint32); info.keysize = sizeof(uint64) + sizeof(uint32);
info.entrysize = sizeof(WorkerTask); info.entrysize = WORKER_TASK_SIZE;
info.hash = tag_hash; info.hash = tag_hash;
hashFlags = (HASH_ELEM | HASH_FUNCTION); hashFlags = (HASH_ELEM | HASH_FUNCTION);
@ -662,9 +663,10 @@ SchedulableTaskList(HTAB *WorkerTasksHash)
for (queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++) for (queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++)
{ {
WorkerTask *schedulableTask = (WorkerTask *) palloc0(sizeof(WorkerTask)); WorkerTask *schedulableTask = (WorkerTask *) palloc0(WORKER_TASK_SIZE);
schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId; WorkerTask *queuedTask = WORKER_TASK_AT(schedulableTaskQueue, queueIndex);
schedulableTask->taskId = schedulableTaskQueue[queueIndex].taskId; schedulableTask->jobId = queuedTask->jobId;
schedulableTask->taskId = queuedTask->taskId;
schedulableTaskList = lappend(schedulableTaskList, schedulableTask); schedulableTaskList = lappend(schedulableTaskList, schedulableTask);
} }
@ -698,7 +700,7 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash)
} }
/* allocate an array of tasks for our priority queue */ /* allocate an array of tasks for our priority queue */
priorityQueue = (WorkerTask *) palloc0(sizeof(WorkerTask) * queueSize); priorityQueue = (WorkerTask *) palloc0(WORKER_TASK_SIZE * queueSize);
/* copy tasks in the shared hash to the priority queue */ /* copy tasks in the shared hash to the priority queue */
hash_seq_init(&status, WorkerTasksHash); hash_seq_init(&status, WorkerTasksHash);
@ -709,9 +711,11 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash)
if (SchedulableTask(currentTask)) if (SchedulableTask(currentTask))
{ {
/* tasks in the priority queue only need the first three fields */ /* tasks in the priority queue only need the first three fields */
priorityQueue[queueIndex].jobId = currentTask->jobId; WorkerTask *queueTask = WORKER_TASK_AT(priorityQueue, queueIndex);
priorityQueue[queueIndex].taskId = currentTask->taskId;
priorityQueue[queueIndex].assignedAt = currentTask->assignedAt; queueTask->jobId = currentTask->jobId;
queueTask->taskId = currentTask->taskId;
queueTask->assignedAt = currentTask->assignedAt;
queueIndex++; queueIndex++;
} }
@ -720,7 +724,7 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash)
} }
/* now order elements in the queue according to our sorting criterion */ /* now order elements in the queue according to our sorting criterion */
qsort(priorityQueue, queueSize, sizeof(WorkerTask), CompareTasksByTime); qsort(priorityQueue, queueSize, WORKER_TASK_SIZE, CompareTasksByTime);
return priorityQueue; return priorityQueue;
} }

View File

@ -80,10 +80,12 @@ task_tracker_assign_task(PG_FUNCTION_ARGS)
} }
/* check that we have enough space in our shared hash for this string */ /* check that we have enough space in our shared hash for this string */
if (taskCallStringLength >= TASK_CALL_STRING_SIZE) if (taskCallStringLength >= MaxTaskStringSize)
{ {
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("task call string exceeds maximum assignable length"))); errmsg("task string length (%d) exceeds maximum assignable "
"size (%d)", taskCallStringLength, MaxTaskStringSize),
errhint("Consider increasing citus.max_task_string_size.")));
} }
/* /*
@ -333,7 +335,7 @@ CreateTask(uint64 jobId, uint32 taskId, char *taskCallString)
/* enter the worker task into shared hash and initialize the task */ /* enter the worker task into shared hash and initialize the task */
workerTask = WorkerTasksHashEnter(jobId, taskId); workerTask = WorkerTasksHashEnter(jobId, taskId);
workerTask->assignedAt = assignmentTime; workerTask->assignedAt = assignmentTime;
strlcpy(workerTask->taskCallString, taskCallString, TASK_CALL_STRING_SIZE); strlcpy(workerTask->taskCallString, taskCallString, MaxTaskStringSize);
workerTask->taskStatus = TASK_ASSIGNED; workerTask->taskStatus = TASK_ASSIGNED;
workerTask->connectionId = INVALID_CONNECTION_ID; workerTask->connectionId = INVALID_CONNECTION_ID;
@ -370,13 +372,13 @@ UpdateTask(WorkerTask *workerTask, char *taskCallString)
} }
else if (taskStatus == TASK_PERMANENTLY_FAILED) else if (taskStatus == TASK_PERMANENTLY_FAILED)
{ {
strlcpy(workerTask->taskCallString, taskCallString, TASK_CALL_STRING_SIZE); strlcpy(workerTask->taskCallString, taskCallString, MaxTaskStringSize);
workerTask->failureCount = 0; workerTask->failureCount = 0;
workerTask->taskStatus = TASK_ASSIGNED; workerTask->taskStatus = TASK_ASSIGNED;
} }
else else
{ {
strlcpy(workerTask->taskCallString, taskCallString, TASK_CALL_STRING_SIZE); strlcpy(workerTask->taskCallString, taskCallString, MaxTaskStringSize);
workerTask->failureCount = 0; workerTask->failureCount = 0;
} }
} }

View File

@ -72,6 +72,9 @@ typedef enum
* master node, (b) state initialized by the protocol process at task assignment * master node, (b) state initialized by the protocol process at task assignment
* time, and (c) state internal to the task tracker process that changes as the * time, and (c) state internal to the task tracker process that changes as the
* task make progress. * task make progress.
*
* Since taskCallString is dynamically sized use WORKER_TASK_SIZE instead of
* sizeof(WorkerTask). Use WORKER_TASK_AT to reference an item in WorkerTask array.
*/ */
typedef struct WorkerTask typedef struct WorkerTask
{ {
@ -79,14 +82,18 @@ typedef struct WorkerTask
uint32 taskId; /* task id; part of hash table key */ uint32 taskId; /* task id; part of hash table key */
uint32 assignedAt; /* task assignment time in epoch seconds */ uint32 assignedAt; /* task assignment time in epoch seconds */
char taskCallString[TASK_CALL_STRING_SIZE]; /* query or function call string */
TaskStatus taskStatus; /* task's current execution status */ TaskStatus taskStatus; /* task's current execution status */
char databaseName[NAMEDATALEN]; /* name to use for local backend connection */ char databaseName[NAMEDATALEN]; /* name to use for local backend connection */
char userName[NAMEDATALEN]; /* user to use for local backend connection */ char userName[NAMEDATALEN]; /* user to use for local backend connection */
int32 connectionId; /* connection id to local backend */ int32 connectionId; /* connection id to local backend */
uint32 failureCount; /* number of task failures */ uint32 failureCount; /* number of task failures */
char taskCallString[FLEXIBLE_ARRAY_MEMBER]; /* query or function call string */
} WorkerTask; } WorkerTask;
#define WORKER_TASK_SIZE (offsetof(WorkerTask, taskCallString) + MaxTaskStringSize)
#define WORKER_TASK_AT(workerTasks, index) \
((WorkerTask *) (((char *) (workerTasks)) + (index) * WORKER_TASK_SIZE))
/* /*
* WorkerTasksControlData contains task tracker state shared between * WorkerTasksControlData contains task tracker state shared between
@ -112,6 +119,7 @@ typedef struct WorkerTasksSharedStateData
extern int TaskTrackerDelay; extern int TaskTrackerDelay;
extern int MaxTrackedTasksPerNode; extern int MaxTrackedTasksPerNode;
extern int MaxRunningTasksPerNode; extern int MaxRunningTasksPerNode;
extern int MaxTaskStringSize;
/* State shared by the task tracker and task tracker protocol functions */ /* State shared by the task tracker and task tracker protocol functions */
extern WorkerTasksSharedStateData *WorkerTasksSharedState; extern WorkerTasksSharedStateData *WorkerTasksSharedState;