diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index b4a4c802b..691981485 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -197,11 +197,11 @@ master_create_distributed_table(PG_FUNCTION_ARGS) if (distributionMethod == DISTRIBUTE_BY_APPEND) { ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("table \"%s\" has a unique constraint", - distributedRelationName), - errdetail("Unique constraints and primary keys on " - "append-partitioned tables cannot be enforced."), - errhint("Consider using hash partitioning."))); + errmsg("table \"%s\" has a unique constraint", + distributedRelationName), + errdetail("Unique constraints and primary keys on " + "append-partitioned tables cannot be enforced."), + errhint("Consider using hash partitioning."))); } attributeCount = indexInfo->ii_NumIndexAttrs; diff --git a/src/backend/distributed/commands/transmit.c b/src/backend/distributed/commands/transmit.c index 0ab90d0ef..6e0d78136 100644 --- a/src/backend/distributed/commands/transmit.c +++ b/src/backend/distributed/commands/transmit.c @@ -136,7 +136,7 @@ static File FileOpenForTransmit(const char *filename, int fileFlags, int fileMode) { File fileDesc = -1; - int fileStated = -1; + int fileStated = -1; struct stat fileStat; fileStated = stat(filename, &fileStat); @@ -145,7 +145,7 @@ FileOpenForTransmit(const char *filename, int fileFlags, int fileMode) if (S_ISDIR(fileStat.st_mode)) { ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is a directory", filename))); + errmsg("\"%s\" is a directory", filename))); } } @@ -270,18 +270,28 @@ ReceiveCopyData(StringInfo copyData) switch (messageType) { - case 'd': /* CopyData */ + case 'd': /* CopyData */ + { copyDone = false; break; - case 'c': /* CopyDone */ + } + + case 'c': /* CopyDone */ + { copyDone = true; break; - case 'f': /* CopyFail */ + } + + case 'f': /* CopyFail */ + { ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), errmsg("COPY data failed: %s", pq_getmsgstring(copyData)))); break; - case 'H': /* Flush */ - case 'S': /* Sync */ + } + + case 'H': /* Flush */ + case 'S': /* Sync */ + { /* * Ignore Flush/Sync for the convenience of client libraries (such * as libpq) that may send those without noticing that the command @@ -289,11 +299,15 @@ ReceiveCopyData(StringInfo copyData) */ copyDone = false; break; + } + default: + { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unexpected message type 0x%02X during COPY data", messageType))); break; + } } return copyDone; diff --git a/src/backend/distributed/executor/multi_client_executor.c b/src/backend/distributed/executor/multi_client_executor.c index e6b4ceef7..57ab6c1b9 100644 --- a/src/backend/distributed/executor/multi_client_executor.c +++ b/src/backend/distributed/executor/multi_client_executor.c @@ -301,7 +301,7 @@ MultiClientCancel(int32 connectionId) if (cancelSent == 0) { ereport(WARNING, (errmsg("could not issue cancel request"), - errdetail("Client error: %s", errorBuffer))); + errdetail("Client error: %s", errorBuffer))); canceled = false; } @@ -348,7 +348,7 @@ MultiClientResultStatus(int32 connectionId) } else { - ereport(WARNING, (errmsg("could not consume data from worker node"))); + ereport(WARNING, (errmsg("could not consume data from worker node"))); resultStatus = CLIENT_RESULT_UNAVAILABLE; } @@ -589,7 +589,7 @@ MultiClientCopyData(int32 connectionId, int32 fileDescriptor) while (receiveLength > 0) { /* received copy data; append these data to file */ - int appended = -1; + int appended = -1; errno = 0; appended = write(fileDescriptor, receiveBuffer, receiveLength); @@ -706,7 +706,7 @@ ClientConnectionReady(PGconn *connection, PostgresPollingStatusType pollingStatu fd_set readFileDescriptorSet; fd_set writeFileDescriptorSet; fd_set exceptionFileDescriptorSet; - struct timeval immediateTimeout = {0, 0}; + struct timeval immediateTimeout = { 0, 0 }; int connectionFileDescriptor = PQsocket(connection); FD_ZERO(&readFileDescriptorSet); diff --git a/src/backend/distributed/executor/multi_executor.c b/src/backend/distributed/executor/multi_executor.c index 25fd12640..145abb4d5 100644 --- a/src/backend/distributed/executor/multi_executor.c +++ b/src/backend/distributed/executor/multi_executor.c @@ -157,7 +157,6 @@ multi_ExecutorStart(QueryDesc *queryDesc, int eflags) queryDesc->plannedstmt = masterSelectPlan; eflags |= EXEC_FLAG_CITUS_MASTER_SELECT; } - } /* if the execution is not done for router executor, drop into standard executor */ @@ -253,7 +252,7 @@ multi_ExecutorEnd(QueryDesc *queryDesc) RangeTblEntry *rangeTableEntry = linitial(planStatement->rtable); Oid masterTableRelid = rangeTableEntry->relid; - ObjectAddress masterTableObject = {InvalidOid, InvalidOid, 0}; + ObjectAddress masterTableObject = { InvalidOid, InvalidOid, 0 }; masterTableObject.classId = RelationRelationId; masterTableObject.objectId = masterTableRelid; diff --git a/src/backend/distributed/executor/multi_real_time_executor.c b/src/backend/distributed/executor/multi_real_time_executor.c index 77436612c..e3050f64b 100644 --- a/src/backend/distributed/executor/multi_real_time_executor.c +++ b/src/backend/distributed/executor/multi_real_time_executor.c @@ -89,7 +89,7 @@ MultiRealTimeExecute(Job *job) } /* loop around until all tasks complete, one task fails, or user cancels */ - while ( !(allTasksCompleted || taskFailed || QueryCancelPending) ) + while (!(allTasksCompleted || taskFailed || QueryCancelPending)) { uint32 taskCount = list_length(taskList); uint32 completedTaskCount = 0; @@ -230,333 +230,338 @@ ManageTaskExecution(Task *task, TaskExecution *taskExecution) switch (currentStatus) { - case EXEC_TASK_CONNECT_START: - { - int32 connectionId = INVALID_CONNECTION_ID; - char *nodeDatabase = NULL; - - /* we use the same database name on the master and worker nodes */ - nodeDatabase = get_database_name(MyDatabaseId); - - connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase); - connectionIdArray[currentIndex] = connectionId; - - /* if valid, poll the connection until the connection is initiated */ - if (connectionId != INVALID_CONNECTION_ID) + case EXEC_TASK_CONNECT_START: { - taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL; - taskExecution->connectPollCount = 0; - connectAction = CONNECT_ACTION_OPENED; - } - else - { - AdjustStateForFailure(taskExecution); - } + int32 connectionId = INVALID_CONNECTION_ID; + char *nodeDatabase = NULL; - break; - } + /* we use the same database name on the master and worker nodes */ + nodeDatabase = get_database_name(MyDatabaseId); - case EXEC_TASK_CONNECT_POLL: - { - int32 connectionId = connectionIdArray[currentIndex]; - ConnectStatus pollStatus = MultiClientConnectPoll(connectionId); + connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase); + connectionIdArray[currentIndex] = connectionId; - /* - * If the connection is established, we reset the data fetch counter and - * change our status to data fetching. - */ - if (pollStatus == CLIENT_CONNECTION_READY) - { - taskExecution->dataFetchTaskIndex = -1; - taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP; - } - else if (pollStatus == CLIENT_CONNECTION_BUSY) - { - taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL; - } - else if (pollStatus == CLIENT_CONNECTION_BAD) - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - } - - /* now check if we have been trying to connect for too long */ - taskExecution->connectPollCount++; - if (pollStatus == CLIENT_CONNECTION_BUSY) - { - uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval; - uint32 currentCount = taskExecution->connectPollCount; - if (currentCount >= maxCount) + /* if valid, poll the connection until the connection is initiated */ + if (connectionId != INVALID_CONNECTION_ID) { - ereport(WARNING, (errmsg("could not establish asynchronous connection " - "after %u ms", REMOTE_NODE_CONNECT_TIMEOUT))); + taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL; + taskExecution->connectPollCount = 0; + connectAction = CONNECT_ACTION_OPENED; + } + else + { + AdjustStateForFailure(taskExecution); + } + break; + } + + case EXEC_TASK_CONNECT_POLL: + { + int32 connectionId = connectionIdArray[currentIndex]; + ConnectStatus pollStatus = MultiClientConnectPoll(connectionId); + + /* + * If the connection is established, we reset the data fetch counter and + * change our status to data fetching. + */ + if (pollStatus == CLIENT_CONNECTION_READY) + { + taskExecution->dataFetchTaskIndex = -1; + taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP; + } + else if (pollStatus == CLIENT_CONNECTION_BUSY) + { + taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL; + } + else if (pollStatus == CLIENT_CONNECTION_BAD) + { taskStatusArray[currentIndex] = EXEC_TASK_FAILED; } - } - break; - } - - case EXEC_TASK_FAILED: - { - /* - * On task failure, we close the connection. We also reset our execution - * status assuming that we might fail on all other worker nodes and come - * back to this failed node. In that case, we will retry the same fetch - * and compute task(s) on this node again. - */ - int32 connectionId = connectionIdArray[currentIndex]; - MultiClientDisconnect(connectionId); - connectionIdArray[currentIndex] = INVALID_CONNECTION_ID; - connectAction = CONNECT_ACTION_CLOSED; - - taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_START; - - /* try next worker node */ - AdjustStateForFailure(taskExecution); - - break; - } - - case EXEC_FETCH_TASK_LOOP: - { - List *dataFetchTaskList = task->dependedTaskList; - int32 dataFetchTaskCount = list_length(dataFetchTaskList); - - /* move to the next data fetch task */ - taskExecution->dataFetchTaskIndex++; - - if (taskExecution->dataFetchTaskIndex < dataFetchTaskCount) - { - taskStatusArray[currentIndex] = EXEC_FETCH_TASK_START; - } - else - { - taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_START; - } - - break; - } - - case EXEC_FETCH_TASK_START: - { - List *dataFetchTaskList = task->dependedTaskList; - int32 dataFetchTaskIndex = taskExecution->dataFetchTaskIndex; - Task *dataFetchTask = (Task *) list_nth(dataFetchTaskList, dataFetchTaskIndex); - - char *dataFetchQuery = dataFetchTask->queryString; - int32 connectionId = connectionIdArray[currentIndex]; - - bool querySent = MultiClientSendQuery(connectionId, dataFetchQuery); - if (querySent) - { - taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING; - } - else - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - } - - break; - } - - case EXEC_FETCH_TASK_RUNNING: - { - int32 connectionId = connectionIdArray[currentIndex]; - ResultStatus resultStatus = MultiClientResultStatus(connectionId); - QueryStatus queryStatus = CLIENT_INVALID_QUERY; - - /* check if query results are in progress or unavailable */ - if (resultStatus == CLIENT_RESULT_BUSY) - { - taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING; - break; - } - else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - break; - } - - Assert(resultStatus == CLIENT_RESULT_READY); - - /* - * If the query executed successfully, loop onto the next data fetch - * task. Else if the query failed, try data fetching on another node. - */ - queryStatus = MultiClientQueryStatus(connectionId); - if (queryStatus == CLIENT_QUERY_DONE) - { - taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP; - } - else if (queryStatus == CLIENT_QUERY_FAILED) - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - } - else - { - ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); - } - - break; - } - - case EXEC_COMPUTE_TASK_START: - { - int32 connectionId = connectionIdArray[currentIndex]; - bool querySent = false; - - /* construct new query to copy query results to stdout */ - char *queryString = task->queryString; - StringInfo computeTaskQuery = makeStringInfo(); - if (BinaryMasterCopyFormat) - { - appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_BINARY, queryString); - } - else - { - appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_TEXT, queryString); - } - - querySent = MultiClientSendQuery(connectionId, computeTaskQuery->data); - if (querySent) - { - taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; - } - else - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - } - - break; - } - - case EXEC_COMPUTE_TASK_RUNNING: - { - int32 connectionId = connectionIdArray[currentIndex]; - ResultStatus resultStatus = MultiClientResultStatus(connectionId); - QueryStatus queryStatus = CLIENT_INVALID_QUERY; - - /* check if query results are in progress or unavailable */ - if (resultStatus == CLIENT_RESULT_BUSY) - { - taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; - break; - } - else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - break; - } - - Assert(resultStatus == CLIENT_RESULT_READY); - - /* check if our request to copy query results has been acknowledged */ - queryStatus = MultiClientQueryStatus(connectionId); - if (queryStatus == CLIENT_QUERY_COPY) - { - StringInfo jobDirectoryName = JobDirectoryName(task->jobId); - StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId); - - char *filename = taskFilename->data; - int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); - int fileMode = (S_IRUSR | S_IWUSR); - - int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode); - if (fileDescriptor >= 0) + /* now check if we have been trying to connect for too long */ + taskExecution->connectPollCount++; + if (pollStatus == CLIENT_CONNECTION_BUSY) + { + uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval; + uint32 currentCount = taskExecution->connectPollCount; + if (currentCount >= maxCount) + { + ereport(WARNING, (errmsg("could not establish asynchronous " + "connection after %u ms", + REMOTE_NODE_CONNECT_TIMEOUT))); + + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + } + + break; + } + + case EXEC_TASK_FAILED: + { + /* + * On task failure, we close the connection. We also reset our execution + * status assuming that we might fail on all other worker nodes and come + * back to this failed node. In that case, we will retry the same fetch + * and compute task(s) on this node again. + */ + int32 connectionId = connectionIdArray[currentIndex]; + MultiClientDisconnect(connectionId); + connectionIdArray[currentIndex] = INVALID_CONNECTION_ID; + connectAction = CONNECT_ACTION_CLOSED; + + taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_START; + + /* try next worker node */ + AdjustStateForFailure(taskExecution); + + break; + } + + case EXEC_FETCH_TASK_LOOP: + { + List *dataFetchTaskList = task->dependedTaskList; + int32 dataFetchTaskCount = list_length(dataFetchTaskList); + + /* move to the next data fetch task */ + taskExecution->dataFetchTaskIndex++; + + if (taskExecution->dataFetchTaskIndex < dataFetchTaskCount) + { + taskStatusArray[currentIndex] = EXEC_FETCH_TASK_START; + } + else + { + taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_START; + } + + break; + } + + case EXEC_FETCH_TASK_START: + { + List *dataFetchTaskList = task->dependedTaskList; + int32 dataFetchTaskIndex = taskExecution->dataFetchTaskIndex; + Task *dataFetchTask = (Task *) list_nth(dataFetchTaskList, + dataFetchTaskIndex); + + char *dataFetchQuery = dataFetchTask->queryString; + int32 connectionId = connectionIdArray[currentIndex]; + + bool querySent = MultiClientSendQuery(connectionId, dataFetchQuery); + if (querySent) + { + taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING; + } + else + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + + break; + } + + case EXEC_FETCH_TASK_RUNNING: + { + int32 connectionId = connectionIdArray[currentIndex]; + ResultStatus resultStatus = MultiClientResultStatus(connectionId); + QueryStatus queryStatus = CLIENT_INVALID_QUERY; + + /* check if query results are in progress or unavailable */ + if (resultStatus == CLIENT_RESULT_BUSY) + { + taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING; + break; + } + else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + break; + } + + Assert(resultStatus == CLIENT_RESULT_READY); + + /* + * If the query executed successfully, loop onto the next data fetch + * task. Else if the query failed, try data fetching on another node. + */ + queryStatus = MultiClientQueryStatus(connectionId); + if (queryStatus == CLIENT_QUERY_DONE) + { + taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP; + } + else if (queryStatus == CLIENT_QUERY_FAILED) + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + else + { + ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); + } + + break; + } + + case EXEC_COMPUTE_TASK_START: + { + int32 connectionId = connectionIdArray[currentIndex]; + bool querySent = false; + + /* construct new query to copy query results to stdout */ + char *queryString = task->queryString; + StringInfo computeTaskQuery = makeStringInfo(); + if (BinaryMasterCopyFormat) + { + appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_BINARY, + queryString); + } + else + { + appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_TEXT, + queryString); + } + + querySent = MultiClientSendQuery(connectionId, computeTaskQuery->data); + if (querySent) + { + taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; + } + else + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + + break; + } + + case EXEC_COMPUTE_TASK_RUNNING: + { + int32 connectionId = connectionIdArray[currentIndex]; + ResultStatus resultStatus = MultiClientResultStatus(connectionId); + QueryStatus queryStatus = CLIENT_INVALID_QUERY; + + /* check if query results are in progress or unavailable */ + if (resultStatus == CLIENT_RESULT_BUSY) + { + taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; + break; + } + else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + break; + } + + Assert(resultStatus == CLIENT_RESULT_READY); + + /* check if our request to copy query results has been acknowledged */ + queryStatus = MultiClientQueryStatus(connectionId); + if (queryStatus == CLIENT_QUERY_COPY) + { + StringInfo jobDirectoryName = JobDirectoryName(task->jobId); + StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId); + + char *filename = taskFilename->data; + int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); + int fileMode = (S_IRUSR | S_IWUSR); + + int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode); + if (fileDescriptor >= 0) + { + /* + * All files inside the job directory get automatically cleaned + * up on transaction commit or abort. + */ + fileDescriptorArray[currentIndex] = fileDescriptor; + taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING; + } + else + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + filename))); + + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + } + else if (queryStatus == CLIENT_QUERY_FAILED) + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + else + { + ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); + } + + break; + } + + case EXEC_COMPUTE_TASK_COPYING: + { + int32 connectionId = connectionIdArray[currentIndex]; + int32 fileDesc = fileDescriptorArray[currentIndex]; + int closed = -1; + + /* copy data from worker node, and write to local file */ + CopyStatus copyStatus = MultiClientCopyData(connectionId, fileDesc); + + /* if worker node will continue to send more data, keep reading */ + if (copyStatus == CLIENT_COPY_MORE) { - /* - * All files inside the job directory get automatically cleaned - * up on transaction commit or abort. - */ - fileDescriptorArray[currentIndex] = fileDescriptor; taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING; } - else + else if (copyStatus == CLIENT_COPY_DONE) { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", filename))); + closed = close(fileDesc); + fileDescriptorArray[currentIndex] = -1; + if (closed >= 0) + { + taskStatusArray[currentIndex] = EXEC_TASK_DONE; + + /* we are done executing; we no longer need the connection */ + MultiClientDisconnect(connectionId); + connectionIdArray[currentIndex] = INVALID_CONNECTION_ID; + connectAction = CONNECT_ACTION_CLOSED; + } + else + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not close copied file: %m"))); + + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + } + else if (copyStatus == CLIENT_COPY_FAILED) + { taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + + closed = close(fileDesc); + fileDescriptorArray[currentIndex] = -1; + + if (closed < 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not close copy file: %m"))); + } } + + break; } - else if (queryStatus == CLIENT_QUERY_FAILED) + + case EXEC_TASK_DONE: { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + /* we are done with this task's execution */ + break; } - else + + default: { - ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); + /* we fatal here to avoid leaking client-side resources */ + ereport(FATAL, (errmsg("invalid execution status: %d", currentStatus))); + break; } - - break; - } - - case EXEC_COMPUTE_TASK_COPYING: - { - int32 connectionId = connectionIdArray[currentIndex]; - int32 fileDesc = fileDescriptorArray[currentIndex]; - int closed = -1; - - /* copy data from worker node, and write to local file */ - CopyStatus copyStatus = MultiClientCopyData(connectionId, fileDesc); - - /* if worker node will continue to send more data, keep reading */ - if (copyStatus == CLIENT_COPY_MORE) - { - taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING; - } - else if (copyStatus == CLIENT_COPY_DONE) - { - closed = close(fileDesc); - fileDescriptorArray[currentIndex] = -1; - - if (closed >= 0) - { - taskStatusArray[currentIndex] = EXEC_TASK_DONE; - - /* we are done executing; we no longer need the connection */ - MultiClientDisconnect(connectionId); - connectionIdArray[currentIndex] = INVALID_CONNECTION_ID; - connectAction = CONNECT_ACTION_CLOSED; - } - else - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not close copied file: %m"))); - - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - } - } - else if (copyStatus == CLIENT_COPY_FAILED) - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - - closed = close(fileDesc); - fileDescriptorArray[currentIndex] = -1; - - if (closed < 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not close copy file: %m"))); - } - } - - break; - } - - case EXEC_TASK_DONE: - { - /* we are done with this task's execution */ - break; - } - - default: - { - /* we fatal here to avoid leaking client-side resources */ - ereport(FATAL, (errmsg("invalid execution status: %d", currentStatus))); - break; - } } return connectAction; diff --git a/src/backend/distributed/executor/multi_router_executor.c b/src/backend/distributed/executor/multi_router_executor.c index 5c2f04165..ae6eea97d 100644 --- a/src/backend/distributed/executor/multi_router_executor.c +++ b/src/backend/distributed/executor/multi_router_executor.c @@ -80,6 +80,7 @@ RouterExecutorStart(QueryDesc *queryDesc, int eflags, Task *task) queryDesc->estate = executorState; #if (PG_VERSION_NUM < 90500) + /* make sure that upsertQuery is false for versions that UPSERT is not available */ Assert(task->upsertQuery == false); #endif @@ -177,14 +178,14 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas if (!ScanDirectionIsForward(direction)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("scan directions other than forward scans " - "are unsupported"))); + errmsg("scan directions other than forward scans " + "are unsupported"))); } if (count != 0) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("fetching rows from a query using a cursor " - "is unsupported"))); + errmsg("fetching rows from a query using a cursor " + "is unsupported"))); } oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); @@ -210,7 +211,7 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas else { ereport(ERROR, (errmsg("unrecognized operation code: %d", - (int) operation))); + (int) operation))); } if (queryDesc->totaltime != NULL) @@ -219,9 +220,9 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas } MemoryContextSwitchTo(oldcontext); - } + /* * ExecuteDistributedModify is the main entry point for modifying distributed * tables. A distributed modification is successful if any placement of the @@ -532,9 +533,10 @@ StoreQueryResult(PGconn *connection, TupleDesc tupleDescriptor, return true; } + /* -* RouterExecutorFinish cleans up after a distributed execution. -*/ + * RouterExecutorFinish cleans up after a distributed execution. + */ void RouterExecutorFinish(QueryDesc *queryDesc) { diff --git a/src/backend/distributed/executor/multi_server_executor.c b/src/backend/distributed/executor/multi_server_executor.c index 1f143778d..1abc6f007 100644 --- a/src/backend/distributed/executor/multi_server_executor.c +++ b/src/backend/distributed/executor/multi_server_executor.c @@ -303,13 +303,13 @@ AdjustStateForFailure(TaskExecution *taskExecution) if (taskExecution->currentNodeIndex < maxNodeIndex) { - taskExecution->currentNodeIndex++; /* try next worker node */ + taskExecution->currentNodeIndex++; /* try next worker node */ } else { taskExecution->currentNodeIndex = 0; /* go back to the first worker node */ } - taskExecution->dataFetchTaskIndex = -1; /* reset data fetch counter */ - taskExecution->failureCount++; /* record failure */ + taskExecution->dataFetchTaskIndex = -1; /* reset data fetch counter */ + taskExecution->failureCount++; /* record failure */ } diff --git a/src/backend/distributed/executor/multi_task_tracker_executor.c b/src/backend/distributed/executor/multi_task_tracker_executor.c index f67c82271..14c26aef9 100644 --- a/src/backend/distributed/executor/multi_task_tracker_executor.c +++ b/src/backend/distributed/executor/multi_task_tracker_executor.c @@ -38,10 +38,9 @@ int MaxAssignTaskBatchSize = 64; /* maximum number of tasks to assign per round /* TaskMapKey is used as a key in task hash */ typedef struct TaskMapKey { - TaskType taskType; - uint64 jobId; - uint32 taskId; - + TaskType taskType; + uint64 jobId; + uint32 taskId; } TaskMapKey; @@ -51,9 +50,8 @@ typedef struct TaskMapKey */ typedef struct TaskMapEntry { - TaskMapKey key; - Task *task; - + TaskMapKey key; + Task *task; } TaskMapEntry; @@ -83,7 +81,8 @@ static TaskTracker * TrackerHashLookup(HTAB *trackerHash, const char *nodeName, static TaskExecStatus ManageTaskExecution(TaskTracker *taskTracker, TaskTracker *sourceTaskTracker, Task *task, TaskExecution *taskExecution); -static TransmitExecStatus ManageTransmitExecution(TaskTracker *transmitTracker, Task *task, +static TransmitExecStatus ManageTransmitExecution(TaskTracker *transmitTracker, + Task *task, TaskExecution *taskExecution); static bool TaskExecutionsCompleted(List *taskList); static StringInfo MapFetchTaskQueryString(Task *mapFetchTask, Task *mapTask); @@ -194,8 +193,8 @@ MultiTaskTrackerExecute(Job *job) TrackerHashConnect(transmitTrackerHash); /* loop around until all tasks complete, one task fails, or user cancels */ - while ( !(allTasksCompleted || taskFailed || taskTransmitFailed || - clusterFailed || QueryCancelPending) ) + while (!(allTasksCompleted || taskFailed || taskTransmitFailed || + clusterFailed || QueryCancelPending)) { TaskTracker *taskTracker = NULL; TaskTracker *transmitTracker = NULL; @@ -493,8 +492,8 @@ TaskAndExecutionList(List *jobTaskList) */ if (!dependendTaskInHash) { - dependendTaskInHash = TaskHashEnter(taskHash, dependendTask); - taskQueue = lappend(taskQueue, dependendTaskInHash); + dependendTaskInHash = TaskHashEnter(taskHash, dependendTask); + taskQueue = lappend(taskQueue, dependendTaskInHash); } /* update dependedTaskList element to the one which is in the hash */ @@ -557,7 +556,7 @@ TaskHashEnter(HTAB *taskHash, Task *task) if (handleFound) { ereport(ERROR, (errmsg("multiple entries for task: \"%d:%ld:%d\"", - task->taskType, task->jobId, task->taskId))); + task->taskType, task->jobId, task->taskId))); } /* save the pointer to the original task in the hash */ @@ -820,82 +819,84 @@ TrackerConnectPoll(TaskTracker *taskTracker) { switch (taskTracker->trackerStatus) { - case TRACKER_CONNECT_START: - { - char *nodeName = taskTracker->workerName; - uint32 nodePort = taskTracker->workerPort; - char *nodeDatabase = get_database_name(MyDatabaseId); - - int32 connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase); - if (connectionId != INVALID_CONNECTION_ID) + case TRACKER_CONNECT_START: { - taskTracker->connectionId = connectionId; - taskTracker->trackerStatus = TRACKER_CONNECT_POLL; - } - else - { - taskTracker->trackerStatus = TRACKER_CONNECTION_FAILED; - } + char *nodeName = taskTracker->workerName; + uint32 nodePort = taskTracker->workerPort; + char *nodeDatabase = get_database_name(MyDatabaseId); - break; - } - - case TRACKER_CONNECT_POLL: - { - int32 connectionId = taskTracker->connectionId; - - ConnectStatus pollStatus = MultiClientConnectPoll(connectionId); - if (pollStatus == CLIENT_CONNECTION_READY) - { - taskTracker->trackerStatus = TRACKER_CONNECTED; - } - else if (pollStatus == CLIENT_CONNECTION_BUSY) - { - taskTracker->trackerStatus = TRACKER_CONNECT_POLL; - } - else if (pollStatus == CLIENT_CONNECTION_BAD) - { - taskTracker->trackerStatus = TRACKER_CONNECTION_FAILED; - - MultiClientDisconnect(connectionId); - taskTracker->connectionId = INVALID_CONNECTION_ID; - } - - /* now check if we have been trying to connect for too long */ - taskTracker->connectPollCount++; - if (pollStatus == CLIENT_CONNECTION_BUSY) - { - uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval; - uint32 currentCount = taskTracker->connectPollCount; - if (currentCount >= maxCount) + int32 connectionId = MultiClientConnectStart(nodeName, nodePort, + nodeDatabase); + if (connectionId != INVALID_CONNECTION_ID) { - ereport(WARNING, (errmsg("could not establish asynchronous connection " - "after %u ms", REMOTE_NODE_CONNECT_TIMEOUT))); + taskTracker->connectionId = connectionId; + taskTracker->trackerStatus = TRACKER_CONNECT_POLL; + } + else + { + taskTracker->trackerStatus = TRACKER_CONNECTION_FAILED; + } + break; + } + + case TRACKER_CONNECT_POLL: + { + int32 connectionId = taskTracker->connectionId; + + ConnectStatus pollStatus = MultiClientConnectPoll(connectionId); + if (pollStatus == CLIENT_CONNECTION_READY) + { + taskTracker->trackerStatus = TRACKER_CONNECTED; + } + else if (pollStatus == CLIENT_CONNECTION_BUSY) + { + taskTracker->trackerStatus = TRACKER_CONNECT_POLL; + } + else if (pollStatus == CLIENT_CONNECTION_BAD) + { taskTracker->trackerStatus = TRACKER_CONNECTION_FAILED; MultiClientDisconnect(connectionId); taskTracker->connectionId = INVALID_CONNECTION_ID; } + + /* now check if we have been trying to connect for too long */ + taskTracker->connectPollCount++; + if (pollStatus == CLIENT_CONNECTION_BUSY) + { + uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval; + uint32 currentCount = taskTracker->connectPollCount; + if (currentCount >= maxCount) + { + ereport(WARNING, (errmsg("could not establish asynchronous " + "connection after %u ms", + REMOTE_NODE_CONNECT_TIMEOUT))); + + taskTracker->trackerStatus = TRACKER_CONNECTION_FAILED; + + MultiClientDisconnect(connectionId); + taskTracker->connectionId = INVALID_CONNECTION_ID; + } + } + + break; } - break; - } + case TRACKER_CONNECTED: + case TRACKER_CONNECTION_FAILED: + { + /* if connected or failed to connect in previous pass, reset poll count */ + taskTracker->connectPollCount = 0; + break; + } - case TRACKER_CONNECTED: - case TRACKER_CONNECTION_FAILED: - { - /* if connected or failed to connect in previous pass, reset poll count */ - taskTracker->connectPollCount = 0; - break; - } - - default: - { - int trackerStatus = (int) taskTracker->trackerStatus; - ereport(FATAL, (errmsg("invalid task tracker status: %d", trackerStatus))); - break; - } + default: + { + int trackerStatus = (int) taskTracker->trackerStatus; + ereport(FATAL, (errmsg("invalid task tracker status: %d", trackerStatus))); + break; + } } return taskTracker->trackerStatus; @@ -1008,213 +1009,214 @@ ManageTaskExecution(TaskTracker *taskTracker, TaskTracker *sourceTaskTracker, switch (currentExecutionStatus) { - case EXEC_TASK_UNASSIGNED: - { - bool taskExecutionsCompleted = true; - TaskType taskType = TASK_TYPE_INVALID_FIRST; - - bool trackerHealthy = TrackerHealthy(taskTracker); - if (!trackerHealthy) + case EXEC_TASK_UNASSIGNED: { - nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; - break; - } + bool taskExecutionsCompleted = true; + TaskType taskType = TASK_TYPE_INVALID_FIRST; - /* - * We first retrieve this task's downstream dependencies, and then check - * if these dependencies' executions have completed. - */ - taskExecutionsCompleted = TaskExecutionsCompleted(task->dependedTaskList); - if (!taskExecutionsCompleted) - { - nextExecutionStatus = EXEC_TASK_UNASSIGNED; - break; - } - - /* if map fetch task, create query string from completed map task */ - taskType = task->taskType; - if (taskType == MAP_OUTPUT_FETCH_TASK) - { - StringInfo mapFetchTaskQueryString = NULL; - Task *mapTask = (Task *) linitial(task->dependedTaskList); - TaskExecution *mapTaskExecution = mapTask->taskExecution; - - mapFetchTaskQueryString = MapFetchTaskQueryString(task, mapTask); - task->queryString = mapFetchTaskQueryString->data; - taskExecution->querySourceNodeIndex = mapTaskExecution->currentNodeIndex; - } - - /* - * We finally queue this task for execution. Note that we queue sql and - * other tasks slightly differently. - */ - if (taskType == SQL_TASK) - { - TrackerQueueSqlTask(taskTracker, task); - } - else - { - TrackerQueueTask(taskTracker, task); - } - - nextExecutionStatus = EXEC_TASK_QUEUED; - break; - } - - case EXEC_TASK_QUEUED: - { - TaskStatus remoteTaskStatus = TASK_STATUS_INVALID_FIRST; - - bool trackerHealthy = TrackerHealthy(taskTracker); - if (!trackerHealthy) - { - nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; - break; - } - - remoteTaskStatus = TrackerTaskStatus(taskTracker, task); - if (remoteTaskStatus == TASK_SUCCEEDED) - { - nextExecutionStatus = EXEC_TASK_DONE; - } - else if (remoteTaskStatus == TASK_CLIENT_SIDE_ASSIGN_FAILED || - remoteTaskStatus == TASK_CLIENT_SIDE_STATUS_FAILED) - { - nextExecutionStatus = EXEC_TASK_TRACKER_RETRY; - } - else if (remoteTaskStatus == TASK_PERMANENTLY_FAILED) - { - /* - * If a map output fetch task failed, we assume the problem lies with - * the map task (and the source task tracker it runs on). Otherwise, - * we assume the task tracker crashed, and fail over to the next task - * tracker. - */ - if (task->taskType == MAP_OUTPUT_FETCH_TASK) + bool trackerHealthy = TrackerHealthy(taskTracker); + if (!trackerHealthy) { - nextExecutionStatus = EXEC_SOURCE_TASK_TRACKER_RETRY; + nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; + break; + } + + /* + * We first retrieve this task's downstream dependencies, and then check + * if these dependencies' executions have completed. + */ + taskExecutionsCompleted = TaskExecutionsCompleted(task->dependedTaskList); + if (!taskExecutionsCompleted) + { + nextExecutionStatus = EXEC_TASK_UNASSIGNED; + break; + } + + /* if map fetch task, create query string from completed map task */ + taskType = task->taskType; + if (taskType == MAP_OUTPUT_FETCH_TASK) + { + StringInfo mapFetchTaskQueryString = NULL; + Task *mapTask = (Task *) linitial(task->dependedTaskList); + TaskExecution *mapTaskExecution = mapTask->taskExecution; + + mapFetchTaskQueryString = MapFetchTaskQueryString(task, mapTask); + task->queryString = mapFetchTaskQueryString->data; + taskExecution->querySourceNodeIndex = mapTaskExecution->currentNodeIndex; + } + + /* + * We finally queue this task for execution. Note that we queue sql and + * other tasks slightly differently. + */ + if (taskType == SQL_TASK) + { + TrackerQueueSqlTask(taskTracker, task); + } + else + { + TrackerQueueTask(taskTracker, task); + } + + nextExecutionStatus = EXEC_TASK_QUEUED; + break; + } + + case EXEC_TASK_QUEUED: + { + TaskStatus remoteTaskStatus = TASK_STATUS_INVALID_FIRST; + + bool trackerHealthy = TrackerHealthy(taskTracker); + if (!trackerHealthy) + { + nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; + break; + } + + remoteTaskStatus = TrackerTaskStatus(taskTracker, task); + if (remoteTaskStatus == TASK_SUCCEEDED) + { + nextExecutionStatus = EXEC_TASK_DONE; + } + else if (remoteTaskStatus == TASK_CLIENT_SIDE_ASSIGN_FAILED || + remoteTaskStatus == TASK_CLIENT_SIDE_STATUS_FAILED) + { + nextExecutionStatus = EXEC_TASK_TRACKER_RETRY; + } + else if (remoteTaskStatus == TASK_PERMANENTLY_FAILED) + { + /* + * If a map output fetch task failed, we assume the problem lies with + * the map task (and the source task tracker it runs on). Otherwise, + * we assume the task tracker crashed, and fail over to the next task + * tracker. + */ + if (task->taskType == MAP_OUTPUT_FETCH_TASK) + { + nextExecutionStatus = EXEC_SOURCE_TASK_TRACKER_RETRY; + } + else + { + nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; + } + } + else + { + /* assume task is still in progress */ + nextExecutionStatus = EXEC_TASK_QUEUED; + } + + break; + } + + case EXEC_TASK_TRACKER_RETRY: + { + bool trackerHealthy = false; + bool trackerConnectionUp = false; + + /* + * This case statement usually handles connection related issues. Some + * edge cases however, like a user sending a SIGTERM to the worker node, + * keep the connection open but disallow task assignments. We therefore + * need to track those as intermittent tracker failures here. + */ + trackerConnectionUp = TrackerConnectionUp(taskTracker); + if (trackerConnectionUp) + { + taskTracker->trackerFailureCount++; + } + + trackerHealthy = TrackerHealthy(taskTracker); + if (trackerHealthy) + { + TaskStatus remoteTaskStatus = TrackerTaskStatus(taskTracker, task); + if (remoteTaskStatus == TASK_CLIENT_SIDE_ASSIGN_FAILED) + { + nextExecutionStatus = EXEC_TASK_UNASSIGNED; + } + else if (remoteTaskStatus == TASK_CLIENT_SIDE_STATUS_FAILED) + { + nextExecutionStatus = EXEC_TASK_QUEUED; + } } else { nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; } - } - else - { - /* assume task is still in progress */ - nextExecutionStatus = EXEC_TASK_QUEUED; + + break; } - break; - } - - case EXEC_TASK_TRACKER_RETRY: - { - bool trackerHealthy = false; - bool trackerConnectionUp = false; - - /* - * This case statement usually handles connection related issues. Some - * edge cases however, like a user sending a SIGTERM to the worker node, - * keep the connection open but disallow task assignments. We therefore - * need to track those as intermittent tracker failures here. - */ - trackerConnectionUp = TrackerConnectionUp(taskTracker); - if (trackerConnectionUp) + case EXEC_SOURCE_TASK_TRACKER_RETRY: { - taskTracker->trackerFailureCount++; - } + Task *mapTask = (Task *) linitial(task->dependedTaskList); + TaskExecution *mapTaskExecution = mapTask->taskExecution; + uint32 sourceNodeIndex = mapTaskExecution->currentNodeIndex; - trackerHealthy = TrackerHealthy(taskTracker); - if (trackerHealthy) - { - TaskStatus remoteTaskStatus = TrackerTaskStatus(taskTracker, task); - if (remoteTaskStatus == TASK_CLIENT_SIDE_ASSIGN_FAILED) + bool sourceTrackerHealthy = false; + Assert(sourceTaskTracker != NULL); + Assert(task->taskType == MAP_OUTPUT_FETCH_TASK); + + /* + * As this map fetch task was running, another map fetch that depends on + * another map task might have failed. We would have then reassigned the + * map task and potentially other map tasks in its constraint group. So + * this map fetch's source node might have changed underneath us. If it + * did, we don't want to record a failure for the new source tracker. + */ + if (taskExecution->querySourceNodeIndex == sourceNodeIndex) { + bool sourceTrackerConnectionUp = TrackerConnectionUp(sourceTaskTracker); + if (sourceTrackerConnectionUp) + { + sourceTaskTracker->trackerFailureCount++; + } + } + + sourceTrackerHealthy = TrackerHealthy(sourceTaskTracker); + if (sourceTrackerHealthy) + { + /* + * We change our status to unassigned. In that status, we queue an + * "update map fetch task" on the task tracker, and retry fetching + * the map task's output from the same source node. + */ nextExecutionStatus = EXEC_TASK_UNASSIGNED; } - else if (remoteTaskStatus == TASK_CLIENT_SIDE_STATUS_FAILED) + else { - nextExecutionStatus = EXEC_TASK_QUEUED; + nextExecutionStatus = EXEC_SOURCE_TASK_TRACKER_FAILED; } - } - else - { - nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; + + break; } - break; - } - - case EXEC_SOURCE_TASK_TRACKER_RETRY: - { - Task *mapTask = (Task *) linitial(task->dependedTaskList); - TaskExecution *mapTaskExecution = mapTask->taskExecution; - uint32 sourceNodeIndex = mapTaskExecution->currentNodeIndex; - - bool sourceTrackerHealthy = false; - Assert(sourceTaskTracker != NULL); - Assert(task->taskType == MAP_OUTPUT_FETCH_TASK); - - /* - * As this map fetch task was running, another map fetch that depends on - * another map task might have failed. We would have then reassigned the - * map task and potentially other map tasks in its constraint group. So - * this map fetch's source node might have changed underneath us. If it - * did, we don't want to record a failure for the new source tracker. - */ - if (taskExecution->querySourceNodeIndex == sourceNodeIndex) - { - bool sourceTrackerConnectionUp = TrackerConnectionUp(sourceTaskTracker); - if (sourceTrackerConnectionUp) - { - sourceTaskTracker->trackerFailureCount++; - } - } - - sourceTrackerHealthy = TrackerHealthy(sourceTaskTracker); - if (sourceTrackerHealthy) + case EXEC_TASK_TRACKER_FAILED: + case EXEC_SOURCE_TASK_TRACKER_FAILED: { /* - * We change our status to unassigned. In that status, we queue an - * "update map fetch task" on the task tracker, and retry fetching - * the map task's output from the same source node. + * These two cases exist to signal to the caller that we failed. In both + * cases, the caller is responsible for reassigning task(s) and running + * the appropriate recovery logic. */ nextExecutionStatus = EXEC_TASK_UNASSIGNED; + break; } - else + + case EXEC_TASK_DONE: { - nextExecutionStatus = EXEC_SOURCE_TASK_TRACKER_FAILED; + /* we are done with this task's execution */ + nextExecutionStatus = EXEC_TASK_DONE; + break; } - break; - } - - case EXEC_TASK_TRACKER_FAILED: - case EXEC_SOURCE_TASK_TRACKER_FAILED: - { - /* - * These two cases exist to signal to the caller that we failed. In both - * cases, the caller is responsible for reassigning task(s) and running - * the appropriate recovery logic. - */ - nextExecutionStatus = EXEC_TASK_UNASSIGNED; - break; - } - - case EXEC_TASK_DONE: - { - /* we are done with this task's execution */ - nextExecutionStatus = EXEC_TASK_DONE; - break; - } - - default: - { - /* we fatal here to avoid leaking client-side resources */ - ereport(FATAL, (errmsg("invalid execution status: %d", currentExecutionStatus))); - break; - } + default: + { + /* we fatal here to avoid leaking client-side resources */ + ereport(FATAL, (errmsg("invalid execution status: %d", + currentExecutionStatus))); + break; + } } /* update task execution's status for most recent task tracker */ @@ -1247,225 +1249,227 @@ ManageTransmitExecution(TaskTracker *transmitTracker, switch (currentTransmitStatus) { - case EXEC_TRANSMIT_UNASSIGNED: - { - TaskExecStatus *taskStatusArray = taskExecution->taskStatusArray; - TaskExecStatus currentExecutionStatus = taskStatusArray[currentNodeIndex]; - bool trackerHealthy = false; - - /* if top level task's in progress, nothing to do */ - if (currentExecutionStatus != EXEC_TASK_DONE) + case EXEC_TRANSMIT_UNASSIGNED: { - nextTransmitStatus = EXEC_TRANSMIT_UNASSIGNED; - break; - } + TaskExecStatus *taskStatusArray = taskExecution->taskStatusArray; + TaskExecStatus currentExecutionStatus = taskStatusArray[currentNodeIndex]; + bool trackerHealthy = false; - trackerHealthy = TrackerHealthy(transmitTracker); - if (!trackerHealthy) - { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; - break; - } + /* if top level task's in progress, nothing to do */ + if (currentExecutionStatus != EXEC_TASK_DONE) + { + nextTransmitStatus = EXEC_TRANSMIT_UNASSIGNED; + break; + } - TrackerQueueFileTransmit(transmitTracker, task); - nextTransmitStatus = EXEC_TRANSMIT_QUEUED; - break; - } + trackerHealthy = TrackerHealthy(transmitTracker); + if (!trackerHealthy) + { + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; + break; + } - case EXEC_TRANSMIT_QUEUED: - { - QueryStatus queryStatus = CLIENT_INVALID_QUERY; - int32 connectionId = INVALID_CONNECTION_ID; - TaskStatus taskStatus = TASK_STATUS_INVALID_FIRST; - - bool trackerHealthy = TrackerHealthy(transmitTracker); - if (!trackerHealthy) - { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; - break; - } - - taskStatus = TrackerTaskStatus(transmitTracker, task); - if (taskStatus == TASK_FILE_TRANSMIT_QUEUED) - { - /* remain in queued status until tracker assigns this task */ + TrackerQueueFileTransmit(transmitTracker, task); nextTransmitStatus = EXEC_TRANSMIT_QUEUED; break; } - else if (taskStatus == TASK_CLIENT_SIDE_TRANSMIT_FAILED) + + case EXEC_TRANSMIT_QUEUED: { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; - break; - } + QueryStatus queryStatus = CLIENT_INVALID_QUERY; + int32 connectionId = INVALID_CONNECTION_ID; + TaskStatus taskStatus = TASK_STATUS_INVALID_FIRST; - /* the open connection belongs to this task */ - connectionId = TransmitTrackerConnectionId(transmitTracker, task); - Assert(connectionId != INVALID_CONNECTION_ID); - Assert(taskStatus == TASK_ASSIGNED); - - /* start copy protocol */ - queryStatus = MultiClientQueryStatus(connectionId); - if (queryStatus == CLIENT_QUERY_COPY) - { - StringInfo jobDirectoryName = JobDirectoryName(task->jobId); - StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId); - - char *filename = taskFilename->data; - int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); - int fileMode = (S_IRUSR | S_IWUSR); - - int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode); - if (fileDescriptor >= 0) + bool trackerHealthy = TrackerHealthy(transmitTracker); + if (!trackerHealthy) { - /* - * All files inside the job directory get automatically cleaned - * up on transaction commit or abort. - */ - fileDescriptorArray[currentNodeIndex] = fileDescriptor; - nextTransmitStatus = EXEC_TRANSMIT_COPYING; + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; + break; + } + + taskStatus = TrackerTaskStatus(transmitTracker, task); + if (taskStatus == TASK_FILE_TRANSMIT_QUEUED) + { + /* remain in queued status until tracker assigns this task */ + nextTransmitStatus = EXEC_TRANSMIT_QUEUED; + break; + } + else if (taskStatus == TASK_CLIENT_SIDE_TRANSMIT_FAILED) + { + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + break; + } + + /* the open connection belongs to this task */ + connectionId = TransmitTrackerConnectionId(transmitTracker, task); + Assert(connectionId != INVALID_CONNECTION_ID); + Assert(taskStatus == TASK_ASSIGNED); + + /* start copy protocol */ + queryStatus = MultiClientQueryStatus(connectionId); + if (queryStatus == CLIENT_QUERY_COPY) + { + StringInfo jobDirectoryName = JobDirectoryName(task->jobId); + StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId); + + char *filename = taskFilename->data; + int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); + int fileMode = (S_IRUSR | S_IWUSR); + + int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode); + if (fileDescriptor >= 0) + { + /* + * All files inside the job directory get automatically cleaned + * up on transaction commit or abort. + */ + fileDescriptorArray[currentNodeIndex] = fileDescriptor; + nextTransmitStatus = EXEC_TRANSMIT_COPYING; + } + else + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + filename))); + + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + } } else { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", filename))); - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; } - } - else - { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + + /* + * We use task tracker logic to manage file transmits as well, but that + * abstraction starts to leak after we drop into the copy protocol. To + * make our task tracker logic work, we need to "void" the tracker's + * connection if the transmit task failed in here. + */ + if (nextTransmitStatus == EXEC_TRANSMIT_TRACKER_RETRY) + { + transmitTracker->connectionBusy = false; + transmitTracker->connectionBusyOnTask = NULL; + } + + break; } - /* - * We use task tracker logic to manage file transmits as well, but that - * abstraction starts to leak after we drop into the copy protocol. To - * make our task tracker logic work, we need to "void" the tracker's - * connection if the transmit task failed in here. - */ - if (nextTransmitStatus == EXEC_TRANSMIT_TRACKER_RETRY) + case EXEC_TRANSMIT_COPYING: { + int32 fileDescriptor = fileDescriptorArray[currentNodeIndex]; + CopyStatus copyStatus = CLIENT_INVALID_COPY; + int closed = -1; + + /* the open connection belongs to this task */ + int32 connectionId = TransmitTrackerConnectionId(transmitTracker, task); + Assert(connectionId != INVALID_CONNECTION_ID); + + copyStatus = MultiClientCopyData(connectionId, fileDescriptor); + if (copyStatus == CLIENT_COPY_MORE) + { + /* worker node continues to send more data, keep reading */ + nextTransmitStatus = EXEC_TRANSMIT_COPYING; + break; + } + + /* we are done copying data */ + if (copyStatus == CLIENT_COPY_DONE) + { + closed = close(fileDescriptor); + fileDescriptorArray[currentNodeIndex] = -1; + + if (closed >= 0) + { + nextTransmitStatus = EXEC_TRANSMIT_DONE; + } + else + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not close copied file: %m"))); + + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + } + } + else if (copyStatus == CLIENT_COPY_FAILED) + { + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + + closed = close(fileDescriptor); + fileDescriptorArray[currentNodeIndex] = -1; + + if (closed < 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not close copy file: %m"))); + } + } + + /* + * We use task tracker logic to manage file transmits as well, but that + * abstraction leaks after we drop into the copy protocol. To make it + * work, we reset transmit tracker's connection for next file transmit. + */ transmitTracker->connectionBusy = false; transmitTracker->connectionBusyOnTask = NULL; - } - break; - } - - case EXEC_TRANSMIT_COPYING: - { - int32 fileDescriptor = fileDescriptorArray[currentNodeIndex]; - CopyStatus copyStatus = CLIENT_INVALID_COPY; - int closed = -1; - - /* the open connection belongs to this task */ - int32 connectionId = TransmitTrackerConnectionId(transmitTracker, task); - Assert(connectionId != INVALID_CONNECTION_ID); - - copyStatus = MultiClientCopyData(connectionId, fileDescriptor); - if (copyStatus == CLIENT_COPY_MORE) - { - /* worker node continues to send more data, keep reading */ - nextTransmitStatus = EXEC_TRANSMIT_COPYING; break; } - /* we are done copying data */ - if (copyStatus == CLIENT_COPY_DONE) + case EXEC_TRANSMIT_TRACKER_RETRY: { - closed = close(fileDescriptor); - fileDescriptorArray[currentNodeIndex] = -1; + bool trackerHealthy = false; + bool trackerConnectionUp = false; - if (closed >= 0) + /* + * The task tracker proxy handles connection errors. On the off chance + * that our connection is still up and the transmit tracker misbehaved, + * we capture this as an intermittent tracker failure. + */ + trackerConnectionUp = TrackerConnectionUp(transmitTracker); + if (trackerConnectionUp) { - nextTransmitStatus = EXEC_TRANSMIT_DONE; + transmitTracker->trackerFailureCount++; + } + + trackerHealthy = TrackerHealthy(transmitTracker); + if (trackerHealthy) + { + nextTransmitStatus = EXEC_TRANSMIT_UNASSIGNED; } else { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not close copied file: %m"))); - - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; } - } - else if (copyStatus == CLIENT_COPY_FAILED) - { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; - closed = close(fileDescriptor); - fileDescriptorArray[currentNodeIndex] = -1; - - if (closed < 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not close copy file: %m"))); - } + break; } - /* - * We use task tracker logic to manage file transmits as well, but that - * abstraction leaks after we drop into the copy protocol. To make it - * work, we reset transmit tracker's connection for next file transmit. - */ - transmitTracker->connectionBusy = false; - transmitTracker->connectionBusyOnTask = NULL; - - break; - } - - case EXEC_TRANSMIT_TRACKER_RETRY: - { - bool trackerHealthy = false; - bool trackerConnectionUp = false; - - /* - * The task tracker proxy handles connection errors. On the off chance - * that our connection is still up and the transmit tracker misbehaved, - * we capture this as an intermittent tracker failure. - */ - trackerConnectionUp = TrackerConnectionUp(transmitTracker); - if (trackerConnectionUp) - { - transmitTracker->trackerFailureCount++; - } - - trackerHealthy = TrackerHealthy(transmitTracker); - if (trackerHealthy) + case EXEC_TRANSMIT_TRACKER_FAILED: { + /* + * This case exists to signal to the caller that we failed. The caller + * is now responsible for reassigning the transmit task (and downstream + * SQL task dependencies) and running the appropriate recovery logic. + */ nextTransmitStatus = EXEC_TRANSMIT_UNASSIGNED; + break; } - else + + case EXEC_TRANSMIT_DONE: { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; + /* we are done with fetching task results to the master node */ + nextTransmitStatus = EXEC_TRANSMIT_DONE; + break; } - break; - } - - case EXEC_TRANSMIT_TRACKER_FAILED: - { - /* - * This case exists to signal to the caller that we failed. The caller - * is now responsible for reassigning the transmit task (and downstream - * SQL task dependencies) and running the appropriate recovery logic. - */ - nextTransmitStatus = EXEC_TRANSMIT_UNASSIGNED; - break; - } - - case EXEC_TRANSMIT_DONE: - { - /* we are done with fetching task results to the master node */ - nextTransmitStatus = EXEC_TRANSMIT_DONE; - break; - } - - default: - { - /* we fatal here to avoid leaking client-side resources */ - ereport(FATAL, (errmsg("invalid transmit status: %d", currentTransmitStatus))); - break; - } + default: + { + /* we fatal here to avoid leaking client-side resources */ + ereport(FATAL, (errmsg("invalid transmit status: %d", + currentTransmitStatus))); + break; + } } /* update file transmit status for most recent transmit tracker */ @@ -2317,7 +2321,7 @@ AssignQueuedTasks(TaskTracker *taskTracker) { StringInfo taskAssignmentQuery = taskState->taskAssignmentQuery; - if(taskAssignmentCount > 0) + if (taskAssignmentCount > 0) { appendStringInfo(multiAssignQuery, ";"); } @@ -2336,7 +2340,7 @@ AssignQueuedTasks(TaskTracker *taskTracker) taskState = (TrackerTaskState *) hash_seq_search(&status); } - if(taskAssignmentCount > 0) + if (taskAssignmentCount > 0) { void *queryResult = NULL; int rowCount = 0; @@ -2833,7 +2837,8 @@ TrackerHashCleanupJob(HTAB *taskTrackerHash, Task *jobCleanupTask) if (queryStatus == CLIENT_QUERY_DONE) { ereport(DEBUG4, (errmsg("completed cleanup query for job " UINT64_FORMAT - " on node \"%s:%u\"", jobId, nodeName, nodePort))); + " on node \"%s:%u\"", jobId, nodeName, + nodePort))); /* clear connection for future cleanup queries */ taskTracker->connectionBusy = false; diff --git a/src/backend/distributed/executor/multi_utility.c b/src/backend/distributed/executor/multi_utility.c index ccd05c021..be4148f75 100644 --- a/src/backend/distributed/executor/multi_utility.c +++ b/src/backend/distributed/executor/multi_utility.c @@ -39,9 +39,9 @@ */ struct DropRelationCallbackState { - char relkind; - Oid heapOid; - bool concurrent; + char relkind; + Oid heapOid; + bool concurrent; }; @@ -190,10 +190,10 @@ multi_ProcessUtility(Node *parsetree, } else if (IsA(parsetree, CreateRoleStmt) && CitusDBHasBeenLoaded()) { - ereport(NOTICE, (errmsg("CitusDB does not support CREATE ROLE/USER " - "for distributed databases"), - errdetail("Multiple roles are currently supported " - "only for local tables"))); + ereport(NOTICE, (errmsg("CitusDB does not support CREATE ROLE/USER " + "for distributed databases"), + errdetail("Multiple roles are currently supported " + "only for local tables"))); } /* now drop into standard process utility */ @@ -757,7 +757,7 @@ IsAlterTableRenameStmt(RenameStmt *renameStmt) isAlterTableRenameStmt = true; } -#if (PG_VERSION_NUM >=90500) +#if (PG_VERSION_NUM >= 90500) else if (renameStmt->renameType == OBJECT_TABCONSTRAINT) { isAlterTableRenameStmt = true; @@ -905,8 +905,9 @@ ExecuteCommandOnWorkerShards(Oid relationId, const char *commandString, } else { - ereport(DEBUG2, (errmsg("applied command on shard " UINT64_FORMAT " on " - "node %s:%d", shardId, workerName, workerPort))); + ereport(DEBUG2, (errmsg("applied command on shard " UINT64_FORMAT + " on node %s:%d", shardId, workerName, + workerPort))); } isFirstPlacement = false; diff --git a/src/backend/distributed/master/master_create_shards.c b/src/backend/distributed/master/master_create_shards.c index 0617d8d59..9adc0e21c 100644 --- a/src/backend/distributed/master/master_create_shards.c +++ b/src/backend/distributed/master/master_create_shards.c @@ -185,7 +185,7 @@ master_create_worker_shards(PG_FUNCTION_ARGS) LockShardDistributionMetadata(shardId, ExclusiveLock); CreateShardPlacements(shardId, ddlCommandList, workerNodeList, - roundRobinNodeIndex, replicationFactor); + roundRobinNodeIndex, replicationFactor); InsertShardRow(distributedTableId, shardId, shardStorageType, minHashTokenText, maxHashTokenText); diff --git a/src/backend/distributed/master/master_delete_protocol.c b/src/backend/distributed/master/master_delete_protocol.c index 35835c7d0..efeeb78bc 100644 --- a/src/backend/distributed/master/master_delete_protocol.c +++ b/src/backend/distributed/master/master_delete_protocol.c @@ -115,9 +115,9 @@ master_apply_delete_command(PG_FUNCTION_ARGS) if ((partitionMethod == DISTRIBUTE_BY_HASH) && (deleteCriteria != NULL)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot delete from distributed table"), - errdetail("Delete statements on hash-partitioned tables " - "with where clause is not supported"))); + errmsg("cannot delete from distributed table"), + errdetail("Delete statements on hash-partitioned tables " + "with where clause is not supported"))); } CheckDeleteCriteria(deleteCriteria); @@ -138,15 +138,15 @@ master_apply_delete_command(PG_FUNCTION_ARGS) else { deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId, - shardIntervalList, - deleteCriteria); + shardIntervalList, + deleteCriteria); } foreach(shardIntervalCell, deletableShardIntervalList) { List *shardPlacementList = NIL; List *droppedPlacementList = NIL; - List *lingeringPlacementList= NIL; + List *lingeringPlacementList = NIL; ListCell *shardPlacementCell = NULL; ListCell *droppedPlacementCell = NULL; ListCell *lingeringPlacementCell = NULL; @@ -167,7 +167,8 @@ master_apply_delete_command(PG_FUNCTION_ARGS) shardPlacementList = ShardPlacementList(shardId); foreach(shardPlacementCell, shardPlacementList) { - ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell); + ShardPlacement *shardPlacement = + (ShardPlacement *) lfirst(shardPlacementCell); char *workerName = shardPlacement->nodeName; uint32 workerPort = shardPlacement->nodePort; bool dropSuccessful = false; @@ -176,14 +177,17 @@ master_apply_delete_command(PG_FUNCTION_ARGS) char tableType = get_rel_relkind(relationId); if (tableType == RELKIND_RELATION) { - appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND, quotedShardName); + appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND, + quotedShardName); } else if (tableType == RELKIND_FOREIGN_TABLE) { - appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND, quotedShardName); + appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND, + quotedShardName); } - dropSuccessful = ExecuteRemoteCommand(workerName, workerPort, workerDropQuery); + dropSuccessful = ExecuteRemoteCommand(workerName, workerPort, + workerDropQuery); if (dropSuccessful) { droppedPlacementList = lappend(droppedPlacementList, shardPlacement); @@ -227,12 +231,13 @@ master_apply_delete_command(PG_FUNCTION_ARGS) if (QueryCancelPending) { - ereport(WARNING, (errmsg("cancel requests are ignored during shard deletion"))); + ereport(WARNING, (errmsg("cancel requests are ignored during shard " + "deletion"))); QueryCancelPending = false; } RESUME_INTERRUPTS(); - } + } deleteCriteriaShardCount = list_length(deletableShardIntervalList); PG_RETURN_INT32(deleteCriteriaShardCount); @@ -257,7 +262,7 @@ CheckTableCount(Query *deleteQuery) static void CheckDeleteCriteria(Node *deleteCriteria) { - bool simpleOpExpression = true; + bool simpleOpExpression = true; if (deleteCriteria == NULL) { @@ -286,7 +291,7 @@ CheckDeleteCriteria(Node *deleteCriteria) } else { - simpleOpExpression = false; + simpleOpExpression = false; } if (!simpleOpExpression) @@ -298,15 +303,15 @@ CheckDeleteCriteria(Node *deleteCriteria) } - /* - * CheckPartitionColumn checks that the given where clause is based only on the - * partition key of the given relation id. - */ +/* + * CheckPartitionColumn checks that the given where clause is based only on the + * partition key of the given relation id. + */ static void CheckPartitionColumn(Oid relationId, Node *whereClause) { Var *partitionColumn = PartitionKey(relationId); - ListCell *columnCell = NULL; + ListCell *columnCell = NULL; List *columnList = pull_var_clause_default(whereClause); foreach(columnCell, columnList) @@ -332,7 +337,7 @@ CheckPartitionColumn(Oid relationId, Node *whereClause) */ static List * ShardsMatchingDeleteCriteria(Oid relationId, List *shardIntervalList, - Node *deleteCriteria) + Node *deleteCriteria) { List *dropShardIntervalList = NIL; List *deleteCriteriaList = NIL; diff --git a/src/backend/distributed/master/master_metadata_utility.c b/src/backend/distributed/master/master_metadata_utility.c index d8ac90997..34db8da9a 100644 --- a/src/backend/distributed/master/master_metadata_utility.c +++ b/src/backend/distributed/master/master_metadata_utility.c @@ -219,7 +219,7 @@ ShardLength(uint64 shardId) if (shardPlacementList == NIL) { ereport(ERROR, (errmsg("could not find length of shard " UINT64_FORMAT, shardId), - errdetail("Could not find any shard placements for the shard."))); + errdetail("Could not find any shard placements for the shard."))); } else { diff --git a/src/backend/distributed/master/master_node_protocol.c b/src/backend/distributed/master/master_node_protocol.c index fc9413666..064756d77 100644 --- a/src/backend/distributed/master/master_node_protocol.c +++ b/src/backend/distributed/master/master_node_protocol.c @@ -49,7 +49,7 @@ /* Shard related configuration */ int ShardReplicationFactor = 2; /* desired replication factor for shards */ -int ShardMaxSize = 1048576; /* maximum size in KB one shard can grow to */ +int ShardMaxSize = 1048576; /* maximum size in KB one shard can grow to */ int ShardPlacementPolicy = SHARD_PLACEMENT_ROUND_ROBIN; @@ -210,7 +210,7 @@ master_get_table_ddl_events(PG_FUNCTION_ARGS) tableDDLEventCell = list_head(tableDDLEventList); functionContext->user_fctx = tableDDLEventCell; - + MemoryContextSwitchTo(oldContext); } @@ -226,8 +226,8 @@ master_get_table_ddl_events(PG_FUNCTION_ARGS) if (tableDDLEventCell != NULL) { char *ddlStatement = (char *) lfirst(tableDDLEventCell); - text *ddlStatementText = cstring_to_text(ddlStatement); - + text *ddlStatementText = cstring_to_text(ddlStatement); + functionContext->user_fctx = lnext(tableDDLEventCell); SRF_RETURN_NEXT(functionContext, PointerGetDatum(ddlStatementText)); @@ -252,7 +252,7 @@ Datum master_get_new_shardid(PG_FUNCTION_ARGS) { text *sequenceName = cstring_to_text(SHARDID_SEQUENCE_NAME); - Oid sequenceId = ResolveRelationId(sequenceName); + Oid sequenceId = ResolveRelationId(sequenceName); Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId); /* generate new and unique shardId from sequence */ @@ -281,7 +281,7 @@ master_get_local_first_candidate_nodes(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { - MemoryContext oldContext = NULL; + MemoryContext oldContext = NULL; TupleDesc tupleDescriptor = NULL; uint32 liveNodeCount = 0; bool hasOid = false; @@ -396,7 +396,7 @@ master_get_round_robin_candidate_nodes(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { - MemoryContext oldContext = NULL; + MemoryContext oldContext = NULL; TupleDesc tupleDescriptor = NULL; List *workerNodeList = NIL; TypeFuncClass resultTypeClass = 0; @@ -477,7 +477,7 @@ master_get_active_worker_nodes(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { - MemoryContext oldContext = NULL; + MemoryContext oldContext = NULL; List *workerNodeList = NIL; uint32 workerNodeCount = 0; TupleDesc tupleDescriptor = NULL; @@ -567,7 +567,7 @@ GetTableDDLEvents(Oid relationId) Relation pgIndex = NULL; SysScanDesc scanDescriptor = NULL; - ScanKeyData scanKey[1]; + ScanKeyData scanKey[1]; int scanKeyCount = 1; HeapTuple heapTuple = NULL; @@ -599,13 +599,13 @@ GetTableDDLEvents(Oid relationId) /* fetch table schema and column option definitions */ tableSchemaDef = pg_get_tableschemadef_string(relationId); tableColumnOptionsDef = pg_get_tablecolumnoptionsdef_string(relationId); - + tableDDLEventList = lappend(tableDDLEventList, tableSchemaDef); if (tableColumnOptionsDef != NULL) { tableDDLEventList = lappend(tableDDLEventList, tableColumnOptionsDef); } - + /* open system catalog and scan all indexes that belong to this table */ pgIndex = heap_open(IndexRelationId, AccessShareLock); @@ -660,7 +660,7 @@ GetTableDDLEvents(Oid relationId) { statementDef = pg_get_indexdef_string(indexId); } - + /* append found constraint or index definition to the list */ tableDDLEventList = lappend(tableDDLEventList, statementDef); @@ -695,8 +695,8 @@ hostname_client_addr(void) Port *port = MyProcPort; char *remoteHost = NULL; int remoteHostLen = NI_MAXHOST; - int flags = NI_NAMEREQD; /* require fully qualified hostname */ - int nameFound = 0; + int flags = NI_NAMEREQD; /* require fully qualified hostname */ + int nameFound = 0; if (port == NULL) { @@ -709,10 +709,15 @@ hostname_client_addr(void) #ifdef HAVE_IPV6 case AF_INET6: #endif - break; + { + break; + } + default: + { ereport(ERROR, (errmsg("invalid address family in connection"))); break; + } } remoteHost = palloc0(remoteHostLen); diff --git a/src/backend/distributed/master/master_stage_protocol.c b/src/backend/distributed/master/master_stage_protocol.c index 1c13237cc..e4c4f7fca 100644 --- a/src/backend/distributed/master/master_stage_protocol.c +++ b/src/backend/distributed/master/master_stage_protocol.c @@ -93,7 +93,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS) if (partitionMethod == DISTRIBUTE_BY_HASH) { ereport(ERROR, (errmsg("relation \"%s\" is a hash partitioned table", - relationName), + relationName), errdetail("We currently don't support creating shards " "on hash-partitioned tables"))); } @@ -128,7 +128,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS) } CreateShardPlacements(shardId, ddlEventList, candidateNodeList, 0, - ShardReplicationFactor); + ShardReplicationFactor); InsertShardRow(relationId, shardId, SHARD_STORAGE_TABLE, nullMinValue, nullMaxValue); @@ -361,7 +361,7 @@ CheckDistributedTable(Oid relationId) */ void CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList, - int workerStartIndex, int replicationFactor) + int workerStartIndex, int replicationFactor) { int attemptCount = replicationFactor; int workerNodeCount = list_length(workerNodeList); @@ -393,7 +393,7 @@ CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList, else { ereport(WARNING, (errmsg("could not create shard on \"%s:%u\"", - nodeName, nodePort))); + nodeName, nodePort))); } if (placementsCreated >= replicationFactor) @@ -406,7 +406,7 @@ CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList, if (placementsCreated < replicationFactor) { ereport(ERROR, (errmsg("could only create %u of %u of required shard replicas", - placementsCreated, replicationFactor))); + placementsCreated, replicationFactor))); } } diff --git a/src/backend/distributed/planner/modify_planner.c b/src/backend/distributed/planner/modify_planner.c index faeb6b6b0..4484e53bb 100644 --- a/src/backend/distributed/planner/modify_planner.c +++ b/src/backend/distributed/planner/modify_planner.c @@ -393,6 +393,7 @@ DistributedModifyTask(Query *query) query->onConflict = RebuildOnConflict(relationId, query->onConflict); } #else + /* always set to false for PG_VERSION_NUM < 90500 */ upsertQuery = false; #endif @@ -414,6 +415,7 @@ DistributedModifyTask(Query *query) #if (PG_VERSION_NUM >= 90500) + /* * RebuildOnConflict rebuilds OnConflictExpr for correct deparsing. The function * makes WHERE clause elements explicit and filters dropped columns @@ -433,7 +435,7 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict) /* Convert onConflictWhere qualifiers to an explicitly and'd clause */ updatedOnConflict->onConflictWhere = - (Node *) make_ands_explicit((List *) onConflictWhere); + (Node *) make_ands_explicit((List *) onConflictWhere); /* * Here we handle dropped columns on the distributed table. onConflictSet @@ -448,7 +450,7 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict) foreach(targetEntryCell, onConflictSet) { TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); - FormData_pg_attribute *tableAttribute = tableAttributes[targetEntry->resno -1]; + FormData_pg_attribute *tableAttribute = tableAttributes[targetEntry->resno - 1]; /* skip dropped columns */ if (tableAttribute->attisdropped) @@ -468,6 +470,8 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict) return updatedOnConflict; } + + #endif diff --git a/src/backend/distributed/planner/multi_explain.c b/src/backend/distributed/planner/multi_explain.c index 35e0ca79b..c7fbdefed 100644 --- a/src/backend/distributed/planner/multi_explain.c +++ b/src/backend/distributed/planner/multi_explain.c @@ -45,7 +45,7 @@ MultiExplainOneQuery(Query *query, IntoClause *into, ExplainState *es, if (localQuery) { PlannedStmt *plan = NULL; - instr_time planstart; + instr_time planstart; instr_time planduration; INSTR_TIME_SET_CURRENT(planstart); diff --git a/src/backend/distributed/planner/multi_join_order.c b/src/backend/distributed/planner/multi_join_order.c index e8cc68f07..04a91d311 100644 --- a/src/backend/distributed/planner/multi_join_order.c +++ b/src/backend/distributed/planner/multi_join_order.c @@ -33,18 +33,18 @@ /* Config variables managed via guc.c */ -int LargeTableShardCount = 4; /* shard counts for a large table */ +int LargeTableShardCount = 4; /* shard counts for a large table */ bool LogMultiJoinOrder = false; /* print join order as a debugging aid */ /* Function pointer type definition for join rule evaluation functions */ -typedef JoinOrderNode * (*RuleEvalFunction) (JoinOrderNode *currentJoinNode, - TableEntry *candidateTable, - List *candidateShardList, - List *applicableJoinClauses, - JoinType joinType); +typedef JoinOrderNode *(*RuleEvalFunction) (JoinOrderNode *currentJoinNode, + TableEntry *candidateTable, + List *candidateShardList, + List *applicableJoinClauses, + JoinType joinType); -static char * RuleNameArray[JOIN_RULE_LAST] = {0}; /* ordered join rule names */ -static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = {0}; /* join rules */ +static char *RuleNameArray[JOIN_RULE_LAST] = { 0 }; /* ordered join rule names */ +static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */ /* Local functions forward declarations */ @@ -54,7 +54,8 @@ static bool JoinExprListWalker(Node *node, List **joinList); static bool ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex); static List * MergeShardIntervals(List *leftShardIntervalList, List *rightShardIntervalList, JoinType joinType); -static bool ShardIntervalsMatch(List *leftShardIntervalList, List *rightShardIntervalList); +static bool ShardIntervalsMatch(List *leftShardIntervalList, + List *rightShardIntervalList); static List * LoadSortedShardIntervalList(Oid relationId); static List * JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClauseList); @@ -68,31 +69,41 @@ static List * TableEntryListDifference(List *lhsTableList, List *rhsTableList); static TableEntry * FindTableEntry(List *tableEntryList, uint32 tableId); /* Local functions forward declarations for join evaluations */ -static JoinOrderNode * EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode, - TableEntry *candidateTable, List *candidateShardList, +static JoinOrderNode * EvaluateJoinRules(List *joinedTableList, + JoinOrderNode *currentJoinNode, + TableEntry *candidateTable, + List *candidateShardList, List *joinClauseList, JoinType joinType); static List * RangeTableIdList(List *tableList); static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType); static char * JoinRuleName(JoinRuleType ruleType); static JoinOrderNode * BroadcastJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, - List *candidateShardList, List *applicableJoinClauses, + List *candidateShardList, + List *applicableJoinClauses, JoinType joinType); static JoinOrderNode * LocalJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, List *candidateShardList, List *applicableJoinClauses, JoinType joinType); static bool JoinOnColumns(Var *currentPartitioncolumn, Var *candidatePartitionColumn, List *joinClauseList); -static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, - List *candidateShardList, List *applicableJoinClauses, +static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode, + TableEntry *candidateTable, + List *candidateShardList, + List *applicableJoinClauses, JoinType joinType); -static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, - List *candidateShardList, List *applicableJoinClauses, +static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode, + TableEntry *candidateTable, + List *candidateShardList, + List *applicableJoinClauses, JoinType joinType); -static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode, TableEntry *candidateTable, - List *candidateShardList, List *applicableJoinClauses, +static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode, + TableEntry *candidateTable, + List *candidateShardList, + List *applicableJoinClauses, JoinType joinType); -static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType, - Var *partitionColumn, char partitionMethod); +static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType + joinRuleType, Var *partitionColumn, + char partitionMethod); /* @@ -106,7 +117,7 @@ List * FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList) { List *joinList = NIL; - ListCell * joinCell = NULL; + ListCell *joinCell = NULL; List *joinWhereClauseList = NIL; List *joinOrderList = NIL; List *joinedTableList = NIL; @@ -199,7 +210,6 @@ FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList) "query"), errdetail("Shards of relations in outer join queries " "must have 1-to-1 shard partitioning"))); - } } else @@ -439,7 +449,7 @@ MergeShardIntervals(List *leftShardIntervalList, List *rightShardIntervalList, bool nextMaxSmaller = comparisonResult > 0; if ((shardUnion && nextMaxLarger) || - (!shardUnion && nextMaxSmaller) ) + (!shardUnion && nextMaxSmaller)) { newShardInterval->maxValue = datumCopy(nextMax, typeByValue, typeLen); } @@ -586,7 +596,8 @@ ShardIntervalsMatch(List *leftShardIntervalList, List *rightShardIntervalList) nextRightIntervalCell = lnext(rightShardIntervalCell); if (nextRightIntervalCell != NULL) { - ShardInterval *nextRightInterval = (ShardInterval *) lfirst(nextRightIntervalCell); + ShardInterval *nextRightInterval = + (ShardInterval *) lfirst(nextRightIntervalCell); shardIntervalsIntersect = ShardIntervalsOverlap(leftInterval, nextRightInterval); if (shardIntervalsIntersect) @@ -730,7 +741,7 @@ JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClause * BestJoinOrder takes in a list of candidate join orders, and determines the * best join order among these candidates. The function uses two heuristics for * this. First, the function chooses join orders that have the fewest number of - * join operators that cause large data transfers. Second, the function chooses + * join operators that cause large data transfers. Second, the function chooses * join orders where large data transfers occur later in the execution. */ static List * @@ -1009,7 +1020,7 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode, uint32 candidateTableId = 0; List *joinedTableIdList = NIL; List *applicableJoinClauses = NIL; - uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1; + uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1; uint32 highestValidIndex = JOIN_RULE_LAST - 1; uint32 ruleIndex = 0; @@ -1028,11 +1039,11 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode, JoinRuleType ruleType = (JoinRuleType) ruleIndex; RuleEvalFunction ruleEvalFunction = JoinRuleEvalFunction(ruleType); - nextJoinNode = (*ruleEvalFunction) (currentJoinNode, - candidateTable, - candidateShardList, - applicableJoinClauses, - joinType); + nextJoinNode = (*ruleEvalFunction)(currentJoinNode, + candidateTable, + candidateShardList, + applicableJoinClauses, + joinType); /* break after finding the first join rule that applies */ if (nextJoinNode != NULL) diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 3aebfcb37..7acf85d7e 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -91,7 +91,8 @@ static void ParentSetNewChild(MultiNode *parentNode, MultiNode *oldChildNode, /* Local functions forward declarations for aggregate expressions */ static void ApplyExtendedOpNodes(MultiExtendedOp *originalNode, - MultiExtendedOp *masterNode, MultiExtendedOp *workerNode); + MultiExtendedOp *masterNode, + MultiExtendedOp *workerNode); static void TransformSubqueryNode(MultiTable *subqueryNode); static MultiExtendedOp * MasterExtendedOpNode(MultiExtendedOp *originalOpNode); static Node * MasterAggregateMutator(Node *originalNode, AttrNumber *columnId); @@ -117,7 +118,8 @@ static void ErrorIfUnsupportedArrayAggregate(Aggref *arrayAggregateExpression); static void ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression, MultiNode *logicalPlanNode); static Var * AggregateDistinctColumn(Aggref *aggregateExpression); -static bool TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode, +static bool TablePartitioningSupportsDistinct(List *tableNodeList, + MultiExtendedOp *opNode, Var *distinctColumn); static bool GroupedByColumn(List *groupClauseList, List *targetList, Var *column); @@ -488,7 +490,7 @@ AddressProjectSpecialConditions(MultiProject *projectNode) /* * We check if we need to include any child columns in the project node to - * address the following special conditions. + * address the following special conditions. * * SNC1: project node must include child node's projected columns, or * SNC2: project node must include child node's partition column, or @@ -637,7 +639,7 @@ Commutative(MultiUnaryNode *parentNode, MultiUnaryNode *childNode) { PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID; CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); - CitusNodeTag childNodeTag = CitusNodeTag(childNode); + CitusNodeTag childNodeTag = CitusNodeTag(childNode); /* we cannot be commutative with non-query operators */ if (childNodeTag == T_MultiTreeRoot || childNodeTag == T_MultiTable) @@ -692,7 +694,7 @@ Distributive(MultiUnaryNode *parentNode, MultiBinaryNode *childNode) { PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID; CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); - CitusNodeTag childNodeTag = CitusNodeTag(childNode); + CitusNodeTag childNodeTag = CitusNodeTag(childNode); /* special condition checks for partition operator are not implemented */ Assert(parentNodeTag != T_MultiPartition); @@ -751,7 +753,7 @@ Factorizable(MultiBinaryNode *parentNode, MultiUnaryNode *childNode) { PullUpStatus pullUpStatus = PULL_UP_NOT_VALID; CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); - CitusNodeTag childNodeTag = CitusNodeTag(childNode); + CitusNodeTag childNodeTag = CitusNodeTag(childNode); /* * The following nodes are factorizable with their parents, but we don't @@ -1220,7 +1222,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode) bool hasAggregates = contain_agg_clause((Node *) originalExpression); if (hasAggregates) { - Node *newNode = MasterAggregateMutator((Node*) originalExpression, + Node *newNode = MasterAggregateMutator((Node *) originalExpression, &columnId); newExpression = (Expr *) newNode; } @@ -1826,7 +1828,7 @@ WorkerAggregateExpressionList(Aggref *originalAggregate) static AggregateType GetAggregateType(Oid aggFunctionId) { - char *aggregateProcName = NULL; + char *aggregateProcName = NULL; uint32 aggregateCount = 0; uint32 aggregateIndex = 0; bool found = false; @@ -1980,22 +1982,30 @@ CountDistinctHashFunctionName(Oid argumentType) switch (argumentType) { case INT4OID: + { hashFunctionName = pstrdup(HLL_HASH_INTEGER_FUNC_NAME); break; + } case INT8OID: + { hashFunctionName = pstrdup(HLL_HASH_BIGINT_FUNC_NAME); break; + } case TEXTOID: case BPCHAROID: case VARCHAROID: + { hashFunctionName = pstrdup(HLL_HASH_TEXT_FUNC_NAME); break; + } default: + { hashFunctionName = pstrdup(HLL_HASH_ANY_FUNC_NAME); break; + } } return hashFunctionName; @@ -2479,7 +2489,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) if (subqueryTree->setOperations) { SetOperationStmt *setOperationStatement = - (SetOperationStmt *) subqueryTree->setOperations; + (SetOperationStmt *) subqueryTree->setOperations; if (setOperationStatement->op == SETOP_UNION) { @@ -2563,7 +2573,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) List *joinTreeTableIndexList = NIL; uint32 joiningTableCount = 0; - ExtractRangeTableIndexWalker((Node*) subqueryTree->jointree, + ExtractRangeTableIndexWalker((Node *) subqueryTree->jointree, &joinTreeTableIndexList); joiningTableCount = list_length(joinTreeTableIndexList); @@ -2587,7 +2597,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList, targetEntryList); bool distinctOnPartitionColumn = - TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList); + TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList); if (!distinctOnPartitionColumn) { preconditionsSatisfied = false; @@ -2609,7 +2619,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) foreach(rangeTableEntryCell, subqueryEntryList) { RangeTblEntry *rangeTableEntry = - (RangeTblEntry *) lfirst(rangeTableEntryCell); + (RangeTblEntry *) lfirst(rangeTableEntryCell); Query *innerSubquery = rangeTableEntry->subquery; ErrorIfCannotPushdownSubquery(innerSubquery, outerQueryHasLimit); @@ -2639,7 +2649,7 @@ ErrorIfUnsupportedTableCombination(Query *queryTree) * Extract all range table indexes from the join tree. Note that sub-queries * that get pulled up by PostgreSQL don't appear in this join tree. */ - ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); foreach(joinTreeTableIndexCell, joinTreeTableIndexList) { /* @@ -2768,7 +2778,7 @@ ErrorIfUnsupportedUnionQuery(Query *unionQuery) leftQueryOnPartitionColumn = TargetListOnPartitionColumn(leftQuery, leftQuery->targetList); rightQueryOnPartitionColumn = TargetListOnPartitionColumn(rightQuery, - rightQuery->targetList); + rightQuery->targetList); if (!(leftQueryOnPartitionColumn && rightQueryOnPartitionColumn)) { @@ -2807,7 +2817,7 @@ GroupTargetEntryList(List *groupClauseList, List *targetEntryList) { SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupClauseCell); TargetEntry *groupTargetEntry = - get_sortgroupclause_tle(groupClause, targetEntryList); + get_sortgroupclause_tle(groupClause, targetEntryList); groupTargetEntryList = lappend(groupTargetEntryList, groupTargetEntry); } @@ -2890,7 +2900,7 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query) else if (IsA(columnExpression, FieldSelect)) { FieldSelect *compositeField = (FieldSelect *) columnExpression; - Expr *fieldExpression = compositeField->arg; + Expr *fieldExpression = compositeField->arg; if (IsA(fieldExpression, Var)) { @@ -2909,7 +2919,7 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query) return false; } - rangeTableEntryIndex = candidateColumn->varno - 1; + rangeTableEntryIndex = candidateColumn->varno - 1; rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex); if (rangeTableEntry->rtekind == RTE_RELATION) @@ -2980,7 +2990,7 @@ CompositeFieldRecursive(Expr *expression, Query *query) return NULL; } - rangeTableEntryIndex = candidateColumn->varno - 1; + rangeTableEntryIndex = candidateColumn->varno - 1; rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex); if (rangeTableEntry->rtekind == RTE_SUBQUERY) @@ -3019,7 +3029,7 @@ FullCompositeFieldList(List *compositeFieldList) uint32 fieldIndex = 0; ListCell *fieldSelectCell = NULL; - foreach (fieldSelectCell, compositeFieldList) + foreach(fieldSelectCell, compositeFieldList) { FieldSelect *fieldSelect = (FieldSelect *) lfirst(fieldSelectCell); uint32 compositeFieldIndex = 0; @@ -3226,9 +3236,10 @@ SupportedLateralQuery(Query *parentQuery, Query *lateralQuery) if (outerColumnIsPartitionColumn && localColumnIsPartitionColumn) { FieldSelect *outerCompositeField = - CompositeFieldRecursive(outerQueryExpression, parentQuery); + CompositeFieldRecursive(outerQueryExpression, parentQuery); FieldSelect *localCompositeField = - CompositeFieldRecursive(localQueryExpression, lateralQuery); + CompositeFieldRecursive(localQueryExpression, lateralQuery); + /* * If partition colums are composite fields, add them to list to * check later if all composite fields are used. @@ -3251,12 +3262,12 @@ SupportedLateralQuery(Query *parentQuery, Query *lateralQuery) } /* check composite fields */ - if(!supportedLateralQuery) + if (!supportedLateralQuery) { bool outerFullCompositeFieldList = - FullCompositeFieldList(outerCompositeFieldList); + FullCompositeFieldList(outerCompositeFieldList); bool localFullCompositeFieldList = - FullCompositeFieldList(localCompositeFieldList); + FullCompositeFieldList(localCompositeFieldList); if (outerFullCompositeFieldList && localFullCompositeFieldList) { @@ -3301,15 +3312,15 @@ JoinOnPartitionColumn(Query *query) if (isLeftColumnPartitionColumn && isRightColumnPartitionColumn) { FieldSelect *leftCompositeField = - CompositeFieldRecursive(leftArgument, query); + CompositeFieldRecursive(leftArgument, query); FieldSelect *rightCompositeField = - CompositeFieldRecursive(rightArgument, query); + CompositeFieldRecursive(rightArgument, query); /* * If partition colums are composite fields, add them to list to * check later if all composite fields are used. */ - if(leftCompositeField && rightCompositeField) + if (leftCompositeField && rightCompositeField) { leftCompositeFieldList = lappend(leftCompositeFieldList, leftCompositeField); @@ -3318,7 +3329,7 @@ JoinOnPartitionColumn(Query *query) } /* if both sides are not composite fields, they are normal columns */ - if(!(leftCompositeField && rightCompositeField)) + if (!(leftCompositeField && rightCompositeField)) { joinOnPartitionColumn = true; break; @@ -3327,12 +3338,12 @@ JoinOnPartitionColumn(Query *query) } /* check composite fields */ - if(!joinOnPartitionColumn) + if (!joinOnPartitionColumn) { bool leftFullCompositeFieldList = - FullCompositeFieldList(leftCompositeFieldList); + FullCompositeFieldList(leftCompositeFieldList); bool rightFullCompositeFieldList = - FullCompositeFieldList(rightCompositeFieldList); + FullCompositeFieldList(rightCompositeFieldList); if (leftFullCompositeFieldList && rightFullCompositeFieldList) { @@ -3409,7 +3420,7 @@ ErrorIfUnsupportedShardDistribution(Query *query) /* check if this table has 1-1 shard partitioning with first table */ coPartitionedTables = CoPartitionedTables(firstShardIntervalList, - currentShardIntervalList); + currentShardIntervalList); if (!coPartitionedTables) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -3437,7 +3448,7 @@ RelationIdList(Query *query) foreach(tableEntryCell, tableEntryList) { - TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell); + TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell); Oid relationId = tableEntry->relationId; relationIdList = list_append_unique_oid(relationIdList, relationId); @@ -3617,7 +3628,7 @@ ExtractQueryWalker(Node *node, List **queryList) Query *query = (Query *) node; (*queryList) = lappend(*queryList, query); - walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList, + walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList, QTW_EXAMINE_RTES); } @@ -3641,7 +3652,7 @@ LeafQuery(Query *queryTree) * Extract all range table indexes from the join tree. Note that sub-queries * that get pulled up by PostgreSQL don't appear in this join tree. */ - ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); foreach(joinTreeTableIndexCell, joinTreeTableIndexList) { /* @@ -3725,7 +3736,7 @@ PartitionColumnOpExpressionList(Query *query) } else if (IsA(leftArgument, Const) && IsA(leftArgument, Var)) { - candidatePartitionColumn = (Var *) rightArgument; + candidatePartitionColumn = (Var *) rightArgument; } else { diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 8f866c074..a9edcfc39 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * multi_logical_planner.c - * + * * Routines for constructing a logical plan tree from the given Query tree * structure. This new logical plan is based on multi-relational algebra rules. * @@ -39,11 +39,11 @@ bool SubqueryPushdown = false; /* is subquery pushdown enabled */ /* Function pointer type definition for apply join rule functions */ -typedef MultiNode * (*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode, - Var *partitionColumn, JoinType joinType, - List *joinClauses); +typedef MultiNode *(*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode, + Var *partitionColumn, JoinType joinType, + List *joinClauses); -static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = {0}; /* join rules */ +static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */ /* Local functions forward declarations */ static MultiNode * MultiPlanTree(Query *queryTree); @@ -157,7 +157,7 @@ SubqueryEntryList(Query *queryTree) * only walk over range table entries at this level and do not recurse into * subqueries. */ - ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); foreach(joinTreeTableIndexCell, joinTreeTableIndexList) { /* @@ -285,6 +285,7 @@ MultiPlanTree(Query *queryTree) else { bool hasOuterJoin = false; + /* * We calculate the join order using the list of tables in the query and * the join clauses between them. Note that this function owns the table @@ -465,6 +466,7 @@ ErrorIfQueryNotSupported(Query *queryTree) #if (PG_VERSION_NUM >= 90500) + /* HasTablesample returns tree if the query contains tablesample */ static bool HasTablesample(Query *queryTree) @@ -485,6 +487,8 @@ HasTablesample(Query *queryTree) return hasTablesample; } + + #endif @@ -529,7 +533,8 @@ HasUnsupportedJoinWalker(Node *node, void *context) * ErrorIfSubqueryNotSupported checks that we can perform distributed planning for * the given subquery. */ -static void ErrorIfSubqueryNotSupported(Query *subqueryTree) +static void +ErrorIfSubqueryNotSupported(Query *subqueryTree) { char *errorDetail = NULL; bool preconditionsSatisfied = true; @@ -587,7 +592,6 @@ HasOuterJoin(Query *queryTree) static bool HasOuterJoinWalker(Node *node, void *context) { - bool hasOuterJoin = false; if (node == NULL) { @@ -657,7 +661,7 @@ HasComplexRangeTableType(Query *queryTree) * Extract all range table indexes from the join tree. Note that sub-queries * that get pulled up by PostgreSQL don't appear in this join tree. */ - ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); foreach(joinTreeTableIndexCell, joinTreeTableIndexList) { /* @@ -675,7 +679,7 @@ HasComplexRangeTableType(Query *queryTree) * subquery. */ if (rangeTableEntry->rtekind != RTE_RELATION && - rangeTableEntry->rtekind != RTE_SUBQUERY) + rangeTableEntry->rtekind != RTE_SUBQUERY) { hasComplexRangeTableType = true; } @@ -966,7 +970,7 @@ TableEntryList(List *rangeTableList) foreach(rangeTableCell, rangeTableList) { - RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); + RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); if (rangeTableEntry->rtekind == RTE_RELATION) { @@ -1178,8 +1182,8 @@ IsSelectClause(Node *clause) /* we currently consider the following nodes as select clauses */ NodeTag nodeTag = nodeTag(clause); - if ( !(nodeTag == T_OpExpr || nodeTag == T_ScalarArrayOpExpr || - nodeTag == T_NullTest || nodeTag == T_BooleanTest) ) + if (!(nodeTag == T_OpExpr || nodeTag == T_ScalarArrayOpExpr || + nodeTag == T_NullTest || nodeTag == T_BooleanTest)) { return false; } @@ -1317,9 +1321,9 @@ UnaryOperator(MultiNode *node) { bool unaryOperator = false; - if (CitusIsA(node, MultiTreeRoot) || CitusIsA(node, MultiTable) || - CitusIsA(node, MultiCollect) || CitusIsA(node, MultiSelect) || - CitusIsA(node, MultiProject) || CitusIsA(node, MultiPartition) || + if (CitusIsA(node, MultiTreeRoot) || CitusIsA(node, MultiTable) || + CitusIsA(node, MultiCollect) || CitusIsA(node, MultiSelect) || + CitusIsA(node, MultiProject) || CitusIsA(node, MultiPartition) || CitusIsA(node, MultiExtendedOp)) { unaryOperator = true; @@ -1403,7 +1407,7 @@ FindNodesOfType(MultiNode *node, int type) } else if (BinaryOperator(node)) { - MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode; + MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode; MultiNode *rightChildNode = ((MultiBinaryNode *) node)->rightChildNode; List *leftChildNodeList = FindNodesOfType(leftChildNode, type); @@ -1533,9 +1537,9 @@ ExtractRangeTableEntryWalker(Node *node, List **rangeTableList) List * pull_var_clause_default(Node *node) { - List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES, - PVC_REJECT_PLACEHOLDERS); - return columnList; + List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES, + PVC_REJECT_PLACEHOLDERS); + return columnList; } @@ -1552,7 +1556,7 @@ ApplyJoinRule(MultiNode *leftNode, MultiNode *rightNode, JoinRuleType ruleType, MultiNode *multiNode = NULL; List *applicableJoinClauses = NIL; - List *leftTableIdList = OutputTableIdList(leftNode); + List *leftTableIdList = OutputTableIdList(leftNode); List *rightTableIdList = OutputTableIdList(rightNode); int rightTableIdCount = 0; uint32 rightTableId = 0; @@ -1567,8 +1571,8 @@ ApplyJoinRule(MultiNode *leftNode, MultiNode *rightNode, JoinRuleType ruleType, /* call the join rule application function to create the new join node */ ruleApplyFunction = JoinRuleApplyFunction(ruleType); - multiNode = (*ruleApplyFunction) (leftNode, rightNode, partitionColumn, - joinType, applicableJoinClauses); + multiNode = (*ruleApplyFunction)(leftNode, rightNode, partitionColumn, + joinType, applicableJoinClauses); if (joinType != JOIN_INNER && CitusIsA(multiNode, MultiJoin)) { @@ -1918,7 +1922,7 @@ ErrorIfSubqueryJoin(Query *queryTree) * Extract all range table indexes from the join tree. Note that sub-queries * that get pulled up by PostgreSQL don't appear in this join tree. */ - ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); joiningRangeTableCount = list_length(joinTreeTableIndexList); if (joiningRangeTableCount > 1) diff --git a/src/backend/distributed/planner/multi_master_planner.c b/src/backend/distributed/planner/multi_master_planner.c index 88fda53a3..b328e2e7c 100644 --- a/src/backend/distributed/planner/multi_master_planner.c +++ b/src/backend/distributed/planner/multi_master_planner.c @@ -122,7 +122,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan) AggStrategy aggregateStrategy = AGG_PLAIN; AggClauseCosts aggregateCosts; AttrNumber *groupColumnIdArray = NULL; - List *aggregateTargetList = NIL; + List *aggregateTargetList = NIL; List *groupColumnList = NIL; List *columnList = NIL; ListCell *columnCell = NULL; @@ -168,13 +168,13 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan) /* finally create the plan */ #if (PG_VERSION_NUM >= 90500) - aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, - &aggregateCosts, groupColumnCount, groupColumnIdArray, - groupColumnOpArray, NIL, rowEstimate, subPlan); + aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, + &aggregateCosts, groupColumnCount, groupColumnIdArray, + groupColumnOpArray, NIL, rowEstimate, subPlan); #else - aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, - &aggregateCosts, groupColumnCount, groupColumnIdArray, - groupColumnOpArray, rowEstimate, subPlan); + aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, + &aggregateCosts, groupColumnCount, groupColumnIdArray, + groupColumnOpArray, rowEstimate, subPlan); #endif return aggregatePlan; @@ -211,7 +211,7 @@ BuildSelectStatement(Query *masterQuery, char *masterTableName, rangeTableEntry = copyObject(queryRangeTableEntry); rangeTableEntry->rtekind = RTE_RELATION; rangeTableEntry->eref = makeAlias(masterTableName, NIL); - rangeTableEntry->relid = 0; /* to be filled in exec_Start */ + rangeTableEntry->relid = 0; /* to be filled in exec_Start */ rangeTableEntry->inh = false; rangeTableEntry->inFromCl = true; @@ -220,7 +220,7 @@ BuildSelectStatement(Query *masterQuery, char *masterTableName, /* (2) build and initialize sequential scan node */ sequentialScan = makeNode(SeqScan); - sequentialScan->scanrelid = 1; /* always one */ + sequentialScan->scanrelid = 1; /* always one */ /* (3) add an aggregation plan if needed */ if (masterQuery->hasAggs || masterQuery->groupClause) diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index 5e497970e..b34c36b4c 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -138,7 +138,7 @@ static OpExpr * MakeOpExpressionWithZeroConst(void); static List * BuildRestrictInfoList(List *qualList); static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery, List *dependedJobList); -static JoinSequenceNode * JoinSequenceArray(List * rangeTableFragmentsList, +static JoinSequenceNode * JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *dependedJobList); static bool PartitionedOnColumn(Var *column, List *rangeTableList, List *dependedJobList); static void CheckJoinBetweenColumns(OpExpr *joinClause); @@ -155,7 +155,8 @@ static StringInfo DatumArrayString(Datum *datumArray, uint32 datumCount, Oid dat static Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType, char *queryString); static void UpdateRangeTableAlias(List *rangeTableList, List *fragmentList); -static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment); +static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry, + RangeTableFragment *fragment); static uint64 AnchorShardId(List *fragmentList, uint32 anchorRangeTableId); static List * PruneSqlTaskDependencies(List *sqlTaskList); static List * AssignTaskList(List *sqlTaskList); @@ -167,7 +168,7 @@ static Task * GreedyAssignTask(WorkerNode *workerNode, List *taskList, static List * RoundRobinAssignTaskList(List *taskList); static List * RoundRobinReorder(Task *task, List *placementList); static List * ReorderAndAssignTaskList(List *taskList, - List * (*reorderFunction) (Task *, List *)); + List * (*reorderFunction)(Task *, List *)); static int CompareTasksByShardId(const void *leftElement, const void *rightElement); static List * ActiveShardPlacementLists(List *taskList); static List * ActivePlacementList(List *placementList); @@ -309,6 +310,7 @@ BuildJobTree(MultiTreeRoot *multiTree) partitionKey, partitionType, baseRelationId, JOIN_MAP_MERGE_JOB); + /* reset depended job list */ loopDependedJobList = NIL; loopDependedJobList = list_make1(mapMergeJob); @@ -538,7 +540,7 @@ BuildJobQuery(MultiNode *multiNode, List *dependedJobList) * If we are building this query on a repartitioned subquery job then we * don't need to update column attributes. */ - if(dependedJobList != NIL) + if (dependedJobList != NIL) { Job *job = (Job *) linitial(dependedJobList); if (CitusIsA(job, MapMergeJob)) @@ -628,10 +630,10 @@ BuildJobQuery(MultiNode *multiNode, List *dependedJobList) jobQuery->rtable = rangeTableList; jobQuery->targetList = targetList; jobQuery->jointree = joinTree; - jobQuery->sortClause = sortClauseList; + jobQuery->sortClause = sortClauseList; jobQuery->groupClause = groupClauseList; jobQuery->limitOffset = limitOffset; - jobQuery->limitCount = limitCount; + jobQuery->limitCount = limitCount; jobQuery->hasAggs = contain_agg_clause((Node *) targetList); return jobQuery; @@ -718,10 +720,10 @@ BuildReduceQuery(MultiExtendedOp *extendedOpNode, List *dependedJobList) reduceQuery->rtable = derivedRangeTableList; reduceQuery->targetList = targetList; reduceQuery->jointree = joinTree; - reduceQuery->sortClause = extendedOpNode->sortClauseList; + reduceQuery->sortClause = extendedOpNode->sortClauseList; reduceQuery->groupClause = extendedOpNode->groupClauseList; reduceQuery->limitOffset = extendedOpNode->limitOffset; - reduceQuery->limitCount = extendedOpNode->limitCount; + reduceQuery->limitCount = extendedOpNode->limitCount; reduceQuery->hasAggs = contain_agg_clause((Node *) targetList); return reduceQuery; @@ -754,7 +756,7 @@ BaseRangeTableList(MultiNode *multiNode) */ MultiTable *multiTable = (MultiTable *) multiNode; if (multiTable->relationId != SUBQUERY_RELATION_ID && - multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID) + multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID) { RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry); rangeTableEntry->inFromCl = true; @@ -870,7 +872,7 @@ TargetEntryList(List *expressionList) Expr *expression = (Expr *) lfirst(expressionCell); TargetEntry *targetEntry = makeTargetEntry(expression, - list_length(targetEntryList)+1, + list_length(targetEntryList) + 1, NULL, false); targetEntryList = lappend(targetEntryList, targetEntry); } @@ -1044,7 +1046,7 @@ QueryJoinTree(MultiNode *multiNode, List *dependedJobList, List **rangeTableList /* fix the column attributes in ON (...) clauses */ columnList = pull_var_clause_default((Node *) joinNode->joinClauseList); - foreach (columnCell, columnList) + foreach(columnCell, columnList) { Var *column = (Var *) lfirst(columnCell); UpdateColumnAttributes(column, *rangeTableList, dependedJobList); @@ -1093,7 +1095,8 @@ QueryJoinTree(MultiNode *multiNode, List *dependedJobList, List **rangeTableList uint32 columnCount = (uint32) list_length(dependedTargetList); List *columnNameList = DerivedColumnNameList(columnCount, dependedJob->jobId); - RangeTblEntry *rangeTableEntry = DerivedRangeTableEntry(multiNode, columnNameList, + RangeTblEntry *rangeTableEntry = DerivedRangeTableEntry(multiNode, + columnNameList, tableIdList); RangeTblRef *rangeTableRef = makeNode(RangeTblRef); @@ -1246,7 +1249,7 @@ ExtractColumns(RangeTblEntry *rangeTableEntry, int rangeTableId, List *dependedJ else if (rangeTableKind == CITUS_RTE_RELATION) { /* - * For distributed tables, we construct a regular table RTE to call + * For distributed tables, we construct a regular table RTE to call * expandRTE, which will extract columns from the distributed table * schema. */ @@ -1405,10 +1408,10 @@ BuildSubqueryJobQuery(MultiNode *multiNode) jobQuery->rtable = rangeTableList; jobQuery->targetList = targetList; jobQuery->jointree = joinTree; - jobQuery->sortClause = sortClauseList; + jobQuery->sortClause = sortClauseList; jobQuery->groupClause = groupClauseList; jobQuery->limitOffset = limitOffset; - jobQuery->limitCount = limitCount; + jobQuery->limitCount = limitCount; jobQuery->hasAggs = contain_agg_clause((Node *) targetList); return jobQuery; @@ -1646,7 +1649,7 @@ static uint64 UniqueJobId(void) { text *sequenceName = cstring_to_text(JOBID_SEQUENCE_NAME); - Oid sequenceId = ResolveRelationId(sequenceName); + Oid sequenceId = ResolveRelationId(sequenceName); Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId); /* generate new and unique jobId from sequence */ @@ -1747,7 +1750,7 @@ HashPartitionCount(void) uint32 nodeCount = WorkerGetLiveNodeCount(); double maxReduceTasksPerNode = MaxRunningTasksPerNode / 2.0; - uint32 partitionCount = (uint32) rint(nodeCount * maxReduceTasksPerNode); + uint32 partitionCount = (uint32) rint(nodeCount * maxReduceTasksPerNode); return partitionCount; } @@ -1864,8 +1867,9 @@ SplitPointObject(ShardInterval **shardIntervalArray, uint32 shardIntervalCount) return splitPointObject; } + /* ------------------------------------------------------------ - * Functions that relate to building and assigning tasks follow + * Functions that relate to building and assigning tasks follow * ------------------------------------------------------------ */ @@ -1986,7 +1990,7 @@ SubquerySqlTaskList(Job *job) ListCell *rangeTableCell = NULL; ListCell *queryCell = NULL; Node *whereClauseTree = NULL; - uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ + uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ uint32 anchorRangeTableId = 0; uint32 rangeTableIndex = 0; const uint32 fragmentSize = sizeof(RangeTableFragment); @@ -2036,10 +2040,10 @@ SubquerySqlTaskList(Job *job) if (opExpressionList != NIL) { Var *partitionColumn = PartitionColumn(relationId, tableId); - List *whereClauseList = ReplaceColumnsInOpExpressionList(opExpressionList, + List *whereClauseList = ReplaceColumnsInOpExpressionList(opExpressionList, partitionColumn); finalShardIntervalList = PruneShardList(relationId, tableId, whereClauseList, - shardIntervalList); + shardIntervalList); } else { @@ -2146,7 +2150,7 @@ static List * SqlTaskList(Job *job) { List *sqlTaskList = NIL; - uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ + uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ uint64 jobId = job->jobId; bool anchorRangeTableBasedAssignment = false; uint32 anchorRangeTableId = 0; @@ -2472,8 +2476,8 @@ RangeTableFragmentsList(List *rangeTableList, List *whereClauseList, List *shardIntervalList = LoadShardIntervalList(relationId); List *prunedShardIntervalList = PruneShardList(relationId, tableId, - whereClauseList, - shardIntervalList); + whereClauseList, + shardIntervalList); /* * If we prune all shards for one table, query results will be empty. @@ -2548,7 +2552,7 @@ RangeTableFragmentsList(List *rangeTableList, List *whereClauseList, */ List * PruneShardList(Oid relationId, Index tableId, List *whereClauseList, - List *shardIntervalList) + List *shardIntervalList) { List *remainingShardList = NIL; ListCell *shardIntervalCell = NULL; @@ -2653,7 +2657,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber) Oid accessMethodId = BTREE_AM_OID; Oid operatorId = InvalidOid; Oid operatorClassInputType = InvalidOid; - Const *constantValue = NULL; + Const *constantValue = NULL; OpExpr *expression = NULL; char typeType = 0; @@ -2679,7 +2683,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber) /* Now make the expression with the given variable and a null constant */ expression = (OpExpr *) make_opclause(operatorId, InvalidOid, /* no result type yet */ - false, /* no return set */ + false, /* no return set */ (Expr *) variable, (Expr *) constantValue, InvalidOid, collationId); @@ -2900,7 +2904,7 @@ HashableClauseMutator(Node *originalNode, Var *partitionColumn) * If this node is not hashable, continue walking down the expression tree * to find and hash clauses which are eligible. */ - if(newNode == NULL) + if (newNode == NULL) { newNode = expression_tree_mutator(originalNode, HashableClauseMutator, (void *) partitionColumn); @@ -3045,7 +3049,7 @@ MakeInt4Constant(Datum constantValue) bool constantIsNull = false; bool constantByValue = true; - Const *int4Constant = makeConst(constantType, constantTypeMode, constantCollationId, + Const *int4Constant = makeConst(constantType, constantTypeMode, constantCollationId, constantLength, constantValue, constantIsNull, constantByValue); return int4Constant; @@ -3102,7 +3106,7 @@ UpdateConstraint(Node *baseConstraint, ShardInterval *shardInterval) Node *greaterThanExpr = (Node *) lsecond(andExpr->args); Node *minNode = get_rightop((Expr *) greaterThanExpr); /* right op */ - Node *maxNode = get_rightop((Expr *) lessThanExpr); /* right op */ + Node *maxNode = get_rightop((Expr *) lessThanExpr); /* right op */ Const *minConstant = NULL; Const *maxConstant = NULL; @@ -3273,7 +3277,7 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended joinSequenceArray[joinedTableCount].joiningRangeTableId = NON_PRUNABLE_JOIN; joinedTableCount++; - foreach (joinExprCell, joinExprList) + foreach(joinExprCell, joinExprList) { JoinExpr *joinExpr = (JoinExpr *) lfirst(joinExprCell); JoinType joinType = joinExpr->jointype; @@ -3347,7 +3351,7 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended if (IS_OUTER_JOIN(joinType)) { int innerRangeTableId = 0; - List * tableFragments = NIL; + List *tableFragments = NIL; int fragmentCount = 0; if (joinType == JOIN_RIGHT) @@ -3500,7 +3504,7 @@ FindRangeTableFragmentsList(List *rangeTableFragmentsList, int tableId) if (tableFragments != NIL) { RangeTableFragment *tableFragment = - (RangeTableFragment*) linitial(tableFragments); + (RangeTableFragment *) linitial(tableFragments); if (tableFragment->rangeTableId == tableId) { foundTableFragments = tableFragments; @@ -3706,7 +3710,7 @@ UniqueFragmentList(List *fragmentList) foreach(uniqueFragmentCell, uniqueFragmentList) { RangeTableFragment *uniqueFragment = - (RangeTableFragment *) lfirst(uniqueFragmentCell); + (RangeTableFragment *) lfirst(uniqueFragmentCell); uint64 *uniqueShardId = uniqueFragment->fragmentReference; if (*shardId == *uniqueShardId) @@ -4046,6 +4050,7 @@ FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment) return alias; } + /* * AnchorShardId walks over each fragment in the given fragment list, finds the * fragment that corresponds to the given anchor range tableId, and returns this @@ -4360,7 +4365,7 @@ MergeTaskList(MapMergeJob *mapMergeJob, List *mapTaskList, uint32 taskIdIndex) StringInfo intermediateTableQueryString = IntermediateTableQueryString(jobId, taskIdIndex, reduceQuery); - StringInfo mergeAndRunQueryString= makeStringInfo(); + StringInfo mergeAndRunQueryString = makeStringInfo(); appendStringInfo(mergeAndRunQueryString, MERGE_FILES_AND_RUN_QUERY_COMMAND, jobId, taskIdIndex, mergeTableQueryString->data, intermediateTableQueryString->data); @@ -4686,7 +4691,7 @@ TaskListAppendUnique(List *list, Task *task) List * TaskListConcatUnique(List *list1, List *list2) { - ListCell *taskCell = NULL; + ListCell *taskCell = NULL; foreach(taskCell, list2) { @@ -4960,7 +4965,7 @@ List * FirstReplicaAssignTaskList(List *taskList) { /* No additional reordering need take place for this algorithm */ - List * (*reorderFunction)(Task *, List *) = NULL; + List *(*reorderFunction)(Task *, List *) = NULL; taskList = ReorderAndAssignTaskList(taskList, reorderFunction); @@ -4984,6 +4989,7 @@ RoundRobinAssignTaskList(List *taskList) return taskList; } + /* * RoundRobinReorder implements the core of the round-robin assignment policy. * It takes a task and placement list and rotates a copy of the placement list @@ -5116,7 +5122,8 @@ ActiveShardPlacementLists(List *taskList) List *activeShardPlacementList = ActivePlacementList(shardPlacementList); /* sort shard placements by their insertion time */ - activeShardPlacementList = SortList(activeShardPlacementList, CompareShardPlacements); + activeShardPlacementList = SortList(activeShardPlacementList, + CompareShardPlacements); shardPlacementLists = lappend(shardPlacementLists, activeShardPlacementList); } @@ -5257,7 +5264,8 @@ AssignDualHashTaskList(List *taskList) uint32 replicaIndex = 0; for (replicaIndex = 0; replicaIndex < ShardReplicationFactor; replicaIndex++) { - uint32 assignmentOffset = beginningNodeIndex + assignedTaskIndex + replicaIndex; + uint32 assignmentOffset = beginningNodeIndex + assignedTaskIndex + + replicaIndex; uint32 assignmentIndex = assignmentOffset % workerNodeCount; WorkerNode *workerNode = list_nth(workerNodeList, assignmentIndex); diff --git a/src/backend/distributed/relay/relay_event_utility.c b/src/backend/distributed/relay/relay_event_utility.c index 54f7a09a5..ce02ef9a0 100644 --- a/src/backend/distributed/relay/relay_event_utility.c +++ b/src/backend/distributed/relay/relay_event_utility.c @@ -35,7 +35,7 @@ /* Local functions forward declarations */ static bool TypeAddIndexConstraint(const AlterTableCmd *command); -static bool TypeDropIndexConstraint(const AlterTableCmd *command, +static bool TypeDropIndexConstraint(const AlterTableCmd *command, const RangeVar *relation, uint64 shardId); static void AppendShardIdToConstraintName(AlterTableCmd *command, uint64 shardId); @@ -67,7 +67,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) AppendShardIdToName(sequenceName, shardId); break; } - + case T_AlterTableStmt: { /* @@ -79,7 +79,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) AlterTableStmt *alterTableStmt = (AlterTableStmt *) parseTree; char **relationName = &(alterTableStmt->relation->relname); - RangeVar *relation = alterTableStmt->relation; /* for constraints */ + RangeVar *relation = alterTableStmt->relation; /* for constraints */ List *commandList = alterTableStmt->cmds; ListCell *commandCell = NULL; @@ -179,15 +179,15 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) objectType == OBJECT_INDEX || objectType == OBJECT_FOREIGN_TABLE || objectType == OBJECT_FOREIGN_SERVER) { - List *relationNameList = NULL; - int relationNameListLength = 0; + List *relationNameList = NULL; + int relationNameListLength = 0; Value *relationNameValue = NULL; - char **relationName = NULL; + char **relationName = NULL; uint32 dropCount = list_length(dropStmt->objects); if (dropCount > 1) { - ereport(ERROR, + ereport(ERROR, (errmsg("cannot extend name for multiple drop objects"))); } @@ -205,19 +205,30 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) switch (relationNameListLength) { case 1: + { relationNameValue = linitial(relationNameList); break; + } + case 2: + { relationNameValue = lsecond(relationNameList); break; + } + case 3: + { relationNameValue = lthird(relationNameList); break; + } + default: + { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("improper relation name: \"%s\"", NameListToString(relationNameList)))); break; + } } relationName = &(relationNameValue->val.str); @@ -304,7 +315,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) { RenameStmt *renameStmt = (RenameStmt *) parseTree; ObjectType objectType = renameStmt->renameType; - + if (objectType == OBJECT_TABLE || objectType == OBJECT_SEQUENCE || objectType == OBJECT_INDEX) { @@ -335,7 +346,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) * We currently do not support truncate statements. This is * primarily because truncates allow implicit modifications to * sequences through table column dependencies. As we have not - * determined our dependency model for sequences, we error here. + * determined our dependency model for sequences, we error here. */ ereport(ERROR, (errmsg("cannot extend name for truncate statement"))); break; @@ -384,18 +395,18 @@ TypeAddIndexConstraint(const AlterTableCmd *command) * associated with an index. */ static bool -TypeDropIndexConstraint(const AlterTableCmd *command, +TypeDropIndexConstraint(const AlterTableCmd *command, const RangeVar *relation, uint64 shardId) { Relation pgConstraint = NULL; SysScanDesc scanDescriptor = NULL; - ScanKeyData scanKey[1]; + ScanKeyData scanKey[1]; int scanKeyCount = 1; HeapTuple heapTuple = NULL; char *searchedConstraintName = NULL; - bool indexConstraint = false; - Oid relationId = InvalidOid; + bool indexConstraint = false; + Oid relationId = InvalidOid; bool failOK = true; if (command->subtype != AT_DropConstraint) @@ -423,8 +434,8 @@ TypeDropIndexConstraint(const AlterTableCmd *command, ScanKeyInit(&scanKey[0], Anum_pg_constraint_conrelid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId)); - - scanDescriptor = systable_beginscan(pgConstraint, + + scanDescriptor = systable_beginscan(pgConstraint, ConstraintRelidIndexId, true, /* indexOK */ NULL, scanKeyCount, scanKey); @@ -433,7 +444,7 @@ TypeDropIndexConstraint(const AlterTableCmd *command, { Form_pg_constraint constraintForm = (Form_pg_constraint) GETSTRUCT(heapTuple); char *constraintName = NameStr(constraintForm->conname); - + if (strncmp(constraintName, searchedConstraintName, NAMEDATALEN) == 0) { /* we found the constraint, now check if it is for an index */ @@ -442,7 +453,7 @@ TypeDropIndexConstraint(const AlterTableCmd *command, { indexConstraint = true; } - + break; } @@ -451,7 +462,7 @@ TypeDropIndexConstraint(const AlterTableCmd *command, systable_endscan(scanDescriptor); heap_close(pgConstraint, AccessShareLock); - + pfree(searchedConstraintName); return indexConstraint; @@ -489,10 +500,10 @@ AppendShardIdToConstraintName(AlterTableCmd *command, uint64 shardId) void AppendShardIdToName(char **name, uint64 shardId) { - char extendedName[NAMEDATALEN]; + char extendedName[NAMEDATALEN]; uint32 extendedNameLength = 0; - snprintf(extendedName, NAMEDATALEN, "%s%c" UINT64_FORMAT, + snprintf(extendedName, NAMEDATALEN, "%s%c" UINT64_FORMAT, (*name), SHARD_NAME_SEPARATOR, shardId); /* diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 344665eb0..8402ab0cf 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -48,23 +48,23 @@ static void NormalizeWorkerListPath(void); /* GUC enum definitions */ static const struct config_enum_entry task_assignment_policy_options[] = { - {"greedy", TASK_ASSIGNMENT_GREEDY, false}, - {"first-replica", TASK_ASSIGNMENT_FIRST_REPLICA, false}, - {"round-robin", TASK_ASSIGNMENT_ROUND_ROBIN, false}, - {NULL, 0, false} + { "greedy", TASK_ASSIGNMENT_GREEDY, false }, + { "first-replica", TASK_ASSIGNMENT_FIRST_REPLICA, false }, + { "round-robin", TASK_ASSIGNMENT_ROUND_ROBIN, false }, + { NULL, 0, false } }; static const struct config_enum_entry task_executor_type_options[] = { - {"real-time", MULTI_EXECUTOR_REAL_TIME, false}, - {"task-tracker", MULTI_EXECUTOR_TASK_TRACKER, false}, - {"router", MULTI_EXECUTOR_ROUTER, false}, - {NULL, 0, false} + { "real-time", MULTI_EXECUTOR_REAL_TIME, false }, + { "task-tracker", MULTI_EXECUTOR_TASK_TRACKER, false }, + { "router", MULTI_EXECUTOR_ROUTER, false }, + { NULL, 0, false } }; static const struct config_enum_entry shard_placement_policy_options[] = { - {"local-node-first", SHARD_PLACEMENT_LOCAL_NODE_FIRST, false}, - {"round-robin", SHARD_PLACEMENT_ROUND_ROBIN, false}, - {NULL, 0, false} + { "local-node-first", SHARD_PLACEMENT_LOCAL_NODE_FIRST, false }, + { "round-robin", SHARD_PLACEMENT_ROUND_ROBIN, false }, + { NULL, 0, false } }; @@ -206,9 +206,10 @@ RegisterCitusConfigVariables(void) DefineCustomBoolVariable( "citusdb.expire_cached_shards", - gettext_noop("Enables shard cache expiration if a shard's size on disk has changed. "), - gettext_noop("When appending to an existing shard, old data may still be cached on " - "other workers. This configuration entry activates automatic " + gettext_noop("Enables shard cache expiration if a shard's size on disk has " + "changed."), + gettext_noop("When appending to an existing shard, old data may still be cached " + "on other workers. This configuration entry activates automatic " "expiration, but should not be used with manual updates to shards."), &ExpireCachedShards, false, @@ -440,11 +441,11 @@ RegisterCitusConfigVariables(void) "citusdb.task_assignment_policy", gettext_noop("Sets the policy to use when assigning tasks to worker nodes."), gettext_noop("The master node assigns tasks to worker nodes based on shard " - "locations. This configuration value specifies the policy to " - "use when making these assignments. The greedy policy aims to " - "evenly distribute tasks across worker nodes, first-replica just " - "assigns tasks in the order shard placements were created, " - "and the round-robin policy assigns tasks to worker nodes in " + "locations. This configuration value specifies the policy to " + "use when making these assignments. The greedy policy aims to " + "evenly distribute tasks across worker nodes, first-replica just " + "assigns tasks in the order shard placements were created, " + "and the round-robin policy assigns tasks to worker nodes in " "a round-robin fashion."), &TaskAssignmentPolicy, TASK_ASSIGNMENT_GREEDY, @@ -488,6 +489,7 @@ RegisterCitusConfigVariables(void) /* warn about config items in the citusdb namespace that are not registered above */ EmitWarningsOnPlaceholders("citusdb"); + /* Also warn about citus namespace, as that's a very likely misspelling */ EmitWarningsOnPlaceholders("citus"); } @@ -515,8 +517,10 @@ NormalizeWorkerListPath(void) { absoluteFileName = malloc(strlen(DataDir) + strlen(WORKER_LIST_FILENAME) + 2); if (absoluteFileName == NULL) + { ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); + } sprintf(absoluteFileName, "%s/%s", DataDir, WORKER_LIST_FILENAME); } @@ -530,6 +534,7 @@ NormalizeWorkerListPath(void) "environment variable.\n", progname, ConfigFileName))); } - SetConfigOption("citusdb.worker_list_file", absoluteFileName, PGC_POSTMASTER, PGC_S_OVERRIDE); + SetConfigOption("citusdb.worker_list_file", absoluteFileName, PGC_POSTMASTER, + PGC_S_OVERRIDE); free(absoluteFileName); } diff --git a/src/backend/distributed/test/fake_fdw.c b/src/backend/distributed/test/fake_fdw.c index 883c77755..9409dde3b 100644 --- a/src/backend/distributed/test/fake_fdw.c +++ b/src/backend/distributed/test/fake_fdw.c @@ -116,9 +116,9 @@ FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid, ForeignPath *best_path, List *tlist, List *scan_clauses) #else static ForeignScan * -FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid, - ForeignPath *best_path, List *tlist, List *scan_clauses, - Plan *outer_plan) +FakeGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreigntableid, + ForeignPath * best_path, List * tlist, List * scan_clauses, + Plan * outer_plan) #endif { Index scan_relid = baserel->relid; @@ -129,7 +129,7 @@ FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid, return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL); #else return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL, NIL, NIL, - outer_plan); + outer_plan); #endif } diff --git a/src/backend/distributed/utils/citus_nodefuncs.c b/src/backend/distributed/utils/citus_nodefuncs.c index b3858cbdf..e5abb1d60 100644 --- a/src/backend/distributed/utils/citus_nodefuncs.c +++ b/src/backend/distributed/utils/citus_nodefuncs.c @@ -265,7 +265,7 @@ GetRangeTblKind(RangeTblEntry *rte) { CitusRTEKind rteKind = CITUS_RTE_RELATION /* invalid */; - switch(rte->rtekind) + switch (rte->rtekind) { /* directly rtekind if it's not possibly an extended RTE */ case RTE_RELATION: @@ -273,9 +273,13 @@ GetRangeTblKind(RangeTblEntry *rte) case RTE_JOIN: case RTE_VALUES: case RTE_CTE: + { rteKind = (CitusRTEKind) rte->rtekind; break; + } + case RTE_FUNCTION: + { /* * Extract extra data - correct even if a plain RTE_FUNCTION, not * an extended one, ExtractRangeTblExtraData handles that case @@ -283,6 +287,7 @@ GetRangeTblKind(RangeTblEntry *rte) */ ExtractRangeTblExtraData(rte, &rteKind, NULL, NULL, NULL); break; + } } return rteKind; diff --git a/src/backend/distributed/utils/citus_ruleutils.c b/src/backend/distributed/utils/citus_ruleutils.c index ad1ac7332..5f2c4a5ef 100644 --- a/src/backend/distributed/utils/citus_ruleutils.c +++ b/src/backend/distributed/utils/citus_ruleutils.c @@ -186,7 +186,7 @@ AppendOptionListToString(StringInfo stringBuffer, List *optionList) foreach(optionCell, optionList) { - DefElem *option = (DefElem*) lfirst(optionCell); + DefElem *option = (DefElem *) lfirst(optionCell); char *optionName = option->defname; char *optionValue = defGetString(option); @@ -219,7 +219,7 @@ pg_get_tableschemadef_string(Oid tableRelationId) char relationKind = 0; TupleDesc tupleDescriptor = NULL; TupleConstr *tupleConstraints = NULL; - int attributeIndex = 0; + int attributeIndex = 0; bool firstAttributePrinted = false; AttrNumber defaultValueIndex = 0; AttrNumber constraintIndex = 0; @@ -447,21 +447,35 @@ pg_get_tablecolumnoptionsdef_string(Oid tableRelationId) switch (attributeForm->attstorage) { case 'p': + { storageName = "PLAIN"; break; + } + case 'e': + { storageName = "EXTERNAL"; break; + } + case 'm': + { storageName = "MAIN"; break; + } + case 'x': + { storageName = "EXTENDED"; break; + } + default: + { ereport(ERROR, (errmsg("unrecognized storage type: %c", attributeForm->attstorage))); break; + } } appendStringInfo(&statement, "ALTER COLUMN %s ", diff --git a/src/backend/distributed/utils/metadata_cache.c b/src/backend/distributed/utils/metadata_cache.c index 868b11313..61127371f 100644 --- a/src/backend/distributed/utils/metadata_cache.c +++ b/src/backend/distributed/utils/metadata_cache.c @@ -51,10 +51,10 @@ static void InvalidateDistRelationCacheCallback(Datum argument, Oid relationId); static HeapTuple LookupDistPartitionTuple(Oid relationId); static List * LookupDistShardTuples(Oid relationId); static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, - Oid *intervalTypeId, int32 *intervalTypeMod); + Oid *intervalTypeId, int32 *intervalTypeMod); static ShardInterval * TupleToShardInterval(HeapTuple heapTuple, - TupleDesc tupleDescriptor, Oid intervalTypeId, - int32 intervalTypeMod); + TupleDesc tupleDescriptor, Oid intervalTypeId, + int32 intervalTypeMod); static void CachedRelationLookup(const char *relationName, Oid *cachedOid); @@ -87,6 +87,7 @@ IsDistributedTable(Oid relationId) return cacheEntry->isDistributedTable; } + /* * LoadShardInterval reads shard metadata for given shardId from pg_dist_shard, * and converts min/max values in these metadata to their properly typed datum @@ -98,7 +99,7 @@ LoadShardInterval(uint64 shardId) { ShardInterval *shardInterval; SysScanDesc scanDescriptor = NULL; - ScanKeyData scanKey[1]; + ScanKeyData scanKey[1]; int scanKeyCount = 1; HeapTuple heapTuple = NULL; Form_pg_dist_shard shardForm = NULL; @@ -127,11 +128,11 @@ LoadShardInterval(uint64 shardId) partitionEntry = DistributedTableCacheEntry(shardForm->logicalrelid); GetPartitionTypeInputInfo(partitionEntry->partitionKeyString, - partitionEntry->partitionMethod, &intervalTypeId, - &intervalTypeMod); + partitionEntry->partitionMethod, &intervalTypeId, + &intervalTypeMod); shardInterval = TupleToShardInterval(heapTuple, tupleDescriptor, intervalTypeId, - intervalTypeMod); + intervalTypeMod); systable_endscan(scanDescriptor); heap_close(pgDistShard, AccessShareLock); @@ -139,6 +140,7 @@ LoadShardInterval(uint64 shardId) return shardInterval; } + /* * DistributedTableCacheEntry looks up a pg_dist_partition entry for a * relation. @@ -239,19 +241,19 @@ LookupDistTableCacheEntry(Oid relationId) int32 intervalTypeMod = -1; GetPartitionTypeInputInfo(partitionKeyString, partitionMethod, &intervalTypeId, - &intervalTypeMod); + &intervalTypeMod); shardIntervalArray = MemoryContextAllocZero(CacheMemoryContext, - shardIntervalArrayLength * - sizeof(ShardInterval)); + shardIntervalArrayLength * + sizeof(ShardInterval)); foreach(distShardTupleCell, distShardTupleList) { HeapTuple shardTuple = lfirst(distShardTupleCell); ShardInterval *shardInterval = TupleToShardInterval(shardTuple, - distShardTupleDesc, - intervalTypeId, - intervalTypeMod); + distShardTupleDesc, + intervalTypeId, + intervalTypeMod); MemoryContext oldContext = MemoryContextSwitchTo(CacheMemoryContext); CopyShardInterval(shardInterval, &shardIntervalArray[arrayIndex]); @@ -773,7 +775,7 @@ LookupDistShardTuples(Oid relationId) scanKey[0].sk_argument = ObjectIdGetDatum(relationId); scanDescriptor = systable_beginscan(pgDistShard, DistShardLogicalRelidIndexId(), true, - NULL, 1, scanKey); + NULL, 1, scanKey); currentShardTuple = systable_getnext(scanDescriptor); while (HeapTupleIsValid(currentShardTuple)) @@ -797,7 +799,7 @@ LookupDistShardTuples(Oid relationId) */ static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, - Oid *intervalTypeId, int32 *intervalTypeMod) + Oid *intervalTypeId, int32 *intervalTypeMod) { *intervalTypeId = InvalidOid; *intervalTypeMod = -1; @@ -826,7 +828,7 @@ GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("unsupported table partition type: %c", - partitionMethod))); + partitionMethod))); } } } @@ -838,7 +840,7 @@ GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, */ static ShardInterval * TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid intervalTypeId, - int32 intervalTypeMod) + int32 intervalTypeMod) { ShardInterval *shardInterval = NULL; bool isNull = false; @@ -847,16 +849,16 @@ TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid interva Oid inputFunctionId = InvalidOid; Oid typeIoParam = InvalidOid; Datum relationIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_logicalrelid, - tupleDescriptor, &isNull); + tupleDescriptor, &isNull); Datum shardIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardid, - tupleDescriptor, &isNull); + tupleDescriptor, &isNull); Datum storageTypeDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardstorage, - tupleDescriptor, &isNull); + tupleDescriptor, &isNull); Datum minValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardminvalue, - tupleDescriptor, &minValueNull); + tupleDescriptor, &minValueNull); Datum maxValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardmaxvalue, - tupleDescriptor, &maxValueNull); + tupleDescriptor, &maxValueNull); Oid relationId = DatumGetObjectId(relationIdDatum); int64 shardId = DatumGetInt64(shardIdDatum); @@ -877,7 +879,7 @@ TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid interva /* TODO: move this up the call stack to avoid per-tuple invocation? */ get_type_io_data(intervalTypeId, IOFunc_input, &intervalTypeLen, &intervalByVal, - &intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId); + &intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId); /* finally convert min/max values to their actual types */ minValue = OidInputFunctionCall(inputFunctionId, minValueString, diff --git a/src/backend/distributed/utils/multi_resowner.c b/src/backend/distributed/utils/multi_resowner.c index 80aecf7e5..21d78d1aa 100644 --- a/src/backend/distributed/utils/multi_resowner.c +++ b/src/backend/distributed/utils/multi_resowner.c @@ -22,7 +22,8 @@ #include "distributed/multi_resowner.h" -typedef struct JobDirectoryEntry { +typedef struct JobDirectoryEntry +{ ResourceOwner owner; uint64 jobId; } JobDirectoryEntry; @@ -44,8 +45,8 @@ MultiResourceOwnerReleaseCallback(ResourceReleasePhase phase, bool isTopLevel, void *arg) { - int lastJobIndex = NumRegisteredJobDirectories - 1; - int jobIndex = 0; + int lastJobIndex = NumRegisteredJobDirectories - 1; + int jobIndex = 0; if (phase == RESOURCE_RELEASE_AFTER_LOCKS) { @@ -79,7 +80,7 @@ MultiResourceOwnerReleaseCallback(ResourceReleasePhase phase, void ResourceOwnerEnlargeJobDirectories(ResourceOwner owner) { - int newMax = 0; + int newMax = 0; /* ensure callback is registered */ if (!RegisteredResownerCallback) @@ -91,15 +92,17 @@ ResourceOwnerEnlargeJobDirectories(ResourceOwner owner) if (RegisteredJobDirectories == NULL) { newMax = 16; - RegisteredJobDirectories = (JobDirectoryEntry *) - MemoryContextAlloc(TopMemoryContext, newMax * sizeof(JobDirectoryEntry)); + RegisteredJobDirectories = + (JobDirectoryEntry *) MemoryContextAlloc(TopMemoryContext, + newMax * sizeof(JobDirectoryEntry)); NumAllocatedJobDirectories = newMax; } else if (NumRegisteredJobDirectories + 1 > NumAllocatedJobDirectories) { newMax = NumAllocatedJobDirectories * 2; - RegisteredJobDirectories = (JobDirectoryEntry *) - repalloc(RegisteredJobDirectories, newMax * sizeof(JobDirectoryEntry)); + RegisteredJobDirectories = + (JobDirectoryEntry *) repalloc(RegisteredJobDirectories, + newMax * sizeof(JobDirectoryEntry)); NumAllocatedJobDirectories = newMax; } } @@ -123,8 +126,8 @@ ResourceOwnerRememberJobDirectory(ResourceOwner owner, uint64 jobId) void ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId) { - int lastJobIndex = NumRegisteredJobDirectories - 1; - int jobIndex = 0; + int lastJobIndex = NumRegisteredJobDirectories - 1; + int jobIndex = 0; for (jobIndex = lastJobIndex; jobIndex >= 0; jobIndex--) { @@ -135,7 +138,8 @@ ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId) /* move all later entries one up */ while (jobIndex < lastJobIndex) { - RegisteredJobDirectories[jobIndex] = RegisteredJobDirectories[jobIndex + 1]; + RegisteredJobDirectories[jobIndex] = + RegisteredJobDirectories[jobIndex + 1]; jobIndex++; } NumRegisteredJobDirectories = lastJobIndex; diff --git a/src/backend/distributed/utils/resource_lock.c b/src/backend/distributed/utils/resource_lock.c index a2552d46b..3f1b739af 100644 --- a/src/backend/distributed/utils/resource_lock.c +++ b/src/backend/distributed/utils/resource_lock.c @@ -30,7 +30,7 @@ void LockShardDistributionMetadata(int64 shardId, LOCKMODE lockMode) { - LOCKTAG tag; + LOCKTAG tag; const bool sessionLock = false; const bool dontWait = false; @@ -64,7 +64,7 @@ LockRelationDistributionMetadata(Oid relationId, LOCKMODE lockMode) void LockShardResource(uint64 shardId, LOCKMODE lockmode) { - LOCKTAG tag; + LOCKTAG tag; const bool sessionLock = false; const bool dontWait = false; @@ -78,7 +78,7 @@ LockShardResource(uint64 shardId, LOCKMODE lockmode) void UnlockShardResource(uint64 shardId, LOCKMODE lockmode) { - LOCKTAG tag; + LOCKTAG tag; const bool sessionLock = false; SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId); @@ -95,7 +95,7 @@ UnlockShardResource(uint64 shardId, LOCKMODE lockmode) void LockJobResource(uint64 jobId, LOCKMODE lockmode) { - LOCKTAG tag; + LOCKTAG tag; const bool sessionLock = false; const bool dontWait = false; @@ -109,7 +109,7 @@ LockJobResource(uint64 jobId, LOCKMODE lockmode) void UnlockJobResource(uint64 jobId, LOCKMODE lockmode) { - LOCKTAG tag; + LOCKTAG tag; const bool sessionLock = false; SET_LOCKTAG_JOB_RESOURCE(tag, MyDatabaseId, jobId); diff --git a/src/backend/distributed/worker/task_tracker.c b/src/backend/distributed/worker/task_tracker.c index b8a6bde6a..e3e7320db 100644 --- a/src/backend/distributed/worker/task_tracker.c +++ b/src/backend/distributed/worker/task_tracker.c @@ -50,7 +50,7 @@ #include "utils/memutils.h" -int TaskTrackerDelay = 200; /* process sleep interval in millisecs */ +int TaskTrackerDelay = 200; /* process sleep interval in millisecs */ int MaxRunningTasksPerNode = 16; /* max number of running tasks */ int MaxTrackedTasksPerNode = 1024; /* max number of tracked tasks */ WorkerTasksSharedStateData *WorkerTasksSharedState; /* shared memory state */ @@ -76,10 +76,10 @@ static void TrackerCleanupJobSchemas(void); static void TrackerCleanupConnections(HTAB *WorkerTasksHash); static void TrackerRegisterShutDown(HTAB *WorkerTasksHash); static void TrackerDelayLoop(void); -static List *SchedulableTaskList(HTAB *WorkerTasksHash); +static List * SchedulableTaskList(HTAB *WorkerTasksHash); static WorkerTask * SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash); static uint32 CountTasksMatchingCriteria(HTAB *WorkerTasksHash, - bool (*CriteriaFunction) (WorkerTask *)); + bool (*CriteriaFunction)(WorkerTask *)); static bool RunningTask(WorkerTask *workerTask); static bool SchedulableTask(WorkerTask *workerTask); static int CompareTasksByTime(const void *first, const void *second); @@ -393,7 +393,7 @@ TrackerCleanupJobSchemas(void) /* * We create cleanup tasks since we can't remove schemas within the task * tracker process. We also assign high priorities to these tasks so - * that they get scheduled before everyone else. + * that they get scheduled before everyone else. */ cleanupTask = WorkerTasksHashEnter(jobId, taskIndex); cleanupTask->assignedAt = HIGH_PRIORITY_TASK_TIME; @@ -440,7 +440,7 @@ TrackerCleanupConnections(HTAB *WorkerTasksHash) currentTask->connectionId = INVALID_CONNECTION_ID; } - currentTask = (WorkerTask *) hash_seq_search(&status); + currentTask = (WorkerTask *) hash_seq_search(&status); } } @@ -494,8 +494,9 @@ TrackerDelayLoop(void) } } + /* ------------------------------------------------------------ - * Signal handling and shared hash initialization functions follow + * Signal handling and shared hash initialization functions follow * ------------------------------------------------------------ */ @@ -503,7 +504,7 @@ TrackerDelayLoop(void) static void TrackerSigHupHandler(SIGNAL_ARGS) { - int save_errno = errno; + int save_errno = errno; got_SIGHUP = true; if (MyProc != NULL) @@ -519,7 +520,7 @@ TrackerSigHupHandler(SIGNAL_ARGS) static void TrackerShutdownHandler(SIGNAL_ARGS) { - int save_errno = errno; + int save_errno = errno; got_SIGTERM = true; if (MyProc != NULL) @@ -579,10 +580,10 @@ TaskTrackerShmemInit(void) LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); /* allocate struct containing task tracker related shared state */ - WorkerTasksSharedState = (WorkerTasksSharedStateData *) - ShmemInitStruct("Worker Task Control", - sizeof(WorkerTasksSharedStateData), - &alreadyInitialized); + WorkerTasksSharedState = + (WorkerTasksSharedStateData *) ShmemInitStruct("Worker Task Control", + sizeof(WorkerTasksSharedStateData), + &alreadyInitialized); if (!alreadyInitialized) { @@ -607,6 +608,7 @@ TaskTrackerShmemInit(void) } } + /* ------------------------------------------------------------ * Task scheduling and management functions follow * ------------------------------------------------------------ @@ -638,7 +640,7 @@ SchedulableTaskList(HTAB *WorkerTasksHash) schedulableTaskCount = CountTasksMatchingCriteria(WorkerTasksHash, &SchedulableTask); if (schedulableTaskCount == 0) { - return NIL; /* we do not have any new tasks to schedule */ + return NIL; /* we do not have any new tasks to schedule */ } tasksToScheduleCount = MaxRunningTasksPerNode - runningTaskCount; @@ -653,7 +655,7 @@ SchedulableTaskList(HTAB *WorkerTasksHash) for (queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++) { WorkerTask *schedulableTask = (WorkerTask *) palloc0(sizeof(WorkerTask)); - schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId; + schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId; schedulableTask->taskId = schedulableTaskQueue[queueIndex].taskId; schedulableTaskList = lappend(schedulableTaskList, schedulableTask); @@ -681,13 +683,13 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash) uint32 queueIndex = 0; /* our priority queue size equals to the number of schedulable tasks */ - queueSize = CountTasksMatchingCriteria(WorkerTasksHash, &SchedulableTask); + queueSize = CountTasksMatchingCriteria(WorkerTasksHash, &SchedulableTask); if (queueSize == 0) { return NULL; } - /* allocate an array of tasks for our priority queue */ + /* allocate an array of tasks for our priority queue */ priorityQueue = (WorkerTask *) palloc0(sizeof(WorkerTask) * queueSize); /* copy tasks in the shared hash to the priority queue */ @@ -719,7 +721,7 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash) /* Counts the number of tasks that match the given criteria function. */ static uint32 CountTasksMatchingCriteria(HTAB *WorkerTasksHash, - bool (*CriteriaFunction) (WorkerTask *)) + bool (*CriteriaFunction)(WorkerTask *)) { HASH_SEQ_STATUS status; WorkerTask *currentTask = NULL; @@ -730,13 +732,13 @@ CountTasksMatchingCriteria(HTAB *WorkerTasksHash, currentTask = (WorkerTask *) hash_seq_search(&status); while (currentTask != NULL) { - bool matchesCriteria = (*CriteriaFunction) (currentTask); + bool matchesCriteria = (*CriteriaFunction)(currentTask); if (matchesCriteria) { taskCount++; } - currentTask = (WorkerTask *) hash_seq_search(&status); + currentTask = (WorkerTask *) hash_seq_search(&status); } return taskCount; @@ -775,7 +777,7 @@ SchedulableTask(WorkerTask *workerTask) static int CompareTasksByTime(const void *first, const void *second) { - WorkerTask *firstTask = (WorkerTask *) first; + WorkerTask *firstTask = (WorkerTask *) first; WorkerTask *secondTask = (WorkerTask *) second; /* tasks that are assigned earlier have higher priority */ @@ -893,7 +895,7 @@ ManageWorkerTask(WorkerTask *workerTask, HTAB *WorkerTasksHash) { case TASK_ASSIGNED: { - break; /* nothing to do until the task gets scheduled */ + break; /* nothing to do until the task gets scheduled */ } case TASK_SCHEDULED: diff --git a/src/backend/distributed/worker/task_tracker_protocol.c b/src/backend/distributed/worker/task_tracker_protocol.c index 84115f441..1b52be99a 100644 --- a/src/backend/distributed/worker/task_tracker_protocol.c +++ b/src/backend/distributed/worker/task_tracker_protocol.c @@ -57,7 +57,7 @@ task_tracker_assign_task(PG_FUNCTION_ARGS) { uint64 jobId = PG_GETARG_INT64(0); uint32 taskId = PG_GETARG_UINT32(1); - text *taskCallStringText = PG_GETARG_TEXT_P(2); + text *taskCallStringText = PG_GETARG_TEXT_P(2); StringInfo jobSchemaName = JobSchemaName(jobId); bool schemaExists = false; @@ -185,7 +185,7 @@ task_tracker_cleanup_job(PG_FUNCTION_ARGS) CleanupTask(currentTask); } - currentTask = (WorkerTask *) hash_seq_search(&status); + currentTask = (WorkerTask *) hash_seq_search(&status); } LWLockRelease(WorkerTasksSharedState->taskHashLock); @@ -308,7 +308,7 @@ CreateTask(uint64 jobId, uint32 taskId, char *taskCallString) } -/* +/* * UpdateTask updates the call string text for an already existing task. Note * that this function expects the caller to hold an exclusive lock over the * shared hash. @@ -331,7 +331,7 @@ UpdateTask(WorkerTask *workerTask, char *taskCallString) if (taskStatus == TASK_SUCCEEDED || taskStatus == TASK_CANCEL_REQUESTED || taskStatus == TASK_CANCELED) { - ; /* nothing to do */ + /* nothing to do */ } else if (taskStatus == TASK_PERMANENTLY_FAILED) { diff --git a/src/backend/distributed/worker/worker_data_fetch_protocol.c b/src/backend/distributed/worker/worker_data_fetch_protocol.c index 0e5b68a1d..d0c309c18 100644 --- a/src/backend/distributed/worker/worker_data_fetch_protocol.c +++ b/src/backend/distributed/worker/worker_data_fetch_protocol.c @@ -53,11 +53,14 @@ static void ReceiveResourceCleanup(int32 connectionId, const char *filename, static void DeleteFile(const char *filename); static void FetchTableCommon(text *tableName, uint64 remoteTableSize, ArrayType *nodeNameObject, ArrayType *nodePortObject, - bool (*FetchTableFunction) (const char *, uint32, StringInfo)); + bool (*FetchTableFunction)(const char *, uint32, + StringInfo)); static uint64 LocalTableSize(Oid relationId); static uint64 ExtractShardId(StringInfo tableName); -static bool FetchRegularTable(const char *nodeName, uint32 nodePort, StringInfo tableName); -static bool FetchForeignTable(const char *nodeName, uint32 nodePort, StringInfo tableName); +static bool FetchRegularTable(const char *nodeName, uint32 nodePort, + StringInfo tableName); +static bool FetchForeignTable(const char *nodeName, uint32 nodePort, + StringInfo tableName); static List * TableDDLCommandList(const char *nodeName, uint32 nodePort, StringInfo tableName); static StringInfo ForeignFilePath(const char *nodeName, uint32 nodePort, @@ -85,7 +88,7 @@ worker_fetch_partition_file(PG_FUNCTION_ARGS) uint64 jobId = PG_GETARG_INT64(0); uint32 partitionTaskId = PG_GETARG_UINT32(1); uint32 partitionFileId = PG_GETARG_UINT32(2); - uint32 upstreamTaskId = PG_GETARG_UINT32(3); + uint32 upstreamTaskId = PG_GETARG_UINT32(3); text *nodeNameText = PG_GETARG_TEXT_P(4); uint32 nodePort = PG_GETARG_UINT32(5); char *nodeName = NULL; @@ -226,7 +229,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort, char filename[MAXPGPATH]; int closed = -1; const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); - const int fileMode = (S_IRUSR | S_IWUSR); + const int fileMode = (S_IRUSR | S_IWUSR); QueryStatus queryStatus = CLIENT_INVALID_QUERY; int32 connectionId = INVALID_CONNECTION_ID; @@ -309,7 +312,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort, } else if (copyStatus == CLIENT_COPY_MORE) { - ; /* remote node will continue to send more data */ + /* remote node will continue to send more data */ } else { @@ -468,7 +471,7 @@ worker_fetch_foreign_file(PG_FUNCTION_ARGS) static void FetchTableCommon(text *tableNameText, uint64 remoteTableSize, ArrayType *nodeNameObject, ArrayType *nodePortObject, - bool (*FetchTableFunction) (const char *, uint32, StringInfo)) + bool (*FetchTableFunction)(const char *, uint32, StringInfo)) { StringInfo tableName = NULL; char *tableNameCString = NULL; @@ -531,7 +534,7 @@ FetchTableCommon(text *tableNameText, uint64 remoteTableSize, if (remoteTableSize > localTableSize) { /* table is not up to date, drop the table */ - ObjectAddress tableObject = {InvalidOid, InvalidOid, 0}; + ObjectAddress tableObject = { InvalidOid, InvalidOid, 0 }; tableObject.classId = RelationRelationId; tableObject.objectId = relationId; @@ -554,7 +557,7 @@ FetchTableCommon(text *tableNameText, uint64 remoteTableSize, char *nodeName = TextDatumGetCString(nodeNameDatum); uint32 nodePort = DatumGetUInt32(nodePortDatum); - tableFetched = (*FetchTableFunction) (nodeName, nodePort, tableName); + tableFetched = (*FetchTableFunction)(nodeName, nodePort, tableName); nodeIndex++; } @@ -1010,7 +1013,7 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS) * the transaction for this function commits, this lock will automatically * be released. This ensures appends to a shard happen in a serial manner. */ - shardId = ExtractShardId(shardNameString); + shardId = ExtractShardId(shardNameString); LockShardResource(shardId, AccessExclusiveLock); localFilePath = makeStringInfo(); @@ -1049,7 +1052,7 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS) static bool check_log_statement(List *statementList) { - ListCell *statementCell; + ListCell *statementCell; if (log_statement == LOGSTMT_NONE) { diff --git a/src/backend/distributed/worker/worker_file_access_protocol.c b/src/backend/distributed/worker/worker_file_access_protocol.c index 04deb2881..6c9eb18bd 100644 --- a/src/backend/distributed/worker/worker_file_access_protocol.c +++ b/src/backend/distributed/worker/worker_file_access_protocol.c @@ -40,22 +40,22 @@ worker_foreign_file_path(PG_FUNCTION_ARGS) ForeignTable *foreignTable = GetForeignTable(relationId); ListCell *optionCell = NULL; - foreach(optionCell, foreignTable->options) - { - DefElem *option = (DefElem *) lfirst(optionCell); + foreach(optionCell, foreignTable->options) + { + DefElem *option = (DefElem *) lfirst(optionCell); char *optionName = option->defname; - int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH); - if (compareResult == 0) - { - char *optionValue = defGetString(option); - foreignFilePath = cstring_to_text(optionValue); - break; - } - } + int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH); + if (compareResult == 0) + { + char *optionValue = defGetString(option); + foreignFilePath = cstring_to_text(optionValue); + break; + } + } /* check that we found the filename option */ - if (foreignFilePath == NULL) + if (foreignFilePath == NULL) { char *relationName = get_rel_name(relationId); ereport(ERROR, (errmsg("could not find filename for foreign table: \"%s\"", diff --git a/src/backend/distributed/worker/worker_merge_protocol.c b/src/backend/distributed/worker/worker_merge_protocol.c index ee829e342..92afb1fb1 100644 --- a/src/backend/distributed/worker/worker_merge_protocol.c +++ b/src/backend/distributed/worker/worker_merge_protocol.c @@ -133,7 +133,7 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS) const char *createMergeTableQuery = text_to_cstring(createMergeTableQueryText); const char *createIntermediateTableQuery = - text_to_cstring(createIntermediateTableQueryText); + text_to_cstring(createIntermediateTableQueryText); StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId); StringInfo jobSchemaName = JobSchemaName(jobId); @@ -170,14 +170,14 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS) if (setSearchPathResult < 0) { ereport(ERROR, (errmsg("execution was not successful \"%s\"", - setSearchPathString->data))); + setSearchPathString->data))); } createMergeTableResult = SPI_exec(createMergeTableQuery, 0); if (createMergeTableResult < 0) { ereport(ERROR, (errmsg("execution was not successful \"%s\"", - createMergeTableQuery))); + createMergeTableQuery))); } appendStringInfo(mergeTableName, "%s%s", intermediateTableName->data, @@ -188,7 +188,7 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS) if (createIntermediateTableResult < 0) { ereport(ERROR, (errmsg("execution was not successful \"%s\"", - createIntermediateTableQuery))); + createIntermediateTableQuery))); } finished = SPI_finish(); @@ -256,8 +256,8 @@ JobSchemaName(uint64 jobId) */ #ifdef HAVE_INTTYPES_H StringInfo jobSchemaName = makeStringInfo(); - appendStringInfo(jobSchemaName, "%s%0*"PRIu64, - JOB_SCHEMA_PREFIX, MIN_JOB_DIRNAME_WIDTH, jobId); + appendStringInfo(jobSchemaName, "%s%0*" PRIu64, JOB_SCHEMA_PREFIX, + MIN_JOB_DIRNAME_WIDTH, jobId); #else StringInfo jobSchemaName = makeStringInfo(); appendStringInfo(jobSchemaName, "%s%0*llu", diff --git a/src/backend/distributed/worker/worker_partition_protocol.c b/src/backend/distributed/worker/worker_partition_protocol.c index c6578295b..9ab21e588 100644 --- a/src/backend/distributed/worker/worker_partition_protocol.c +++ b/src/backend/distributed/worker/worker_partition_protocol.c @@ -59,7 +59,7 @@ static void FileOutputStreamWrite(FileOutputStream file, StringInfo dataToWrite) static void FileOutputStreamFlush(FileOutputStream file); static void FilterAndPartitionTable(const char *filterQuery, const char *columnName, Oid columnType, - uint32 (*PartitionIdFunction) (Datum, const void *), + uint32 (*PartitionIdFunction)(Datum, const void *), const void *partitionIdContext, FileOutputStream *partitionFileArray, uint32 fileCount); @@ -105,7 +105,7 @@ worker_range_partition_table(PG_FUNCTION_ARGS) uint32 taskId = PG_GETARG_UINT32(1); text *filterQueryText = PG_GETARG_TEXT_P(2); text *partitionColumnText = PG_GETARG_TEXT_P(3); - Oid partitionColumnType = PG_GETARG_OID(4); + Oid partitionColumnType = PG_GETARG_OID(4); ArrayType *splitPointObject = PG_GETARG_ARRAYTYPE_P(5); const char *filterQuery = text_to_cstring(filterQueryText); @@ -125,7 +125,7 @@ worker_range_partition_table(PG_FUNCTION_ARGS) if (splitPointType != partitionColumnType) { ereport(ERROR, (errmsg("partition column type %u and split point type %u " - "do not match", partitionColumnType, splitPointType))); + "do not match", partitionColumnType, splitPointType))); } /* use column's type information to get the comparison function */ @@ -181,7 +181,7 @@ worker_hash_partition_table(PG_FUNCTION_ARGS) uint32 taskId = PG_GETARG_UINT32(1); text *filterQueryText = PG_GETARG_TEXT_P(2); text *partitionColumnText = PG_GETARG_TEXT_P(3); - Oid partitionColumnType = PG_GETARG_OID(4); + Oid partitionColumnType = PG_GETARG_OID(4); uint32 partitionCount = PG_GETARG_UINT32(5); const char *filterQuery = text_to_cstring(filterQueryText); @@ -463,7 +463,7 @@ JobDirectoryName(uint64 jobId) */ #ifdef HAVE_INTTYPES_H StringInfo jobDirectoryName = makeStringInfo(); - appendStringInfo(jobDirectoryName, "base/%s/%s%0*"PRIu64, + appendStringInfo(jobDirectoryName, "base/%s/%s%0*" PRIu64, PG_JOB_CACHE_DIR, JOB_DIRECTORY_PREFIX, MIN_JOB_DIRNAME_WIDTH, jobId); #else @@ -726,7 +726,7 @@ FileOutputStreamFlush(FileOutputStream file) static void FilterAndPartitionTable(const char *filterQuery, const char *partitionColumnName, Oid partitionColumnType, - uint32 (*PartitionIdFunction) (Datum, const void *), + uint32 (*PartitionIdFunction)(Datum, const void *), const void *partitionIdContext, FileOutputStream *partitionFileArray, uint32 fileCount) @@ -794,7 +794,7 @@ FilterAndPartitionTable(const char *filterQuery, FileOutputStream partitionFile = { 0, 0, 0 }; StringInfo rowText = NULL; Datum partitionKey = 0; - bool partitionKeyNull = false; + bool partitionKeyNull = false; uint32 partitionId = 0; partitionKey = SPI_getbinval(row, rowDescriptor, @@ -808,7 +808,7 @@ FilterAndPartitionTable(const char *filterQuery, */ if (!partitionKeyNull) { - partitionId = (*PartitionIdFunction) (partitionKey, partitionIdContext); + partitionId = (*PartitionIdFunction)(partitionKey, partitionIdContext); } else { @@ -926,7 +926,7 @@ InitRowOutputState(void) /* initialize defaults for printing null values */ char *nullPrint = pstrdup("\\N"); - int nullPrintLen = strlen(nullPrint); + int nullPrintLen = strlen(nullPrint); char *nullPrintClient = pg_server_to_any(nullPrint, nullPrintLen, fileEncoding); /* set default text output characters */ @@ -946,7 +946,7 @@ InitRowOutputState(void) } /* set up transcoding information and default text output characters */ - if ( (fileEncoding != databaseEncoding) || (databaseEncodingMaxLength > 1) ) + if ((fileEncoding != databaseEncoding) || (databaseEncodingMaxLength > 1)) { rowOutputState->need_transcoding = true; } @@ -1057,7 +1057,7 @@ OutputRow(HeapTuple row, TupleDesc rowDescriptor, CopySendString(rowOutputState, rowOutputState->null_print_client); } - lastColumn = ((columnIndex+1) == columnCount); + lastColumn = ((columnIndex + 1) == columnCount); if (!lastColumn) { CopySendChar(rowOutputState, rowOutputState->delim[0]); @@ -1094,9 +1094,9 @@ OutputBinaryHeaders(FileOutputStream *partitionFileArray, uint32 fileCount) { /* Generate header for a binary copy */ const int32 zero = 0; - FileOutputStream partitionFile = {0, 0, 0}; + FileOutputStream partitionFile = { 0, 0, 0 }; PartialCopyStateData headerOutputStateData; - PartialCopyState headerOutputState = (PartialCopyState) &headerOutputStateData; + PartialCopyState headerOutputState = (PartialCopyState) & headerOutputStateData; memset(headerOutputState, 0, sizeof(PartialCopyStateData)); headerOutputState->fe_msgbuf = makeStringInfo(); @@ -1128,9 +1128,9 @@ OutputBinaryFooters(FileOutputStream *partitionFileArray, uint32 fileCount) { /* Generate footer for a binary copy */ int16 negative = -1; - FileOutputStream partitionFile = {0, 0, 0}; + FileOutputStream partitionFile = { 0, 0, 0 }; PartialCopyStateData footerOutputStateData; - PartialCopyState footerOutputState = (PartialCopyState) &footerOutputStateData; + PartialCopyState footerOutputState = (PartialCopyState) & footerOutputStateData; memset(footerOutputState, 0, sizeof(PartialCopyStateData)); footerOutputState->fe_msgbuf = makeStringInfo(); @@ -1359,7 +1359,7 @@ RangePartitionId(Datum partitionValue, const void *context) currentLength = currentLength - halfLength - 1; } } - + return firstIndex; } diff --git a/src/include/distributed/citus_ruleutils.h b/src/include/distributed/citus_ruleutils.h index 0f2402748..92e1777e7 100644 --- a/src/include/distributed/citus_ruleutils.h +++ b/src/include/distributed/citus_ruleutils.h @@ -2,7 +2,7 @@ * * citus_ruleutils.h * CitusDB ruleutils wrapper functions and exported PostgreSQL ruleutils - * functions. + * functions. * * Copyright (c) 2012-2015, Citus Data, Inc. *------------------------------------------------------------------------- @@ -16,16 +16,17 @@ /* Function declarations for version independent CitusDB ruleutils wrapper functions */ -extern char *pg_get_extensiondef_string(Oid tableRelationId); -extern char *pg_get_serverdef_string(Oid tableRelationId); -extern char *pg_get_tableschemadef_string(Oid tableRelationId); -extern char *pg_get_tablecolumnoptionsdef_string(Oid tableRelationId); -extern char *pg_get_indexclusterdef_string(Oid indexRelationId); +extern char * pg_get_extensiondef_string(Oid tableRelationId); +extern char * pg_get_serverdef_string(Oid tableRelationId); +extern char * pg_get_tableschemadef_string(Oid tableRelationId); +extern char * pg_get_tablecolumnoptionsdef_string(Oid tableRelationId); +extern char * pg_get_indexclusterdef_string(Oid indexRelationId); /* Function declarations for version dependent PostgreSQL ruleutils functions */ -extern void pg_get_query_def(Query *query, StringInfo buffer); -extern void deparse_shard_query(Query *query, Oid distrelid, int64 shardid, StringInfo buffer); -extern char *generate_relation_name(Oid relid, List *namespaces); +extern void pg_get_query_def(Query *query, StringInfo buffer); +extern void deparse_shard_query(Query *query, Oid distrelid, int64 shardid, StringInfo + buffer); +extern char * generate_relation_name(Oid relid, List *namespaces); #endif /* CITUS_RULEUTILS_H */ diff --git a/src/include/distributed/master_metadata_utility.h b/src/include/distributed/master_metadata_utility.h index b3cfb6548..4fb377571 100644 --- a/src/include/distributed/master_metadata_utility.h +++ b/src/include/distributed/master_metadata_utility.h @@ -30,15 +30,14 @@ typedef struct ShardInterval CitusNodeTag type; Oid relationId; char storageType; - Oid valueTypeId; /* min/max value datum's typeId */ - int valueTypeLen; /* min/max value datum's typelen */ - bool valueByVal; /* min/max value datum's byval */ + Oid valueTypeId; /* min/max value datum's typeId */ + int valueTypeLen; /* min/max value datum's typelen */ + bool valueByVal; /* min/max value datum's byval */ bool minValueExists; bool maxValueExists; - Datum minValue; /* a shard's typed min value datum */ - Datum maxValue; /* a shard's typed max value datum */ + Datum minValue; /* a shard's typed min value datum */ + Datum maxValue; /* a shard's typed max value datum */ uint64 shardId; - } ShardInterval; @@ -46,13 +45,12 @@ typedef struct ShardInterval typedef struct ShardPlacement { CitusNodeTag type; - Oid tupleOid; /* unique oid that implies this row's insertion order */ + Oid tupleOid; /* unique oid that implies this row's insertion order */ uint64 shardId; uint64 shardLength; RelayFileState shardState; char *nodeName; uint32 nodePort; - } ShardPlacement; diff --git a/src/include/distributed/master_protocol.h b/src/include/distributed/master_protocol.h index f39ce865b..94344de0c 100644 --- a/src/include/distributed/master_protocol.h +++ b/src/include/distributed/master_protocol.h @@ -49,10 +49,10 @@ #define SHARDID_SEQUENCE_NAME "pg_dist_shardid_seq" /* Remote call definitions to help with data staging and deletion */ -#define WORKER_APPLY_SHARD_DDL_COMMAND "SELECT worker_apply_shard_ddl_command \ - ("UINT64_FORMAT", %s)" -#define WORKER_APPEND_TABLE_TO_SHARD "SELECT worker_append_table_to_shard \ - (%s, %s, %s, %u)" +#define WORKER_APPLY_SHARD_DDL_COMMAND \ + "SELECT worker_apply_shard_ddl_command (" UINT64_FORMAT ", %s)" +#define WORKER_APPEND_TABLE_TO_SHARD \ + "SELECT worker_append_table_to_shard (%s, %s, %s, %u)" #define SHARD_MIN_VALUE_QUERY "SELECT min(%s) FROM %s" #define SHARD_MAX_VALUE_QUERY "SELECT max(%s) FROM %s" #define SHARD_TABLE_SIZE_QUERY "SELECT pg_table_size('%s')" @@ -67,7 +67,6 @@ typedef enum SHARD_PLACEMENT_INVALID_FIRST = 0, SHARD_PLACEMENT_LOCAL_NODE_FIRST = 1, SHARD_PLACEMENT_ROUND_ROBIN = 2 - } ShardPlacementPolicyType; @@ -83,8 +82,8 @@ extern Oid ResolveRelationId(text *relationName); extern List * GetTableDDLEvents(Oid relationId); extern void CheckDistributedTable(Oid relationId); extern void CreateShardPlacements(int64 shardId, List *ddlEventList, - List *workerNodeList, int workerStartIndex, - int replicationFactor); + List *workerNodeList, int workerStartIndex, + int replicationFactor); /* Function declarations for generating metadata for shard creation */ extern Datum master_get_table_metadata(PG_FUNCTION_ARGS); diff --git a/src/include/distributed/modify_planner.h b/src/include/distributed/modify_planner.h index 3e52cfa5a..bd6df4755 100644 --- a/src/include/distributed/modify_planner.h +++ b/src/include/distributed/modify_planner.h @@ -24,6 +24,7 @@ #define INVALID_TASK_ID 0 #if (PG_VERSION_NUM >= 90500) + /* reserved alias name for UPSERTs */ #define UPSERT_ALIAS "citus_table_alias" #endif diff --git a/src/include/distributed/multi_client_executor.h b/src/include/distributed/multi_client_executor.h index 470b53673..0f1203031 100644 --- a/src/include/distributed/multi_client_executor.h +++ b/src/include/distributed/multi_client_executor.h @@ -15,21 +15,20 @@ #define MULTI_CLIENT_EXECUTOR_H -#define INVALID_CONNECTION_ID -1 /* identifies an invalid connection */ -#define CLIENT_CONNECT_TIMEOUT 5 /* connection timeout in seconds */ +#define INVALID_CONNECTION_ID -1 /* identifies an invalid connection */ +#define CLIENT_CONNECT_TIMEOUT 5 /* connection timeout in seconds */ #define MAX_CONNECTION_COUNT 2048 /* simultaneous client connection count */ -#define STRING_BUFFER_SIZE 1024 /* buffer size for character arrays */ +#define STRING_BUFFER_SIZE 1024 /* buffer size for character arrays */ #define CONN_INFO_TEMPLATE "host=%s port=%u dbname=%s connect_timeout=%u" /* Enumeration to track one client connection's status */ typedef enum { - CLIENT_INVALID_CONNECT = 0, - CLIENT_CONNECTION_BAD = 1, - CLIENT_CONNECTION_BUSY = 2, + CLIENT_INVALID_CONNECT = 0, + CLIENT_CONNECTION_BAD = 1, + CLIENT_CONNECTION_BUSY = 2, CLIENT_CONNECTION_READY = 3 - } ConnectStatus; @@ -38,9 +37,8 @@ typedef enum { CLIENT_INVALID_RESULT_STATUS = 0, CLIENT_RESULT_UNAVAILABLE = 1, - CLIENT_RESULT_BUSY = 2, + CLIENT_RESULT_BUSY = 2, CLIENT_RESULT_READY = 3 - } ResultStatus; @@ -48,10 +46,9 @@ typedef enum typedef enum { CLIENT_INVALID_QUERY = 0, - CLIENT_QUERY_FAILED = 1, + CLIENT_QUERY_FAILED = 1, CLIENT_QUERY_DONE = 2, CLIENT_QUERY_COPY = 3 - } QueryStatus; @@ -59,21 +56,19 @@ typedef enum typedef enum { CLIENT_INVALID_COPY = 0, - CLIENT_COPY_MORE = 1, - CLIENT_COPY_FAILED = 2, - CLIENT_COPY_DONE = 3 - + CLIENT_COPY_MORE = 1, + CLIENT_COPY_FAILED = 2, + CLIENT_COPY_DONE = 3 } CopyStatus; /* Enumeration to track the status of a query in a batch on the client */ typedef enum { - CLIENT_INVALID_BATCH_QUERY = 0, - CLIENT_BATCH_QUERY_FAILED = 1, + CLIENT_INVALID_BATCH_QUERY = 0, + CLIENT_BATCH_QUERY_FAILED = 1, CLIENT_BATCH_QUERY_CONTINUE = 2, - CLIENT_BATCH_QUERY_DONE = 3 - + CLIENT_BATCH_QUERY_DONE = 3 } BatchQueryStatus; diff --git a/src/include/distributed/multi_executor.h b/src/include/distributed/multi_executor.h index bcf22fe2e..b777858d8 100644 --- a/src/include/distributed/multi_executor.h +++ b/src/include/distributed/multi_executor.h @@ -14,12 +14,12 @@ #include "nodes/parsenodes.h" /* signal currently executed statement is a master select statement or router execution */ -#define EXEC_FLAG_CITUS_MASTER_SELECT 0x100 -#define EXEC_FLAG_CITUS_ROUTER_EXECUTOR 0x200 +#define EXEC_FLAG_CITUS_MASTER_SELECT 0x100 +#define EXEC_FLAG_CITUS_ROUTER_EXECUTOR 0x200 extern void multi_ExecutorStart(QueryDesc *queryDesc, int eflags); extern void multi_ExecutorRun(QueryDesc *queryDesc, - ScanDirection direction, long count); + ScanDirection direction, long count); extern void multi_ExecutorFinish(QueryDesc *queryDesc); extern void multi_ExecutorEnd(QueryDesc *queryDesc); diff --git a/src/include/distributed/multi_join_order.h b/src/include/distributed/multi_join_order.h index cccde6917..56ede010b 100644 --- a/src/include/distributed/multi_join_order.h +++ b/src/include/distributed/multi_join_order.h @@ -29,7 +29,7 @@ typedef enum JoinRuleType { JOIN_RULE_INVALID_FIRST = 0, BROADCAST_JOIN = 1, - LOCAL_PARTITION_JOIN = 2, + LOCAL_PARTITION_JOIN = 2, SINGLE_PARTITION_JOIN = 3, DUAL_PARTITION_JOIN = 4, CARTESIAN_PRODUCT = 5, @@ -40,7 +40,6 @@ typedef enum JoinRuleType * RuleNameArray. */ JOIN_RULE_LAST - } JoinRuleType; @@ -53,7 +52,6 @@ typedef struct TableEntry { Oid relationId; uint32 rangeTableId; - } TableEntry; @@ -65,14 +63,13 @@ typedef struct TableEntry */ typedef struct JoinOrderNode { - TableEntry *tableEntry; /* this node's relation and range table id */ - JoinRuleType joinRuleType; /* not relevant for the first table */ - JoinType joinType; /* not relevant for the first table */ - Var *partitionColumn; /* not relevant for the first table */ + TableEntry *tableEntry; /* this node's relation and range table id */ + JoinRuleType joinRuleType; /* not relevant for the first table */ + JoinType joinType; /* not relevant for the first table */ + Var *partitionColumn; /* not relevant for the first table */ char partitionMethod; - List *joinClauseList; /* not relevant for the first table */ + List *joinClauseList; /* not relevant for the first table */ List *shardIntervalList; - } JoinOrderNode; diff --git a/src/include/distributed/multi_logical_optimizer.h b/src/include/distributed/multi_logical_optimizer.h index f8c145751..4efc1bfed 100644 --- a/src/include/distributed/multi_logical_optimizer.h +++ b/src/include/distributed/multi_logical_optimizer.h @@ -44,7 +44,7 @@ * * Please note that the order of values in this enumeration is tied to the order * of elements in the following AggregateNames array. This order needs to be - * preserved. + * preserved. */ typedef enum { @@ -55,7 +55,6 @@ typedef enum AGGREGATE_SUM = 4, AGGREGATE_COUNT = 5, AGGREGATE_ARRAY_AGG = 6 - } AggregateType; @@ -69,7 +68,6 @@ typedef enum PUSH_DOWN_VALID = 1, PUSH_DOWN_NOT_VALID = 2, PUSH_DOWN_SPECIAL_CONDITIONS = 3 - } PushDownStatus; @@ -82,7 +80,6 @@ typedef enum PULL_UP_INVALID_FIRST = 0, PULL_UP_VALID = 1, PULL_UP_NOT_VALID = 2 - } PullUpStatus; @@ -97,8 +94,10 @@ typedef enum * Please note that the order of elements in this array is tied to the order of * values in the preceding AggregateType enum. This order needs to be preserved. */ -static const char * const AggregateNames[] = { "invalid", "avg", "min", "max", - "sum", "count", "array_agg" }; +static const char *const AggregateNames[] = { + "invalid", "avg", "min", "max", "sum", + "count", "array_agg" +}; /* Config variable managed via guc.c */ diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index 15b0cba07..40ee39c70 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -40,8 +40,8 @@ typedef struct MultiNode CitusNodeTag type; struct MultiNode *parentNode; - /* child node(s) are defined in unary and binary nodes */ + /* child node(s) are defined in unary and binary nodes */ } MultiNode; @@ -51,7 +51,6 @@ typedef struct MultiUnaryNode MultiNode node; struct MultiNode *childNode; - } MultiUnaryNode; @@ -62,7 +61,6 @@ typedef struct MultiBinaryNode struct MultiNode *leftChildNode; struct MultiNode *rightChildNode; - } MultiBinaryNode; @@ -73,7 +71,6 @@ typedef struct MultiBinaryNode typedef struct MultiTreeRoot { MultiUnaryNode unaryNode; - } MultiTreeRoot; @@ -91,7 +88,6 @@ typedef struct MultiTable Alias *alias; Alias *referenceNames; Query *subquery; /* this field is only valid for non-relation subquery types */ - } MultiTable; @@ -100,7 +96,6 @@ typedef struct MultiProject { MultiUnaryNode unaryNode; List *columnList; - } MultiProject; @@ -112,7 +107,6 @@ typedef struct MultiProject typedef struct MultiCollect { MultiUnaryNode unaryNode; - } MultiCollect; @@ -125,7 +119,6 @@ typedef struct MultiSelect { MultiUnaryNode unaryNode; List *selectClauseList; - } MultiSelect; @@ -140,7 +133,6 @@ typedef struct MultiJoin List *joinClauseList; JoinRuleType joinRuleType; JoinType joinType; - } MultiJoin; @@ -150,7 +142,6 @@ typedef struct MultiPartition MultiUnaryNode unaryNode; Var *partitionColumn; uint32 splitPointTableId; - } MultiPartition; @@ -158,7 +149,6 @@ typedef struct MultiPartition typedef struct MultiCartesianProduct { MultiBinaryNode binaryNode; - } MultiCartesianProduct; @@ -183,7 +173,6 @@ typedef struct MultiExtendedOp List *sortClauseList; Node *limitCount; Node *limitOffset; - } MultiExtendedOp; diff --git a/src/include/distributed/multi_physical_planner.h b/src/include/distributed/multi_physical_planner.h index 09d28e952..3ad053b22 100644 --- a/src/include/distributed/multi_physical_planner.h +++ b/src/include/distributed/multi_physical_planner.h @@ -2,7 +2,7 @@ * * multi_physical_planner.h * Type and function declarations used in creating the distributed execution - * plan. + * plan. * * Copyright (c) 2012, Citus Data, Inc. * @@ -40,17 +40,18 @@ (" UINT64_FORMAT ", %d, %s, '%s', %d, %d)" #define MERGE_FILES_INTO_TABLE_COMMAND "SELECT worker_merge_files_into_table \ (" UINT64_FORMAT ", %d, '%s', '%s')" -#define MERGE_FILES_AND_RUN_QUERY_COMMAND "SELECT worker_merge_files_and_run_query(" UINT64_FORMAT ", %d, '%s', '%s')" +#define MERGE_FILES_AND_RUN_QUERY_COMMAND \ + "SELECT worker_merge_files_and_run_query(" UINT64_FORMAT ", %d, '%s', '%s')" typedef enum CitusRTEKind { - CITUS_RTE_RELATION = RTE_RELATION, /* ordinary relation reference */ - CITUS_RTE_SUBQUERY = RTE_SUBQUERY, /* subquery in FROM */ - CITUS_RTE_JOIN = RTE_JOIN, /* join */ - CITUS_RTE_FUNCTION = RTE_FUNCTION, /* function in FROM */ - CITUS_RTE_VALUES = RTE_VALUES, /* VALUES (), (), ... */ - CITUS_RTE_CTE = RTE_CTE, /* common table expr (WITH list element) */ + CITUS_RTE_RELATION = RTE_RELATION, /* ordinary relation reference */ + CITUS_RTE_SUBQUERY = RTE_SUBQUERY, /* subquery in FROM */ + CITUS_RTE_JOIN = RTE_JOIN, /* join */ + CITUS_RTE_FUNCTION = RTE_FUNCTION, /* function in FROM */ + CITUS_RTE_VALUES = RTE_VALUES, /* VALUES (), (), ... */ + CITUS_RTE_CTE = RTE_CTE, /* common table expr (WITH list element) */ CITUS_RTE_SHARD, CITUS_RTE_REMOTE_QUERY } CitusRTEKind; @@ -61,8 +62,7 @@ typedef enum { PARTITION_INVALID_FIRST = 0, RANGE_PARTITION_TYPE = 1, - HASH_PARTITION_TYPE = 2 - + HASH_PARTITION_TYPE = 2 } PartitionType; @@ -77,7 +77,6 @@ typedef enum MAP_OUTPUT_FETCH_TASK = 5, MERGE_FETCH_TASK = 6, MODIFY_TASK = 7 - } TaskType; @@ -88,7 +87,6 @@ typedef enum TASK_ASSIGNMENT_GREEDY = 1, TASK_ASSIGNMENT_ROUND_ROBIN = 2, TASK_ASSIGNMENT_FIRST_REPLICA = 3 - } TaskAssignmentPolicyType; @@ -99,7 +97,6 @@ typedef enum JOIN_MAP_MERGE_JOB = 1, SUBQUERY_MAP_MERGE_JOB = 2, TOP_LEVEL_WORKER_JOB = 3 - } BoundaryNodeJobType; @@ -133,7 +130,6 @@ typedef struct MapMergeJob ShardInterval **sortedShardIntervalArray; /* only applies to range partitioning */ List *mapTaskList; List *mergeTaskList; - } MapMergeJob; @@ -153,18 +149,17 @@ typedef struct Task uint64 jobId; uint32 taskId; char *queryString; - uint64 anchorShardId; /* only applies to compute tasks */ - List *taskPlacementList; /* only applies to compute tasks */ - List *dependedTaskList; /* only applies to compute tasks */ + uint64 anchorShardId; /* only applies to compute tasks */ + List *taskPlacementList; /* only applies to compute tasks */ + List *dependedTaskList; /* only applies to compute tasks */ uint32 partitionId; - uint32 upstreamTaskId; /* only applies to data fetch tasks */ + uint32 upstreamTaskId; /* only applies to data fetch tasks */ ShardInterval *shardInterval; /* only applies to merge tasks */ bool assignmentConstrained; /* only applies to merge tasks */ - uint64 shardId; /* only applies to shard fetch tasks */ + uint64 shardId; /* only applies to shard fetch tasks */ TaskExecution *taskExecution; /* used by task tracker executor */ - bool upsertQuery; /* only applies to modify tasks */ - + bool upsertQuery; /* only applies to modify tasks */ } Task; @@ -177,7 +172,6 @@ typedef struct RangeTableFragment CitusRTEKind fragmentType; void *fragmentReference; uint32 rangeTableId; - } RangeTableFragment; @@ -190,7 +184,6 @@ typedef struct JoinSequenceNode { uint32 rangeTableId; int32 joiningRangeTableId; - } JoinSequenceNode; @@ -203,7 +196,6 @@ typedef struct MultiPlan Job *workerJob; Query *masterQuery; char *masterTableName; - } MultiPlan; diff --git a/src/include/distributed/multi_planner.h b/src/include/distributed/multi_planner.h index a14b2b65e..c3e2511e8 100644 --- a/src/include/distributed/multi_planner.h +++ b/src/include/distributed/multi_planner.h @@ -13,8 +13,8 @@ #include "nodes/plannodes.h" #include "nodes/relation.h" -extern PlannedStmt *multi_planner(Query *parse, int cursorOptions, - ParamListInfo boundParams); +extern PlannedStmt * multi_planner(Query *parse, int cursorOptions, + ParamListInfo boundParams); extern bool HasCitusToplevelNode(PlannedStmt *planStatement); struct MultiPlan; diff --git a/src/include/distributed/multi_server_executor.h b/src/include/distributed/multi_server_executor.h index 83105cc54..e6e17f566 100644 --- a/src/include/distributed/multi_server_executor.h +++ b/src/include/distributed/multi_server_executor.h @@ -20,9 +20,9 @@ #define MAX_TASK_EXECUTION_FAILURES 3 /* allowed failure count for one task */ -#define MAX_TRACKER_FAILURE_COUNT 3 /* allowed failure count for one tracker */ +#define MAX_TRACKER_FAILURE_COUNT 3 /* allowed failure count for one tracker */ #define REMOTE_NODE_CONNECT_TIMEOUT 4000 /* async connect timeout in ms */ -#define RESERVED_FD_COUNT 64 /* file descriptors unavailable to executor */ +#define RESERVED_FD_COUNT 64 /* file descriptors unavailable to executor */ /* copy out query results */ #define COPY_QUERY_TO_STDOUT_TEXT "COPY (%s) TO STDOUT" @@ -32,9 +32,9 @@ /* Task tracker executor related defines */ #define TASK_ASSIGNMENT_QUERY "SELECT task_tracker_assign_task \ - ("UINT64_FORMAT", %u, %s)" -#define TASK_STATUS_QUERY "SELECT task_tracker_task_status("UINT64_FORMAT", %u)" -#define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT")" + ("UINT64_FORMAT ", %u, %s)" +#define TASK_STATUS_QUERY "SELECT task_tracker_task_status("UINT64_FORMAT ", %u)" +#define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT ")" #define JOB_CLEANUP_TASK_ID INT_MAX @@ -43,9 +43,9 @@ typedef enum { EXEC_TASK_INVALID_FIRST = 0, EXEC_TASK_CONNECT_START = 1, - EXEC_TASK_CONNECT_POLL = 2, + EXEC_TASK_CONNECT_POLL = 2, EXEC_TASK_FAILED = 3, - EXEC_FETCH_TASK_LOOP = 4, + EXEC_FETCH_TASK_LOOP = 4, EXEC_FETCH_TASK_START = 5, EXEC_FETCH_TASK_RUNNING = 6, EXEC_COMPUTE_TASK_START = 7, @@ -60,7 +60,6 @@ typedef enum EXEC_TASK_TRACKER_FAILED = 14, EXEC_SOURCE_TASK_TRACKER_RETRY = 15, EXEC_SOURCE_TASK_TRACKER_FAILED = 16 - } TaskExecStatus; @@ -74,7 +73,6 @@ typedef enum EXEC_TRANSMIT_TRACKER_RETRY = 4, EXEC_TRANSMIT_TRACKER_FAILED = 5, EXEC_TRANSMIT_DONE = 6 - } TransmitExecStatus; @@ -86,7 +84,6 @@ typedef enum TRACKER_CONNECT_POLL = 2, TRACKER_CONNECTED = 3, TRACKER_CONNECTION_FAILED = 4 - } TrackerStatus; @@ -97,7 +94,6 @@ typedef enum MULTI_EXECUTOR_REAL_TIME = 1, MULTI_EXECUTOR_TASK_TRACKER = 2, MULTI_EXECUTOR_ROUTER = 3 - } MultiExecutorType; @@ -107,7 +103,6 @@ typedef enum CONNECT_ACTION_NONE = 0, CONNECT_ACTION_OPENED = 1, CONNECT_ACTION_CLOSED = 2 - } ConnectAction; @@ -132,7 +127,6 @@ struct TaskExecution uint32 querySourceNodeIndex; /* only applies to map fetch tasks */ int32 dataFetchTaskIndex; uint32 failureCount; - }; @@ -147,7 +141,6 @@ typedef struct TrackerTaskState uint32 taskId; TaskStatus status; StringInfo taskAssignmentQuery; - } TrackerTaskState; @@ -158,7 +151,7 @@ typedef struct TrackerTaskState */ typedef struct TaskTracker { - uint32 workerPort; /* node's port; part of hash table key */ + uint32 workerPort; /* node's port; part of hash table key */ char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */ TrackerStatus trackerStatus; int32 connectionId; @@ -171,7 +164,6 @@ typedef struct TaskTracker int32 currentTaskIndex; bool connectionBusy; TrackerTaskState *connectionBusyOnTask; - } TaskTracker; @@ -184,7 +176,6 @@ typedef struct WorkerNodeState uint32 workerPort; char workerName[WORKER_LENGTH]; uint32 openConnectionCount; - } WorkerNodeState; diff --git a/src/include/distributed/pg_dist_partition.h b/src/include/distributed/pg_dist_partition.h index d3db82638..d277bc8ce 100644 --- a/src/include/distributed/pg_dist_partition.h +++ b/src/include/distributed/pg_dist_partition.h @@ -21,9 +21,9 @@ */ typedef struct FormData_pg_dist_partition { - Oid logicalrelid; /* logical relation id; references pg_class oid */ - char partmethod; /* partition method; see codes below */ - text partkey; /* partition key expression */ + Oid logicalrelid; /* logical relation id; references pg_class oid */ + char partmethod; /* partition method; see codes below */ + text partkey; /* partition key expression */ } FormData_pg_dist_partition; /* ---------------- @@ -37,16 +37,16 @@ typedef FormData_pg_dist_partition *Form_pg_dist_partition; * compiler constants for pg_dist_partitions * ---------------- */ -#define Natts_pg_dist_partition 3 -#define Anum_pg_dist_partition_logicalrelid 1 -#define Anum_pg_dist_partition_partmethod 2 -#define Anum_pg_dist_partition_partkey 3 +#define Natts_pg_dist_partition 3 +#define Anum_pg_dist_partition_logicalrelid 1 +#define Anum_pg_dist_partition_partmethod 2 +#define Anum_pg_dist_partition_partkey 3 /* valid values for partmethod include append, hash, and range */ -#define DISTRIBUTE_BY_APPEND 'a' -#define DISTRIBUTE_BY_HASH 'h' -#define DISTRIBUTE_BY_RANGE 'r' -#define REDISTRIBUTE_BY_HASH 'x' +#define DISTRIBUTE_BY_APPEND 'a' +#define DISTRIBUTE_BY_HASH 'h' +#define DISTRIBUTE_BY_RANGE 'r' +#define REDISTRIBUTE_BY_HASH 'x' #endif /* PG_DIST_PARTITION_H */ diff --git a/src/include/distributed/pg_dist_shard.h b/src/include/distributed/pg_dist_shard.h index dfe1c86c7..b093bb59d 100644 --- a/src/include/distributed/pg_dist_shard.h +++ b/src/include/distributed/pg_dist_shard.h @@ -22,13 +22,13 @@ */ typedef struct FormData_pg_dist_shard { - Oid logicalrelid; /* logical relation id; references pg_class oid */ - int64 shardid; /* global shardId representing remote partition */ - char shardstorage; /* shard storage type; see codes below */ -#ifdef CATALOG_VARLEN /* variable-length fields start here */ - text shardalias; /* user specified table name for shard, if any */ - text shardminvalue; /* partition key's minimum value in shard */ - text shardmaxvalue; /* partition key's maximum value in shard */ + Oid logicalrelid; /* logical relation id; references pg_class oid */ + int64 shardid; /* global shardId representing remote partition */ + char shardstorage; /* shard storage type; see codes below */ +#ifdef CATALOG_VARLEN /* variable-length fields start here */ + text shardalias; /* user specified table name for shard, if any */ + text shardminvalue; /* partition key's minimum value in shard */ + text shardmaxvalue; /* partition key's maximum value in shard */ #endif } FormData_pg_dist_shard; @@ -43,22 +43,22 @@ typedef FormData_pg_dist_shard *Form_pg_dist_shard; * compiler constants for pg_dist_shards * ---------------- */ -#define Natts_pg_dist_shard 6 -#define Anum_pg_dist_shard_logicalrelid 1 -#define Anum_pg_dist_shard_shardid 2 -#define Anum_pg_dist_shard_shardstorage 3 -#define Anum_pg_dist_shard_shardalias 4 -#define Anum_pg_dist_shard_shardminvalue 5 -#define Anum_pg_dist_shard_shardmaxvalue 6 +#define Natts_pg_dist_shard 6 +#define Anum_pg_dist_shard_logicalrelid 1 +#define Anum_pg_dist_shard_shardid 2 +#define Anum_pg_dist_shard_shardstorage 3 +#define Anum_pg_dist_shard_shardalias 4 +#define Anum_pg_dist_shard_shardminvalue 5 +#define Anum_pg_dist_shard_shardmaxvalue 6 /* * Valid values for shard storage types include relay file, foreign table, * (standard) table and columnar table. Relay file types are currently unused. */ -#define SHARD_STORAGE_RELAY 'r' -#define SHARD_STORAGE_FOREIGN 'f' -#define SHARD_STORAGE_TABLE 't' -#define SHARD_STORAGE_COLUMNAR 'c' +#define SHARD_STORAGE_RELAY 'r' +#define SHARD_STORAGE_FOREIGN 'f' +#define SHARD_STORAGE_TABLE 't' +#define SHARD_STORAGE_COLUMNAR 'c' #endif /* PG_DIST_SHARD_H */ diff --git a/src/include/distributed/pg_dist_shard_placement.h b/src/include/distributed/pg_dist_shard_placement.h index 505daffa2..955e4efa6 100644 --- a/src/include/distributed/pg_dist_shard_placement.h +++ b/src/include/distributed/pg_dist_shard_placement.h @@ -23,12 +23,12 @@ */ typedef struct FormData_pg_dist_shard_placement { - int64 shardid; /* global shardId on remote node */ - int32 shardstate; /* shard state on remote node; see RelayFileState */ - int64 shardlength; /* shard length on remote node; stored as bigint */ -#ifdef CATALOG_VARLEN /* variable-length fields start here */ - text nodename; /* remote node's host name */ - int32 nodeport; /* remote node's port number */ + int64 shardid; /* global shardId on remote node */ + int32 shardstate; /* shard state on remote node; see RelayFileState */ + int64 shardlength; /* shard length on remote node; stored as bigint */ +#ifdef CATALOG_VARLEN /* variable-length fields start here */ + text nodename; /* remote node's host name */ + int32 nodeport; /* remote node's port number */ #endif } FormData_pg_dist_shard_placement; @@ -43,12 +43,12 @@ typedef FormData_pg_dist_shard_placement *Form_pg_dist_shard_placement; * compiler constants for pg_dist_shard_placement * ---------------- */ -#define Natts_pg_dist_shard_placement 5 -#define Anum_pg_dist_shard_placement_shardid 1 -#define Anum_pg_dist_shard_placement_shardstate 2 -#define Anum_pg_dist_shard_placement_shardlength 3 -#define Anum_pg_dist_shard_placement_nodename 4 -#define Anum_pg_dist_shard_placement_nodeport 5 +#define Natts_pg_dist_shard_placement 5 +#define Anum_pg_dist_shard_placement_shardid 1 +#define Anum_pg_dist_shard_placement_shardstate 2 +#define Anum_pg_dist_shard_placement_shardlength 3 +#define Anum_pg_dist_shard_placement_nodename 4 +#define Anum_pg_dist_shard_placement_nodeport 5 #endif /* PG_DIST_SHARD_PLACEMENT_H */ diff --git a/src/include/distributed/relay_utility.h b/src/include/distributed/relay_utility.h index 592f61632..bd4657a01 100644 --- a/src/include/distributed/relay_utility.h +++ b/src/include/distributed/relay_utility.h @@ -3,7 +3,7 @@ * relay_utility.h * * Header and type declarations that extend relation, index and constraint names - * with the appropriate shard identifiers. + * with the appropriate shard identifiers. * * Copyright (c) 2012, Citus Data, Inc. * @@ -35,7 +35,6 @@ typedef enum FILE_CACHED = 2, FILE_INACTIVE = 3, FILE_TO_DELETE = 4 - } RelayFileState; diff --git a/src/include/distributed/resource_lock.h b/src/include/distributed/resource_lock.h index 6c1c8ffcf..1406da9c5 100644 --- a/src/include/distributed/resource_lock.h +++ b/src/include/distributed/resource_lock.h @@ -29,6 +29,7 @@ typedef enum AdvisoryLocktagClass /* values defined in postgres' lockfuncs.c */ ADV_LOCKTAG_CLASS_INT64 = 1, ADV_LOCKTAG_CLASS_INT32 = 2, + /* CitusDB lock types */ ADV_LOCKTAG_CLASS_CITUS_SHARD_METADATA = 4, ADV_LOCKTAG_CLASS_CITUS_SHARD = 5, diff --git a/src/include/distributed/task_tracker.h b/src/include/distributed/task_tracker.h index ad41b8589..2fc657d49 100644 --- a/src/include/distributed/task_tracker.h +++ b/src/include/distributed/task_tracker.h @@ -19,10 +19,10 @@ #include "utils/hsearch.h" -#define HIGH_PRIORITY_TASK_TIME 1 /* assignment time for high priority tasks */ -#define RESERVED_JOB_ID 1 /* reserved for cleanup and shutdown tasks */ +#define HIGH_PRIORITY_TASK_TIME 1 /* assignment time for high priority tasks */ +#define RESERVED_JOB_ID 1 /* reserved for cleanup and shutdown tasks */ #define SHUTDOWN_MARKER_TASK_ID UINT_MAX /* used to identify task tracker shutdown */ -#define MAX_TASK_FAILURE_COUNT 2 /* allowed failure count for one task */ +#define MAX_TASK_FAILURE_COUNT 2 /* allowed failure count for one task */ #define LOCAL_HOST_NAME "localhost" /* connect to local backends using this name */ #define TASK_CALL_STRING_SIZE 12288 /* max length of task call string */ #define TEMPLATE0_NAME "template0" /* skip job schema cleanup for template0 */ @@ -37,13 +37,13 @@ typedef enum { TASK_STATUS_INVALID_FIRST = 0, - TASK_ASSIGNED = 1, /* master node and task tracker */ + TASK_ASSIGNED = 1, /* master node and task tracker */ TASK_SCHEDULED = 2, TASK_RUNNING = 3, - TASK_FAILED = 4, + TASK_FAILED = 4, TASK_PERMANENTLY_FAILED = 5, TASK_SUCCEEDED = 6, - TASK_CANCEL_REQUESTED = 7, /* master node only */ + TASK_CANCEL_REQUESTED = 7, /* master node only */ TASK_CANCELED = 8, TASK_TO_REMOVE = 9, @@ -63,7 +63,6 @@ typedef enum * TASK_STATUS_LAST, should never have their numbers changed. */ TASK_STATUS_LAST - } TaskStatus; @@ -76,16 +75,15 @@ typedef enum */ typedef struct WorkerTask { - uint64 jobId; /* job id (upper 32-bits reserved); part of hash table key */ - uint32 taskId; /* task id; part of hash table key */ + uint64 jobId; /* job id (upper 32-bits reserved); part of hash table key */ + uint32 taskId; /* task id; part of hash table key */ uint32 assignedAt; /* task assignment time in epoch seconds */ char taskCallString[TASK_CALL_STRING_SIZE]; /* query or function call string */ - TaskStatus taskStatus; /* task's current execution status */ - char databaseName[NAMEDATALEN]; /* name to use for local backend connection */ - int32 connectionId; /* connection id to local backend */ - uint32 failureCount; /* number of task failures */ - + TaskStatus taskStatus; /* task's current execution status */ + char databaseName[NAMEDATALEN]; /* name to use for local backend connection */ + int32 connectionId; /* connection id to local backend */ + uint32 failureCount; /* number of task failures */ } WorkerTask; @@ -97,6 +95,7 @@ typedef struct WorkerTasksSharedStateData { /* Hash table shared by the task tracker and task tracker protocol functions */ HTAB *taskHash; + /* Lock protecting workerNodesHash */ LWLock *taskHashLock; } WorkerTasksSharedStateData; diff --git a/src/include/distributed/worker_manager.h b/src/include/distributed/worker_manager.h index 57a38194a..f23a659d0 100644 --- a/src/include/distributed/worker_manager.h +++ b/src/include/distributed/worker_manager.h @@ -43,12 +43,11 @@ */ typedef struct WorkerNode { - uint32 workerPort; /* node's port; part of hash table key */ + uint32 workerPort; /* node's port; part of hash table key */ char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */ char workerRack[WORKER_LENGTH]; /* node's network location */ - bool inWorkerFile; /* is node in current membership file? */ - + bool inWorkerFile; /* is node in current membership file? */ } WorkerNode; diff --git a/src/include/distributed/worker_protocol.h b/src/include/distributed/worker_protocol.h index e797b7396..ac2985f6a 100644 --- a/src/include/distributed/worker_protocol.h +++ b/src/include/distributed/worker_protocol.h @@ -64,8 +64,7 @@ typedef struct RangePartitionContext { FmgrInfo *comparisonFunction; Datum *splitPointArray; - int32 splitPointCount; - + int32 splitPointCount; } RangePartitionContext; @@ -77,7 +76,6 @@ typedef struct HashPartitionContext { FmgrInfo *hashFunction; uint32 partitionCount; - } HashPartitionContext; @@ -88,16 +86,16 @@ typedef struct HashPartitionContext */ typedef struct PartialCopyStateData { - StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for - * dest == COPY_NEW_FE in COPY FROM */ - int file_encoding; /* file or remote side's character encoding */ - bool need_transcoding; /* file encoding diff from server? */ - bool binary; /* binary format? */ - char *null_print; /* NULL marker string (server encoding!) */ - char *null_print_client; /* same converted to file encoding */ - char *delim; /* column delimiter (must be 1 byte) */ + StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for + * dest == COPY_NEW_FE in COPY FROM */ + int file_encoding; /* file or remote side's character encoding */ + bool need_transcoding; /* file encoding diff from server? */ + bool binary; /* binary format? */ + char *null_print; /* NULL marker string (server encoding!) */ + char *null_print_client; /* same converted to file encoding */ + char *delim; /* column delimiter (must be 1 byte) */ - MemoryContext rowcontext; /* per-row evaluation context */ + MemoryContext rowcontext; /* per-row evaluation context */ } PartialCopyStateData; typedef struct PartialCopyStateData *PartialCopyState; @@ -114,7 +112,6 @@ typedef struct FileOutputStream File fileDescriptor; StringInfo fileBuffer; StringInfo filePath; - } FileOutputStream;