From fdb37682b21b26aaa54dde1635c44654429af0f1 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Fri, 5 Feb 2016 13:18:54 -0700 Subject: [PATCH 01/12] First formatting attempt Skipped csql, ruleutils, readfuncs, and functions obviously copied from PostgreSQL. Seeing how this looks, then continuing. --- .../commands/create_distributed_table.c | 10 +- src/backend/distributed/commands/transmit.c | 28 +- .../executor/multi_client_executor.c | 8 +- .../distributed/executor/multi_executor.c | 3 +- .../executor/multi_real_time_executor.c | 619 ++++++------ .../executor/multi_router_executor.c | 18 +- .../executor/multi_server_executor.c | 6 +- .../executor/multi_task_tracker_executor.c | 883 +++++++++--------- .../distributed/executor/multi_utility.c | 21 +- .../distributed/master/master_create_shards.c | 2 +- .../master/master_delete_protocol.c | 45 +- .../master/master_metadata_utility.c | 2 +- .../distributed/master/master_node_protocol.c | 35 +- .../master/master_stage_protocol.c | 10 +- .../distributed/planner/modify_planner.c | 8 +- .../distributed/planner/multi_explain.c | 2 +- .../distributed/planner/multi_join_order.c | 73 +- .../planner/multi_logical_optimizer.c | 83 +- .../planner/multi_logical_planner.c | 52 +- .../planner/multi_master_planner.c | 18 +- .../planner/multi_physical_planner.c | 86 +- .../distributed/relay/relay_event_utility.c | 51 +- src/backend/distributed/shared_library_init.c | 45 +- src/backend/distributed/test/fake_fdw.c | 8 +- .../distributed/utils/citus_nodefuncs.c | 7 +- .../distributed/utils/citus_ruleutils.c | 18 +- .../distributed/utils/metadata_cache.c | 48 +- .../distributed/utils/multi_resowner.c | 26 +- src/backend/distributed/utils/resource_lock.c | 10 +- src/backend/distributed/worker/task_tracker.c | 44 +- .../worker/task_tracker_protocol.c | 8 +- .../worker/worker_data_fetch_protocol.c | 25 +- .../worker/worker_file_access_protocol.c | 24 +- .../worker/worker_merge_protocol.c | 12 +- .../worker/worker_partition_protocol.c | 32 +- src/include/distributed/citus_ruleutils.h | 19 +- .../distributed/master_metadata_utility.h | 14 +- src/include/distributed/master_protocol.h | 13 +- src/include/distributed/modify_planner.h | 1 + .../distributed/multi_client_executor.h | 33 +- src/include/distributed/multi_executor.h | 6 +- src/include/distributed/multi_join_order.h | 15 +- .../distributed/multi_logical_optimizer.h | 11 +- .../distributed/multi_logical_planner.h | 13 +- .../distributed/multi_physical_planner.h | 40 +- src/include/distributed/multi_planner.h | 4 +- .../distributed/multi_server_executor.h | 25 +- src/include/distributed/pg_dist_partition.h | 22 +- src/include/distributed/pg_dist_shard.h | 36 +- .../distributed/pg_dist_shard_placement.h | 24 +- src/include/distributed/relay_utility.h | 3 +- src/include/distributed/resource_lock.h | 1 + src/include/distributed/task_tracker.h | 27 +- src/include/distributed/worker_manager.h | 5 +- src/include/distributed/worker_protocol.h | 23 +- 55 files changed, 1391 insertions(+), 1314 deletions(-) diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index b4a4c802b..691981485 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -197,11 +197,11 @@ master_create_distributed_table(PG_FUNCTION_ARGS) if (distributionMethod == DISTRIBUTE_BY_APPEND) { ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("table \"%s\" has a unique constraint", - distributedRelationName), - errdetail("Unique constraints and primary keys on " - "append-partitioned tables cannot be enforced."), - errhint("Consider using hash partitioning."))); + errmsg("table \"%s\" has a unique constraint", + distributedRelationName), + errdetail("Unique constraints and primary keys on " + "append-partitioned tables cannot be enforced."), + errhint("Consider using hash partitioning."))); } attributeCount = indexInfo->ii_NumIndexAttrs; diff --git a/src/backend/distributed/commands/transmit.c b/src/backend/distributed/commands/transmit.c index 0ab90d0ef..6e0d78136 100644 --- a/src/backend/distributed/commands/transmit.c +++ b/src/backend/distributed/commands/transmit.c @@ -136,7 +136,7 @@ static File FileOpenForTransmit(const char *filename, int fileFlags, int fileMode) { File fileDesc = -1; - int fileStated = -1; + int fileStated = -1; struct stat fileStat; fileStated = stat(filename, &fileStat); @@ -145,7 +145,7 @@ FileOpenForTransmit(const char *filename, int fileFlags, int fileMode) if (S_ISDIR(fileStat.st_mode)) { ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is a directory", filename))); + errmsg("\"%s\" is a directory", filename))); } } @@ -270,18 +270,28 @@ ReceiveCopyData(StringInfo copyData) switch (messageType) { - case 'd': /* CopyData */ + case 'd': /* CopyData */ + { copyDone = false; break; - case 'c': /* CopyDone */ + } + + case 'c': /* CopyDone */ + { copyDone = true; break; - case 'f': /* CopyFail */ + } + + case 'f': /* CopyFail */ + { ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), errmsg("COPY data failed: %s", pq_getmsgstring(copyData)))); break; - case 'H': /* Flush */ - case 'S': /* Sync */ + } + + case 'H': /* Flush */ + case 'S': /* Sync */ + { /* * Ignore Flush/Sync for the convenience of client libraries (such * as libpq) that may send those without noticing that the command @@ -289,11 +299,15 @@ ReceiveCopyData(StringInfo copyData) */ copyDone = false; break; + } + default: + { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unexpected message type 0x%02X during COPY data", messageType))); break; + } } return copyDone; diff --git a/src/backend/distributed/executor/multi_client_executor.c b/src/backend/distributed/executor/multi_client_executor.c index e6b4ceef7..57ab6c1b9 100644 --- a/src/backend/distributed/executor/multi_client_executor.c +++ b/src/backend/distributed/executor/multi_client_executor.c @@ -301,7 +301,7 @@ MultiClientCancel(int32 connectionId) if (cancelSent == 0) { ereport(WARNING, (errmsg("could not issue cancel request"), - errdetail("Client error: %s", errorBuffer))); + errdetail("Client error: %s", errorBuffer))); canceled = false; } @@ -348,7 +348,7 @@ MultiClientResultStatus(int32 connectionId) } else { - ereport(WARNING, (errmsg("could not consume data from worker node"))); + ereport(WARNING, (errmsg("could not consume data from worker node"))); resultStatus = CLIENT_RESULT_UNAVAILABLE; } @@ -589,7 +589,7 @@ MultiClientCopyData(int32 connectionId, int32 fileDescriptor) while (receiveLength > 0) { /* received copy data; append these data to file */ - int appended = -1; + int appended = -1; errno = 0; appended = write(fileDescriptor, receiveBuffer, receiveLength); @@ -706,7 +706,7 @@ ClientConnectionReady(PGconn *connection, PostgresPollingStatusType pollingStatu fd_set readFileDescriptorSet; fd_set writeFileDescriptorSet; fd_set exceptionFileDescriptorSet; - struct timeval immediateTimeout = {0, 0}; + struct timeval immediateTimeout = { 0, 0 }; int connectionFileDescriptor = PQsocket(connection); FD_ZERO(&readFileDescriptorSet); diff --git a/src/backend/distributed/executor/multi_executor.c b/src/backend/distributed/executor/multi_executor.c index 25fd12640..145abb4d5 100644 --- a/src/backend/distributed/executor/multi_executor.c +++ b/src/backend/distributed/executor/multi_executor.c @@ -157,7 +157,6 @@ multi_ExecutorStart(QueryDesc *queryDesc, int eflags) queryDesc->plannedstmt = masterSelectPlan; eflags |= EXEC_FLAG_CITUS_MASTER_SELECT; } - } /* if the execution is not done for router executor, drop into standard executor */ @@ -253,7 +252,7 @@ multi_ExecutorEnd(QueryDesc *queryDesc) RangeTblEntry *rangeTableEntry = linitial(planStatement->rtable); Oid masterTableRelid = rangeTableEntry->relid; - ObjectAddress masterTableObject = {InvalidOid, InvalidOid, 0}; + ObjectAddress masterTableObject = { InvalidOid, InvalidOid, 0 }; masterTableObject.classId = RelationRelationId; masterTableObject.objectId = masterTableRelid; diff --git a/src/backend/distributed/executor/multi_real_time_executor.c b/src/backend/distributed/executor/multi_real_time_executor.c index 77436612c..e3050f64b 100644 --- a/src/backend/distributed/executor/multi_real_time_executor.c +++ b/src/backend/distributed/executor/multi_real_time_executor.c @@ -89,7 +89,7 @@ MultiRealTimeExecute(Job *job) } /* loop around until all tasks complete, one task fails, or user cancels */ - while ( !(allTasksCompleted || taskFailed || QueryCancelPending) ) + while (!(allTasksCompleted || taskFailed || QueryCancelPending)) { uint32 taskCount = list_length(taskList); uint32 completedTaskCount = 0; @@ -230,333 +230,338 @@ ManageTaskExecution(Task *task, TaskExecution *taskExecution) switch (currentStatus) { - case EXEC_TASK_CONNECT_START: - { - int32 connectionId = INVALID_CONNECTION_ID; - char *nodeDatabase = NULL; - - /* we use the same database name on the master and worker nodes */ - nodeDatabase = get_database_name(MyDatabaseId); - - connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase); - connectionIdArray[currentIndex] = connectionId; - - /* if valid, poll the connection until the connection is initiated */ - if (connectionId != INVALID_CONNECTION_ID) + case EXEC_TASK_CONNECT_START: { - taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL; - taskExecution->connectPollCount = 0; - connectAction = CONNECT_ACTION_OPENED; - } - else - { - AdjustStateForFailure(taskExecution); - } + int32 connectionId = INVALID_CONNECTION_ID; + char *nodeDatabase = NULL; - break; - } + /* we use the same database name on the master and worker nodes */ + nodeDatabase = get_database_name(MyDatabaseId); - case EXEC_TASK_CONNECT_POLL: - { - int32 connectionId = connectionIdArray[currentIndex]; - ConnectStatus pollStatus = MultiClientConnectPoll(connectionId); + connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase); + connectionIdArray[currentIndex] = connectionId; - /* - * If the connection is established, we reset the data fetch counter and - * change our status to data fetching. - */ - if (pollStatus == CLIENT_CONNECTION_READY) - { - taskExecution->dataFetchTaskIndex = -1; - taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP; - } - else if (pollStatus == CLIENT_CONNECTION_BUSY) - { - taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL; - } - else if (pollStatus == CLIENT_CONNECTION_BAD) - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - } - - /* now check if we have been trying to connect for too long */ - taskExecution->connectPollCount++; - if (pollStatus == CLIENT_CONNECTION_BUSY) - { - uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval; - uint32 currentCount = taskExecution->connectPollCount; - if (currentCount >= maxCount) + /* if valid, poll the connection until the connection is initiated */ + if (connectionId != INVALID_CONNECTION_ID) { - ereport(WARNING, (errmsg("could not establish asynchronous connection " - "after %u ms", REMOTE_NODE_CONNECT_TIMEOUT))); + taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL; + taskExecution->connectPollCount = 0; + connectAction = CONNECT_ACTION_OPENED; + } + else + { + AdjustStateForFailure(taskExecution); + } + break; + } + + case EXEC_TASK_CONNECT_POLL: + { + int32 connectionId = connectionIdArray[currentIndex]; + ConnectStatus pollStatus = MultiClientConnectPoll(connectionId); + + /* + * If the connection is established, we reset the data fetch counter and + * change our status to data fetching. + */ + if (pollStatus == CLIENT_CONNECTION_READY) + { + taskExecution->dataFetchTaskIndex = -1; + taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP; + } + else if (pollStatus == CLIENT_CONNECTION_BUSY) + { + taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL; + } + else if (pollStatus == CLIENT_CONNECTION_BAD) + { taskStatusArray[currentIndex] = EXEC_TASK_FAILED; } - } - break; - } - - case EXEC_TASK_FAILED: - { - /* - * On task failure, we close the connection. We also reset our execution - * status assuming that we might fail on all other worker nodes and come - * back to this failed node. In that case, we will retry the same fetch - * and compute task(s) on this node again. - */ - int32 connectionId = connectionIdArray[currentIndex]; - MultiClientDisconnect(connectionId); - connectionIdArray[currentIndex] = INVALID_CONNECTION_ID; - connectAction = CONNECT_ACTION_CLOSED; - - taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_START; - - /* try next worker node */ - AdjustStateForFailure(taskExecution); - - break; - } - - case EXEC_FETCH_TASK_LOOP: - { - List *dataFetchTaskList = task->dependedTaskList; - int32 dataFetchTaskCount = list_length(dataFetchTaskList); - - /* move to the next data fetch task */ - taskExecution->dataFetchTaskIndex++; - - if (taskExecution->dataFetchTaskIndex < dataFetchTaskCount) - { - taskStatusArray[currentIndex] = EXEC_FETCH_TASK_START; - } - else - { - taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_START; - } - - break; - } - - case EXEC_FETCH_TASK_START: - { - List *dataFetchTaskList = task->dependedTaskList; - int32 dataFetchTaskIndex = taskExecution->dataFetchTaskIndex; - Task *dataFetchTask = (Task *) list_nth(dataFetchTaskList, dataFetchTaskIndex); - - char *dataFetchQuery = dataFetchTask->queryString; - int32 connectionId = connectionIdArray[currentIndex]; - - bool querySent = MultiClientSendQuery(connectionId, dataFetchQuery); - if (querySent) - { - taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING; - } - else - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - } - - break; - } - - case EXEC_FETCH_TASK_RUNNING: - { - int32 connectionId = connectionIdArray[currentIndex]; - ResultStatus resultStatus = MultiClientResultStatus(connectionId); - QueryStatus queryStatus = CLIENT_INVALID_QUERY; - - /* check if query results are in progress or unavailable */ - if (resultStatus == CLIENT_RESULT_BUSY) - { - taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING; - break; - } - else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - break; - } - - Assert(resultStatus == CLIENT_RESULT_READY); - - /* - * If the query executed successfully, loop onto the next data fetch - * task. Else if the query failed, try data fetching on another node. - */ - queryStatus = MultiClientQueryStatus(connectionId); - if (queryStatus == CLIENT_QUERY_DONE) - { - taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP; - } - else if (queryStatus == CLIENT_QUERY_FAILED) - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - } - else - { - ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); - } - - break; - } - - case EXEC_COMPUTE_TASK_START: - { - int32 connectionId = connectionIdArray[currentIndex]; - bool querySent = false; - - /* construct new query to copy query results to stdout */ - char *queryString = task->queryString; - StringInfo computeTaskQuery = makeStringInfo(); - if (BinaryMasterCopyFormat) - { - appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_BINARY, queryString); - } - else - { - appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_TEXT, queryString); - } - - querySent = MultiClientSendQuery(connectionId, computeTaskQuery->data); - if (querySent) - { - taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; - } - else - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - } - - break; - } - - case EXEC_COMPUTE_TASK_RUNNING: - { - int32 connectionId = connectionIdArray[currentIndex]; - ResultStatus resultStatus = MultiClientResultStatus(connectionId); - QueryStatus queryStatus = CLIENT_INVALID_QUERY; - - /* check if query results are in progress or unavailable */ - if (resultStatus == CLIENT_RESULT_BUSY) - { - taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; - break; - } - else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - break; - } - - Assert(resultStatus == CLIENT_RESULT_READY); - - /* check if our request to copy query results has been acknowledged */ - queryStatus = MultiClientQueryStatus(connectionId); - if (queryStatus == CLIENT_QUERY_COPY) - { - StringInfo jobDirectoryName = JobDirectoryName(task->jobId); - StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId); - - char *filename = taskFilename->data; - int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); - int fileMode = (S_IRUSR | S_IWUSR); - - int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode); - if (fileDescriptor >= 0) + /* now check if we have been trying to connect for too long */ + taskExecution->connectPollCount++; + if (pollStatus == CLIENT_CONNECTION_BUSY) + { + uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval; + uint32 currentCount = taskExecution->connectPollCount; + if (currentCount >= maxCount) + { + ereport(WARNING, (errmsg("could not establish asynchronous " + "connection after %u ms", + REMOTE_NODE_CONNECT_TIMEOUT))); + + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + } + + break; + } + + case EXEC_TASK_FAILED: + { + /* + * On task failure, we close the connection. We also reset our execution + * status assuming that we might fail on all other worker nodes and come + * back to this failed node. In that case, we will retry the same fetch + * and compute task(s) on this node again. + */ + int32 connectionId = connectionIdArray[currentIndex]; + MultiClientDisconnect(connectionId); + connectionIdArray[currentIndex] = INVALID_CONNECTION_ID; + connectAction = CONNECT_ACTION_CLOSED; + + taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_START; + + /* try next worker node */ + AdjustStateForFailure(taskExecution); + + break; + } + + case EXEC_FETCH_TASK_LOOP: + { + List *dataFetchTaskList = task->dependedTaskList; + int32 dataFetchTaskCount = list_length(dataFetchTaskList); + + /* move to the next data fetch task */ + taskExecution->dataFetchTaskIndex++; + + if (taskExecution->dataFetchTaskIndex < dataFetchTaskCount) + { + taskStatusArray[currentIndex] = EXEC_FETCH_TASK_START; + } + else + { + taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_START; + } + + break; + } + + case EXEC_FETCH_TASK_START: + { + List *dataFetchTaskList = task->dependedTaskList; + int32 dataFetchTaskIndex = taskExecution->dataFetchTaskIndex; + Task *dataFetchTask = (Task *) list_nth(dataFetchTaskList, + dataFetchTaskIndex); + + char *dataFetchQuery = dataFetchTask->queryString; + int32 connectionId = connectionIdArray[currentIndex]; + + bool querySent = MultiClientSendQuery(connectionId, dataFetchQuery); + if (querySent) + { + taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING; + } + else + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + + break; + } + + case EXEC_FETCH_TASK_RUNNING: + { + int32 connectionId = connectionIdArray[currentIndex]; + ResultStatus resultStatus = MultiClientResultStatus(connectionId); + QueryStatus queryStatus = CLIENT_INVALID_QUERY; + + /* check if query results are in progress or unavailable */ + if (resultStatus == CLIENT_RESULT_BUSY) + { + taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING; + break; + } + else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + break; + } + + Assert(resultStatus == CLIENT_RESULT_READY); + + /* + * If the query executed successfully, loop onto the next data fetch + * task. Else if the query failed, try data fetching on another node. + */ + queryStatus = MultiClientQueryStatus(connectionId); + if (queryStatus == CLIENT_QUERY_DONE) + { + taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP; + } + else if (queryStatus == CLIENT_QUERY_FAILED) + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + else + { + ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); + } + + break; + } + + case EXEC_COMPUTE_TASK_START: + { + int32 connectionId = connectionIdArray[currentIndex]; + bool querySent = false; + + /* construct new query to copy query results to stdout */ + char *queryString = task->queryString; + StringInfo computeTaskQuery = makeStringInfo(); + if (BinaryMasterCopyFormat) + { + appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_BINARY, + queryString); + } + else + { + appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_TEXT, + queryString); + } + + querySent = MultiClientSendQuery(connectionId, computeTaskQuery->data); + if (querySent) + { + taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; + } + else + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + + break; + } + + case EXEC_COMPUTE_TASK_RUNNING: + { + int32 connectionId = connectionIdArray[currentIndex]; + ResultStatus resultStatus = MultiClientResultStatus(connectionId); + QueryStatus queryStatus = CLIENT_INVALID_QUERY; + + /* check if query results are in progress or unavailable */ + if (resultStatus == CLIENT_RESULT_BUSY) + { + taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; + break; + } + else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + break; + } + + Assert(resultStatus == CLIENT_RESULT_READY); + + /* check if our request to copy query results has been acknowledged */ + queryStatus = MultiClientQueryStatus(connectionId); + if (queryStatus == CLIENT_QUERY_COPY) + { + StringInfo jobDirectoryName = JobDirectoryName(task->jobId); + StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId); + + char *filename = taskFilename->data; + int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); + int fileMode = (S_IRUSR | S_IWUSR); + + int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode); + if (fileDescriptor >= 0) + { + /* + * All files inside the job directory get automatically cleaned + * up on transaction commit or abort. + */ + fileDescriptorArray[currentIndex] = fileDescriptor; + taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING; + } + else + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + filename))); + + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + } + else if (queryStatus == CLIENT_QUERY_FAILED) + { + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + else + { + ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); + } + + break; + } + + case EXEC_COMPUTE_TASK_COPYING: + { + int32 connectionId = connectionIdArray[currentIndex]; + int32 fileDesc = fileDescriptorArray[currentIndex]; + int closed = -1; + + /* copy data from worker node, and write to local file */ + CopyStatus copyStatus = MultiClientCopyData(connectionId, fileDesc); + + /* if worker node will continue to send more data, keep reading */ + if (copyStatus == CLIENT_COPY_MORE) { - /* - * All files inside the job directory get automatically cleaned - * up on transaction commit or abort. - */ - fileDescriptorArray[currentIndex] = fileDescriptor; taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING; } - else + else if (copyStatus == CLIENT_COPY_DONE) { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", filename))); + closed = close(fileDesc); + fileDescriptorArray[currentIndex] = -1; + if (closed >= 0) + { + taskStatusArray[currentIndex] = EXEC_TASK_DONE; + + /* we are done executing; we no longer need the connection */ + MultiClientDisconnect(connectionId); + connectionIdArray[currentIndex] = INVALID_CONNECTION_ID; + connectAction = CONNECT_ACTION_CLOSED; + } + else + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not close copied file: %m"))); + + taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + } + } + else if (copyStatus == CLIENT_COPY_FAILED) + { taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + + closed = close(fileDesc); + fileDescriptorArray[currentIndex] = -1; + + if (closed < 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not close copy file: %m"))); + } } + + break; } - else if (queryStatus == CLIENT_QUERY_FAILED) + + case EXEC_TASK_DONE: { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; + /* we are done with this task's execution */ + break; } - else + + default: { - ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); + /* we fatal here to avoid leaking client-side resources */ + ereport(FATAL, (errmsg("invalid execution status: %d", currentStatus))); + break; } - - break; - } - - case EXEC_COMPUTE_TASK_COPYING: - { - int32 connectionId = connectionIdArray[currentIndex]; - int32 fileDesc = fileDescriptorArray[currentIndex]; - int closed = -1; - - /* copy data from worker node, and write to local file */ - CopyStatus copyStatus = MultiClientCopyData(connectionId, fileDesc); - - /* if worker node will continue to send more data, keep reading */ - if (copyStatus == CLIENT_COPY_MORE) - { - taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING; - } - else if (copyStatus == CLIENT_COPY_DONE) - { - closed = close(fileDesc); - fileDescriptorArray[currentIndex] = -1; - - if (closed >= 0) - { - taskStatusArray[currentIndex] = EXEC_TASK_DONE; - - /* we are done executing; we no longer need the connection */ - MultiClientDisconnect(connectionId); - connectionIdArray[currentIndex] = INVALID_CONNECTION_ID; - connectAction = CONNECT_ACTION_CLOSED; - } - else - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not close copied file: %m"))); - - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - } - } - else if (copyStatus == CLIENT_COPY_FAILED) - { - taskStatusArray[currentIndex] = EXEC_TASK_FAILED; - - closed = close(fileDesc); - fileDescriptorArray[currentIndex] = -1; - - if (closed < 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not close copy file: %m"))); - } - } - - break; - } - - case EXEC_TASK_DONE: - { - /* we are done with this task's execution */ - break; - } - - default: - { - /* we fatal here to avoid leaking client-side resources */ - ereport(FATAL, (errmsg("invalid execution status: %d", currentStatus))); - break; - } } return connectAction; diff --git a/src/backend/distributed/executor/multi_router_executor.c b/src/backend/distributed/executor/multi_router_executor.c index 5c2f04165..ae6eea97d 100644 --- a/src/backend/distributed/executor/multi_router_executor.c +++ b/src/backend/distributed/executor/multi_router_executor.c @@ -80,6 +80,7 @@ RouterExecutorStart(QueryDesc *queryDesc, int eflags, Task *task) queryDesc->estate = executorState; #if (PG_VERSION_NUM < 90500) + /* make sure that upsertQuery is false for versions that UPSERT is not available */ Assert(task->upsertQuery == false); #endif @@ -177,14 +178,14 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas if (!ScanDirectionIsForward(direction)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("scan directions other than forward scans " - "are unsupported"))); + errmsg("scan directions other than forward scans " + "are unsupported"))); } if (count != 0) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("fetching rows from a query using a cursor " - "is unsupported"))); + errmsg("fetching rows from a query using a cursor " + "is unsupported"))); } oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); @@ -210,7 +211,7 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas else { ereport(ERROR, (errmsg("unrecognized operation code: %d", - (int) operation))); + (int) operation))); } if (queryDesc->totaltime != NULL) @@ -219,9 +220,9 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas } MemoryContextSwitchTo(oldcontext); - } + /* * ExecuteDistributedModify is the main entry point for modifying distributed * tables. A distributed modification is successful if any placement of the @@ -532,9 +533,10 @@ StoreQueryResult(PGconn *connection, TupleDesc tupleDescriptor, return true; } + /* -* RouterExecutorFinish cleans up after a distributed execution. -*/ + * RouterExecutorFinish cleans up after a distributed execution. + */ void RouterExecutorFinish(QueryDesc *queryDesc) { diff --git a/src/backend/distributed/executor/multi_server_executor.c b/src/backend/distributed/executor/multi_server_executor.c index 1f143778d..1abc6f007 100644 --- a/src/backend/distributed/executor/multi_server_executor.c +++ b/src/backend/distributed/executor/multi_server_executor.c @@ -303,13 +303,13 @@ AdjustStateForFailure(TaskExecution *taskExecution) if (taskExecution->currentNodeIndex < maxNodeIndex) { - taskExecution->currentNodeIndex++; /* try next worker node */ + taskExecution->currentNodeIndex++; /* try next worker node */ } else { taskExecution->currentNodeIndex = 0; /* go back to the first worker node */ } - taskExecution->dataFetchTaskIndex = -1; /* reset data fetch counter */ - taskExecution->failureCount++; /* record failure */ + taskExecution->dataFetchTaskIndex = -1; /* reset data fetch counter */ + taskExecution->failureCount++; /* record failure */ } diff --git a/src/backend/distributed/executor/multi_task_tracker_executor.c b/src/backend/distributed/executor/multi_task_tracker_executor.c index f67c82271..14c26aef9 100644 --- a/src/backend/distributed/executor/multi_task_tracker_executor.c +++ b/src/backend/distributed/executor/multi_task_tracker_executor.c @@ -38,10 +38,9 @@ int MaxAssignTaskBatchSize = 64; /* maximum number of tasks to assign per round /* TaskMapKey is used as a key in task hash */ typedef struct TaskMapKey { - TaskType taskType; - uint64 jobId; - uint32 taskId; - + TaskType taskType; + uint64 jobId; + uint32 taskId; } TaskMapKey; @@ -51,9 +50,8 @@ typedef struct TaskMapKey */ typedef struct TaskMapEntry { - TaskMapKey key; - Task *task; - + TaskMapKey key; + Task *task; } TaskMapEntry; @@ -83,7 +81,8 @@ static TaskTracker * TrackerHashLookup(HTAB *trackerHash, const char *nodeName, static TaskExecStatus ManageTaskExecution(TaskTracker *taskTracker, TaskTracker *sourceTaskTracker, Task *task, TaskExecution *taskExecution); -static TransmitExecStatus ManageTransmitExecution(TaskTracker *transmitTracker, Task *task, +static TransmitExecStatus ManageTransmitExecution(TaskTracker *transmitTracker, + Task *task, TaskExecution *taskExecution); static bool TaskExecutionsCompleted(List *taskList); static StringInfo MapFetchTaskQueryString(Task *mapFetchTask, Task *mapTask); @@ -194,8 +193,8 @@ MultiTaskTrackerExecute(Job *job) TrackerHashConnect(transmitTrackerHash); /* loop around until all tasks complete, one task fails, or user cancels */ - while ( !(allTasksCompleted || taskFailed || taskTransmitFailed || - clusterFailed || QueryCancelPending) ) + while (!(allTasksCompleted || taskFailed || taskTransmitFailed || + clusterFailed || QueryCancelPending)) { TaskTracker *taskTracker = NULL; TaskTracker *transmitTracker = NULL; @@ -493,8 +492,8 @@ TaskAndExecutionList(List *jobTaskList) */ if (!dependendTaskInHash) { - dependendTaskInHash = TaskHashEnter(taskHash, dependendTask); - taskQueue = lappend(taskQueue, dependendTaskInHash); + dependendTaskInHash = TaskHashEnter(taskHash, dependendTask); + taskQueue = lappend(taskQueue, dependendTaskInHash); } /* update dependedTaskList element to the one which is in the hash */ @@ -557,7 +556,7 @@ TaskHashEnter(HTAB *taskHash, Task *task) if (handleFound) { ereport(ERROR, (errmsg("multiple entries for task: \"%d:%ld:%d\"", - task->taskType, task->jobId, task->taskId))); + task->taskType, task->jobId, task->taskId))); } /* save the pointer to the original task in the hash */ @@ -820,82 +819,84 @@ TrackerConnectPoll(TaskTracker *taskTracker) { switch (taskTracker->trackerStatus) { - case TRACKER_CONNECT_START: - { - char *nodeName = taskTracker->workerName; - uint32 nodePort = taskTracker->workerPort; - char *nodeDatabase = get_database_name(MyDatabaseId); - - int32 connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase); - if (connectionId != INVALID_CONNECTION_ID) + case TRACKER_CONNECT_START: { - taskTracker->connectionId = connectionId; - taskTracker->trackerStatus = TRACKER_CONNECT_POLL; - } - else - { - taskTracker->trackerStatus = TRACKER_CONNECTION_FAILED; - } + char *nodeName = taskTracker->workerName; + uint32 nodePort = taskTracker->workerPort; + char *nodeDatabase = get_database_name(MyDatabaseId); - break; - } - - case TRACKER_CONNECT_POLL: - { - int32 connectionId = taskTracker->connectionId; - - ConnectStatus pollStatus = MultiClientConnectPoll(connectionId); - if (pollStatus == CLIENT_CONNECTION_READY) - { - taskTracker->trackerStatus = TRACKER_CONNECTED; - } - else if (pollStatus == CLIENT_CONNECTION_BUSY) - { - taskTracker->trackerStatus = TRACKER_CONNECT_POLL; - } - else if (pollStatus == CLIENT_CONNECTION_BAD) - { - taskTracker->trackerStatus = TRACKER_CONNECTION_FAILED; - - MultiClientDisconnect(connectionId); - taskTracker->connectionId = INVALID_CONNECTION_ID; - } - - /* now check if we have been trying to connect for too long */ - taskTracker->connectPollCount++; - if (pollStatus == CLIENT_CONNECTION_BUSY) - { - uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval; - uint32 currentCount = taskTracker->connectPollCount; - if (currentCount >= maxCount) + int32 connectionId = MultiClientConnectStart(nodeName, nodePort, + nodeDatabase); + if (connectionId != INVALID_CONNECTION_ID) { - ereport(WARNING, (errmsg("could not establish asynchronous connection " - "after %u ms", REMOTE_NODE_CONNECT_TIMEOUT))); + taskTracker->connectionId = connectionId; + taskTracker->trackerStatus = TRACKER_CONNECT_POLL; + } + else + { + taskTracker->trackerStatus = TRACKER_CONNECTION_FAILED; + } + break; + } + + case TRACKER_CONNECT_POLL: + { + int32 connectionId = taskTracker->connectionId; + + ConnectStatus pollStatus = MultiClientConnectPoll(connectionId); + if (pollStatus == CLIENT_CONNECTION_READY) + { + taskTracker->trackerStatus = TRACKER_CONNECTED; + } + else if (pollStatus == CLIENT_CONNECTION_BUSY) + { + taskTracker->trackerStatus = TRACKER_CONNECT_POLL; + } + else if (pollStatus == CLIENT_CONNECTION_BAD) + { taskTracker->trackerStatus = TRACKER_CONNECTION_FAILED; MultiClientDisconnect(connectionId); taskTracker->connectionId = INVALID_CONNECTION_ID; } + + /* now check if we have been trying to connect for too long */ + taskTracker->connectPollCount++; + if (pollStatus == CLIENT_CONNECTION_BUSY) + { + uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval; + uint32 currentCount = taskTracker->connectPollCount; + if (currentCount >= maxCount) + { + ereport(WARNING, (errmsg("could not establish asynchronous " + "connection after %u ms", + REMOTE_NODE_CONNECT_TIMEOUT))); + + taskTracker->trackerStatus = TRACKER_CONNECTION_FAILED; + + MultiClientDisconnect(connectionId); + taskTracker->connectionId = INVALID_CONNECTION_ID; + } + } + + break; } - break; - } + case TRACKER_CONNECTED: + case TRACKER_CONNECTION_FAILED: + { + /* if connected or failed to connect in previous pass, reset poll count */ + taskTracker->connectPollCount = 0; + break; + } - case TRACKER_CONNECTED: - case TRACKER_CONNECTION_FAILED: - { - /* if connected or failed to connect in previous pass, reset poll count */ - taskTracker->connectPollCount = 0; - break; - } - - default: - { - int trackerStatus = (int) taskTracker->trackerStatus; - ereport(FATAL, (errmsg("invalid task tracker status: %d", trackerStatus))); - break; - } + default: + { + int trackerStatus = (int) taskTracker->trackerStatus; + ereport(FATAL, (errmsg("invalid task tracker status: %d", trackerStatus))); + break; + } } return taskTracker->trackerStatus; @@ -1008,213 +1009,214 @@ ManageTaskExecution(TaskTracker *taskTracker, TaskTracker *sourceTaskTracker, switch (currentExecutionStatus) { - case EXEC_TASK_UNASSIGNED: - { - bool taskExecutionsCompleted = true; - TaskType taskType = TASK_TYPE_INVALID_FIRST; - - bool trackerHealthy = TrackerHealthy(taskTracker); - if (!trackerHealthy) + case EXEC_TASK_UNASSIGNED: { - nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; - break; - } + bool taskExecutionsCompleted = true; + TaskType taskType = TASK_TYPE_INVALID_FIRST; - /* - * We first retrieve this task's downstream dependencies, and then check - * if these dependencies' executions have completed. - */ - taskExecutionsCompleted = TaskExecutionsCompleted(task->dependedTaskList); - if (!taskExecutionsCompleted) - { - nextExecutionStatus = EXEC_TASK_UNASSIGNED; - break; - } - - /* if map fetch task, create query string from completed map task */ - taskType = task->taskType; - if (taskType == MAP_OUTPUT_FETCH_TASK) - { - StringInfo mapFetchTaskQueryString = NULL; - Task *mapTask = (Task *) linitial(task->dependedTaskList); - TaskExecution *mapTaskExecution = mapTask->taskExecution; - - mapFetchTaskQueryString = MapFetchTaskQueryString(task, mapTask); - task->queryString = mapFetchTaskQueryString->data; - taskExecution->querySourceNodeIndex = mapTaskExecution->currentNodeIndex; - } - - /* - * We finally queue this task for execution. Note that we queue sql and - * other tasks slightly differently. - */ - if (taskType == SQL_TASK) - { - TrackerQueueSqlTask(taskTracker, task); - } - else - { - TrackerQueueTask(taskTracker, task); - } - - nextExecutionStatus = EXEC_TASK_QUEUED; - break; - } - - case EXEC_TASK_QUEUED: - { - TaskStatus remoteTaskStatus = TASK_STATUS_INVALID_FIRST; - - bool trackerHealthy = TrackerHealthy(taskTracker); - if (!trackerHealthy) - { - nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; - break; - } - - remoteTaskStatus = TrackerTaskStatus(taskTracker, task); - if (remoteTaskStatus == TASK_SUCCEEDED) - { - nextExecutionStatus = EXEC_TASK_DONE; - } - else if (remoteTaskStatus == TASK_CLIENT_SIDE_ASSIGN_FAILED || - remoteTaskStatus == TASK_CLIENT_SIDE_STATUS_FAILED) - { - nextExecutionStatus = EXEC_TASK_TRACKER_RETRY; - } - else if (remoteTaskStatus == TASK_PERMANENTLY_FAILED) - { - /* - * If a map output fetch task failed, we assume the problem lies with - * the map task (and the source task tracker it runs on). Otherwise, - * we assume the task tracker crashed, and fail over to the next task - * tracker. - */ - if (task->taskType == MAP_OUTPUT_FETCH_TASK) + bool trackerHealthy = TrackerHealthy(taskTracker); + if (!trackerHealthy) { - nextExecutionStatus = EXEC_SOURCE_TASK_TRACKER_RETRY; + nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; + break; + } + + /* + * We first retrieve this task's downstream dependencies, and then check + * if these dependencies' executions have completed. + */ + taskExecutionsCompleted = TaskExecutionsCompleted(task->dependedTaskList); + if (!taskExecutionsCompleted) + { + nextExecutionStatus = EXEC_TASK_UNASSIGNED; + break; + } + + /* if map fetch task, create query string from completed map task */ + taskType = task->taskType; + if (taskType == MAP_OUTPUT_FETCH_TASK) + { + StringInfo mapFetchTaskQueryString = NULL; + Task *mapTask = (Task *) linitial(task->dependedTaskList); + TaskExecution *mapTaskExecution = mapTask->taskExecution; + + mapFetchTaskQueryString = MapFetchTaskQueryString(task, mapTask); + task->queryString = mapFetchTaskQueryString->data; + taskExecution->querySourceNodeIndex = mapTaskExecution->currentNodeIndex; + } + + /* + * We finally queue this task for execution. Note that we queue sql and + * other tasks slightly differently. + */ + if (taskType == SQL_TASK) + { + TrackerQueueSqlTask(taskTracker, task); + } + else + { + TrackerQueueTask(taskTracker, task); + } + + nextExecutionStatus = EXEC_TASK_QUEUED; + break; + } + + case EXEC_TASK_QUEUED: + { + TaskStatus remoteTaskStatus = TASK_STATUS_INVALID_FIRST; + + bool trackerHealthy = TrackerHealthy(taskTracker); + if (!trackerHealthy) + { + nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; + break; + } + + remoteTaskStatus = TrackerTaskStatus(taskTracker, task); + if (remoteTaskStatus == TASK_SUCCEEDED) + { + nextExecutionStatus = EXEC_TASK_DONE; + } + else if (remoteTaskStatus == TASK_CLIENT_SIDE_ASSIGN_FAILED || + remoteTaskStatus == TASK_CLIENT_SIDE_STATUS_FAILED) + { + nextExecutionStatus = EXEC_TASK_TRACKER_RETRY; + } + else if (remoteTaskStatus == TASK_PERMANENTLY_FAILED) + { + /* + * If a map output fetch task failed, we assume the problem lies with + * the map task (and the source task tracker it runs on). Otherwise, + * we assume the task tracker crashed, and fail over to the next task + * tracker. + */ + if (task->taskType == MAP_OUTPUT_FETCH_TASK) + { + nextExecutionStatus = EXEC_SOURCE_TASK_TRACKER_RETRY; + } + else + { + nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; + } + } + else + { + /* assume task is still in progress */ + nextExecutionStatus = EXEC_TASK_QUEUED; + } + + break; + } + + case EXEC_TASK_TRACKER_RETRY: + { + bool trackerHealthy = false; + bool trackerConnectionUp = false; + + /* + * This case statement usually handles connection related issues. Some + * edge cases however, like a user sending a SIGTERM to the worker node, + * keep the connection open but disallow task assignments. We therefore + * need to track those as intermittent tracker failures here. + */ + trackerConnectionUp = TrackerConnectionUp(taskTracker); + if (trackerConnectionUp) + { + taskTracker->trackerFailureCount++; + } + + trackerHealthy = TrackerHealthy(taskTracker); + if (trackerHealthy) + { + TaskStatus remoteTaskStatus = TrackerTaskStatus(taskTracker, task); + if (remoteTaskStatus == TASK_CLIENT_SIDE_ASSIGN_FAILED) + { + nextExecutionStatus = EXEC_TASK_UNASSIGNED; + } + else if (remoteTaskStatus == TASK_CLIENT_SIDE_STATUS_FAILED) + { + nextExecutionStatus = EXEC_TASK_QUEUED; + } } else { nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; } - } - else - { - /* assume task is still in progress */ - nextExecutionStatus = EXEC_TASK_QUEUED; + + break; } - break; - } - - case EXEC_TASK_TRACKER_RETRY: - { - bool trackerHealthy = false; - bool trackerConnectionUp = false; - - /* - * This case statement usually handles connection related issues. Some - * edge cases however, like a user sending a SIGTERM to the worker node, - * keep the connection open but disallow task assignments. We therefore - * need to track those as intermittent tracker failures here. - */ - trackerConnectionUp = TrackerConnectionUp(taskTracker); - if (trackerConnectionUp) + case EXEC_SOURCE_TASK_TRACKER_RETRY: { - taskTracker->trackerFailureCount++; - } + Task *mapTask = (Task *) linitial(task->dependedTaskList); + TaskExecution *mapTaskExecution = mapTask->taskExecution; + uint32 sourceNodeIndex = mapTaskExecution->currentNodeIndex; - trackerHealthy = TrackerHealthy(taskTracker); - if (trackerHealthy) - { - TaskStatus remoteTaskStatus = TrackerTaskStatus(taskTracker, task); - if (remoteTaskStatus == TASK_CLIENT_SIDE_ASSIGN_FAILED) + bool sourceTrackerHealthy = false; + Assert(sourceTaskTracker != NULL); + Assert(task->taskType == MAP_OUTPUT_FETCH_TASK); + + /* + * As this map fetch task was running, another map fetch that depends on + * another map task might have failed. We would have then reassigned the + * map task and potentially other map tasks in its constraint group. So + * this map fetch's source node might have changed underneath us. If it + * did, we don't want to record a failure for the new source tracker. + */ + if (taskExecution->querySourceNodeIndex == sourceNodeIndex) { + bool sourceTrackerConnectionUp = TrackerConnectionUp(sourceTaskTracker); + if (sourceTrackerConnectionUp) + { + sourceTaskTracker->trackerFailureCount++; + } + } + + sourceTrackerHealthy = TrackerHealthy(sourceTaskTracker); + if (sourceTrackerHealthy) + { + /* + * We change our status to unassigned. In that status, we queue an + * "update map fetch task" on the task tracker, and retry fetching + * the map task's output from the same source node. + */ nextExecutionStatus = EXEC_TASK_UNASSIGNED; } - else if (remoteTaskStatus == TASK_CLIENT_SIDE_STATUS_FAILED) + else { - nextExecutionStatus = EXEC_TASK_QUEUED; + nextExecutionStatus = EXEC_SOURCE_TASK_TRACKER_FAILED; } - } - else - { - nextExecutionStatus = EXEC_TASK_TRACKER_FAILED; + + break; } - break; - } - - case EXEC_SOURCE_TASK_TRACKER_RETRY: - { - Task *mapTask = (Task *) linitial(task->dependedTaskList); - TaskExecution *mapTaskExecution = mapTask->taskExecution; - uint32 sourceNodeIndex = mapTaskExecution->currentNodeIndex; - - bool sourceTrackerHealthy = false; - Assert(sourceTaskTracker != NULL); - Assert(task->taskType == MAP_OUTPUT_FETCH_TASK); - - /* - * As this map fetch task was running, another map fetch that depends on - * another map task might have failed. We would have then reassigned the - * map task and potentially other map tasks in its constraint group. So - * this map fetch's source node might have changed underneath us. If it - * did, we don't want to record a failure for the new source tracker. - */ - if (taskExecution->querySourceNodeIndex == sourceNodeIndex) - { - bool sourceTrackerConnectionUp = TrackerConnectionUp(sourceTaskTracker); - if (sourceTrackerConnectionUp) - { - sourceTaskTracker->trackerFailureCount++; - } - } - - sourceTrackerHealthy = TrackerHealthy(sourceTaskTracker); - if (sourceTrackerHealthy) + case EXEC_TASK_TRACKER_FAILED: + case EXEC_SOURCE_TASK_TRACKER_FAILED: { /* - * We change our status to unassigned. In that status, we queue an - * "update map fetch task" on the task tracker, and retry fetching - * the map task's output from the same source node. + * These two cases exist to signal to the caller that we failed. In both + * cases, the caller is responsible for reassigning task(s) and running + * the appropriate recovery logic. */ nextExecutionStatus = EXEC_TASK_UNASSIGNED; + break; } - else + + case EXEC_TASK_DONE: { - nextExecutionStatus = EXEC_SOURCE_TASK_TRACKER_FAILED; + /* we are done with this task's execution */ + nextExecutionStatus = EXEC_TASK_DONE; + break; } - break; - } - - case EXEC_TASK_TRACKER_FAILED: - case EXEC_SOURCE_TASK_TRACKER_FAILED: - { - /* - * These two cases exist to signal to the caller that we failed. In both - * cases, the caller is responsible for reassigning task(s) and running - * the appropriate recovery logic. - */ - nextExecutionStatus = EXEC_TASK_UNASSIGNED; - break; - } - - case EXEC_TASK_DONE: - { - /* we are done with this task's execution */ - nextExecutionStatus = EXEC_TASK_DONE; - break; - } - - default: - { - /* we fatal here to avoid leaking client-side resources */ - ereport(FATAL, (errmsg("invalid execution status: %d", currentExecutionStatus))); - break; - } + default: + { + /* we fatal here to avoid leaking client-side resources */ + ereport(FATAL, (errmsg("invalid execution status: %d", + currentExecutionStatus))); + break; + } } /* update task execution's status for most recent task tracker */ @@ -1247,225 +1249,227 @@ ManageTransmitExecution(TaskTracker *transmitTracker, switch (currentTransmitStatus) { - case EXEC_TRANSMIT_UNASSIGNED: - { - TaskExecStatus *taskStatusArray = taskExecution->taskStatusArray; - TaskExecStatus currentExecutionStatus = taskStatusArray[currentNodeIndex]; - bool trackerHealthy = false; - - /* if top level task's in progress, nothing to do */ - if (currentExecutionStatus != EXEC_TASK_DONE) + case EXEC_TRANSMIT_UNASSIGNED: { - nextTransmitStatus = EXEC_TRANSMIT_UNASSIGNED; - break; - } + TaskExecStatus *taskStatusArray = taskExecution->taskStatusArray; + TaskExecStatus currentExecutionStatus = taskStatusArray[currentNodeIndex]; + bool trackerHealthy = false; - trackerHealthy = TrackerHealthy(transmitTracker); - if (!trackerHealthy) - { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; - break; - } + /* if top level task's in progress, nothing to do */ + if (currentExecutionStatus != EXEC_TASK_DONE) + { + nextTransmitStatus = EXEC_TRANSMIT_UNASSIGNED; + break; + } - TrackerQueueFileTransmit(transmitTracker, task); - nextTransmitStatus = EXEC_TRANSMIT_QUEUED; - break; - } + trackerHealthy = TrackerHealthy(transmitTracker); + if (!trackerHealthy) + { + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; + break; + } - case EXEC_TRANSMIT_QUEUED: - { - QueryStatus queryStatus = CLIENT_INVALID_QUERY; - int32 connectionId = INVALID_CONNECTION_ID; - TaskStatus taskStatus = TASK_STATUS_INVALID_FIRST; - - bool trackerHealthy = TrackerHealthy(transmitTracker); - if (!trackerHealthy) - { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; - break; - } - - taskStatus = TrackerTaskStatus(transmitTracker, task); - if (taskStatus == TASK_FILE_TRANSMIT_QUEUED) - { - /* remain in queued status until tracker assigns this task */ + TrackerQueueFileTransmit(transmitTracker, task); nextTransmitStatus = EXEC_TRANSMIT_QUEUED; break; } - else if (taskStatus == TASK_CLIENT_SIDE_TRANSMIT_FAILED) + + case EXEC_TRANSMIT_QUEUED: { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; - break; - } + QueryStatus queryStatus = CLIENT_INVALID_QUERY; + int32 connectionId = INVALID_CONNECTION_ID; + TaskStatus taskStatus = TASK_STATUS_INVALID_FIRST; - /* the open connection belongs to this task */ - connectionId = TransmitTrackerConnectionId(transmitTracker, task); - Assert(connectionId != INVALID_CONNECTION_ID); - Assert(taskStatus == TASK_ASSIGNED); - - /* start copy protocol */ - queryStatus = MultiClientQueryStatus(connectionId); - if (queryStatus == CLIENT_QUERY_COPY) - { - StringInfo jobDirectoryName = JobDirectoryName(task->jobId); - StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId); - - char *filename = taskFilename->data; - int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); - int fileMode = (S_IRUSR | S_IWUSR); - - int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode); - if (fileDescriptor >= 0) + bool trackerHealthy = TrackerHealthy(transmitTracker); + if (!trackerHealthy) { - /* - * All files inside the job directory get automatically cleaned - * up on transaction commit or abort. - */ - fileDescriptorArray[currentNodeIndex] = fileDescriptor; - nextTransmitStatus = EXEC_TRANSMIT_COPYING; + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; + break; + } + + taskStatus = TrackerTaskStatus(transmitTracker, task); + if (taskStatus == TASK_FILE_TRANSMIT_QUEUED) + { + /* remain in queued status until tracker assigns this task */ + nextTransmitStatus = EXEC_TRANSMIT_QUEUED; + break; + } + else if (taskStatus == TASK_CLIENT_SIDE_TRANSMIT_FAILED) + { + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + break; + } + + /* the open connection belongs to this task */ + connectionId = TransmitTrackerConnectionId(transmitTracker, task); + Assert(connectionId != INVALID_CONNECTION_ID); + Assert(taskStatus == TASK_ASSIGNED); + + /* start copy protocol */ + queryStatus = MultiClientQueryStatus(connectionId); + if (queryStatus == CLIENT_QUERY_COPY) + { + StringInfo jobDirectoryName = JobDirectoryName(task->jobId); + StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId); + + char *filename = taskFilename->data; + int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); + int fileMode = (S_IRUSR | S_IWUSR); + + int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode); + if (fileDescriptor >= 0) + { + /* + * All files inside the job directory get automatically cleaned + * up on transaction commit or abort. + */ + fileDescriptorArray[currentNodeIndex] = fileDescriptor; + nextTransmitStatus = EXEC_TRANSMIT_COPYING; + } + else + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + filename))); + + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + } } else { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", filename))); - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; } - } - else - { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + + /* + * We use task tracker logic to manage file transmits as well, but that + * abstraction starts to leak after we drop into the copy protocol. To + * make our task tracker logic work, we need to "void" the tracker's + * connection if the transmit task failed in here. + */ + if (nextTransmitStatus == EXEC_TRANSMIT_TRACKER_RETRY) + { + transmitTracker->connectionBusy = false; + transmitTracker->connectionBusyOnTask = NULL; + } + + break; } - /* - * We use task tracker logic to manage file transmits as well, but that - * abstraction starts to leak after we drop into the copy protocol. To - * make our task tracker logic work, we need to "void" the tracker's - * connection if the transmit task failed in here. - */ - if (nextTransmitStatus == EXEC_TRANSMIT_TRACKER_RETRY) + case EXEC_TRANSMIT_COPYING: { + int32 fileDescriptor = fileDescriptorArray[currentNodeIndex]; + CopyStatus copyStatus = CLIENT_INVALID_COPY; + int closed = -1; + + /* the open connection belongs to this task */ + int32 connectionId = TransmitTrackerConnectionId(transmitTracker, task); + Assert(connectionId != INVALID_CONNECTION_ID); + + copyStatus = MultiClientCopyData(connectionId, fileDescriptor); + if (copyStatus == CLIENT_COPY_MORE) + { + /* worker node continues to send more data, keep reading */ + nextTransmitStatus = EXEC_TRANSMIT_COPYING; + break; + } + + /* we are done copying data */ + if (copyStatus == CLIENT_COPY_DONE) + { + closed = close(fileDescriptor); + fileDescriptorArray[currentNodeIndex] = -1; + + if (closed >= 0) + { + nextTransmitStatus = EXEC_TRANSMIT_DONE; + } + else + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not close copied file: %m"))); + + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + } + } + else if (copyStatus == CLIENT_COPY_FAILED) + { + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + + closed = close(fileDescriptor); + fileDescriptorArray[currentNodeIndex] = -1; + + if (closed < 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not close copy file: %m"))); + } + } + + /* + * We use task tracker logic to manage file transmits as well, but that + * abstraction leaks after we drop into the copy protocol. To make it + * work, we reset transmit tracker's connection for next file transmit. + */ transmitTracker->connectionBusy = false; transmitTracker->connectionBusyOnTask = NULL; - } - break; - } - - case EXEC_TRANSMIT_COPYING: - { - int32 fileDescriptor = fileDescriptorArray[currentNodeIndex]; - CopyStatus copyStatus = CLIENT_INVALID_COPY; - int closed = -1; - - /* the open connection belongs to this task */ - int32 connectionId = TransmitTrackerConnectionId(transmitTracker, task); - Assert(connectionId != INVALID_CONNECTION_ID); - - copyStatus = MultiClientCopyData(connectionId, fileDescriptor); - if (copyStatus == CLIENT_COPY_MORE) - { - /* worker node continues to send more data, keep reading */ - nextTransmitStatus = EXEC_TRANSMIT_COPYING; break; } - /* we are done copying data */ - if (copyStatus == CLIENT_COPY_DONE) + case EXEC_TRANSMIT_TRACKER_RETRY: { - closed = close(fileDescriptor); - fileDescriptorArray[currentNodeIndex] = -1; + bool trackerHealthy = false; + bool trackerConnectionUp = false; - if (closed >= 0) + /* + * The task tracker proxy handles connection errors. On the off chance + * that our connection is still up and the transmit tracker misbehaved, + * we capture this as an intermittent tracker failure. + */ + trackerConnectionUp = TrackerConnectionUp(transmitTracker); + if (trackerConnectionUp) { - nextTransmitStatus = EXEC_TRANSMIT_DONE; + transmitTracker->trackerFailureCount++; + } + + trackerHealthy = TrackerHealthy(transmitTracker); + if (trackerHealthy) + { + nextTransmitStatus = EXEC_TRANSMIT_UNASSIGNED; } else { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not close copied file: %m"))); - - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; + nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; } - } - else if (copyStatus == CLIENT_COPY_FAILED) - { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_RETRY; - closed = close(fileDescriptor); - fileDescriptorArray[currentNodeIndex] = -1; - - if (closed < 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not close copy file: %m"))); - } + break; } - /* - * We use task tracker logic to manage file transmits as well, but that - * abstraction leaks after we drop into the copy protocol. To make it - * work, we reset transmit tracker's connection for next file transmit. - */ - transmitTracker->connectionBusy = false; - transmitTracker->connectionBusyOnTask = NULL; - - break; - } - - case EXEC_TRANSMIT_TRACKER_RETRY: - { - bool trackerHealthy = false; - bool trackerConnectionUp = false; - - /* - * The task tracker proxy handles connection errors. On the off chance - * that our connection is still up and the transmit tracker misbehaved, - * we capture this as an intermittent tracker failure. - */ - trackerConnectionUp = TrackerConnectionUp(transmitTracker); - if (trackerConnectionUp) - { - transmitTracker->trackerFailureCount++; - } - - trackerHealthy = TrackerHealthy(transmitTracker); - if (trackerHealthy) + case EXEC_TRANSMIT_TRACKER_FAILED: { + /* + * This case exists to signal to the caller that we failed. The caller + * is now responsible for reassigning the transmit task (and downstream + * SQL task dependencies) and running the appropriate recovery logic. + */ nextTransmitStatus = EXEC_TRANSMIT_UNASSIGNED; + break; } - else + + case EXEC_TRANSMIT_DONE: { - nextTransmitStatus = EXEC_TRANSMIT_TRACKER_FAILED; + /* we are done with fetching task results to the master node */ + nextTransmitStatus = EXEC_TRANSMIT_DONE; + break; } - break; - } - - case EXEC_TRANSMIT_TRACKER_FAILED: - { - /* - * This case exists to signal to the caller that we failed. The caller - * is now responsible for reassigning the transmit task (and downstream - * SQL task dependencies) and running the appropriate recovery logic. - */ - nextTransmitStatus = EXEC_TRANSMIT_UNASSIGNED; - break; - } - - case EXEC_TRANSMIT_DONE: - { - /* we are done with fetching task results to the master node */ - nextTransmitStatus = EXEC_TRANSMIT_DONE; - break; - } - - default: - { - /* we fatal here to avoid leaking client-side resources */ - ereport(FATAL, (errmsg("invalid transmit status: %d", currentTransmitStatus))); - break; - } + default: + { + /* we fatal here to avoid leaking client-side resources */ + ereport(FATAL, (errmsg("invalid transmit status: %d", + currentTransmitStatus))); + break; + } } /* update file transmit status for most recent transmit tracker */ @@ -2317,7 +2321,7 @@ AssignQueuedTasks(TaskTracker *taskTracker) { StringInfo taskAssignmentQuery = taskState->taskAssignmentQuery; - if(taskAssignmentCount > 0) + if (taskAssignmentCount > 0) { appendStringInfo(multiAssignQuery, ";"); } @@ -2336,7 +2340,7 @@ AssignQueuedTasks(TaskTracker *taskTracker) taskState = (TrackerTaskState *) hash_seq_search(&status); } - if(taskAssignmentCount > 0) + if (taskAssignmentCount > 0) { void *queryResult = NULL; int rowCount = 0; @@ -2833,7 +2837,8 @@ TrackerHashCleanupJob(HTAB *taskTrackerHash, Task *jobCleanupTask) if (queryStatus == CLIENT_QUERY_DONE) { ereport(DEBUG4, (errmsg("completed cleanup query for job " UINT64_FORMAT - " on node \"%s:%u\"", jobId, nodeName, nodePort))); + " on node \"%s:%u\"", jobId, nodeName, + nodePort))); /* clear connection for future cleanup queries */ taskTracker->connectionBusy = false; diff --git a/src/backend/distributed/executor/multi_utility.c b/src/backend/distributed/executor/multi_utility.c index ccd05c021..be4148f75 100644 --- a/src/backend/distributed/executor/multi_utility.c +++ b/src/backend/distributed/executor/multi_utility.c @@ -39,9 +39,9 @@ */ struct DropRelationCallbackState { - char relkind; - Oid heapOid; - bool concurrent; + char relkind; + Oid heapOid; + bool concurrent; }; @@ -190,10 +190,10 @@ multi_ProcessUtility(Node *parsetree, } else if (IsA(parsetree, CreateRoleStmt) && CitusDBHasBeenLoaded()) { - ereport(NOTICE, (errmsg("CitusDB does not support CREATE ROLE/USER " - "for distributed databases"), - errdetail("Multiple roles are currently supported " - "only for local tables"))); + ereport(NOTICE, (errmsg("CitusDB does not support CREATE ROLE/USER " + "for distributed databases"), + errdetail("Multiple roles are currently supported " + "only for local tables"))); } /* now drop into standard process utility */ @@ -757,7 +757,7 @@ IsAlterTableRenameStmt(RenameStmt *renameStmt) isAlterTableRenameStmt = true; } -#if (PG_VERSION_NUM >=90500) +#if (PG_VERSION_NUM >= 90500) else if (renameStmt->renameType == OBJECT_TABCONSTRAINT) { isAlterTableRenameStmt = true; @@ -905,8 +905,9 @@ ExecuteCommandOnWorkerShards(Oid relationId, const char *commandString, } else { - ereport(DEBUG2, (errmsg("applied command on shard " UINT64_FORMAT " on " - "node %s:%d", shardId, workerName, workerPort))); + ereport(DEBUG2, (errmsg("applied command on shard " UINT64_FORMAT + " on node %s:%d", shardId, workerName, + workerPort))); } isFirstPlacement = false; diff --git a/src/backend/distributed/master/master_create_shards.c b/src/backend/distributed/master/master_create_shards.c index 0617d8d59..9adc0e21c 100644 --- a/src/backend/distributed/master/master_create_shards.c +++ b/src/backend/distributed/master/master_create_shards.c @@ -185,7 +185,7 @@ master_create_worker_shards(PG_FUNCTION_ARGS) LockShardDistributionMetadata(shardId, ExclusiveLock); CreateShardPlacements(shardId, ddlCommandList, workerNodeList, - roundRobinNodeIndex, replicationFactor); + roundRobinNodeIndex, replicationFactor); InsertShardRow(distributedTableId, shardId, shardStorageType, minHashTokenText, maxHashTokenText); diff --git a/src/backend/distributed/master/master_delete_protocol.c b/src/backend/distributed/master/master_delete_protocol.c index 35835c7d0..efeeb78bc 100644 --- a/src/backend/distributed/master/master_delete_protocol.c +++ b/src/backend/distributed/master/master_delete_protocol.c @@ -115,9 +115,9 @@ master_apply_delete_command(PG_FUNCTION_ARGS) if ((partitionMethod == DISTRIBUTE_BY_HASH) && (deleteCriteria != NULL)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot delete from distributed table"), - errdetail("Delete statements on hash-partitioned tables " - "with where clause is not supported"))); + errmsg("cannot delete from distributed table"), + errdetail("Delete statements on hash-partitioned tables " + "with where clause is not supported"))); } CheckDeleteCriteria(deleteCriteria); @@ -138,15 +138,15 @@ master_apply_delete_command(PG_FUNCTION_ARGS) else { deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId, - shardIntervalList, - deleteCriteria); + shardIntervalList, + deleteCriteria); } foreach(shardIntervalCell, deletableShardIntervalList) { List *shardPlacementList = NIL; List *droppedPlacementList = NIL; - List *lingeringPlacementList= NIL; + List *lingeringPlacementList = NIL; ListCell *shardPlacementCell = NULL; ListCell *droppedPlacementCell = NULL; ListCell *lingeringPlacementCell = NULL; @@ -167,7 +167,8 @@ master_apply_delete_command(PG_FUNCTION_ARGS) shardPlacementList = ShardPlacementList(shardId); foreach(shardPlacementCell, shardPlacementList) { - ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell); + ShardPlacement *shardPlacement = + (ShardPlacement *) lfirst(shardPlacementCell); char *workerName = shardPlacement->nodeName; uint32 workerPort = shardPlacement->nodePort; bool dropSuccessful = false; @@ -176,14 +177,17 @@ master_apply_delete_command(PG_FUNCTION_ARGS) char tableType = get_rel_relkind(relationId); if (tableType == RELKIND_RELATION) { - appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND, quotedShardName); + appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND, + quotedShardName); } else if (tableType == RELKIND_FOREIGN_TABLE) { - appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND, quotedShardName); + appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND, + quotedShardName); } - dropSuccessful = ExecuteRemoteCommand(workerName, workerPort, workerDropQuery); + dropSuccessful = ExecuteRemoteCommand(workerName, workerPort, + workerDropQuery); if (dropSuccessful) { droppedPlacementList = lappend(droppedPlacementList, shardPlacement); @@ -227,12 +231,13 @@ master_apply_delete_command(PG_FUNCTION_ARGS) if (QueryCancelPending) { - ereport(WARNING, (errmsg("cancel requests are ignored during shard deletion"))); + ereport(WARNING, (errmsg("cancel requests are ignored during shard " + "deletion"))); QueryCancelPending = false; } RESUME_INTERRUPTS(); - } + } deleteCriteriaShardCount = list_length(deletableShardIntervalList); PG_RETURN_INT32(deleteCriteriaShardCount); @@ -257,7 +262,7 @@ CheckTableCount(Query *deleteQuery) static void CheckDeleteCriteria(Node *deleteCriteria) { - bool simpleOpExpression = true; + bool simpleOpExpression = true; if (deleteCriteria == NULL) { @@ -286,7 +291,7 @@ CheckDeleteCriteria(Node *deleteCriteria) } else { - simpleOpExpression = false; + simpleOpExpression = false; } if (!simpleOpExpression) @@ -298,15 +303,15 @@ CheckDeleteCriteria(Node *deleteCriteria) } - /* - * CheckPartitionColumn checks that the given where clause is based only on the - * partition key of the given relation id. - */ +/* + * CheckPartitionColumn checks that the given where clause is based only on the + * partition key of the given relation id. + */ static void CheckPartitionColumn(Oid relationId, Node *whereClause) { Var *partitionColumn = PartitionKey(relationId); - ListCell *columnCell = NULL; + ListCell *columnCell = NULL; List *columnList = pull_var_clause_default(whereClause); foreach(columnCell, columnList) @@ -332,7 +337,7 @@ CheckPartitionColumn(Oid relationId, Node *whereClause) */ static List * ShardsMatchingDeleteCriteria(Oid relationId, List *shardIntervalList, - Node *deleteCriteria) + Node *deleteCriteria) { List *dropShardIntervalList = NIL; List *deleteCriteriaList = NIL; diff --git a/src/backend/distributed/master/master_metadata_utility.c b/src/backend/distributed/master/master_metadata_utility.c index d8ac90997..34db8da9a 100644 --- a/src/backend/distributed/master/master_metadata_utility.c +++ b/src/backend/distributed/master/master_metadata_utility.c @@ -219,7 +219,7 @@ ShardLength(uint64 shardId) if (shardPlacementList == NIL) { ereport(ERROR, (errmsg("could not find length of shard " UINT64_FORMAT, shardId), - errdetail("Could not find any shard placements for the shard."))); + errdetail("Could not find any shard placements for the shard."))); } else { diff --git a/src/backend/distributed/master/master_node_protocol.c b/src/backend/distributed/master/master_node_protocol.c index fc9413666..064756d77 100644 --- a/src/backend/distributed/master/master_node_protocol.c +++ b/src/backend/distributed/master/master_node_protocol.c @@ -49,7 +49,7 @@ /* Shard related configuration */ int ShardReplicationFactor = 2; /* desired replication factor for shards */ -int ShardMaxSize = 1048576; /* maximum size in KB one shard can grow to */ +int ShardMaxSize = 1048576; /* maximum size in KB one shard can grow to */ int ShardPlacementPolicy = SHARD_PLACEMENT_ROUND_ROBIN; @@ -210,7 +210,7 @@ master_get_table_ddl_events(PG_FUNCTION_ARGS) tableDDLEventCell = list_head(tableDDLEventList); functionContext->user_fctx = tableDDLEventCell; - + MemoryContextSwitchTo(oldContext); } @@ -226,8 +226,8 @@ master_get_table_ddl_events(PG_FUNCTION_ARGS) if (tableDDLEventCell != NULL) { char *ddlStatement = (char *) lfirst(tableDDLEventCell); - text *ddlStatementText = cstring_to_text(ddlStatement); - + text *ddlStatementText = cstring_to_text(ddlStatement); + functionContext->user_fctx = lnext(tableDDLEventCell); SRF_RETURN_NEXT(functionContext, PointerGetDatum(ddlStatementText)); @@ -252,7 +252,7 @@ Datum master_get_new_shardid(PG_FUNCTION_ARGS) { text *sequenceName = cstring_to_text(SHARDID_SEQUENCE_NAME); - Oid sequenceId = ResolveRelationId(sequenceName); + Oid sequenceId = ResolveRelationId(sequenceName); Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId); /* generate new and unique shardId from sequence */ @@ -281,7 +281,7 @@ master_get_local_first_candidate_nodes(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { - MemoryContext oldContext = NULL; + MemoryContext oldContext = NULL; TupleDesc tupleDescriptor = NULL; uint32 liveNodeCount = 0; bool hasOid = false; @@ -396,7 +396,7 @@ master_get_round_robin_candidate_nodes(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { - MemoryContext oldContext = NULL; + MemoryContext oldContext = NULL; TupleDesc tupleDescriptor = NULL; List *workerNodeList = NIL; TypeFuncClass resultTypeClass = 0; @@ -477,7 +477,7 @@ master_get_active_worker_nodes(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { - MemoryContext oldContext = NULL; + MemoryContext oldContext = NULL; List *workerNodeList = NIL; uint32 workerNodeCount = 0; TupleDesc tupleDescriptor = NULL; @@ -567,7 +567,7 @@ GetTableDDLEvents(Oid relationId) Relation pgIndex = NULL; SysScanDesc scanDescriptor = NULL; - ScanKeyData scanKey[1]; + ScanKeyData scanKey[1]; int scanKeyCount = 1; HeapTuple heapTuple = NULL; @@ -599,13 +599,13 @@ GetTableDDLEvents(Oid relationId) /* fetch table schema and column option definitions */ tableSchemaDef = pg_get_tableschemadef_string(relationId); tableColumnOptionsDef = pg_get_tablecolumnoptionsdef_string(relationId); - + tableDDLEventList = lappend(tableDDLEventList, tableSchemaDef); if (tableColumnOptionsDef != NULL) { tableDDLEventList = lappend(tableDDLEventList, tableColumnOptionsDef); } - + /* open system catalog and scan all indexes that belong to this table */ pgIndex = heap_open(IndexRelationId, AccessShareLock); @@ -660,7 +660,7 @@ GetTableDDLEvents(Oid relationId) { statementDef = pg_get_indexdef_string(indexId); } - + /* append found constraint or index definition to the list */ tableDDLEventList = lappend(tableDDLEventList, statementDef); @@ -695,8 +695,8 @@ hostname_client_addr(void) Port *port = MyProcPort; char *remoteHost = NULL; int remoteHostLen = NI_MAXHOST; - int flags = NI_NAMEREQD; /* require fully qualified hostname */ - int nameFound = 0; + int flags = NI_NAMEREQD; /* require fully qualified hostname */ + int nameFound = 0; if (port == NULL) { @@ -709,10 +709,15 @@ hostname_client_addr(void) #ifdef HAVE_IPV6 case AF_INET6: #endif - break; + { + break; + } + default: + { ereport(ERROR, (errmsg("invalid address family in connection"))); break; + } } remoteHost = palloc0(remoteHostLen); diff --git a/src/backend/distributed/master/master_stage_protocol.c b/src/backend/distributed/master/master_stage_protocol.c index 1c13237cc..e4c4f7fca 100644 --- a/src/backend/distributed/master/master_stage_protocol.c +++ b/src/backend/distributed/master/master_stage_protocol.c @@ -93,7 +93,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS) if (partitionMethod == DISTRIBUTE_BY_HASH) { ereport(ERROR, (errmsg("relation \"%s\" is a hash partitioned table", - relationName), + relationName), errdetail("We currently don't support creating shards " "on hash-partitioned tables"))); } @@ -128,7 +128,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS) } CreateShardPlacements(shardId, ddlEventList, candidateNodeList, 0, - ShardReplicationFactor); + ShardReplicationFactor); InsertShardRow(relationId, shardId, SHARD_STORAGE_TABLE, nullMinValue, nullMaxValue); @@ -361,7 +361,7 @@ CheckDistributedTable(Oid relationId) */ void CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList, - int workerStartIndex, int replicationFactor) + int workerStartIndex, int replicationFactor) { int attemptCount = replicationFactor; int workerNodeCount = list_length(workerNodeList); @@ -393,7 +393,7 @@ CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList, else { ereport(WARNING, (errmsg("could not create shard on \"%s:%u\"", - nodeName, nodePort))); + nodeName, nodePort))); } if (placementsCreated >= replicationFactor) @@ -406,7 +406,7 @@ CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList, if (placementsCreated < replicationFactor) { ereport(ERROR, (errmsg("could only create %u of %u of required shard replicas", - placementsCreated, replicationFactor))); + placementsCreated, replicationFactor))); } } diff --git a/src/backend/distributed/planner/modify_planner.c b/src/backend/distributed/planner/modify_planner.c index faeb6b6b0..4484e53bb 100644 --- a/src/backend/distributed/planner/modify_planner.c +++ b/src/backend/distributed/planner/modify_planner.c @@ -393,6 +393,7 @@ DistributedModifyTask(Query *query) query->onConflict = RebuildOnConflict(relationId, query->onConflict); } #else + /* always set to false for PG_VERSION_NUM < 90500 */ upsertQuery = false; #endif @@ -414,6 +415,7 @@ DistributedModifyTask(Query *query) #if (PG_VERSION_NUM >= 90500) + /* * RebuildOnConflict rebuilds OnConflictExpr for correct deparsing. The function * makes WHERE clause elements explicit and filters dropped columns @@ -433,7 +435,7 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict) /* Convert onConflictWhere qualifiers to an explicitly and'd clause */ updatedOnConflict->onConflictWhere = - (Node *) make_ands_explicit((List *) onConflictWhere); + (Node *) make_ands_explicit((List *) onConflictWhere); /* * Here we handle dropped columns on the distributed table. onConflictSet @@ -448,7 +450,7 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict) foreach(targetEntryCell, onConflictSet) { TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); - FormData_pg_attribute *tableAttribute = tableAttributes[targetEntry->resno -1]; + FormData_pg_attribute *tableAttribute = tableAttributes[targetEntry->resno - 1]; /* skip dropped columns */ if (tableAttribute->attisdropped) @@ -468,6 +470,8 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict) return updatedOnConflict; } + + #endif diff --git a/src/backend/distributed/planner/multi_explain.c b/src/backend/distributed/planner/multi_explain.c index 35e0ca79b..c7fbdefed 100644 --- a/src/backend/distributed/planner/multi_explain.c +++ b/src/backend/distributed/planner/multi_explain.c @@ -45,7 +45,7 @@ MultiExplainOneQuery(Query *query, IntoClause *into, ExplainState *es, if (localQuery) { PlannedStmt *plan = NULL; - instr_time planstart; + instr_time planstart; instr_time planduration; INSTR_TIME_SET_CURRENT(planstart); diff --git a/src/backend/distributed/planner/multi_join_order.c b/src/backend/distributed/planner/multi_join_order.c index e8cc68f07..04a91d311 100644 --- a/src/backend/distributed/planner/multi_join_order.c +++ b/src/backend/distributed/planner/multi_join_order.c @@ -33,18 +33,18 @@ /* Config variables managed via guc.c */ -int LargeTableShardCount = 4; /* shard counts for a large table */ +int LargeTableShardCount = 4; /* shard counts for a large table */ bool LogMultiJoinOrder = false; /* print join order as a debugging aid */ /* Function pointer type definition for join rule evaluation functions */ -typedef JoinOrderNode * (*RuleEvalFunction) (JoinOrderNode *currentJoinNode, - TableEntry *candidateTable, - List *candidateShardList, - List *applicableJoinClauses, - JoinType joinType); +typedef JoinOrderNode *(*RuleEvalFunction) (JoinOrderNode *currentJoinNode, + TableEntry *candidateTable, + List *candidateShardList, + List *applicableJoinClauses, + JoinType joinType); -static char * RuleNameArray[JOIN_RULE_LAST] = {0}; /* ordered join rule names */ -static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = {0}; /* join rules */ +static char *RuleNameArray[JOIN_RULE_LAST] = { 0 }; /* ordered join rule names */ +static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */ /* Local functions forward declarations */ @@ -54,7 +54,8 @@ static bool JoinExprListWalker(Node *node, List **joinList); static bool ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex); static List * MergeShardIntervals(List *leftShardIntervalList, List *rightShardIntervalList, JoinType joinType); -static bool ShardIntervalsMatch(List *leftShardIntervalList, List *rightShardIntervalList); +static bool ShardIntervalsMatch(List *leftShardIntervalList, + List *rightShardIntervalList); static List * LoadSortedShardIntervalList(Oid relationId); static List * JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClauseList); @@ -68,31 +69,41 @@ static List * TableEntryListDifference(List *lhsTableList, List *rhsTableList); static TableEntry * FindTableEntry(List *tableEntryList, uint32 tableId); /* Local functions forward declarations for join evaluations */ -static JoinOrderNode * EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode, - TableEntry *candidateTable, List *candidateShardList, +static JoinOrderNode * EvaluateJoinRules(List *joinedTableList, + JoinOrderNode *currentJoinNode, + TableEntry *candidateTable, + List *candidateShardList, List *joinClauseList, JoinType joinType); static List * RangeTableIdList(List *tableList); static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType); static char * JoinRuleName(JoinRuleType ruleType); static JoinOrderNode * BroadcastJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, - List *candidateShardList, List *applicableJoinClauses, + List *candidateShardList, + List *applicableJoinClauses, JoinType joinType); static JoinOrderNode * LocalJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, List *candidateShardList, List *applicableJoinClauses, JoinType joinType); static bool JoinOnColumns(Var *currentPartitioncolumn, Var *candidatePartitionColumn, List *joinClauseList); -static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, - List *candidateShardList, List *applicableJoinClauses, +static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode, + TableEntry *candidateTable, + List *candidateShardList, + List *applicableJoinClauses, JoinType joinType); -static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, - List *candidateShardList, List *applicableJoinClauses, +static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode, + TableEntry *candidateTable, + List *candidateShardList, + List *applicableJoinClauses, JoinType joinType); -static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode, TableEntry *candidateTable, - List *candidateShardList, List *applicableJoinClauses, +static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode, + TableEntry *candidateTable, + List *candidateShardList, + List *applicableJoinClauses, JoinType joinType); -static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType, - Var *partitionColumn, char partitionMethod); +static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType + joinRuleType, Var *partitionColumn, + char partitionMethod); /* @@ -106,7 +117,7 @@ List * FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList) { List *joinList = NIL; - ListCell * joinCell = NULL; + ListCell *joinCell = NULL; List *joinWhereClauseList = NIL; List *joinOrderList = NIL; List *joinedTableList = NIL; @@ -199,7 +210,6 @@ FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList) "query"), errdetail("Shards of relations in outer join queries " "must have 1-to-1 shard partitioning"))); - } } else @@ -439,7 +449,7 @@ MergeShardIntervals(List *leftShardIntervalList, List *rightShardIntervalList, bool nextMaxSmaller = comparisonResult > 0; if ((shardUnion && nextMaxLarger) || - (!shardUnion && nextMaxSmaller) ) + (!shardUnion && nextMaxSmaller)) { newShardInterval->maxValue = datumCopy(nextMax, typeByValue, typeLen); } @@ -586,7 +596,8 @@ ShardIntervalsMatch(List *leftShardIntervalList, List *rightShardIntervalList) nextRightIntervalCell = lnext(rightShardIntervalCell); if (nextRightIntervalCell != NULL) { - ShardInterval *nextRightInterval = (ShardInterval *) lfirst(nextRightIntervalCell); + ShardInterval *nextRightInterval = + (ShardInterval *) lfirst(nextRightIntervalCell); shardIntervalsIntersect = ShardIntervalsOverlap(leftInterval, nextRightInterval); if (shardIntervalsIntersect) @@ -730,7 +741,7 @@ JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, List *joinClause * BestJoinOrder takes in a list of candidate join orders, and determines the * best join order among these candidates. The function uses two heuristics for * this. First, the function chooses join orders that have the fewest number of - * join operators that cause large data transfers. Second, the function chooses + * join operators that cause large data transfers. Second, the function chooses * join orders where large data transfers occur later in the execution. */ static List * @@ -1009,7 +1020,7 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode, uint32 candidateTableId = 0; List *joinedTableIdList = NIL; List *applicableJoinClauses = NIL; - uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1; + uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1; uint32 highestValidIndex = JOIN_RULE_LAST - 1; uint32 ruleIndex = 0; @@ -1028,11 +1039,11 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode, JoinRuleType ruleType = (JoinRuleType) ruleIndex; RuleEvalFunction ruleEvalFunction = JoinRuleEvalFunction(ruleType); - nextJoinNode = (*ruleEvalFunction) (currentJoinNode, - candidateTable, - candidateShardList, - applicableJoinClauses, - joinType); + nextJoinNode = (*ruleEvalFunction)(currentJoinNode, + candidateTable, + candidateShardList, + applicableJoinClauses, + joinType); /* break after finding the first join rule that applies */ if (nextJoinNode != NULL) diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 3aebfcb37..7acf85d7e 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -91,7 +91,8 @@ static void ParentSetNewChild(MultiNode *parentNode, MultiNode *oldChildNode, /* Local functions forward declarations for aggregate expressions */ static void ApplyExtendedOpNodes(MultiExtendedOp *originalNode, - MultiExtendedOp *masterNode, MultiExtendedOp *workerNode); + MultiExtendedOp *masterNode, + MultiExtendedOp *workerNode); static void TransformSubqueryNode(MultiTable *subqueryNode); static MultiExtendedOp * MasterExtendedOpNode(MultiExtendedOp *originalOpNode); static Node * MasterAggregateMutator(Node *originalNode, AttrNumber *columnId); @@ -117,7 +118,8 @@ static void ErrorIfUnsupportedArrayAggregate(Aggref *arrayAggregateExpression); static void ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression, MultiNode *logicalPlanNode); static Var * AggregateDistinctColumn(Aggref *aggregateExpression); -static bool TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode, +static bool TablePartitioningSupportsDistinct(List *tableNodeList, + MultiExtendedOp *opNode, Var *distinctColumn); static bool GroupedByColumn(List *groupClauseList, List *targetList, Var *column); @@ -488,7 +490,7 @@ AddressProjectSpecialConditions(MultiProject *projectNode) /* * We check if we need to include any child columns in the project node to - * address the following special conditions. + * address the following special conditions. * * SNC1: project node must include child node's projected columns, or * SNC2: project node must include child node's partition column, or @@ -637,7 +639,7 @@ Commutative(MultiUnaryNode *parentNode, MultiUnaryNode *childNode) { PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID; CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); - CitusNodeTag childNodeTag = CitusNodeTag(childNode); + CitusNodeTag childNodeTag = CitusNodeTag(childNode); /* we cannot be commutative with non-query operators */ if (childNodeTag == T_MultiTreeRoot || childNodeTag == T_MultiTable) @@ -692,7 +694,7 @@ Distributive(MultiUnaryNode *parentNode, MultiBinaryNode *childNode) { PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID; CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); - CitusNodeTag childNodeTag = CitusNodeTag(childNode); + CitusNodeTag childNodeTag = CitusNodeTag(childNode); /* special condition checks for partition operator are not implemented */ Assert(parentNodeTag != T_MultiPartition); @@ -751,7 +753,7 @@ Factorizable(MultiBinaryNode *parentNode, MultiUnaryNode *childNode) { PullUpStatus pullUpStatus = PULL_UP_NOT_VALID; CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); - CitusNodeTag childNodeTag = CitusNodeTag(childNode); + CitusNodeTag childNodeTag = CitusNodeTag(childNode); /* * The following nodes are factorizable with their parents, but we don't @@ -1220,7 +1222,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode) bool hasAggregates = contain_agg_clause((Node *) originalExpression); if (hasAggregates) { - Node *newNode = MasterAggregateMutator((Node*) originalExpression, + Node *newNode = MasterAggregateMutator((Node *) originalExpression, &columnId); newExpression = (Expr *) newNode; } @@ -1826,7 +1828,7 @@ WorkerAggregateExpressionList(Aggref *originalAggregate) static AggregateType GetAggregateType(Oid aggFunctionId) { - char *aggregateProcName = NULL; + char *aggregateProcName = NULL; uint32 aggregateCount = 0; uint32 aggregateIndex = 0; bool found = false; @@ -1980,22 +1982,30 @@ CountDistinctHashFunctionName(Oid argumentType) switch (argumentType) { case INT4OID: + { hashFunctionName = pstrdup(HLL_HASH_INTEGER_FUNC_NAME); break; + } case INT8OID: + { hashFunctionName = pstrdup(HLL_HASH_BIGINT_FUNC_NAME); break; + } case TEXTOID: case BPCHAROID: case VARCHAROID: + { hashFunctionName = pstrdup(HLL_HASH_TEXT_FUNC_NAME); break; + } default: + { hashFunctionName = pstrdup(HLL_HASH_ANY_FUNC_NAME); break; + } } return hashFunctionName; @@ -2479,7 +2489,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) if (subqueryTree->setOperations) { SetOperationStmt *setOperationStatement = - (SetOperationStmt *) subqueryTree->setOperations; + (SetOperationStmt *) subqueryTree->setOperations; if (setOperationStatement->op == SETOP_UNION) { @@ -2563,7 +2573,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) List *joinTreeTableIndexList = NIL; uint32 joiningTableCount = 0; - ExtractRangeTableIndexWalker((Node*) subqueryTree->jointree, + ExtractRangeTableIndexWalker((Node *) subqueryTree->jointree, &joinTreeTableIndexList); joiningTableCount = list_length(joinTreeTableIndexList); @@ -2587,7 +2597,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList, targetEntryList); bool distinctOnPartitionColumn = - TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList); + TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList); if (!distinctOnPartitionColumn) { preconditionsSatisfied = false; @@ -2609,7 +2619,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit) foreach(rangeTableEntryCell, subqueryEntryList) { RangeTblEntry *rangeTableEntry = - (RangeTblEntry *) lfirst(rangeTableEntryCell); + (RangeTblEntry *) lfirst(rangeTableEntryCell); Query *innerSubquery = rangeTableEntry->subquery; ErrorIfCannotPushdownSubquery(innerSubquery, outerQueryHasLimit); @@ -2639,7 +2649,7 @@ ErrorIfUnsupportedTableCombination(Query *queryTree) * Extract all range table indexes from the join tree. Note that sub-queries * that get pulled up by PostgreSQL don't appear in this join tree. */ - ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); foreach(joinTreeTableIndexCell, joinTreeTableIndexList) { /* @@ -2768,7 +2778,7 @@ ErrorIfUnsupportedUnionQuery(Query *unionQuery) leftQueryOnPartitionColumn = TargetListOnPartitionColumn(leftQuery, leftQuery->targetList); rightQueryOnPartitionColumn = TargetListOnPartitionColumn(rightQuery, - rightQuery->targetList); + rightQuery->targetList); if (!(leftQueryOnPartitionColumn && rightQueryOnPartitionColumn)) { @@ -2807,7 +2817,7 @@ GroupTargetEntryList(List *groupClauseList, List *targetEntryList) { SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupClauseCell); TargetEntry *groupTargetEntry = - get_sortgroupclause_tle(groupClause, targetEntryList); + get_sortgroupclause_tle(groupClause, targetEntryList); groupTargetEntryList = lappend(groupTargetEntryList, groupTargetEntry); } @@ -2890,7 +2900,7 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query) else if (IsA(columnExpression, FieldSelect)) { FieldSelect *compositeField = (FieldSelect *) columnExpression; - Expr *fieldExpression = compositeField->arg; + Expr *fieldExpression = compositeField->arg; if (IsA(fieldExpression, Var)) { @@ -2909,7 +2919,7 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query) return false; } - rangeTableEntryIndex = candidateColumn->varno - 1; + rangeTableEntryIndex = candidateColumn->varno - 1; rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex); if (rangeTableEntry->rtekind == RTE_RELATION) @@ -2980,7 +2990,7 @@ CompositeFieldRecursive(Expr *expression, Query *query) return NULL; } - rangeTableEntryIndex = candidateColumn->varno - 1; + rangeTableEntryIndex = candidateColumn->varno - 1; rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex); if (rangeTableEntry->rtekind == RTE_SUBQUERY) @@ -3019,7 +3029,7 @@ FullCompositeFieldList(List *compositeFieldList) uint32 fieldIndex = 0; ListCell *fieldSelectCell = NULL; - foreach (fieldSelectCell, compositeFieldList) + foreach(fieldSelectCell, compositeFieldList) { FieldSelect *fieldSelect = (FieldSelect *) lfirst(fieldSelectCell); uint32 compositeFieldIndex = 0; @@ -3226,9 +3236,10 @@ SupportedLateralQuery(Query *parentQuery, Query *lateralQuery) if (outerColumnIsPartitionColumn && localColumnIsPartitionColumn) { FieldSelect *outerCompositeField = - CompositeFieldRecursive(outerQueryExpression, parentQuery); + CompositeFieldRecursive(outerQueryExpression, parentQuery); FieldSelect *localCompositeField = - CompositeFieldRecursive(localQueryExpression, lateralQuery); + CompositeFieldRecursive(localQueryExpression, lateralQuery); + /* * If partition colums are composite fields, add them to list to * check later if all composite fields are used. @@ -3251,12 +3262,12 @@ SupportedLateralQuery(Query *parentQuery, Query *lateralQuery) } /* check composite fields */ - if(!supportedLateralQuery) + if (!supportedLateralQuery) { bool outerFullCompositeFieldList = - FullCompositeFieldList(outerCompositeFieldList); + FullCompositeFieldList(outerCompositeFieldList); bool localFullCompositeFieldList = - FullCompositeFieldList(localCompositeFieldList); + FullCompositeFieldList(localCompositeFieldList); if (outerFullCompositeFieldList && localFullCompositeFieldList) { @@ -3301,15 +3312,15 @@ JoinOnPartitionColumn(Query *query) if (isLeftColumnPartitionColumn && isRightColumnPartitionColumn) { FieldSelect *leftCompositeField = - CompositeFieldRecursive(leftArgument, query); + CompositeFieldRecursive(leftArgument, query); FieldSelect *rightCompositeField = - CompositeFieldRecursive(rightArgument, query); + CompositeFieldRecursive(rightArgument, query); /* * If partition colums are composite fields, add them to list to * check later if all composite fields are used. */ - if(leftCompositeField && rightCompositeField) + if (leftCompositeField && rightCompositeField) { leftCompositeFieldList = lappend(leftCompositeFieldList, leftCompositeField); @@ -3318,7 +3329,7 @@ JoinOnPartitionColumn(Query *query) } /* if both sides are not composite fields, they are normal columns */ - if(!(leftCompositeField && rightCompositeField)) + if (!(leftCompositeField && rightCompositeField)) { joinOnPartitionColumn = true; break; @@ -3327,12 +3338,12 @@ JoinOnPartitionColumn(Query *query) } /* check composite fields */ - if(!joinOnPartitionColumn) + if (!joinOnPartitionColumn) { bool leftFullCompositeFieldList = - FullCompositeFieldList(leftCompositeFieldList); + FullCompositeFieldList(leftCompositeFieldList); bool rightFullCompositeFieldList = - FullCompositeFieldList(rightCompositeFieldList); + FullCompositeFieldList(rightCompositeFieldList); if (leftFullCompositeFieldList && rightFullCompositeFieldList) { @@ -3409,7 +3420,7 @@ ErrorIfUnsupportedShardDistribution(Query *query) /* check if this table has 1-1 shard partitioning with first table */ coPartitionedTables = CoPartitionedTables(firstShardIntervalList, - currentShardIntervalList); + currentShardIntervalList); if (!coPartitionedTables) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -3437,7 +3448,7 @@ RelationIdList(Query *query) foreach(tableEntryCell, tableEntryList) { - TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell); + TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell); Oid relationId = tableEntry->relationId; relationIdList = list_append_unique_oid(relationIdList, relationId); @@ -3617,7 +3628,7 @@ ExtractQueryWalker(Node *node, List **queryList) Query *query = (Query *) node; (*queryList) = lappend(*queryList, query); - walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList, + walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList, QTW_EXAMINE_RTES); } @@ -3641,7 +3652,7 @@ LeafQuery(Query *queryTree) * Extract all range table indexes from the join tree. Note that sub-queries * that get pulled up by PostgreSQL don't appear in this join tree. */ - ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); foreach(joinTreeTableIndexCell, joinTreeTableIndexList) { /* @@ -3725,7 +3736,7 @@ PartitionColumnOpExpressionList(Query *query) } else if (IsA(leftArgument, Const) && IsA(leftArgument, Var)) { - candidatePartitionColumn = (Var *) rightArgument; + candidatePartitionColumn = (Var *) rightArgument; } else { diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 8f866c074..a9edcfc39 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * multi_logical_planner.c - * + * * Routines for constructing a logical plan tree from the given Query tree * structure. This new logical plan is based on multi-relational algebra rules. * @@ -39,11 +39,11 @@ bool SubqueryPushdown = false; /* is subquery pushdown enabled */ /* Function pointer type definition for apply join rule functions */ -typedef MultiNode * (*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode, - Var *partitionColumn, JoinType joinType, - List *joinClauses); +typedef MultiNode *(*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode, + Var *partitionColumn, JoinType joinType, + List *joinClauses); -static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = {0}; /* join rules */ +static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */ /* Local functions forward declarations */ static MultiNode * MultiPlanTree(Query *queryTree); @@ -157,7 +157,7 @@ SubqueryEntryList(Query *queryTree) * only walk over range table entries at this level and do not recurse into * subqueries. */ - ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); foreach(joinTreeTableIndexCell, joinTreeTableIndexList) { /* @@ -285,6 +285,7 @@ MultiPlanTree(Query *queryTree) else { bool hasOuterJoin = false; + /* * We calculate the join order using the list of tables in the query and * the join clauses between them. Note that this function owns the table @@ -465,6 +466,7 @@ ErrorIfQueryNotSupported(Query *queryTree) #if (PG_VERSION_NUM >= 90500) + /* HasTablesample returns tree if the query contains tablesample */ static bool HasTablesample(Query *queryTree) @@ -485,6 +487,8 @@ HasTablesample(Query *queryTree) return hasTablesample; } + + #endif @@ -529,7 +533,8 @@ HasUnsupportedJoinWalker(Node *node, void *context) * ErrorIfSubqueryNotSupported checks that we can perform distributed planning for * the given subquery. */ -static void ErrorIfSubqueryNotSupported(Query *subqueryTree) +static void +ErrorIfSubqueryNotSupported(Query *subqueryTree) { char *errorDetail = NULL; bool preconditionsSatisfied = true; @@ -587,7 +592,6 @@ HasOuterJoin(Query *queryTree) static bool HasOuterJoinWalker(Node *node, void *context) { - bool hasOuterJoin = false; if (node == NULL) { @@ -657,7 +661,7 @@ HasComplexRangeTableType(Query *queryTree) * Extract all range table indexes from the join tree. Note that sub-queries * that get pulled up by PostgreSQL don't appear in this join tree. */ - ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); foreach(joinTreeTableIndexCell, joinTreeTableIndexList) { /* @@ -675,7 +679,7 @@ HasComplexRangeTableType(Query *queryTree) * subquery. */ if (rangeTableEntry->rtekind != RTE_RELATION && - rangeTableEntry->rtekind != RTE_SUBQUERY) + rangeTableEntry->rtekind != RTE_SUBQUERY) { hasComplexRangeTableType = true; } @@ -966,7 +970,7 @@ TableEntryList(List *rangeTableList) foreach(rangeTableCell, rangeTableList) { - RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); + RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); if (rangeTableEntry->rtekind == RTE_RELATION) { @@ -1178,8 +1182,8 @@ IsSelectClause(Node *clause) /* we currently consider the following nodes as select clauses */ NodeTag nodeTag = nodeTag(clause); - if ( !(nodeTag == T_OpExpr || nodeTag == T_ScalarArrayOpExpr || - nodeTag == T_NullTest || nodeTag == T_BooleanTest) ) + if (!(nodeTag == T_OpExpr || nodeTag == T_ScalarArrayOpExpr || + nodeTag == T_NullTest || nodeTag == T_BooleanTest)) { return false; } @@ -1317,9 +1321,9 @@ UnaryOperator(MultiNode *node) { bool unaryOperator = false; - if (CitusIsA(node, MultiTreeRoot) || CitusIsA(node, MultiTable) || - CitusIsA(node, MultiCollect) || CitusIsA(node, MultiSelect) || - CitusIsA(node, MultiProject) || CitusIsA(node, MultiPartition) || + if (CitusIsA(node, MultiTreeRoot) || CitusIsA(node, MultiTable) || + CitusIsA(node, MultiCollect) || CitusIsA(node, MultiSelect) || + CitusIsA(node, MultiProject) || CitusIsA(node, MultiPartition) || CitusIsA(node, MultiExtendedOp)) { unaryOperator = true; @@ -1403,7 +1407,7 @@ FindNodesOfType(MultiNode *node, int type) } else if (BinaryOperator(node)) { - MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode; + MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode; MultiNode *rightChildNode = ((MultiBinaryNode *) node)->rightChildNode; List *leftChildNodeList = FindNodesOfType(leftChildNode, type); @@ -1533,9 +1537,9 @@ ExtractRangeTableEntryWalker(Node *node, List **rangeTableList) List * pull_var_clause_default(Node *node) { - List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES, - PVC_REJECT_PLACEHOLDERS); - return columnList; + List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES, + PVC_REJECT_PLACEHOLDERS); + return columnList; } @@ -1552,7 +1556,7 @@ ApplyJoinRule(MultiNode *leftNode, MultiNode *rightNode, JoinRuleType ruleType, MultiNode *multiNode = NULL; List *applicableJoinClauses = NIL; - List *leftTableIdList = OutputTableIdList(leftNode); + List *leftTableIdList = OutputTableIdList(leftNode); List *rightTableIdList = OutputTableIdList(rightNode); int rightTableIdCount = 0; uint32 rightTableId = 0; @@ -1567,8 +1571,8 @@ ApplyJoinRule(MultiNode *leftNode, MultiNode *rightNode, JoinRuleType ruleType, /* call the join rule application function to create the new join node */ ruleApplyFunction = JoinRuleApplyFunction(ruleType); - multiNode = (*ruleApplyFunction) (leftNode, rightNode, partitionColumn, - joinType, applicableJoinClauses); + multiNode = (*ruleApplyFunction)(leftNode, rightNode, partitionColumn, + joinType, applicableJoinClauses); if (joinType != JOIN_INNER && CitusIsA(multiNode, MultiJoin)) { @@ -1918,7 +1922,7 @@ ErrorIfSubqueryJoin(Query *queryTree) * Extract all range table indexes from the join tree. Note that sub-queries * that get pulled up by PostgreSQL don't appear in this join tree. */ - ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); + ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList); joiningRangeTableCount = list_length(joinTreeTableIndexList); if (joiningRangeTableCount > 1) diff --git a/src/backend/distributed/planner/multi_master_planner.c b/src/backend/distributed/planner/multi_master_planner.c index 88fda53a3..b328e2e7c 100644 --- a/src/backend/distributed/planner/multi_master_planner.c +++ b/src/backend/distributed/planner/multi_master_planner.c @@ -122,7 +122,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan) AggStrategy aggregateStrategy = AGG_PLAIN; AggClauseCosts aggregateCosts; AttrNumber *groupColumnIdArray = NULL; - List *aggregateTargetList = NIL; + List *aggregateTargetList = NIL; List *groupColumnList = NIL; List *columnList = NIL; ListCell *columnCell = NULL; @@ -168,13 +168,13 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan) /* finally create the plan */ #if (PG_VERSION_NUM >= 90500) - aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, - &aggregateCosts, groupColumnCount, groupColumnIdArray, - groupColumnOpArray, NIL, rowEstimate, subPlan); + aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, + &aggregateCosts, groupColumnCount, groupColumnIdArray, + groupColumnOpArray, NIL, rowEstimate, subPlan); #else - aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, - &aggregateCosts, groupColumnCount, groupColumnIdArray, - groupColumnOpArray, rowEstimate, subPlan); + aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, + &aggregateCosts, groupColumnCount, groupColumnIdArray, + groupColumnOpArray, rowEstimate, subPlan); #endif return aggregatePlan; @@ -211,7 +211,7 @@ BuildSelectStatement(Query *masterQuery, char *masterTableName, rangeTableEntry = copyObject(queryRangeTableEntry); rangeTableEntry->rtekind = RTE_RELATION; rangeTableEntry->eref = makeAlias(masterTableName, NIL); - rangeTableEntry->relid = 0; /* to be filled in exec_Start */ + rangeTableEntry->relid = 0; /* to be filled in exec_Start */ rangeTableEntry->inh = false; rangeTableEntry->inFromCl = true; @@ -220,7 +220,7 @@ BuildSelectStatement(Query *masterQuery, char *masterTableName, /* (2) build and initialize sequential scan node */ sequentialScan = makeNode(SeqScan); - sequentialScan->scanrelid = 1; /* always one */ + sequentialScan->scanrelid = 1; /* always one */ /* (3) add an aggregation plan if needed */ if (masterQuery->hasAggs || masterQuery->groupClause) diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index 5e497970e..b34c36b4c 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -138,7 +138,7 @@ static OpExpr * MakeOpExpressionWithZeroConst(void); static List * BuildRestrictInfoList(List *qualList); static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery, List *dependedJobList); -static JoinSequenceNode * JoinSequenceArray(List * rangeTableFragmentsList, +static JoinSequenceNode * JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *dependedJobList); static bool PartitionedOnColumn(Var *column, List *rangeTableList, List *dependedJobList); static void CheckJoinBetweenColumns(OpExpr *joinClause); @@ -155,7 +155,8 @@ static StringInfo DatumArrayString(Datum *datumArray, uint32 datumCount, Oid dat static Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType, char *queryString); static void UpdateRangeTableAlias(List *rangeTableList, List *fragmentList); -static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment); +static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry, + RangeTableFragment *fragment); static uint64 AnchorShardId(List *fragmentList, uint32 anchorRangeTableId); static List * PruneSqlTaskDependencies(List *sqlTaskList); static List * AssignTaskList(List *sqlTaskList); @@ -167,7 +168,7 @@ static Task * GreedyAssignTask(WorkerNode *workerNode, List *taskList, static List * RoundRobinAssignTaskList(List *taskList); static List * RoundRobinReorder(Task *task, List *placementList); static List * ReorderAndAssignTaskList(List *taskList, - List * (*reorderFunction) (Task *, List *)); + List * (*reorderFunction)(Task *, List *)); static int CompareTasksByShardId(const void *leftElement, const void *rightElement); static List * ActiveShardPlacementLists(List *taskList); static List * ActivePlacementList(List *placementList); @@ -309,6 +310,7 @@ BuildJobTree(MultiTreeRoot *multiTree) partitionKey, partitionType, baseRelationId, JOIN_MAP_MERGE_JOB); + /* reset depended job list */ loopDependedJobList = NIL; loopDependedJobList = list_make1(mapMergeJob); @@ -538,7 +540,7 @@ BuildJobQuery(MultiNode *multiNode, List *dependedJobList) * If we are building this query on a repartitioned subquery job then we * don't need to update column attributes. */ - if(dependedJobList != NIL) + if (dependedJobList != NIL) { Job *job = (Job *) linitial(dependedJobList); if (CitusIsA(job, MapMergeJob)) @@ -628,10 +630,10 @@ BuildJobQuery(MultiNode *multiNode, List *dependedJobList) jobQuery->rtable = rangeTableList; jobQuery->targetList = targetList; jobQuery->jointree = joinTree; - jobQuery->sortClause = sortClauseList; + jobQuery->sortClause = sortClauseList; jobQuery->groupClause = groupClauseList; jobQuery->limitOffset = limitOffset; - jobQuery->limitCount = limitCount; + jobQuery->limitCount = limitCount; jobQuery->hasAggs = contain_agg_clause((Node *) targetList); return jobQuery; @@ -718,10 +720,10 @@ BuildReduceQuery(MultiExtendedOp *extendedOpNode, List *dependedJobList) reduceQuery->rtable = derivedRangeTableList; reduceQuery->targetList = targetList; reduceQuery->jointree = joinTree; - reduceQuery->sortClause = extendedOpNode->sortClauseList; + reduceQuery->sortClause = extendedOpNode->sortClauseList; reduceQuery->groupClause = extendedOpNode->groupClauseList; reduceQuery->limitOffset = extendedOpNode->limitOffset; - reduceQuery->limitCount = extendedOpNode->limitCount; + reduceQuery->limitCount = extendedOpNode->limitCount; reduceQuery->hasAggs = contain_agg_clause((Node *) targetList); return reduceQuery; @@ -754,7 +756,7 @@ BaseRangeTableList(MultiNode *multiNode) */ MultiTable *multiTable = (MultiTable *) multiNode; if (multiTable->relationId != SUBQUERY_RELATION_ID && - multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID) + multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID) { RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry); rangeTableEntry->inFromCl = true; @@ -870,7 +872,7 @@ TargetEntryList(List *expressionList) Expr *expression = (Expr *) lfirst(expressionCell); TargetEntry *targetEntry = makeTargetEntry(expression, - list_length(targetEntryList)+1, + list_length(targetEntryList) + 1, NULL, false); targetEntryList = lappend(targetEntryList, targetEntry); } @@ -1044,7 +1046,7 @@ QueryJoinTree(MultiNode *multiNode, List *dependedJobList, List **rangeTableList /* fix the column attributes in ON (...) clauses */ columnList = pull_var_clause_default((Node *) joinNode->joinClauseList); - foreach (columnCell, columnList) + foreach(columnCell, columnList) { Var *column = (Var *) lfirst(columnCell); UpdateColumnAttributes(column, *rangeTableList, dependedJobList); @@ -1093,7 +1095,8 @@ QueryJoinTree(MultiNode *multiNode, List *dependedJobList, List **rangeTableList uint32 columnCount = (uint32) list_length(dependedTargetList); List *columnNameList = DerivedColumnNameList(columnCount, dependedJob->jobId); - RangeTblEntry *rangeTableEntry = DerivedRangeTableEntry(multiNode, columnNameList, + RangeTblEntry *rangeTableEntry = DerivedRangeTableEntry(multiNode, + columnNameList, tableIdList); RangeTblRef *rangeTableRef = makeNode(RangeTblRef); @@ -1246,7 +1249,7 @@ ExtractColumns(RangeTblEntry *rangeTableEntry, int rangeTableId, List *dependedJ else if (rangeTableKind == CITUS_RTE_RELATION) { /* - * For distributed tables, we construct a regular table RTE to call + * For distributed tables, we construct a regular table RTE to call * expandRTE, which will extract columns from the distributed table * schema. */ @@ -1405,10 +1408,10 @@ BuildSubqueryJobQuery(MultiNode *multiNode) jobQuery->rtable = rangeTableList; jobQuery->targetList = targetList; jobQuery->jointree = joinTree; - jobQuery->sortClause = sortClauseList; + jobQuery->sortClause = sortClauseList; jobQuery->groupClause = groupClauseList; jobQuery->limitOffset = limitOffset; - jobQuery->limitCount = limitCount; + jobQuery->limitCount = limitCount; jobQuery->hasAggs = contain_agg_clause((Node *) targetList); return jobQuery; @@ -1646,7 +1649,7 @@ static uint64 UniqueJobId(void) { text *sequenceName = cstring_to_text(JOBID_SEQUENCE_NAME); - Oid sequenceId = ResolveRelationId(sequenceName); + Oid sequenceId = ResolveRelationId(sequenceName); Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId); /* generate new and unique jobId from sequence */ @@ -1747,7 +1750,7 @@ HashPartitionCount(void) uint32 nodeCount = WorkerGetLiveNodeCount(); double maxReduceTasksPerNode = MaxRunningTasksPerNode / 2.0; - uint32 partitionCount = (uint32) rint(nodeCount * maxReduceTasksPerNode); + uint32 partitionCount = (uint32) rint(nodeCount * maxReduceTasksPerNode); return partitionCount; } @@ -1864,8 +1867,9 @@ SplitPointObject(ShardInterval **shardIntervalArray, uint32 shardIntervalCount) return splitPointObject; } + /* ------------------------------------------------------------ - * Functions that relate to building and assigning tasks follow + * Functions that relate to building and assigning tasks follow * ------------------------------------------------------------ */ @@ -1986,7 +1990,7 @@ SubquerySqlTaskList(Job *job) ListCell *rangeTableCell = NULL; ListCell *queryCell = NULL; Node *whereClauseTree = NULL; - uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ + uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ uint32 anchorRangeTableId = 0; uint32 rangeTableIndex = 0; const uint32 fragmentSize = sizeof(RangeTableFragment); @@ -2036,10 +2040,10 @@ SubquerySqlTaskList(Job *job) if (opExpressionList != NIL) { Var *partitionColumn = PartitionColumn(relationId, tableId); - List *whereClauseList = ReplaceColumnsInOpExpressionList(opExpressionList, + List *whereClauseList = ReplaceColumnsInOpExpressionList(opExpressionList, partitionColumn); finalShardIntervalList = PruneShardList(relationId, tableId, whereClauseList, - shardIntervalList); + shardIntervalList); } else { @@ -2146,7 +2150,7 @@ static List * SqlTaskList(Job *job) { List *sqlTaskList = NIL; - uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ + uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ uint64 jobId = job->jobId; bool anchorRangeTableBasedAssignment = false; uint32 anchorRangeTableId = 0; @@ -2472,8 +2476,8 @@ RangeTableFragmentsList(List *rangeTableList, List *whereClauseList, List *shardIntervalList = LoadShardIntervalList(relationId); List *prunedShardIntervalList = PruneShardList(relationId, tableId, - whereClauseList, - shardIntervalList); + whereClauseList, + shardIntervalList); /* * If we prune all shards for one table, query results will be empty. @@ -2548,7 +2552,7 @@ RangeTableFragmentsList(List *rangeTableList, List *whereClauseList, */ List * PruneShardList(Oid relationId, Index tableId, List *whereClauseList, - List *shardIntervalList) + List *shardIntervalList) { List *remainingShardList = NIL; ListCell *shardIntervalCell = NULL; @@ -2653,7 +2657,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber) Oid accessMethodId = BTREE_AM_OID; Oid operatorId = InvalidOid; Oid operatorClassInputType = InvalidOid; - Const *constantValue = NULL; + Const *constantValue = NULL; OpExpr *expression = NULL; char typeType = 0; @@ -2679,7 +2683,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber) /* Now make the expression with the given variable and a null constant */ expression = (OpExpr *) make_opclause(operatorId, InvalidOid, /* no result type yet */ - false, /* no return set */ + false, /* no return set */ (Expr *) variable, (Expr *) constantValue, InvalidOid, collationId); @@ -2900,7 +2904,7 @@ HashableClauseMutator(Node *originalNode, Var *partitionColumn) * If this node is not hashable, continue walking down the expression tree * to find and hash clauses which are eligible. */ - if(newNode == NULL) + if (newNode == NULL) { newNode = expression_tree_mutator(originalNode, HashableClauseMutator, (void *) partitionColumn); @@ -3045,7 +3049,7 @@ MakeInt4Constant(Datum constantValue) bool constantIsNull = false; bool constantByValue = true; - Const *int4Constant = makeConst(constantType, constantTypeMode, constantCollationId, + Const *int4Constant = makeConst(constantType, constantTypeMode, constantCollationId, constantLength, constantValue, constantIsNull, constantByValue); return int4Constant; @@ -3102,7 +3106,7 @@ UpdateConstraint(Node *baseConstraint, ShardInterval *shardInterval) Node *greaterThanExpr = (Node *) lsecond(andExpr->args); Node *minNode = get_rightop((Expr *) greaterThanExpr); /* right op */ - Node *maxNode = get_rightop((Expr *) lessThanExpr); /* right op */ + Node *maxNode = get_rightop((Expr *) lessThanExpr); /* right op */ Const *minConstant = NULL; Const *maxConstant = NULL; @@ -3273,7 +3277,7 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended joinSequenceArray[joinedTableCount].joiningRangeTableId = NON_PRUNABLE_JOIN; joinedTableCount++; - foreach (joinExprCell, joinExprList) + foreach(joinExprCell, joinExprList) { JoinExpr *joinExpr = (JoinExpr *) lfirst(joinExprCell); JoinType joinType = joinExpr->jointype; @@ -3347,7 +3351,7 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended if (IS_OUTER_JOIN(joinType)) { int innerRangeTableId = 0; - List * tableFragments = NIL; + List *tableFragments = NIL; int fragmentCount = 0; if (joinType == JOIN_RIGHT) @@ -3500,7 +3504,7 @@ FindRangeTableFragmentsList(List *rangeTableFragmentsList, int tableId) if (tableFragments != NIL) { RangeTableFragment *tableFragment = - (RangeTableFragment*) linitial(tableFragments); + (RangeTableFragment *) linitial(tableFragments); if (tableFragment->rangeTableId == tableId) { foundTableFragments = tableFragments; @@ -3706,7 +3710,7 @@ UniqueFragmentList(List *fragmentList) foreach(uniqueFragmentCell, uniqueFragmentList) { RangeTableFragment *uniqueFragment = - (RangeTableFragment *) lfirst(uniqueFragmentCell); + (RangeTableFragment *) lfirst(uniqueFragmentCell); uint64 *uniqueShardId = uniqueFragment->fragmentReference; if (*shardId == *uniqueShardId) @@ -4046,6 +4050,7 @@ FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment) return alias; } + /* * AnchorShardId walks over each fragment in the given fragment list, finds the * fragment that corresponds to the given anchor range tableId, and returns this @@ -4360,7 +4365,7 @@ MergeTaskList(MapMergeJob *mapMergeJob, List *mapTaskList, uint32 taskIdIndex) StringInfo intermediateTableQueryString = IntermediateTableQueryString(jobId, taskIdIndex, reduceQuery); - StringInfo mergeAndRunQueryString= makeStringInfo(); + StringInfo mergeAndRunQueryString = makeStringInfo(); appendStringInfo(mergeAndRunQueryString, MERGE_FILES_AND_RUN_QUERY_COMMAND, jobId, taskIdIndex, mergeTableQueryString->data, intermediateTableQueryString->data); @@ -4686,7 +4691,7 @@ TaskListAppendUnique(List *list, Task *task) List * TaskListConcatUnique(List *list1, List *list2) { - ListCell *taskCell = NULL; + ListCell *taskCell = NULL; foreach(taskCell, list2) { @@ -4960,7 +4965,7 @@ List * FirstReplicaAssignTaskList(List *taskList) { /* No additional reordering need take place for this algorithm */ - List * (*reorderFunction)(Task *, List *) = NULL; + List *(*reorderFunction)(Task *, List *) = NULL; taskList = ReorderAndAssignTaskList(taskList, reorderFunction); @@ -4984,6 +4989,7 @@ RoundRobinAssignTaskList(List *taskList) return taskList; } + /* * RoundRobinReorder implements the core of the round-robin assignment policy. * It takes a task and placement list and rotates a copy of the placement list @@ -5116,7 +5122,8 @@ ActiveShardPlacementLists(List *taskList) List *activeShardPlacementList = ActivePlacementList(shardPlacementList); /* sort shard placements by their insertion time */ - activeShardPlacementList = SortList(activeShardPlacementList, CompareShardPlacements); + activeShardPlacementList = SortList(activeShardPlacementList, + CompareShardPlacements); shardPlacementLists = lappend(shardPlacementLists, activeShardPlacementList); } @@ -5257,7 +5264,8 @@ AssignDualHashTaskList(List *taskList) uint32 replicaIndex = 0; for (replicaIndex = 0; replicaIndex < ShardReplicationFactor; replicaIndex++) { - uint32 assignmentOffset = beginningNodeIndex + assignedTaskIndex + replicaIndex; + uint32 assignmentOffset = beginningNodeIndex + assignedTaskIndex + + replicaIndex; uint32 assignmentIndex = assignmentOffset % workerNodeCount; WorkerNode *workerNode = list_nth(workerNodeList, assignmentIndex); diff --git a/src/backend/distributed/relay/relay_event_utility.c b/src/backend/distributed/relay/relay_event_utility.c index 54f7a09a5..ce02ef9a0 100644 --- a/src/backend/distributed/relay/relay_event_utility.c +++ b/src/backend/distributed/relay/relay_event_utility.c @@ -35,7 +35,7 @@ /* Local functions forward declarations */ static bool TypeAddIndexConstraint(const AlterTableCmd *command); -static bool TypeDropIndexConstraint(const AlterTableCmd *command, +static bool TypeDropIndexConstraint(const AlterTableCmd *command, const RangeVar *relation, uint64 shardId); static void AppendShardIdToConstraintName(AlterTableCmd *command, uint64 shardId); @@ -67,7 +67,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) AppendShardIdToName(sequenceName, shardId); break; } - + case T_AlterTableStmt: { /* @@ -79,7 +79,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) AlterTableStmt *alterTableStmt = (AlterTableStmt *) parseTree; char **relationName = &(alterTableStmt->relation->relname); - RangeVar *relation = alterTableStmt->relation; /* for constraints */ + RangeVar *relation = alterTableStmt->relation; /* for constraints */ List *commandList = alterTableStmt->cmds; ListCell *commandCell = NULL; @@ -179,15 +179,15 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) objectType == OBJECT_INDEX || objectType == OBJECT_FOREIGN_TABLE || objectType == OBJECT_FOREIGN_SERVER) { - List *relationNameList = NULL; - int relationNameListLength = 0; + List *relationNameList = NULL; + int relationNameListLength = 0; Value *relationNameValue = NULL; - char **relationName = NULL; + char **relationName = NULL; uint32 dropCount = list_length(dropStmt->objects); if (dropCount > 1) { - ereport(ERROR, + ereport(ERROR, (errmsg("cannot extend name for multiple drop objects"))); } @@ -205,19 +205,30 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) switch (relationNameListLength) { case 1: + { relationNameValue = linitial(relationNameList); break; + } + case 2: + { relationNameValue = lsecond(relationNameList); break; + } + case 3: + { relationNameValue = lthird(relationNameList); break; + } + default: + { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("improper relation name: \"%s\"", NameListToString(relationNameList)))); break; + } } relationName = &(relationNameValue->val.str); @@ -304,7 +315,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) { RenameStmt *renameStmt = (RenameStmt *) parseTree; ObjectType objectType = renameStmt->renameType; - + if (objectType == OBJECT_TABLE || objectType == OBJECT_SEQUENCE || objectType == OBJECT_INDEX) { @@ -335,7 +346,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId) * We currently do not support truncate statements. This is * primarily because truncates allow implicit modifications to * sequences through table column dependencies. As we have not - * determined our dependency model for sequences, we error here. + * determined our dependency model for sequences, we error here. */ ereport(ERROR, (errmsg("cannot extend name for truncate statement"))); break; @@ -384,18 +395,18 @@ TypeAddIndexConstraint(const AlterTableCmd *command) * associated with an index. */ static bool -TypeDropIndexConstraint(const AlterTableCmd *command, +TypeDropIndexConstraint(const AlterTableCmd *command, const RangeVar *relation, uint64 shardId) { Relation pgConstraint = NULL; SysScanDesc scanDescriptor = NULL; - ScanKeyData scanKey[1]; + ScanKeyData scanKey[1]; int scanKeyCount = 1; HeapTuple heapTuple = NULL; char *searchedConstraintName = NULL; - bool indexConstraint = false; - Oid relationId = InvalidOid; + bool indexConstraint = false; + Oid relationId = InvalidOid; bool failOK = true; if (command->subtype != AT_DropConstraint) @@ -423,8 +434,8 @@ TypeDropIndexConstraint(const AlterTableCmd *command, ScanKeyInit(&scanKey[0], Anum_pg_constraint_conrelid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId)); - - scanDescriptor = systable_beginscan(pgConstraint, + + scanDescriptor = systable_beginscan(pgConstraint, ConstraintRelidIndexId, true, /* indexOK */ NULL, scanKeyCount, scanKey); @@ -433,7 +444,7 @@ TypeDropIndexConstraint(const AlterTableCmd *command, { Form_pg_constraint constraintForm = (Form_pg_constraint) GETSTRUCT(heapTuple); char *constraintName = NameStr(constraintForm->conname); - + if (strncmp(constraintName, searchedConstraintName, NAMEDATALEN) == 0) { /* we found the constraint, now check if it is for an index */ @@ -442,7 +453,7 @@ TypeDropIndexConstraint(const AlterTableCmd *command, { indexConstraint = true; } - + break; } @@ -451,7 +462,7 @@ TypeDropIndexConstraint(const AlterTableCmd *command, systable_endscan(scanDescriptor); heap_close(pgConstraint, AccessShareLock); - + pfree(searchedConstraintName); return indexConstraint; @@ -489,10 +500,10 @@ AppendShardIdToConstraintName(AlterTableCmd *command, uint64 shardId) void AppendShardIdToName(char **name, uint64 shardId) { - char extendedName[NAMEDATALEN]; + char extendedName[NAMEDATALEN]; uint32 extendedNameLength = 0; - snprintf(extendedName, NAMEDATALEN, "%s%c" UINT64_FORMAT, + snprintf(extendedName, NAMEDATALEN, "%s%c" UINT64_FORMAT, (*name), SHARD_NAME_SEPARATOR, shardId); /* diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 344665eb0..8402ab0cf 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -48,23 +48,23 @@ static void NormalizeWorkerListPath(void); /* GUC enum definitions */ static const struct config_enum_entry task_assignment_policy_options[] = { - {"greedy", TASK_ASSIGNMENT_GREEDY, false}, - {"first-replica", TASK_ASSIGNMENT_FIRST_REPLICA, false}, - {"round-robin", TASK_ASSIGNMENT_ROUND_ROBIN, false}, - {NULL, 0, false} + { "greedy", TASK_ASSIGNMENT_GREEDY, false }, + { "first-replica", TASK_ASSIGNMENT_FIRST_REPLICA, false }, + { "round-robin", TASK_ASSIGNMENT_ROUND_ROBIN, false }, + { NULL, 0, false } }; static const struct config_enum_entry task_executor_type_options[] = { - {"real-time", MULTI_EXECUTOR_REAL_TIME, false}, - {"task-tracker", MULTI_EXECUTOR_TASK_TRACKER, false}, - {"router", MULTI_EXECUTOR_ROUTER, false}, - {NULL, 0, false} + { "real-time", MULTI_EXECUTOR_REAL_TIME, false }, + { "task-tracker", MULTI_EXECUTOR_TASK_TRACKER, false }, + { "router", MULTI_EXECUTOR_ROUTER, false }, + { NULL, 0, false } }; static const struct config_enum_entry shard_placement_policy_options[] = { - {"local-node-first", SHARD_PLACEMENT_LOCAL_NODE_FIRST, false}, - {"round-robin", SHARD_PLACEMENT_ROUND_ROBIN, false}, - {NULL, 0, false} + { "local-node-first", SHARD_PLACEMENT_LOCAL_NODE_FIRST, false }, + { "round-robin", SHARD_PLACEMENT_ROUND_ROBIN, false }, + { NULL, 0, false } }; @@ -206,9 +206,10 @@ RegisterCitusConfigVariables(void) DefineCustomBoolVariable( "citusdb.expire_cached_shards", - gettext_noop("Enables shard cache expiration if a shard's size on disk has changed. "), - gettext_noop("When appending to an existing shard, old data may still be cached on " - "other workers. This configuration entry activates automatic " + gettext_noop("Enables shard cache expiration if a shard's size on disk has " + "changed."), + gettext_noop("When appending to an existing shard, old data may still be cached " + "on other workers. This configuration entry activates automatic " "expiration, but should not be used with manual updates to shards."), &ExpireCachedShards, false, @@ -440,11 +441,11 @@ RegisterCitusConfigVariables(void) "citusdb.task_assignment_policy", gettext_noop("Sets the policy to use when assigning tasks to worker nodes."), gettext_noop("The master node assigns tasks to worker nodes based on shard " - "locations. This configuration value specifies the policy to " - "use when making these assignments. The greedy policy aims to " - "evenly distribute tasks across worker nodes, first-replica just " - "assigns tasks in the order shard placements were created, " - "and the round-robin policy assigns tasks to worker nodes in " + "locations. This configuration value specifies the policy to " + "use when making these assignments. The greedy policy aims to " + "evenly distribute tasks across worker nodes, first-replica just " + "assigns tasks in the order shard placements were created, " + "and the round-robin policy assigns tasks to worker nodes in " "a round-robin fashion."), &TaskAssignmentPolicy, TASK_ASSIGNMENT_GREEDY, @@ -488,6 +489,7 @@ RegisterCitusConfigVariables(void) /* warn about config items in the citusdb namespace that are not registered above */ EmitWarningsOnPlaceholders("citusdb"); + /* Also warn about citus namespace, as that's a very likely misspelling */ EmitWarningsOnPlaceholders("citus"); } @@ -515,8 +517,10 @@ NormalizeWorkerListPath(void) { absoluteFileName = malloc(strlen(DataDir) + strlen(WORKER_LIST_FILENAME) + 2); if (absoluteFileName == NULL) + { ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); + } sprintf(absoluteFileName, "%s/%s", DataDir, WORKER_LIST_FILENAME); } @@ -530,6 +534,7 @@ NormalizeWorkerListPath(void) "environment variable.\n", progname, ConfigFileName))); } - SetConfigOption("citusdb.worker_list_file", absoluteFileName, PGC_POSTMASTER, PGC_S_OVERRIDE); + SetConfigOption("citusdb.worker_list_file", absoluteFileName, PGC_POSTMASTER, + PGC_S_OVERRIDE); free(absoluteFileName); } diff --git a/src/backend/distributed/test/fake_fdw.c b/src/backend/distributed/test/fake_fdw.c index 883c77755..9409dde3b 100644 --- a/src/backend/distributed/test/fake_fdw.c +++ b/src/backend/distributed/test/fake_fdw.c @@ -116,9 +116,9 @@ FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid, ForeignPath *best_path, List *tlist, List *scan_clauses) #else static ForeignScan * -FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid, - ForeignPath *best_path, List *tlist, List *scan_clauses, - Plan *outer_plan) +FakeGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreigntableid, + ForeignPath * best_path, List * tlist, List * scan_clauses, + Plan * outer_plan) #endif { Index scan_relid = baserel->relid; @@ -129,7 +129,7 @@ FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid, return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL); #else return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL, NIL, NIL, - outer_plan); + outer_plan); #endif } diff --git a/src/backend/distributed/utils/citus_nodefuncs.c b/src/backend/distributed/utils/citus_nodefuncs.c index b3858cbdf..e5abb1d60 100644 --- a/src/backend/distributed/utils/citus_nodefuncs.c +++ b/src/backend/distributed/utils/citus_nodefuncs.c @@ -265,7 +265,7 @@ GetRangeTblKind(RangeTblEntry *rte) { CitusRTEKind rteKind = CITUS_RTE_RELATION /* invalid */; - switch(rte->rtekind) + switch (rte->rtekind) { /* directly rtekind if it's not possibly an extended RTE */ case RTE_RELATION: @@ -273,9 +273,13 @@ GetRangeTblKind(RangeTblEntry *rte) case RTE_JOIN: case RTE_VALUES: case RTE_CTE: + { rteKind = (CitusRTEKind) rte->rtekind; break; + } + case RTE_FUNCTION: + { /* * Extract extra data - correct even if a plain RTE_FUNCTION, not * an extended one, ExtractRangeTblExtraData handles that case @@ -283,6 +287,7 @@ GetRangeTblKind(RangeTblEntry *rte) */ ExtractRangeTblExtraData(rte, &rteKind, NULL, NULL, NULL); break; + } } return rteKind; diff --git a/src/backend/distributed/utils/citus_ruleutils.c b/src/backend/distributed/utils/citus_ruleutils.c index ad1ac7332..5f2c4a5ef 100644 --- a/src/backend/distributed/utils/citus_ruleutils.c +++ b/src/backend/distributed/utils/citus_ruleutils.c @@ -186,7 +186,7 @@ AppendOptionListToString(StringInfo stringBuffer, List *optionList) foreach(optionCell, optionList) { - DefElem *option = (DefElem*) lfirst(optionCell); + DefElem *option = (DefElem *) lfirst(optionCell); char *optionName = option->defname; char *optionValue = defGetString(option); @@ -219,7 +219,7 @@ pg_get_tableschemadef_string(Oid tableRelationId) char relationKind = 0; TupleDesc tupleDescriptor = NULL; TupleConstr *tupleConstraints = NULL; - int attributeIndex = 0; + int attributeIndex = 0; bool firstAttributePrinted = false; AttrNumber defaultValueIndex = 0; AttrNumber constraintIndex = 0; @@ -447,21 +447,35 @@ pg_get_tablecolumnoptionsdef_string(Oid tableRelationId) switch (attributeForm->attstorage) { case 'p': + { storageName = "PLAIN"; break; + } + case 'e': + { storageName = "EXTERNAL"; break; + } + case 'm': + { storageName = "MAIN"; break; + } + case 'x': + { storageName = "EXTENDED"; break; + } + default: + { ereport(ERROR, (errmsg("unrecognized storage type: %c", attributeForm->attstorage))); break; + } } appendStringInfo(&statement, "ALTER COLUMN %s ", diff --git a/src/backend/distributed/utils/metadata_cache.c b/src/backend/distributed/utils/metadata_cache.c index 868b11313..61127371f 100644 --- a/src/backend/distributed/utils/metadata_cache.c +++ b/src/backend/distributed/utils/metadata_cache.c @@ -51,10 +51,10 @@ static void InvalidateDistRelationCacheCallback(Datum argument, Oid relationId); static HeapTuple LookupDistPartitionTuple(Oid relationId); static List * LookupDistShardTuples(Oid relationId); static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, - Oid *intervalTypeId, int32 *intervalTypeMod); + Oid *intervalTypeId, int32 *intervalTypeMod); static ShardInterval * TupleToShardInterval(HeapTuple heapTuple, - TupleDesc tupleDescriptor, Oid intervalTypeId, - int32 intervalTypeMod); + TupleDesc tupleDescriptor, Oid intervalTypeId, + int32 intervalTypeMod); static void CachedRelationLookup(const char *relationName, Oid *cachedOid); @@ -87,6 +87,7 @@ IsDistributedTable(Oid relationId) return cacheEntry->isDistributedTable; } + /* * LoadShardInterval reads shard metadata for given shardId from pg_dist_shard, * and converts min/max values in these metadata to their properly typed datum @@ -98,7 +99,7 @@ LoadShardInterval(uint64 shardId) { ShardInterval *shardInterval; SysScanDesc scanDescriptor = NULL; - ScanKeyData scanKey[1]; + ScanKeyData scanKey[1]; int scanKeyCount = 1; HeapTuple heapTuple = NULL; Form_pg_dist_shard shardForm = NULL; @@ -127,11 +128,11 @@ LoadShardInterval(uint64 shardId) partitionEntry = DistributedTableCacheEntry(shardForm->logicalrelid); GetPartitionTypeInputInfo(partitionEntry->partitionKeyString, - partitionEntry->partitionMethod, &intervalTypeId, - &intervalTypeMod); + partitionEntry->partitionMethod, &intervalTypeId, + &intervalTypeMod); shardInterval = TupleToShardInterval(heapTuple, tupleDescriptor, intervalTypeId, - intervalTypeMod); + intervalTypeMod); systable_endscan(scanDescriptor); heap_close(pgDistShard, AccessShareLock); @@ -139,6 +140,7 @@ LoadShardInterval(uint64 shardId) return shardInterval; } + /* * DistributedTableCacheEntry looks up a pg_dist_partition entry for a * relation. @@ -239,19 +241,19 @@ LookupDistTableCacheEntry(Oid relationId) int32 intervalTypeMod = -1; GetPartitionTypeInputInfo(partitionKeyString, partitionMethod, &intervalTypeId, - &intervalTypeMod); + &intervalTypeMod); shardIntervalArray = MemoryContextAllocZero(CacheMemoryContext, - shardIntervalArrayLength * - sizeof(ShardInterval)); + shardIntervalArrayLength * + sizeof(ShardInterval)); foreach(distShardTupleCell, distShardTupleList) { HeapTuple shardTuple = lfirst(distShardTupleCell); ShardInterval *shardInterval = TupleToShardInterval(shardTuple, - distShardTupleDesc, - intervalTypeId, - intervalTypeMod); + distShardTupleDesc, + intervalTypeId, + intervalTypeMod); MemoryContext oldContext = MemoryContextSwitchTo(CacheMemoryContext); CopyShardInterval(shardInterval, &shardIntervalArray[arrayIndex]); @@ -773,7 +775,7 @@ LookupDistShardTuples(Oid relationId) scanKey[0].sk_argument = ObjectIdGetDatum(relationId); scanDescriptor = systable_beginscan(pgDistShard, DistShardLogicalRelidIndexId(), true, - NULL, 1, scanKey); + NULL, 1, scanKey); currentShardTuple = systable_getnext(scanDescriptor); while (HeapTupleIsValid(currentShardTuple)) @@ -797,7 +799,7 @@ LookupDistShardTuples(Oid relationId) */ static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, - Oid *intervalTypeId, int32 *intervalTypeMod) + Oid *intervalTypeId, int32 *intervalTypeMod) { *intervalTypeId = InvalidOid; *intervalTypeMod = -1; @@ -826,7 +828,7 @@ GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("unsupported table partition type: %c", - partitionMethod))); + partitionMethod))); } } } @@ -838,7 +840,7 @@ GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, */ static ShardInterval * TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid intervalTypeId, - int32 intervalTypeMod) + int32 intervalTypeMod) { ShardInterval *shardInterval = NULL; bool isNull = false; @@ -847,16 +849,16 @@ TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid interva Oid inputFunctionId = InvalidOid; Oid typeIoParam = InvalidOid; Datum relationIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_logicalrelid, - tupleDescriptor, &isNull); + tupleDescriptor, &isNull); Datum shardIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardid, - tupleDescriptor, &isNull); + tupleDescriptor, &isNull); Datum storageTypeDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardstorage, - tupleDescriptor, &isNull); + tupleDescriptor, &isNull); Datum minValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardminvalue, - tupleDescriptor, &minValueNull); + tupleDescriptor, &minValueNull); Datum maxValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardmaxvalue, - tupleDescriptor, &maxValueNull); + tupleDescriptor, &maxValueNull); Oid relationId = DatumGetObjectId(relationIdDatum); int64 shardId = DatumGetInt64(shardIdDatum); @@ -877,7 +879,7 @@ TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid interva /* TODO: move this up the call stack to avoid per-tuple invocation? */ get_type_io_data(intervalTypeId, IOFunc_input, &intervalTypeLen, &intervalByVal, - &intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId); + &intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId); /* finally convert min/max values to their actual types */ minValue = OidInputFunctionCall(inputFunctionId, minValueString, diff --git a/src/backend/distributed/utils/multi_resowner.c b/src/backend/distributed/utils/multi_resowner.c index 80aecf7e5..21d78d1aa 100644 --- a/src/backend/distributed/utils/multi_resowner.c +++ b/src/backend/distributed/utils/multi_resowner.c @@ -22,7 +22,8 @@ #include "distributed/multi_resowner.h" -typedef struct JobDirectoryEntry { +typedef struct JobDirectoryEntry +{ ResourceOwner owner; uint64 jobId; } JobDirectoryEntry; @@ -44,8 +45,8 @@ MultiResourceOwnerReleaseCallback(ResourceReleasePhase phase, bool isTopLevel, void *arg) { - int lastJobIndex = NumRegisteredJobDirectories - 1; - int jobIndex = 0; + int lastJobIndex = NumRegisteredJobDirectories - 1; + int jobIndex = 0; if (phase == RESOURCE_RELEASE_AFTER_LOCKS) { @@ -79,7 +80,7 @@ MultiResourceOwnerReleaseCallback(ResourceReleasePhase phase, void ResourceOwnerEnlargeJobDirectories(ResourceOwner owner) { - int newMax = 0; + int newMax = 0; /* ensure callback is registered */ if (!RegisteredResownerCallback) @@ -91,15 +92,17 @@ ResourceOwnerEnlargeJobDirectories(ResourceOwner owner) if (RegisteredJobDirectories == NULL) { newMax = 16; - RegisteredJobDirectories = (JobDirectoryEntry *) - MemoryContextAlloc(TopMemoryContext, newMax * sizeof(JobDirectoryEntry)); + RegisteredJobDirectories = + (JobDirectoryEntry *) MemoryContextAlloc(TopMemoryContext, + newMax * sizeof(JobDirectoryEntry)); NumAllocatedJobDirectories = newMax; } else if (NumRegisteredJobDirectories + 1 > NumAllocatedJobDirectories) { newMax = NumAllocatedJobDirectories * 2; - RegisteredJobDirectories = (JobDirectoryEntry *) - repalloc(RegisteredJobDirectories, newMax * sizeof(JobDirectoryEntry)); + RegisteredJobDirectories = + (JobDirectoryEntry *) repalloc(RegisteredJobDirectories, + newMax * sizeof(JobDirectoryEntry)); NumAllocatedJobDirectories = newMax; } } @@ -123,8 +126,8 @@ ResourceOwnerRememberJobDirectory(ResourceOwner owner, uint64 jobId) void ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId) { - int lastJobIndex = NumRegisteredJobDirectories - 1; - int jobIndex = 0; + int lastJobIndex = NumRegisteredJobDirectories - 1; + int jobIndex = 0; for (jobIndex = lastJobIndex; jobIndex >= 0; jobIndex--) { @@ -135,7 +138,8 @@ ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId) /* move all later entries one up */ while (jobIndex < lastJobIndex) { - RegisteredJobDirectories[jobIndex] = RegisteredJobDirectories[jobIndex + 1]; + RegisteredJobDirectories[jobIndex] = + RegisteredJobDirectories[jobIndex + 1]; jobIndex++; } NumRegisteredJobDirectories = lastJobIndex; diff --git a/src/backend/distributed/utils/resource_lock.c b/src/backend/distributed/utils/resource_lock.c index a2552d46b..3f1b739af 100644 --- a/src/backend/distributed/utils/resource_lock.c +++ b/src/backend/distributed/utils/resource_lock.c @@ -30,7 +30,7 @@ void LockShardDistributionMetadata(int64 shardId, LOCKMODE lockMode) { - LOCKTAG tag; + LOCKTAG tag; const bool sessionLock = false; const bool dontWait = false; @@ -64,7 +64,7 @@ LockRelationDistributionMetadata(Oid relationId, LOCKMODE lockMode) void LockShardResource(uint64 shardId, LOCKMODE lockmode) { - LOCKTAG tag; + LOCKTAG tag; const bool sessionLock = false; const bool dontWait = false; @@ -78,7 +78,7 @@ LockShardResource(uint64 shardId, LOCKMODE lockmode) void UnlockShardResource(uint64 shardId, LOCKMODE lockmode) { - LOCKTAG tag; + LOCKTAG tag; const bool sessionLock = false; SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId); @@ -95,7 +95,7 @@ UnlockShardResource(uint64 shardId, LOCKMODE lockmode) void LockJobResource(uint64 jobId, LOCKMODE lockmode) { - LOCKTAG tag; + LOCKTAG tag; const bool sessionLock = false; const bool dontWait = false; @@ -109,7 +109,7 @@ LockJobResource(uint64 jobId, LOCKMODE lockmode) void UnlockJobResource(uint64 jobId, LOCKMODE lockmode) { - LOCKTAG tag; + LOCKTAG tag; const bool sessionLock = false; SET_LOCKTAG_JOB_RESOURCE(tag, MyDatabaseId, jobId); diff --git a/src/backend/distributed/worker/task_tracker.c b/src/backend/distributed/worker/task_tracker.c index b8a6bde6a..e3e7320db 100644 --- a/src/backend/distributed/worker/task_tracker.c +++ b/src/backend/distributed/worker/task_tracker.c @@ -50,7 +50,7 @@ #include "utils/memutils.h" -int TaskTrackerDelay = 200; /* process sleep interval in millisecs */ +int TaskTrackerDelay = 200; /* process sleep interval in millisecs */ int MaxRunningTasksPerNode = 16; /* max number of running tasks */ int MaxTrackedTasksPerNode = 1024; /* max number of tracked tasks */ WorkerTasksSharedStateData *WorkerTasksSharedState; /* shared memory state */ @@ -76,10 +76,10 @@ static void TrackerCleanupJobSchemas(void); static void TrackerCleanupConnections(HTAB *WorkerTasksHash); static void TrackerRegisterShutDown(HTAB *WorkerTasksHash); static void TrackerDelayLoop(void); -static List *SchedulableTaskList(HTAB *WorkerTasksHash); +static List * SchedulableTaskList(HTAB *WorkerTasksHash); static WorkerTask * SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash); static uint32 CountTasksMatchingCriteria(HTAB *WorkerTasksHash, - bool (*CriteriaFunction) (WorkerTask *)); + bool (*CriteriaFunction)(WorkerTask *)); static bool RunningTask(WorkerTask *workerTask); static bool SchedulableTask(WorkerTask *workerTask); static int CompareTasksByTime(const void *first, const void *second); @@ -393,7 +393,7 @@ TrackerCleanupJobSchemas(void) /* * We create cleanup tasks since we can't remove schemas within the task * tracker process. We also assign high priorities to these tasks so - * that they get scheduled before everyone else. + * that they get scheduled before everyone else. */ cleanupTask = WorkerTasksHashEnter(jobId, taskIndex); cleanupTask->assignedAt = HIGH_PRIORITY_TASK_TIME; @@ -440,7 +440,7 @@ TrackerCleanupConnections(HTAB *WorkerTasksHash) currentTask->connectionId = INVALID_CONNECTION_ID; } - currentTask = (WorkerTask *) hash_seq_search(&status); + currentTask = (WorkerTask *) hash_seq_search(&status); } } @@ -494,8 +494,9 @@ TrackerDelayLoop(void) } } + /* ------------------------------------------------------------ - * Signal handling and shared hash initialization functions follow + * Signal handling and shared hash initialization functions follow * ------------------------------------------------------------ */ @@ -503,7 +504,7 @@ TrackerDelayLoop(void) static void TrackerSigHupHandler(SIGNAL_ARGS) { - int save_errno = errno; + int save_errno = errno; got_SIGHUP = true; if (MyProc != NULL) @@ -519,7 +520,7 @@ TrackerSigHupHandler(SIGNAL_ARGS) static void TrackerShutdownHandler(SIGNAL_ARGS) { - int save_errno = errno; + int save_errno = errno; got_SIGTERM = true; if (MyProc != NULL) @@ -579,10 +580,10 @@ TaskTrackerShmemInit(void) LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); /* allocate struct containing task tracker related shared state */ - WorkerTasksSharedState = (WorkerTasksSharedStateData *) - ShmemInitStruct("Worker Task Control", - sizeof(WorkerTasksSharedStateData), - &alreadyInitialized); + WorkerTasksSharedState = + (WorkerTasksSharedStateData *) ShmemInitStruct("Worker Task Control", + sizeof(WorkerTasksSharedStateData), + &alreadyInitialized); if (!alreadyInitialized) { @@ -607,6 +608,7 @@ TaskTrackerShmemInit(void) } } + /* ------------------------------------------------------------ * Task scheduling and management functions follow * ------------------------------------------------------------ @@ -638,7 +640,7 @@ SchedulableTaskList(HTAB *WorkerTasksHash) schedulableTaskCount = CountTasksMatchingCriteria(WorkerTasksHash, &SchedulableTask); if (schedulableTaskCount == 0) { - return NIL; /* we do not have any new tasks to schedule */ + return NIL; /* we do not have any new tasks to schedule */ } tasksToScheduleCount = MaxRunningTasksPerNode - runningTaskCount; @@ -653,7 +655,7 @@ SchedulableTaskList(HTAB *WorkerTasksHash) for (queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++) { WorkerTask *schedulableTask = (WorkerTask *) palloc0(sizeof(WorkerTask)); - schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId; + schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId; schedulableTask->taskId = schedulableTaskQueue[queueIndex].taskId; schedulableTaskList = lappend(schedulableTaskList, schedulableTask); @@ -681,13 +683,13 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash) uint32 queueIndex = 0; /* our priority queue size equals to the number of schedulable tasks */ - queueSize = CountTasksMatchingCriteria(WorkerTasksHash, &SchedulableTask); + queueSize = CountTasksMatchingCriteria(WorkerTasksHash, &SchedulableTask); if (queueSize == 0) { return NULL; } - /* allocate an array of tasks for our priority queue */ + /* allocate an array of tasks for our priority queue */ priorityQueue = (WorkerTask *) palloc0(sizeof(WorkerTask) * queueSize); /* copy tasks in the shared hash to the priority queue */ @@ -719,7 +721,7 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash) /* Counts the number of tasks that match the given criteria function. */ static uint32 CountTasksMatchingCriteria(HTAB *WorkerTasksHash, - bool (*CriteriaFunction) (WorkerTask *)) + bool (*CriteriaFunction)(WorkerTask *)) { HASH_SEQ_STATUS status; WorkerTask *currentTask = NULL; @@ -730,13 +732,13 @@ CountTasksMatchingCriteria(HTAB *WorkerTasksHash, currentTask = (WorkerTask *) hash_seq_search(&status); while (currentTask != NULL) { - bool matchesCriteria = (*CriteriaFunction) (currentTask); + bool matchesCriteria = (*CriteriaFunction)(currentTask); if (matchesCriteria) { taskCount++; } - currentTask = (WorkerTask *) hash_seq_search(&status); + currentTask = (WorkerTask *) hash_seq_search(&status); } return taskCount; @@ -775,7 +777,7 @@ SchedulableTask(WorkerTask *workerTask) static int CompareTasksByTime(const void *first, const void *second) { - WorkerTask *firstTask = (WorkerTask *) first; + WorkerTask *firstTask = (WorkerTask *) first; WorkerTask *secondTask = (WorkerTask *) second; /* tasks that are assigned earlier have higher priority */ @@ -893,7 +895,7 @@ ManageWorkerTask(WorkerTask *workerTask, HTAB *WorkerTasksHash) { case TASK_ASSIGNED: { - break; /* nothing to do until the task gets scheduled */ + break; /* nothing to do until the task gets scheduled */ } case TASK_SCHEDULED: diff --git a/src/backend/distributed/worker/task_tracker_protocol.c b/src/backend/distributed/worker/task_tracker_protocol.c index 84115f441..1b52be99a 100644 --- a/src/backend/distributed/worker/task_tracker_protocol.c +++ b/src/backend/distributed/worker/task_tracker_protocol.c @@ -57,7 +57,7 @@ task_tracker_assign_task(PG_FUNCTION_ARGS) { uint64 jobId = PG_GETARG_INT64(0); uint32 taskId = PG_GETARG_UINT32(1); - text *taskCallStringText = PG_GETARG_TEXT_P(2); + text *taskCallStringText = PG_GETARG_TEXT_P(2); StringInfo jobSchemaName = JobSchemaName(jobId); bool schemaExists = false; @@ -185,7 +185,7 @@ task_tracker_cleanup_job(PG_FUNCTION_ARGS) CleanupTask(currentTask); } - currentTask = (WorkerTask *) hash_seq_search(&status); + currentTask = (WorkerTask *) hash_seq_search(&status); } LWLockRelease(WorkerTasksSharedState->taskHashLock); @@ -308,7 +308,7 @@ CreateTask(uint64 jobId, uint32 taskId, char *taskCallString) } -/* +/* * UpdateTask updates the call string text for an already existing task. Note * that this function expects the caller to hold an exclusive lock over the * shared hash. @@ -331,7 +331,7 @@ UpdateTask(WorkerTask *workerTask, char *taskCallString) if (taskStatus == TASK_SUCCEEDED || taskStatus == TASK_CANCEL_REQUESTED || taskStatus == TASK_CANCELED) { - ; /* nothing to do */ + /* nothing to do */ } else if (taskStatus == TASK_PERMANENTLY_FAILED) { diff --git a/src/backend/distributed/worker/worker_data_fetch_protocol.c b/src/backend/distributed/worker/worker_data_fetch_protocol.c index 0e5b68a1d..d0c309c18 100644 --- a/src/backend/distributed/worker/worker_data_fetch_protocol.c +++ b/src/backend/distributed/worker/worker_data_fetch_protocol.c @@ -53,11 +53,14 @@ static void ReceiveResourceCleanup(int32 connectionId, const char *filename, static void DeleteFile(const char *filename); static void FetchTableCommon(text *tableName, uint64 remoteTableSize, ArrayType *nodeNameObject, ArrayType *nodePortObject, - bool (*FetchTableFunction) (const char *, uint32, StringInfo)); + bool (*FetchTableFunction)(const char *, uint32, + StringInfo)); static uint64 LocalTableSize(Oid relationId); static uint64 ExtractShardId(StringInfo tableName); -static bool FetchRegularTable(const char *nodeName, uint32 nodePort, StringInfo tableName); -static bool FetchForeignTable(const char *nodeName, uint32 nodePort, StringInfo tableName); +static bool FetchRegularTable(const char *nodeName, uint32 nodePort, + StringInfo tableName); +static bool FetchForeignTable(const char *nodeName, uint32 nodePort, + StringInfo tableName); static List * TableDDLCommandList(const char *nodeName, uint32 nodePort, StringInfo tableName); static StringInfo ForeignFilePath(const char *nodeName, uint32 nodePort, @@ -85,7 +88,7 @@ worker_fetch_partition_file(PG_FUNCTION_ARGS) uint64 jobId = PG_GETARG_INT64(0); uint32 partitionTaskId = PG_GETARG_UINT32(1); uint32 partitionFileId = PG_GETARG_UINT32(2); - uint32 upstreamTaskId = PG_GETARG_UINT32(3); + uint32 upstreamTaskId = PG_GETARG_UINT32(3); text *nodeNameText = PG_GETARG_TEXT_P(4); uint32 nodePort = PG_GETARG_UINT32(5); char *nodeName = NULL; @@ -226,7 +229,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort, char filename[MAXPGPATH]; int closed = -1; const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); - const int fileMode = (S_IRUSR | S_IWUSR); + const int fileMode = (S_IRUSR | S_IWUSR); QueryStatus queryStatus = CLIENT_INVALID_QUERY; int32 connectionId = INVALID_CONNECTION_ID; @@ -309,7 +312,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort, } else if (copyStatus == CLIENT_COPY_MORE) { - ; /* remote node will continue to send more data */ + /* remote node will continue to send more data */ } else { @@ -468,7 +471,7 @@ worker_fetch_foreign_file(PG_FUNCTION_ARGS) static void FetchTableCommon(text *tableNameText, uint64 remoteTableSize, ArrayType *nodeNameObject, ArrayType *nodePortObject, - bool (*FetchTableFunction) (const char *, uint32, StringInfo)) + bool (*FetchTableFunction)(const char *, uint32, StringInfo)) { StringInfo tableName = NULL; char *tableNameCString = NULL; @@ -531,7 +534,7 @@ FetchTableCommon(text *tableNameText, uint64 remoteTableSize, if (remoteTableSize > localTableSize) { /* table is not up to date, drop the table */ - ObjectAddress tableObject = {InvalidOid, InvalidOid, 0}; + ObjectAddress tableObject = { InvalidOid, InvalidOid, 0 }; tableObject.classId = RelationRelationId; tableObject.objectId = relationId; @@ -554,7 +557,7 @@ FetchTableCommon(text *tableNameText, uint64 remoteTableSize, char *nodeName = TextDatumGetCString(nodeNameDatum); uint32 nodePort = DatumGetUInt32(nodePortDatum); - tableFetched = (*FetchTableFunction) (nodeName, nodePort, tableName); + tableFetched = (*FetchTableFunction)(nodeName, nodePort, tableName); nodeIndex++; } @@ -1010,7 +1013,7 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS) * the transaction for this function commits, this lock will automatically * be released. This ensures appends to a shard happen in a serial manner. */ - shardId = ExtractShardId(shardNameString); + shardId = ExtractShardId(shardNameString); LockShardResource(shardId, AccessExclusiveLock); localFilePath = makeStringInfo(); @@ -1049,7 +1052,7 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS) static bool check_log_statement(List *statementList) { - ListCell *statementCell; + ListCell *statementCell; if (log_statement == LOGSTMT_NONE) { diff --git a/src/backend/distributed/worker/worker_file_access_protocol.c b/src/backend/distributed/worker/worker_file_access_protocol.c index 04deb2881..6c9eb18bd 100644 --- a/src/backend/distributed/worker/worker_file_access_protocol.c +++ b/src/backend/distributed/worker/worker_file_access_protocol.c @@ -40,22 +40,22 @@ worker_foreign_file_path(PG_FUNCTION_ARGS) ForeignTable *foreignTable = GetForeignTable(relationId); ListCell *optionCell = NULL; - foreach(optionCell, foreignTable->options) - { - DefElem *option = (DefElem *) lfirst(optionCell); + foreach(optionCell, foreignTable->options) + { + DefElem *option = (DefElem *) lfirst(optionCell); char *optionName = option->defname; - int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH); - if (compareResult == 0) - { - char *optionValue = defGetString(option); - foreignFilePath = cstring_to_text(optionValue); - break; - } - } + int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH); + if (compareResult == 0) + { + char *optionValue = defGetString(option); + foreignFilePath = cstring_to_text(optionValue); + break; + } + } /* check that we found the filename option */ - if (foreignFilePath == NULL) + if (foreignFilePath == NULL) { char *relationName = get_rel_name(relationId); ereport(ERROR, (errmsg("could not find filename for foreign table: \"%s\"", diff --git a/src/backend/distributed/worker/worker_merge_protocol.c b/src/backend/distributed/worker/worker_merge_protocol.c index ee829e342..92afb1fb1 100644 --- a/src/backend/distributed/worker/worker_merge_protocol.c +++ b/src/backend/distributed/worker/worker_merge_protocol.c @@ -133,7 +133,7 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS) const char *createMergeTableQuery = text_to_cstring(createMergeTableQueryText); const char *createIntermediateTableQuery = - text_to_cstring(createIntermediateTableQueryText); + text_to_cstring(createIntermediateTableQueryText); StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId); StringInfo jobSchemaName = JobSchemaName(jobId); @@ -170,14 +170,14 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS) if (setSearchPathResult < 0) { ereport(ERROR, (errmsg("execution was not successful \"%s\"", - setSearchPathString->data))); + setSearchPathString->data))); } createMergeTableResult = SPI_exec(createMergeTableQuery, 0); if (createMergeTableResult < 0) { ereport(ERROR, (errmsg("execution was not successful \"%s\"", - createMergeTableQuery))); + createMergeTableQuery))); } appendStringInfo(mergeTableName, "%s%s", intermediateTableName->data, @@ -188,7 +188,7 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS) if (createIntermediateTableResult < 0) { ereport(ERROR, (errmsg("execution was not successful \"%s\"", - createIntermediateTableQuery))); + createIntermediateTableQuery))); } finished = SPI_finish(); @@ -256,8 +256,8 @@ JobSchemaName(uint64 jobId) */ #ifdef HAVE_INTTYPES_H StringInfo jobSchemaName = makeStringInfo(); - appendStringInfo(jobSchemaName, "%s%0*"PRIu64, - JOB_SCHEMA_PREFIX, MIN_JOB_DIRNAME_WIDTH, jobId); + appendStringInfo(jobSchemaName, "%s%0*" PRIu64, JOB_SCHEMA_PREFIX, + MIN_JOB_DIRNAME_WIDTH, jobId); #else StringInfo jobSchemaName = makeStringInfo(); appendStringInfo(jobSchemaName, "%s%0*llu", diff --git a/src/backend/distributed/worker/worker_partition_protocol.c b/src/backend/distributed/worker/worker_partition_protocol.c index c6578295b..9ab21e588 100644 --- a/src/backend/distributed/worker/worker_partition_protocol.c +++ b/src/backend/distributed/worker/worker_partition_protocol.c @@ -59,7 +59,7 @@ static void FileOutputStreamWrite(FileOutputStream file, StringInfo dataToWrite) static void FileOutputStreamFlush(FileOutputStream file); static void FilterAndPartitionTable(const char *filterQuery, const char *columnName, Oid columnType, - uint32 (*PartitionIdFunction) (Datum, const void *), + uint32 (*PartitionIdFunction)(Datum, const void *), const void *partitionIdContext, FileOutputStream *partitionFileArray, uint32 fileCount); @@ -105,7 +105,7 @@ worker_range_partition_table(PG_FUNCTION_ARGS) uint32 taskId = PG_GETARG_UINT32(1); text *filterQueryText = PG_GETARG_TEXT_P(2); text *partitionColumnText = PG_GETARG_TEXT_P(3); - Oid partitionColumnType = PG_GETARG_OID(4); + Oid partitionColumnType = PG_GETARG_OID(4); ArrayType *splitPointObject = PG_GETARG_ARRAYTYPE_P(5); const char *filterQuery = text_to_cstring(filterQueryText); @@ -125,7 +125,7 @@ worker_range_partition_table(PG_FUNCTION_ARGS) if (splitPointType != partitionColumnType) { ereport(ERROR, (errmsg("partition column type %u and split point type %u " - "do not match", partitionColumnType, splitPointType))); + "do not match", partitionColumnType, splitPointType))); } /* use column's type information to get the comparison function */ @@ -181,7 +181,7 @@ worker_hash_partition_table(PG_FUNCTION_ARGS) uint32 taskId = PG_GETARG_UINT32(1); text *filterQueryText = PG_GETARG_TEXT_P(2); text *partitionColumnText = PG_GETARG_TEXT_P(3); - Oid partitionColumnType = PG_GETARG_OID(4); + Oid partitionColumnType = PG_GETARG_OID(4); uint32 partitionCount = PG_GETARG_UINT32(5); const char *filterQuery = text_to_cstring(filterQueryText); @@ -463,7 +463,7 @@ JobDirectoryName(uint64 jobId) */ #ifdef HAVE_INTTYPES_H StringInfo jobDirectoryName = makeStringInfo(); - appendStringInfo(jobDirectoryName, "base/%s/%s%0*"PRIu64, + appendStringInfo(jobDirectoryName, "base/%s/%s%0*" PRIu64, PG_JOB_CACHE_DIR, JOB_DIRECTORY_PREFIX, MIN_JOB_DIRNAME_WIDTH, jobId); #else @@ -726,7 +726,7 @@ FileOutputStreamFlush(FileOutputStream file) static void FilterAndPartitionTable(const char *filterQuery, const char *partitionColumnName, Oid partitionColumnType, - uint32 (*PartitionIdFunction) (Datum, const void *), + uint32 (*PartitionIdFunction)(Datum, const void *), const void *partitionIdContext, FileOutputStream *partitionFileArray, uint32 fileCount) @@ -794,7 +794,7 @@ FilterAndPartitionTable(const char *filterQuery, FileOutputStream partitionFile = { 0, 0, 0 }; StringInfo rowText = NULL; Datum partitionKey = 0; - bool partitionKeyNull = false; + bool partitionKeyNull = false; uint32 partitionId = 0; partitionKey = SPI_getbinval(row, rowDescriptor, @@ -808,7 +808,7 @@ FilterAndPartitionTable(const char *filterQuery, */ if (!partitionKeyNull) { - partitionId = (*PartitionIdFunction) (partitionKey, partitionIdContext); + partitionId = (*PartitionIdFunction)(partitionKey, partitionIdContext); } else { @@ -926,7 +926,7 @@ InitRowOutputState(void) /* initialize defaults for printing null values */ char *nullPrint = pstrdup("\\N"); - int nullPrintLen = strlen(nullPrint); + int nullPrintLen = strlen(nullPrint); char *nullPrintClient = pg_server_to_any(nullPrint, nullPrintLen, fileEncoding); /* set default text output characters */ @@ -946,7 +946,7 @@ InitRowOutputState(void) } /* set up transcoding information and default text output characters */ - if ( (fileEncoding != databaseEncoding) || (databaseEncodingMaxLength > 1) ) + if ((fileEncoding != databaseEncoding) || (databaseEncodingMaxLength > 1)) { rowOutputState->need_transcoding = true; } @@ -1057,7 +1057,7 @@ OutputRow(HeapTuple row, TupleDesc rowDescriptor, CopySendString(rowOutputState, rowOutputState->null_print_client); } - lastColumn = ((columnIndex+1) == columnCount); + lastColumn = ((columnIndex + 1) == columnCount); if (!lastColumn) { CopySendChar(rowOutputState, rowOutputState->delim[0]); @@ -1094,9 +1094,9 @@ OutputBinaryHeaders(FileOutputStream *partitionFileArray, uint32 fileCount) { /* Generate header for a binary copy */ const int32 zero = 0; - FileOutputStream partitionFile = {0, 0, 0}; + FileOutputStream partitionFile = { 0, 0, 0 }; PartialCopyStateData headerOutputStateData; - PartialCopyState headerOutputState = (PartialCopyState) &headerOutputStateData; + PartialCopyState headerOutputState = (PartialCopyState) & headerOutputStateData; memset(headerOutputState, 0, sizeof(PartialCopyStateData)); headerOutputState->fe_msgbuf = makeStringInfo(); @@ -1128,9 +1128,9 @@ OutputBinaryFooters(FileOutputStream *partitionFileArray, uint32 fileCount) { /* Generate footer for a binary copy */ int16 negative = -1; - FileOutputStream partitionFile = {0, 0, 0}; + FileOutputStream partitionFile = { 0, 0, 0 }; PartialCopyStateData footerOutputStateData; - PartialCopyState footerOutputState = (PartialCopyState) &footerOutputStateData; + PartialCopyState footerOutputState = (PartialCopyState) & footerOutputStateData; memset(footerOutputState, 0, sizeof(PartialCopyStateData)); footerOutputState->fe_msgbuf = makeStringInfo(); @@ -1359,7 +1359,7 @@ RangePartitionId(Datum partitionValue, const void *context) currentLength = currentLength - halfLength - 1; } } - + return firstIndex; } diff --git a/src/include/distributed/citus_ruleutils.h b/src/include/distributed/citus_ruleutils.h index 0f2402748..92e1777e7 100644 --- a/src/include/distributed/citus_ruleutils.h +++ b/src/include/distributed/citus_ruleutils.h @@ -2,7 +2,7 @@ * * citus_ruleutils.h * CitusDB ruleutils wrapper functions and exported PostgreSQL ruleutils - * functions. + * functions. * * Copyright (c) 2012-2015, Citus Data, Inc. *------------------------------------------------------------------------- @@ -16,16 +16,17 @@ /* Function declarations for version independent CitusDB ruleutils wrapper functions */ -extern char *pg_get_extensiondef_string(Oid tableRelationId); -extern char *pg_get_serverdef_string(Oid tableRelationId); -extern char *pg_get_tableschemadef_string(Oid tableRelationId); -extern char *pg_get_tablecolumnoptionsdef_string(Oid tableRelationId); -extern char *pg_get_indexclusterdef_string(Oid indexRelationId); +extern char * pg_get_extensiondef_string(Oid tableRelationId); +extern char * pg_get_serverdef_string(Oid tableRelationId); +extern char * pg_get_tableschemadef_string(Oid tableRelationId); +extern char * pg_get_tablecolumnoptionsdef_string(Oid tableRelationId); +extern char * pg_get_indexclusterdef_string(Oid indexRelationId); /* Function declarations for version dependent PostgreSQL ruleutils functions */ -extern void pg_get_query_def(Query *query, StringInfo buffer); -extern void deparse_shard_query(Query *query, Oid distrelid, int64 shardid, StringInfo buffer); -extern char *generate_relation_name(Oid relid, List *namespaces); +extern void pg_get_query_def(Query *query, StringInfo buffer); +extern void deparse_shard_query(Query *query, Oid distrelid, int64 shardid, StringInfo + buffer); +extern char * generate_relation_name(Oid relid, List *namespaces); #endif /* CITUS_RULEUTILS_H */ diff --git a/src/include/distributed/master_metadata_utility.h b/src/include/distributed/master_metadata_utility.h index b3cfb6548..4fb377571 100644 --- a/src/include/distributed/master_metadata_utility.h +++ b/src/include/distributed/master_metadata_utility.h @@ -30,15 +30,14 @@ typedef struct ShardInterval CitusNodeTag type; Oid relationId; char storageType; - Oid valueTypeId; /* min/max value datum's typeId */ - int valueTypeLen; /* min/max value datum's typelen */ - bool valueByVal; /* min/max value datum's byval */ + Oid valueTypeId; /* min/max value datum's typeId */ + int valueTypeLen; /* min/max value datum's typelen */ + bool valueByVal; /* min/max value datum's byval */ bool minValueExists; bool maxValueExists; - Datum minValue; /* a shard's typed min value datum */ - Datum maxValue; /* a shard's typed max value datum */ + Datum minValue; /* a shard's typed min value datum */ + Datum maxValue; /* a shard's typed max value datum */ uint64 shardId; - } ShardInterval; @@ -46,13 +45,12 @@ typedef struct ShardInterval typedef struct ShardPlacement { CitusNodeTag type; - Oid tupleOid; /* unique oid that implies this row's insertion order */ + Oid tupleOid; /* unique oid that implies this row's insertion order */ uint64 shardId; uint64 shardLength; RelayFileState shardState; char *nodeName; uint32 nodePort; - } ShardPlacement; diff --git a/src/include/distributed/master_protocol.h b/src/include/distributed/master_protocol.h index f39ce865b..94344de0c 100644 --- a/src/include/distributed/master_protocol.h +++ b/src/include/distributed/master_protocol.h @@ -49,10 +49,10 @@ #define SHARDID_SEQUENCE_NAME "pg_dist_shardid_seq" /* Remote call definitions to help with data staging and deletion */ -#define WORKER_APPLY_SHARD_DDL_COMMAND "SELECT worker_apply_shard_ddl_command \ - ("UINT64_FORMAT", %s)" -#define WORKER_APPEND_TABLE_TO_SHARD "SELECT worker_append_table_to_shard \ - (%s, %s, %s, %u)" +#define WORKER_APPLY_SHARD_DDL_COMMAND \ + "SELECT worker_apply_shard_ddl_command (" UINT64_FORMAT ", %s)" +#define WORKER_APPEND_TABLE_TO_SHARD \ + "SELECT worker_append_table_to_shard (%s, %s, %s, %u)" #define SHARD_MIN_VALUE_QUERY "SELECT min(%s) FROM %s" #define SHARD_MAX_VALUE_QUERY "SELECT max(%s) FROM %s" #define SHARD_TABLE_SIZE_QUERY "SELECT pg_table_size('%s')" @@ -67,7 +67,6 @@ typedef enum SHARD_PLACEMENT_INVALID_FIRST = 0, SHARD_PLACEMENT_LOCAL_NODE_FIRST = 1, SHARD_PLACEMENT_ROUND_ROBIN = 2 - } ShardPlacementPolicyType; @@ -83,8 +82,8 @@ extern Oid ResolveRelationId(text *relationName); extern List * GetTableDDLEvents(Oid relationId); extern void CheckDistributedTable(Oid relationId); extern void CreateShardPlacements(int64 shardId, List *ddlEventList, - List *workerNodeList, int workerStartIndex, - int replicationFactor); + List *workerNodeList, int workerStartIndex, + int replicationFactor); /* Function declarations for generating metadata for shard creation */ extern Datum master_get_table_metadata(PG_FUNCTION_ARGS); diff --git a/src/include/distributed/modify_planner.h b/src/include/distributed/modify_planner.h index 3e52cfa5a..bd6df4755 100644 --- a/src/include/distributed/modify_planner.h +++ b/src/include/distributed/modify_planner.h @@ -24,6 +24,7 @@ #define INVALID_TASK_ID 0 #if (PG_VERSION_NUM >= 90500) + /* reserved alias name for UPSERTs */ #define UPSERT_ALIAS "citus_table_alias" #endif diff --git a/src/include/distributed/multi_client_executor.h b/src/include/distributed/multi_client_executor.h index 470b53673..0f1203031 100644 --- a/src/include/distributed/multi_client_executor.h +++ b/src/include/distributed/multi_client_executor.h @@ -15,21 +15,20 @@ #define MULTI_CLIENT_EXECUTOR_H -#define INVALID_CONNECTION_ID -1 /* identifies an invalid connection */ -#define CLIENT_CONNECT_TIMEOUT 5 /* connection timeout in seconds */ +#define INVALID_CONNECTION_ID -1 /* identifies an invalid connection */ +#define CLIENT_CONNECT_TIMEOUT 5 /* connection timeout in seconds */ #define MAX_CONNECTION_COUNT 2048 /* simultaneous client connection count */ -#define STRING_BUFFER_SIZE 1024 /* buffer size for character arrays */ +#define STRING_BUFFER_SIZE 1024 /* buffer size for character arrays */ #define CONN_INFO_TEMPLATE "host=%s port=%u dbname=%s connect_timeout=%u" /* Enumeration to track one client connection's status */ typedef enum { - CLIENT_INVALID_CONNECT = 0, - CLIENT_CONNECTION_BAD = 1, - CLIENT_CONNECTION_BUSY = 2, + CLIENT_INVALID_CONNECT = 0, + CLIENT_CONNECTION_BAD = 1, + CLIENT_CONNECTION_BUSY = 2, CLIENT_CONNECTION_READY = 3 - } ConnectStatus; @@ -38,9 +37,8 @@ typedef enum { CLIENT_INVALID_RESULT_STATUS = 0, CLIENT_RESULT_UNAVAILABLE = 1, - CLIENT_RESULT_BUSY = 2, + CLIENT_RESULT_BUSY = 2, CLIENT_RESULT_READY = 3 - } ResultStatus; @@ -48,10 +46,9 @@ typedef enum typedef enum { CLIENT_INVALID_QUERY = 0, - CLIENT_QUERY_FAILED = 1, + CLIENT_QUERY_FAILED = 1, CLIENT_QUERY_DONE = 2, CLIENT_QUERY_COPY = 3 - } QueryStatus; @@ -59,21 +56,19 @@ typedef enum typedef enum { CLIENT_INVALID_COPY = 0, - CLIENT_COPY_MORE = 1, - CLIENT_COPY_FAILED = 2, - CLIENT_COPY_DONE = 3 - + CLIENT_COPY_MORE = 1, + CLIENT_COPY_FAILED = 2, + CLIENT_COPY_DONE = 3 } CopyStatus; /* Enumeration to track the status of a query in a batch on the client */ typedef enum { - CLIENT_INVALID_BATCH_QUERY = 0, - CLIENT_BATCH_QUERY_FAILED = 1, + CLIENT_INVALID_BATCH_QUERY = 0, + CLIENT_BATCH_QUERY_FAILED = 1, CLIENT_BATCH_QUERY_CONTINUE = 2, - CLIENT_BATCH_QUERY_DONE = 3 - + CLIENT_BATCH_QUERY_DONE = 3 } BatchQueryStatus; diff --git a/src/include/distributed/multi_executor.h b/src/include/distributed/multi_executor.h index bcf22fe2e..b777858d8 100644 --- a/src/include/distributed/multi_executor.h +++ b/src/include/distributed/multi_executor.h @@ -14,12 +14,12 @@ #include "nodes/parsenodes.h" /* signal currently executed statement is a master select statement or router execution */ -#define EXEC_FLAG_CITUS_MASTER_SELECT 0x100 -#define EXEC_FLAG_CITUS_ROUTER_EXECUTOR 0x200 +#define EXEC_FLAG_CITUS_MASTER_SELECT 0x100 +#define EXEC_FLAG_CITUS_ROUTER_EXECUTOR 0x200 extern void multi_ExecutorStart(QueryDesc *queryDesc, int eflags); extern void multi_ExecutorRun(QueryDesc *queryDesc, - ScanDirection direction, long count); + ScanDirection direction, long count); extern void multi_ExecutorFinish(QueryDesc *queryDesc); extern void multi_ExecutorEnd(QueryDesc *queryDesc); diff --git a/src/include/distributed/multi_join_order.h b/src/include/distributed/multi_join_order.h index cccde6917..56ede010b 100644 --- a/src/include/distributed/multi_join_order.h +++ b/src/include/distributed/multi_join_order.h @@ -29,7 +29,7 @@ typedef enum JoinRuleType { JOIN_RULE_INVALID_FIRST = 0, BROADCAST_JOIN = 1, - LOCAL_PARTITION_JOIN = 2, + LOCAL_PARTITION_JOIN = 2, SINGLE_PARTITION_JOIN = 3, DUAL_PARTITION_JOIN = 4, CARTESIAN_PRODUCT = 5, @@ -40,7 +40,6 @@ typedef enum JoinRuleType * RuleNameArray. */ JOIN_RULE_LAST - } JoinRuleType; @@ -53,7 +52,6 @@ typedef struct TableEntry { Oid relationId; uint32 rangeTableId; - } TableEntry; @@ -65,14 +63,13 @@ typedef struct TableEntry */ typedef struct JoinOrderNode { - TableEntry *tableEntry; /* this node's relation and range table id */ - JoinRuleType joinRuleType; /* not relevant for the first table */ - JoinType joinType; /* not relevant for the first table */ - Var *partitionColumn; /* not relevant for the first table */ + TableEntry *tableEntry; /* this node's relation and range table id */ + JoinRuleType joinRuleType; /* not relevant for the first table */ + JoinType joinType; /* not relevant for the first table */ + Var *partitionColumn; /* not relevant for the first table */ char partitionMethod; - List *joinClauseList; /* not relevant for the first table */ + List *joinClauseList; /* not relevant for the first table */ List *shardIntervalList; - } JoinOrderNode; diff --git a/src/include/distributed/multi_logical_optimizer.h b/src/include/distributed/multi_logical_optimizer.h index f8c145751..4efc1bfed 100644 --- a/src/include/distributed/multi_logical_optimizer.h +++ b/src/include/distributed/multi_logical_optimizer.h @@ -44,7 +44,7 @@ * * Please note that the order of values in this enumeration is tied to the order * of elements in the following AggregateNames array. This order needs to be - * preserved. + * preserved. */ typedef enum { @@ -55,7 +55,6 @@ typedef enum AGGREGATE_SUM = 4, AGGREGATE_COUNT = 5, AGGREGATE_ARRAY_AGG = 6 - } AggregateType; @@ -69,7 +68,6 @@ typedef enum PUSH_DOWN_VALID = 1, PUSH_DOWN_NOT_VALID = 2, PUSH_DOWN_SPECIAL_CONDITIONS = 3 - } PushDownStatus; @@ -82,7 +80,6 @@ typedef enum PULL_UP_INVALID_FIRST = 0, PULL_UP_VALID = 1, PULL_UP_NOT_VALID = 2 - } PullUpStatus; @@ -97,8 +94,10 @@ typedef enum * Please note that the order of elements in this array is tied to the order of * values in the preceding AggregateType enum. This order needs to be preserved. */ -static const char * const AggregateNames[] = { "invalid", "avg", "min", "max", - "sum", "count", "array_agg" }; +static const char *const AggregateNames[] = { + "invalid", "avg", "min", "max", "sum", + "count", "array_agg" +}; /* Config variable managed via guc.c */ diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index 15b0cba07..40ee39c70 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -40,8 +40,8 @@ typedef struct MultiNode CitusNodeTag type; struct MultiNode *parentNode; - /* child node(s) are defined in unary and binary nodes */ + /* child node(s) are defined in unary and binary nodes */ } MultiNode; @@ -51,7 +51,6 @@ typedef struct MultiUnaryNode MultiNode node; struct MultiNode *childNode; - } MultiUnaryNode; @@ -62,7 +61,6 @@ typedef struct MultiBinaryNode struct MultiNode *leftChildNode; struct MultiNode *rightChildNode; - } MultiBinaryNode; @@ -73,7 +71,6 @@ typedef struct MultiBinaryNode typedef struct MultiTreeRoot { MultiUnaryNode unaryNode; - } MultiTreeRoot; @@ -91,7 +88,6 @@ typedef struct MultiTable Alias *alias; Alias *referenceNames; Query *subquery; /* this field is only valid for non-relation subquery types */ - } MultiTable; @@ -100,7 +96,6 @@ typedef struct MultiProject { MultiUnaryNode unaryNode; List *columnList; - } MultiProject; @@ -112,7 +107,6 @@ typedef struct MultiProject typedef struct MultiCollect { MultiUnaryNode unaryNode; - } MultiCollect; @@ -125,7 +119,6 @@ typedef struct MultiSelect { MultiUnaryNode unaryNode; List *selectClauseList; - } MultiSelect; @@ -140,7 +133,6 @@ typedef struct MultiJoin List *joinClauseList; JoinRuleType joinRuleType; JoinType joinType; - } MultiJoin; @@ -150,7 +142,6 @@ typedef struct MultiPartition MultiUnaryNode unaryNode; Var *partitionColumn; uint32 splitPointTableId; - } MultiPartition; @@ -158,7 +149,6 @@ typedef struct MultiPartition typedef struct MultiCartesianProduct { MultiBinaryNode binaryNode; - } MultiCartesianProduct; @@ -183,7 +173,6 @@ typedef struct MultiExtendedOp List *sortClauseList; Node *limitCount; Node *limitOffset; - } MultiExtendedOp; diff --git a/src/include/distributed/multi_physical_planner.h b/src/include/distributed/multi_physical_planner.h index 09d28e952..3ad053b22 100644 --- a/src/include/distributed/multi_physical_planner.h +++ b/src/include/distributed/multi_physical_planner.h @@ -2,7 +2,7 @@ * * multi_physical_planner.h * Type and function declarations used in creating the distributed execution - * plan. + * plan. * * Copyright (c) 2012, Citus Data, Inc. * @@ -40,17 +40,18 @@ (" UINT64_FORMAT ", %d, %s, '%s', %d, %d)" #define MERGE_FILES_INTO_TABLE_COMMAND "SELECT worker_merge_files_into_table \ (" UINT64_FORMAT ", %d, '%s', '%s')" -#define MERGE_FILES_AND_RUN_QUERY_COMMAND "SELECT worker_merge_files_and_run_query(" UINT64_FORMAT ", %d, '%s', '%s')" +#define MERGE_FILES_AND_RUN_QUERY_COMMAND \ + "SELECT worker_merge_files_and_run_query(" UINT64_FORMAT ", %d, '%s', '%s')" typedef enum CitusRTEKind { - CITUS_RTE_RELATION = RTE_RELATION, /* ordinary relation reference */ - CITUS_RTE_SUBQUERY = RTE_SUBQUERY, /* subquery in FROM */ - CITUS_RTE_JOIN = RTE_JOIN, /* join */ - CITUS_RTE_FUNCTION = RTE_FUNCTION, /* function in FROM */ - CITUS_RTE_VALUES = RTE_VALUES, /* VALUES (), (), ... */ - CITUS_RTE_CTE = RTE_CTE, /* common table expr (WITH list element) */ + CITUS_RTE_RELATION = RTE_RELATION, /* ordinary relation reference */ + CITUS_RTE_SUBQUERY = RTE_SUBQUERY, /* subquery in FROM */ + CITUS_RTE_JOIN = RTE_JOIN, /* join */ + CITUS_RTE_FUNCTION = RTE_FUNCTION, /* function in FROM */ + CITUS_RTE_VALUES = RTE_VALUES, /* VALUES (), (), ... */ + CITUS_RTE_CTE = RTE_CTE, /* common table expr (WITH list element) */ CITUS_RTE_SHARD, CITUS_RTE_REMOTE_QUERY } CitusRTEKind; @@ -61,8 +62,7 @@ typedef enum { PARTITION_INVALID_FIRST = 0, RANGE_PARTITION_TYPE = 1, - HASH_PARTITION_TYPE = 2 - + HASH_PARTITION_TYPE = 2 } PartitionType; @@ -77,7 +77,6 @@ typedef enum MAP_OUTPUT_FETCH_TASK = 5, MERGE_FETCH_TASK = 6, MODIFY_TASK = 7 - } TaskType; @@ -88,7 +87,6 @@ typedef enum TASK_ASSIGNMENT_GREEDY = 1, TASK_ASSIGNMENT_ROUND_ROBIN = 2, TASK_ASSIGNMENT_FIRST_REPLICA = 3 - } TaskAssignmentPolicyType; @@ -99,7 +97,6 @@ typedef enum JOIN_MAP_MERGE_JOB = 1, SUBQUERY_MAP_MERGE_JOB = 2, TOP_LEVEL_WORKER_JOB = 3 - } BoundaryNodeJobType; @@ -133,7 +130,6 @@ typedef struct MapMergeJob ShardInterval **sortedShardIntervalArray; /* only applies to range partitioning */ List *mapTaskList; List *mergeTaskList; - } MapMergeJob; @@ -153,18 +149,17 @@ typedef struct Task uint64 jobId; uint32 taskId; char *queryString; - uint64 anchorShardId; /* only applies to compute tasks */ - List *taskPlacementList; /* only applies to compute tasks */ - List *dependedTaskList; /* only applies to compute tasks */ + uint64 anchorShardId; /* only applies to compute tasks */ + List *taskPlacementList; /* only applies to compute tasks */ + List *dependedTaskList; /* only applies to compute tasks */ uint32 partitionId; - uint32 upstreamTaskId; /* only applies to data fetch tasks */ + uint32 upstreamTaskId; /* only applies to data fetch tasks */ ShardInterval *shardInterval; /* only applies to merge tasks */ bool assignmentConstrained; /* only applies to merge tasks */ - uint64 shardId; /* only applies to shard fetch tasks */ + uint64 shardId; /* only applies to shard fetch tasks */ TaskExecution *taskExecution; /* used by task tracker executor */ - bool upsertQuery; /* only applies to modify tasks */ - + bool upsertQuery; /* only applies to modify tasks */ } Task; @@ -177,7 +172,6 @@ typedef struct RangeTableFragment CitusRTEKind fragmentType; void *fragmentReference; uint32 rangeTableId; - } RangeTableFragment; @@ -190,7 +184,6 @@ typedef struct JoinSequenceNode { uint32 rangeTableId; int32 joiningRangeTableId; - } JoinSequenceNode; @@ -203,7 +196,6 @@ typedef struct MultiPlan Job *workerJob; Query *masterQuery; char *masterTableName; - } MultiPlan; diff --git a/src/include/distributed/multi_planner.h b/src/include/distributed/multi_planner.h index a14b2b65e..c3e2511e8 100644 --- a/src/include/distributed/multi_planner.h +++ b/src/include/distributed/multi_planner.h @@ -13,8 +13,8 @@ #include "nodes/plannodes.h" #include "nodes/relation.h" -extern PlannedStmt *multi_planner(Query *parse, int cursorOptions, - ParamListInfo boundParams); +extern PlannedStmt * multi_planner(Query *parse, int cursorOptions, + ParamListInfo boundParams); extern bool HasCitusToplevelNode(PlannedStmt *planStatement); struct MultiPlan; diff --git a/src/include/distributed/multi_server_executor.h b/src/include/distributed/multi_server_executor.h index 83105cc54..e6e17f566 100644 --- a/src/include/distributed/multi_server_executor.h +++ b/src/include/distributed/multi_server_executor.h @@ -20,9 +20,9 @@ #define MAX_TASK_EXECUTION_FAILURES 3 /* allowed failure count for one task */ -#define MAX_TRACKER_FAILURE_COUNT 3 /* allowed failure count for one tracker */ +#define MAX_TRACKER_FAILURE_COUNT 3 /* allowed failure count for one tracker */ #define REMOTE_NODE_CONNECT_TIMEOUT 4000 /* async connect timeout in ms */ -#define RESERVED_FD_COUNT 64 /* file descriptors unavailable to executor */ +#define RESERVED_FD_COUNT 64 /* file descriptors unavailable to executor */ /* copy out query results */ #define COPY_QUERY_TO_STDOUT_TEXT "COPY (%s) TO STDOUT" @@ -32,9 +32,9 @@ /* Task tracker executor related defines */ #define TASK_ASSIGNMENT_QUERY "SELECT task_tracker_assign_task \ - ("UINT64_FORMAT", %u, %s)" -#define TASK_STATUS_QUERY "SELECT task_tracker_task_status("UINT64_FORMAT", %u)" -#define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT")" + ("UINT64_FORMAT ", %u, %s)" +#define TASK_STATUS_QUERY "SELECT task_tracker_task_status("UINT64_FORMAT ", %u)" +#define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT ")" #define JOB_CLEANUP_TASK_ID INT_MAX @@ -43,9 +43,9 @@ typedef enum { EXEC_TASK_INVALID_FIRST = 0, EXEC_TASK_CONNECT_START = 1, - EXEC_TASK_CONNECT_POLL = 2, + EXEC_TASK_CONNECT_POLL = 2, EXEC_TASK_FAILED = 3, - EXEC_FETCH_TASK_LOOP = 4, + EXEC_FETCH_TASK_LOOP = 4, EXEC_FETCH_TASK_START = 5, EXEC_FETCH_TASK_RUNNING = 6, EXEC_COMPUTE_TASK_START = 7, @@ -60,7 +60,6 @@ typedef enum EXEC_TASK_TRACKER_FAILED = 14, EXEC_SOURCE_TASK_TRACKER_RETRY = 15, EXEC_SOURCE_TASK_TRACKER_FAILED = 16 - } TaskExecStatus; @@ -74,7 +73,6 @@ typedef enum EXEC_TRANSMIT_TRACKER_RETRY = 4, EXEC_TRANSMIT_TRACKER_FAILED = 5, EXEC_TRANSMIT_DONE = 6 - } TransmitExecStatus; @@ -86,7 +84,6 @@ typedef enum TRACKER_CONNECT_POLL = 2, TRACKER_CONNECTED = 3, TRACKER_CONNECTION_FAILED = 4 - } TrackerStatus; @@ -97,7 +94,6 @@ typedef enum MULTI_EXECUTOR_REAL_TIME = 1, MULTI_EXECUTOR_TASK_TRACKER = 2, MULTI_EXECUTOR_ROUTER = 3 - } MultiExecutorType; @@ -107,7 +103,6 @@ typedef enum CONNECT_ACTION_NONE = 0, CONNECT_ACTION_OPENED = 1, CONNECT_ACTION_CLOSED = 2 - } ConnectAction; @@ -132,7 +127,6 @@ struct TaskExecution uint32 querySourceNodeIndex; /* only applies to map fetch tasks */ int32 dataFetchTaskIndex; uint32 failureCount; - }; @@ -147,7 +141,6 @@ typedef struct TrackerTaskState uint32 taskId; TaskStatus status; StringInfo taskAssignmentQuery; - } TrackerTaskState; @@ -158,7 +151,7 @@ typedef struct TrackerTaskState */ typedef struct TaskTracker { - uint32 workerPort; /* node's port; part of hash table key */ + uint32 workerPort; /* node's port; part of hash table key */ char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */ TrackerStatus trackerStatus; int32 connectionId; @@ -171,7 +164,6 @@ typedef struct TaskTracker int32 currentTaskIndex; bool connectionBusy; TrackerTaskState *connectionBusyOnTask; - } TaskTracker; @@ -184,7 +176,6 @@ typedef struct WorkerNodeState uint32 workerPort; char workerName[WORKER_LENGTH]; uint32 openConnectionCount; - } WorkerNodeState; diff --git a/src/include/distributed/pg_dist_partition.h b/src/include/distributed/pg_dist_partition.h index d3db82638..d277bc8ce 100644 --- a/src/include/distributed/pg_dist_partition.h +++ b/src/include/distributed/pg_dist_partition.h @@ -21,9 +21,9 @@ */ typedef struct FormData_pg_dist_partition { - Oid logicalrelid; /* logical relation id; references pg_class oid */ - char partmethod; /* partition method; see codes below */ - text partkey; /* partition key expression */ + Oid logicalrelid; /* logical relation id; references pg_class oid */ + char partmethod; /* partition method; see codes below */ + text partkey; /* partition key expression */ } FormData_pg_dist_partition; /* ---------------- @@ -37,16 +37,16 @@ typedef FormData_pg_dist_partition *Form_pg_dist_partition; * compiler constants for pg_dist_partitions * ---------------- */ -#define Natts_pg_dist_partition 3 -#define Anum_pg_dist_partition_logicalrelid 1 -#define Anum_pg_dist_partition_partmethod 2 -#define Anum_pg_dist_partition_partkey 3 +#define Natts_pg_dist_partition 3 +#define Anum_pg_dist_partition_logicalrelid 1 +#define Anum_pg_dist_partition_partmethod 2 +#define Anum_pg_dist_partition_partkey 3 /* valid values for partmethod include append, hash, and range */ -#define DISTRIBUTE_BY_APPEND 'a' -#define DISTRIBUTE_BY_HASH 'h' -#define DISTRIBUTE_BY_RANGE 'r' -#define REDISTRIBUTE_BY_HASH 'x' +#define DISTRIBUTE_BY_APPEND 'a' +#define DISTRIBUTE_BY_HASH 'h' +#define DISTRIBUTE_BY_RANGE 'r' +#define REDISTRIBUTE_BY_HASH 'x' #endif /* PG_DIST_PARTITION_H */ diff --git a/src/include/distributed/pg_dist_shard.h b/src/include/distributed/pg_dist_shard.h index dfe1c86c7..b093bb59d 100644 --- a/src/include/distributed/pg_dist_shard.h +++ b/src/include/distributed/pg_dist_shard.h @@ -22,13 +22,13 @@ */ typedef struct FormData_pg_dist_shard { - Oid logicalrelid; /* logical relation id; references pg_class oid */ - int64 shardid; /* global shardId representing remote partition */ - char shardstorage; /* shard storage type; see codes below */ -#ifdef CATALOG_VARLEN /* variable-length fields start here */ - text shardalias; /* user specified table name for shard, if any */ - text shardminvalue; /* partition key's minimum value in shard */ - text shardmaxvalue; /* partition key's maximum value in shard */ + Oid logicalrelid; /* logical relation id; references pg_class oid */ + int64 shardid; /* global shardId representing remote partition */ + char shardstorage; /* shard storage type; see codes below */ +#ifdef CATALOG_VARLEN /* variable-length fields start here */ + text shardalias; /* user specified table name for shard, if any */ + text shardminvalue; /* partition key's minimum value in shard */ + text shardmaxvalue; /* partition key's maximum value in shard */ #endif } FormData_pg_dist_shard; @@ -43,22 +43,22 @@ typedef FormData_pg_dist_shard *Form_pg_dist_shard; * compiler constants for pg_dist_shards * ---------------- */ -#define Natts_pg_dist_shard 6 -#define Anum_pg_dist_shard_logicalrelid 1 -#define Anum_pg_dist_shard_shardid 2 -#define Anum_pg_dist_shard_shardstorage 3 -#define Anum_pg_dist_shard_shardalias 4 -#define Anum_pg_dist_shard_shardminvalue 5 -#define Anum_pg_dist_shard_shardmaxvalue 6 +#define Natts_pg_dist_shard 6 +#define Anum_pg_dist_shard_logicalrelid 1 +#define Anum_pg_dist_shard_shardid 2 +#define Anum_pg_dist_shard_shardstorage 3 +#define Anum_pg_dist_shard_shardalias 4 +#define Anum_pg_dist_shard_shardminvalue 5 +#define Anum_pg_dist_shard_shardmaxvalue 6 /* * Valid values for shard storage types include relay file, foreign table, * (standard) table and columnar table. Relay file types are currently unused. */ -#define SHARD_STORAGE_RELAY 'r' -#define SHARD_STORAGE_FOREIGN 'f' -#define SHARD_STORAGE_TABLE 't' -#define SHARD_STORAGE_COLUMNAR 'c' +#define SHARD_STORAGE_RELAY 'r' +#define SHARD_STORAGE_FOREIGN 'f' +#define SHARD_STORAGE_TABLE 't' +#define SHARD_STORAGE_COLUMNAR 'c' #endif /* PG_DIST_SHARD_H */ diff --git a/src/include/distributed/pg_dist_shard_placement.h b/src/include/distributed/pg_dist_shard_placement.h index 505daffa2..955e4efa6 100644 --- a/src/include/distributed/pg_dist_shard_placement.h +++ b/src/include/distributed/pg_dist_shard_placement.h @@ -23,12 +23,12 @@ */ typedef struct FormData_pg_dist_shard_placement { - int64 shardid; /* global shardId on remote node */ - int32 shardstate; /* shard state on remote node; see RelayFileState */ - int64 shardlength; /* shard length on remote node; stored as bigint */ -#ifdef CATALOG_VARLEN /* variable-length fields start here */ - text nodename; /* remote node's host name */ - int32 nodeport; /* remote node's port number */ + int64 shardid; /* global shardId on remote node */ + int32 shardstate; /* shard state on remote node; see RelayFileState */ + int64 shardlength; /* shard length on remote node; stored as bigint */ +#ifdef CATALOG_VARLEN /* variable-length fields start here */ + text nodename; /* remote node's host name */ + int32 nodeport; /* remote node's port number */ #endif } FormData_pg_dist_shard_placement; @@ -43,12 +43,12 @@ typedef FormData_pg_dist_shard_placement *Form_pg_dist_shard_placement; * compiler constants for pg_dist_shard_placement * ---------------- */ -#define Natts_pg_dist_shard_placement 5 -#define Anum_pg_dist_shard_placement_shardid 1 -#define Anum_pg_dist_shard_placement_shardstate 2 -#define Anum_pg_dist_shard_placement_shardlength 3 -#define Anum_pg_dist_shard_placement_nodename 4 -#define Anum_pg_dist_shard_placement_nodeport 5 +#define Natts_pg_dist_shard_placement 5 +#define Anum_pg_dist_shard_placement_shardid 1 +#define Anum_pg_dist_shard_placement_shardstate 2 +#define Anum_pg_dist_shard_placement_shardlength 3 +#define Anum_pg_dist_shard_placement_nodename 4 +#define Anum_pg_dist_shard_placement_nodeport 5 #endif /* PG_DIST_SHARD_PLACEMENT_H */ diff --git a/src/include/distributed/relay_utility.h b/src/include/distributed/relay_utility.h index 592f61632..bd4657a01 100644 --- a/src/include/distributed/relay_utility.h +++ b/src/include/distributed/relay_utility.h @@ -3,7 +3,7 @@ * relay_utility.h * * Header and type declarations that extend relation, index and constraint names - * with the appropriate shard identifiers. + * with the appropriate shard identifiers. * * Copyright (c) 2012, Citus Data, Inc. * @@ -35,7 +35,6 @@ typedef enum FILE_CACHED = 2, FILE_INACTIVE = 3, FILE_TO_DELETE = 4 - } RelayFileState; diff --git a/src/include/distributed/resource_lock.h b/src/include/distributed/resource_lock.h index 6c1c8ffcf..1406da9c5 100644 --- a/src/include/distributed/resource_lock.h +++ b/src/include/distributed/resource_lock.h @@ -29,6 +29,7 @@ typedef enum AdvisoryLocktagClass /* values defined in postgres' lockfuncs.c */ ADV_LOCKTAG_CLASS_INT64 = 1, ADV_LOCKTAG_CLASS_INT32 = 2, + /* CitusDB lock types */ ADV_LOCKTAG_CLASS_CITUS_SHARD_METADATA = 4, ADV_LOCKTAG_CLASS_CITUS_SHARD = 5, diff --git a/src/include/distributed/task_tracker.h b/src/include/distributed/task_tracker.h index ad41b8589..2fc657d49 100644 --- a/src/include/distributed/task_tracker.h +++ b/src/include/distributed/task_tracker.h @@ -19,10 +19,10 @@ #include "utils/hsearch.h" -#define HIGH_PRIORITY_TASK_TIME 1 /* assignment time for high priority tasks */ -#define RESERVED_JOB_ID 1 /* reserved for cleanup and shutdown tasks */ +#define HIGH_PRIORITY_TASK_TIME 1 /* assignment time for high priority tasks */ +#define RESERVED_JOB_ID 1 /* reserved for cleanup and shutdown tasks */ #define SHUTDOWN_MARKER_TASK_ID UINT_MAX /* used to identify task tracker shutdown */ -#define MAX_TASK_FAILURE_COUNT 2 /* allowed failure count for one task */ +#define MAX_TASK_FAILURE_COUNT 2 /* allowed failure count for one task */ #define LOCAL_HOST_NAME "localhost" /* connect to local backends using this name */ #define TASK_CALL_STRING_SIZE 12288 /* max length of task call string */ #define TEMPLATE0_NAME "template0" /* skip job schema cleanup for template0 */ @@ -37,13 +37,13 @@ typedef enum { TASK_STATUS_INVALID_FIRST = 0, - TASK_ASSIGNED = 1, /* master node and task tracker */ + TASK_ASSIGNED = 1, /* master node and task tracker */ TASK_SCHEDULED = 2, TASK_RUNNING = 3, - TASK_FAILED = 4, + TASK_FAILED = 4, TASK_PERMANENTLY_FAILED = 5, TASK_SUCCEEDED = 6, - TASK_CANCEL_REQUESTED = 7, /* master node only */ + TASK_CANCEL_REQUESTED = 7, /* master node only */ TASK_CANCELED = 8, TASK_TO_REMOVE = 9, @@ -63,7 +63,6 @@ typedef enum * TASK_STATUS_LAST, should never have their numbers changed. */ TASK_STATUS_LAST - } TaskStatus; @@ -76,16 +75,15 @@ typedef enum */ typedef struct WorkerTask { - uint64 jobId; /* job id (upper 32-bits reserved); part of hash table key */ - uint32 taskId; /* task id; part of hash table key */ + uint64 jobId; /* job id (upper 32-bits reserved); part of hash table key */ + uint32 taskId; /* task id; part of hash table key */ uint32 assignedAt; /* task assignment time in epoch seconds */ char taskCallString[TASK_CALL_STRING_SIZE]; /* query or function call string */ - TaskStatus taskStatus; /* task's current execution status */ - char databaseName[NAMEDATALEN]; /* name to use for local backend connection */ - int32 connectionId; /* connection id to local backend */ - uint32 failureCount; /* number of task failures */ - + TaskStatus taskStatus; /* task's current execution status */ + char databaseName[NAMEDATALEN]; /* name to use for local backend connection */ + int32 connectionId; /* connection id to local backend */ + uint32 failureCount; /* number of task failures */ } WorkerTask; @@ -97,6 +95,7 @@ typedef struct WorkerTasksSharedStateData { /* Hash table shared by the task tracker and task tracker protocol functions */ HTAB *taskHash; + /* Lock protecting workerNodesHash */ LWLock *taskHashLock; } WorkerTasksSharedStateData; diff --git a/src/include/distributed/worker_manager.h b/src/include/distributed/worker_manager.h index 57a38194a..f23a659d0 100644 --- a/src/include/distributed/worker_manager.h +++ b/src/include/distributed/worker_manager.h @@ -43,12 +43,11 @@ */ typedef struct WorkerNode { - uint32 workerPort; /* node's port; part of hash table key */ + uint32 workerPort; /* node's port; part of hash table key */ char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */ char workerRack[WORKER_LENGTH]; /* node's network location */ - bool inWorkerFile; /* is node in current membership file? */ - + bool inWorkerFile; /* is node in current membership file? */ } WorkerNode; diff --git a/src/include/distributed/worker_protocol.h b/src/include/distributed/worker_protocol.h index e797b7396..ac2985f6a 100644 --- a/src/include/distributed/worker_protocol.h +++ b/src/include/distributed/worker_protocol.h @@ -64,8 +64,7 @@ typedef struct RangePartitionContext { FmgrInfo *comparisonFunction; Datum *splitPointArray; - int32 splitPointCount; - + int32 splitPointCount; } RangePartitionContext; @@ -77,7 +76,6 @@ typedef struct HashPartitionContext { FmgrInfo *hashFunction; uint32 partitionCount; - } HashPartitionContext; @@ -88,16 +86,16 @@ typedef struct HashPartitionContext */ typedef struct PartialCopyStateData { - StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for - * dest == COPY_NEW_FE in COPY FROM */ - int file_encoding; /* file or remote side's character encoding */ - bool need_transcoding; /* file encoding diff from server? */ - bool binary; /* binary format? */ - char *null_print; /* NULL marker string (server encoding!) */ - char *null_print_client; /* same converted to file encoding */ - char *delim; /* column delimiter (must be 1 byte) */ + StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for + * dest == COPY_NEW_FE in COPY FROM */ + int file_encoding; /* file or remote side's character encoding */ + bool need_transcoding; /* file encoding diff from server? */ + bool binary; /* binary format? */ + char *null_print; /* NULL marker string (server encoding!) */ + char *null_print_client; /* same converted to file encoding */ + char *delim; /* column delimiter (must be 1 byte) */ - MemoryContext rowcontext; /* per-row evaluation context */ + MemoryContext rowcontext; /* per-row evaluation context */ } PartialCopyStateData; typedef struct PartialCopyStateData *PartialCopyState; @@ -114,7 +112,6 @@ typedef struct FileOutputStream File fileDescriptor; StringInfo fileBuffer; StringInfo filePath; - } FileOutputStream; From f874a56e24fa0f57ab7365e8d028d4b160c73116 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Fri, 5 Feb 2016 16:53:04 -0700 Subject: [PATCH 02/12] Omit RangeVarCallbackForDropIndex from formatting I removed two braces to have this function remain more similar to the original PostgreSQL function and added uncrustify commands to disable formatting of its contents. --- src/backend/distributed/executor/multi_utility.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/distributed/executor/multi_utility.c b/src/backend/distributed/executor/multi_utility.c index be4148f75..84d74edf2 100644 --- a/src/backend/distributed/executor/multi_utility.c +++ b/src/backend/distributed/executor/multi_utility.c @@ -989,6 +989,7 @@ AllFinalizedPlacementsAccessible(Oid relationId) static void RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, void *arg) { + /* *INDENT-OFF* */ HeapTuple tuple; struct DropRelationCallbackState *state; char relkind; @@ -1023,10 +1024,8 @@ RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, voi classform = (Form_pg_class) GETSTRUCT(tuple); if (classform->relkind != relkind) - { ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not an index", rel->relname))); - } /* Allow DROP to either table owner or schema owner */ if (!pg_class_ownercheck(relOid, GetUserId()) && @@ -1055,4 +1054,5 @@ RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, voi if (OidIsValid(state->heapOid)) LockRelationOid(state->heapOid, heap_lockmode); } + /* *INDENT-ON* */ } From 74372f70e08b842df9566bfa715ced3a7d1dbfb4 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Fri, 5 Feb 2016 17:19:43 -0700 Subject: [PATCH 03/12] Omit get_extension_schema from formatting It exactly matches the implementation in extension.c. --- src/backend/distributed/utils/citus_ruleutils.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/distributed/utils/citus_ruleutils.c b/src/backend/distributed/utils/citus_ruleutils.c index 5f2c4a5ef..dda9a86fc 100644 --- a/src/backend/distributed/utils/citus_ruleutils.c +++ b/src/backend/distributed/utils/citus_ruleutils.c @@ -102,6 +102,7 @@ pg_get_extensiondef_string(Oid tableRelationId) static Oid get_extension_schema(Oid ext_oid) { + /* *INDENT-OFF* */ Oid result; Relation rel; SysScanDesc scandesc; @@ -131,6 +132,7 @@ get_extension_schema(Oid ext_oid) heap_close(rel, AccessShareLock); return result; + /* *INDENT-ON* */ } From bc231137328c7bb0ed47612feb9b5886007fd235 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Sat, 6 Feb 2016 14:56:12 -0700 Subject: [PATCH 04/12] Omit backend/copy.c-inspired parts from formatting I think we need to assess whether this function is still as in-sync with upstream as we believe, but for now I'm omitting it from formatting. --- src/backend/distributed/worker/worker_partition_protocol.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/distributed/worker/worker_partition_protocol.c b/src/backend/distributed/worker/worker_partition_protocol.c index 9ab21e588..9ad956ed6 100644 --- a/src/backend/distributed/worker/worker_partition_protocol.c +++ b/src/backend/distributed/worker/worker_partition_protocol.c @@ -1143,6 +1143,7 @@ OutputBinaryFooters(FileOutputStream *partitionFileArray, uint32 fileCount) } +/* *INDENT-OFF* */ /* Append data to the copy buffer in outputState */ static void CopySendData(PartialCopyState outputState, const void *databuf, int datasize) @@ -1282,6 +1283,7 @@ CopyAttributeOutText(PartialCopyState cstate, char *string) } +/* *INDENT-ON* */ /* Helper function to send pending copy output */ static inline void CopyFlushOutput(PartialCopyState cstate, char *start, char *pointer) From 2b5ae847d4c385a6732b05ed4bf43f8817b7901e Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 8 Feb 2016 12:44:05 -0700 Subject: [PATCH 05/12] Make copy_options.ch similar to PostgreSQL copy.c We reorganized these functions in our copy; not sure why (makes diffing harder). I'm moving it back. --- src/bin/csql/copy_options.c | 30 +++++++++++++++--------------- src/bin/csql/copy_options.h | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/bin/csql/copy_options.c b/src/bin/csql/copy_options.c index 357ae726c..da5dd5826 100644 --- a/src/bin/csql/copy_options.c +++ b/src/bin/csql/copy_options.c @@ -16,7 +16,21 @@ #include "stringutils.h" -/* Concatenates "more" onto "var", and frees the original value of *var. */ +void +free_copy_options(copy_options * ptr) +{ + if (!ptr) + return; + free(ptr->before_tofrom); + free(ptr->after_tofrom); + free(ptr->file); + free(ptr->tableName); + free(ptr->columnList); + free(ptr); +} + + +/* concatenate "more" onto "var", freeing the original value of *var */ static void xstrcat(char **var, const char *more) { @@ -212,20 +226,6 @@ error: /* Frees copy options. */ -void -free_copy_options(copy_options * ptr) -{ - if (!ptr) - return; - free(ptr->before_tofrom); - free(ptr->after_tofrom); - free(ptr->file); - free(ptr->tableName); - free(ptr->columnList); - free(ptr); -} - - /* * ParseStageOptions takes the given copy options, parses the additional options * needed for the \stage command, and sets them in the copy options structure. diff --git a/src/bin/csql/copy_options.h b/src/bin/csql/copy_options.h index 07a3aeb09..4a2e15222 100644 --- a/src/bin/csql/copy_options.h +++ b/src/bin/csql/copy_options.h @@ -46,7 +46,7 @@ typedef struct copy_options bool psql_inout; /* true = use psql stdin/stdout */ bool from; /* true = FROM, false = TO */ - char *tableName; /* table name to stage data to */ + char *tableName; /* table name to stage data to */ char *columnList; /* optional column list used in staging */ } copy_options; From 19c529f311699a37195c41398c64fdaa08402688 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 8 Feb 2016 12:49:25 -0700 Subject: [PATCH 06/12] Omit most of copy_options from formatting Only a small portion is Citus style. --- src/bin/csql/copy_options.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/bin/csql/copy_options.c b/src/bin/csql/copy_options.c index da5dd5826..7c6542a98 100644 --- a/src/bin/csql/copy_options.c +++ b/src/bin/csql/copy_options.c @@ -16,6 +16,7 @@ #include "stringutils.h" +/* *INDENT-OFF* */ void free_copy_options(copy_options * ptr) { @@ -224,8 +225,10 @@ error: return NULL; } +/* *INDENT-ON* */ /* Frees copy options. */ + /* * ParseStageOptions takes the given copy options, parses the additional options * needed for the \stage command, and sets them in the copy options structure. From 920e0c406d96e426d4d04e36703104954304eccc Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Mon, 8 Feb 2016 13:19:52 -0700 Subject: [PATCH 07/12] Format csql's stage files These are entirely Citus-produced, so need full formatting. --- src/bin/csql/stage.c | 72 ++++++++++++++++++++------------------- src/bin/csql/stage.h | 80 ++++++++++++++++++++++---------------------- 2 files changed, 77 insertions(+), 75 deletions(-) diff --git a/src/bin/csql/stage.c b/src/bin/csql/stage.c index 1863b8bcb..de8bd5e9b 100644 --- a/src/bin/csql/stage.c +++ b/src/bin/csql/stage.c @@ -26,7 +26,8 @@ static bool FileSize(char *filename, uint64 *fileSize); static PGconn * ConnectToWorkerNode(const char *nodeName, uint32 nodePort, const char *nodeDatabase); -static PGresult * ExecuteRemoteCommand(PGconn *remoteConnection, const char *remoteCommand, +static PGresult * ExecuteRemoteCommand(PGconn *remoteConnection, + const char *remoteCommand, const char **parameterValues, int parameterCount); static TableMetadata * InitTableMetadata(const char *tableName); static ShardMetadata * InitShardMetadata(int shardPlacementPolicy); @@ -41,7 +42,8 @@ static uint64 GetValueUint64(const PGresult *result, int rowNumber, int columnNu static bool MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata); static bool MasterGetTableDDLEvents(const char *tableName, TableMetadata *tableMetadata); static bool MasterGetNewShardId(ShardMetadata *shardMetadata); -static bool MasterGetCandidateNodes(ShardMetadata *shardMetadata, int shardPlacementPolicy); +static bool MasterGetCandidateNodes(ShardMetadata *shardMetadata, + int shardPlacementPolicy); static bool MasterInsertShardRow(uint32 logicalRelid, char storageType, const ShardMetadata *shardMetadata); static bool MasterInsertPlacementRows(const ShardMetadata *shardMetadata); @@ -62,7 +64,8 @@ static bool ApplyShardDDLCommand(PGconn *workerNode, uint64 shardId, const char static bool TransmitTableData(PGconn *workerNode, uint64 shardId, uint64 shardMaxSize, copy_options *stageOptions, uint64 currentFileOffset, uint64 *nextFileOffset); -static bool TransmitFile(PGconn *workerNode, const char *localPath, const char *remotePath); +static bool TransmitFile(PGconn *workerNode, const char *localPath, + const char *remotePath); static bool FileStreamOK(const copy_options *stageOptions); static PQExpBuffer CreateCopyQueryString(const char *tableName, const char *columnList, const char *afterToFrom); @@ -166,7 +169,7 @@ DoStageData(const char *stageCommand) if (partitionMethod == DISTRIBUTE_BY_HASH) { psql_error("\\stage: staging data into hash partitioned tables is not " - "supported\n"); + "supported\n"); free_copy_options(stageOptions); FreeTableMetadata(tableMetadata); @@ -179,7 +182,7 @@ DoStageData(const char *stageCommand) bool tableOptionsOK = ColumnarTableOptionsOK(tableMetadata->logicalRelid); if (!tableOptionsOK) { - return false; /* error message already displayed */ + return false; /* error message already displayed */ } } @@ -225,7 +228,7 @@ DoStageData(const char *stageCommand) */ FreeCommonStageData(stageOptions, tableMetadata, shardMetadataList); - return false; /* abort immediately */ + return false; /* abort immediately */ } /* save allocated shard metadata */ @@ -245,7 +248,7 @@ DoStageData(const char *stageCommand) */ for (nodeIndex = 0; nodeIndex < shardMetadata->nodeCount; nodeIndex++) { - char *remoteNodeName = shardMetadata->nodeNameList[nodeIndex]; + char *remoteNodeName = shardMetadata->nodeNameList[nodeIndex]; uint32 remoteNodePort = shardMetadata->nodePortList[nodeIndex]; PGconn *remoteNode = NULL; @@ -341,7 +344,6 @@ DoStageData(const char *stageCommand) /* update current file offset */ currentFileOffset = nextFileOffset; - } /* while more file data left for sharding */ /* @@ -390,9 +392,9 @@ ConnectToWorkerNode(const char *nodeName, uint32 nodePort, const char *nodeDatab char nodePortString[MAXPGPATH]; char connInfoString[MAXPGPATH]; - /* transcribe port number and connection info to their string values */ + /* transcribe port number and connection info to their string values */ snprintf(nodePortString, MAXPGPATH, "%u", nodePort); - snprintf(connInfoString, MAXPGPATH, CONN_INFO_TEMPLATE, + snprintf(connInfoString, MAXPGPATH, CONN_INFO_TEMPLATE, nodeDatabase, CLIENT_CONNECT_TIMEOUT); workerNode = PQsetdb(nodeName, nodePortString, nodeOptions, nodeTty, connInfoString); @@ -421,16 +423,16 @@ ExecuteRemoteCommand(PGconn *remoteConnection, const char *remoteCommand, { PGresult *result = NULL; - const Oid *parameterType = NULL; /* let the backend deduce type */ + const Oid *parameterType = NULL; /* let the backend deduce type */ const int *parameterLength = NULL; /* text params do not need length */ const int *parameterFormat = NULL; /* text params have Null by default */ - const int resultFormat = 0; /* ask for results in text format */ + const int resultFormat = 0; /* ask for results in text format */ result = PQexecParams(remoteConnection, remoteCommand, parameterCount, parameterType, parameterValues, parameterLength, parameterFormat, resultFormat); - if (PQresultStatus(result) != PGRES_COMMAND_OK && + if (PQresultStatus(result) != PGRES_COMMAND_OK && PQresultStatus(result) != PGRES_TUPLES_OK) { psql_error("remote command \"%s\" failed with %s", @@ -488,7 +490,7 @@ FreeTableMetadata(TableMetadata *tableMetadata) for (eventIndex = 0; eventIndex < eventCount; eventIndex++) { char *ddlEvent = tableMetadata->ddlEventList[eventIndex]; - + free(ddlEvent); ddlEvent = NULL; } @@ -552,7 +554,7 @@ FreeShardMetadata(ShardMetadata *shardMetadata) for (nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) { char *nodeName = shardMetadata->nodeNameList[nodeIndex]; - + free(nodeName); nodeName = NULL; } @@ -655,7 +657,7 @@ ExtendTablename(const char *baseTablename, uint64 shardId) { char *extendedTablename = (char *) pg_malloc0(NAMEDATALEN); - snprintf(extendedTablename, NAMEDATALEN, "%s%c" UINT64_FORMAT, + snprintf(extendedTablename, NAMEDATALEN, "%s%c" UINT64_FORMAT, baseTablename, SHARD_NAME_SEPARATOR, shardId); return extendedTablename; @@ -678,7 +680,7 @@ GetValueUint64(const PGresult *result, int rowNumber, int columnNumber) errno = 0; value = strtoull(valueString, &valueStringEnd, 0); - + if (errno != 0 || (*valueStringEnd) != '\0') { return INVALID_UINT64; @@ -716,7 +718,7 @@ MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata) char *tableStorageType = NULL; char *partitionMethod = NULL; char *partitionKey = NULL; - int partitionKeyLength = 0; + int partitionKeyLength = 0; uint64 logicalRelid = 0; uint64 shardReplicaCount = 0; uint64 shardMaxSize = 0; @@ -727,7 +729,7 @@ MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata) parameterValue, parameterCount); if (result == NULL) { - return false; /* error message already displayed */ + return false; /* error message already displayed */ } /* find column numbers associated with column names */ @@ -798,13 +800,13 @@ MasterGetTableDDLEvents(const char *tableName, TableMetadata *tableMetadata) int ddlEventIndex = 0; /* fetch DDL events needed for table creation */ - result = ExecuteRemoteCommand(masterNode, remoteCommand, + result = ExecuteRemoteCommand(masterNode, remoteCommand, parameterValue, parameterCount); if (result == NULL) { return false; } - + /* check that we have at least one DDL event */ ddlEventCount = PQntuples(result); if (ddlEventCount <= 0) @@ -825,7 +827,7 @@ MasterGetTableDDLEvents(const char *tableName, TableMetadata *tableMetadata) { char *ddlEvent = NULL; char *ddlEventValue = PQgetvalue(result, ddlEventIndex, 0); - int ddlEventLength = PQgetlength(result, ddlEventIndex, 0); + int ddlEventLength = PQgetlength(result, ddlEventIndex, 0); if (ddlEventLength <= 0) { @@ -866,7 +868,7 @@ MasterGetNewShardId(ShardMetadata *shardMetadata) uint64 shardId = 0; /* fetch unique shardId for shard to be created */ - result = ExecuteRemoteCommand(masterNode, remoteCommand, + result = ExecuteRemoteCommand(masterNode, remoteCommand, parameterValue, parameterCount); if (result == NULL) { @@ -877,7 +879,7 @@ MasterGetNewShardId(ShardMetadata *shardMetadata) shardId = GetValueUint64(result, 0, 0); if (shardId == INVALID_UINT64) { - psql_error("remote command \"%s\" failed with invalid shardId\n", + psql_error("remote command \"%s\" failed with invalid shardId\n", remoteCommand); PQclear(result); @@ -996,11 +998,11 @@ MasterGetCandidateNodes(ShardMetadata *shardMetadata, int shardPlacementPolicy) /* walk over fetched node name/port list, and assign them to metadata */ for (nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) { - char *nodeName = NULL; + char *nodeName = NULL; uint64 nodePort = 0; - char *nodeNameValue = PQgetvalue(result, nodeIndex, nodeNameIndex); - int nodeNameLength = PQgetlength(result, nodeIndex, nodeNameIndex); + char *nodeNameValue = PQgetvalue(result, nodeIndex, nodeNameIndex); + int nodeNameLength = PQgetlength(result, nodeIndex, nodeNameIndex); if (nodeNameLength <= 0) { @@ -1014,7 +1016,7 @@ MasterGetCandidateNodes(ShardMetadata *shardMetadata, int shardPlacementPolicy) /* deep copy node name and assign to metadata */ nodeName = (char *) pg_malloc0(nodeNameLength + 1); strncpy(nodeName, nodeNameValue, nodeNameLength + 1); - + shardMetadata->nodeNameList[nodeIndex] = nodeName; /* convert port value string to 64-bit integer, and assign to metadata */ @@ -1024,7 +1026,7 @@ MasterGetCandidateNodes(ShardMetadata *shardMetadata, int shardPlacementPolicy) psql_error("remote command \"%s\" failed to fetch valid port number\n", remoteCommand); PQclear(result); - + return false; } @@ -1107,12 +1109,12 @@ MasterInsertPlacementRows(const ShardMetadata *shardMetadata) bool staged = shardMetadata->nodeStageList[nodeIndex]; if (staged) { - char *nodeName = shardMetadata->nodeNameList[nodeIndex]; + char *nodeName = shardMetadata->nodeNameList[nodeIndex]; uint32 nodePort = shardMetadata->nodePortList[nodeIndex]; /* convert parameter to its string representation */ snprintf(nodePortString, NAMEDATALEN, "%u", nodePort); - + parameterValue[3] = nodeName; parameterValue[4] = nodePortString; @@ -1136,7 +1138,7 @@ MasterInsertPlacementRows(const ShardMetadata *shardMetadata) * staged to worker nodes. The function executes shard metadata insert commands * within a single transaction so that either all or none of the metadata are * finalized. On success, the function commits the transaction and returns true. - * On failure, the function rolls back the transaction and returns false. + * On failure, the function rolls back the transaction and returns false. */ static bool MasterInsertShardMetadata(uint32 logicalRelid, char storageType, @@ -1199,7 +1201,7 @@ IssueTransactionCommand(PGconn *connection, const char *command) return false; } - + PQclear(result); return true; } @@ -1729,7 +1731,7 @@ ShardColumnarTableSize(PGconn *workerNode, const char *tablename, uint64 shardId * failure, the function returns false. */ static bool -ShardMinMaxValues(PGconn *workerNode, const char *tablename, +ShardMinMaxValues(PGconn *workerNode, const char *tablename, const char *partitionKey, ShardMetadata *shardMetadata) { const int MinValueIndex = 0; @@ -1744,7 +1746,7 @@ ShardMinMaxValues(PGconn *workerNode, const char *tablename, int maxValueLength = 0; extendedTablename = ExtendTablename(tablename, shardMetadata->shardId); - snprintf(remoteCommand, MAXPGPATH, SHARD_MIN_MAX_COMMAND, + snprintf(remoteCommand, MAXPGPATH, SHARD_MIN_MAX_COMMAND, partitionKey, partitionKey, extendedTablename); result = PQexec(workerNode, remoteCommand); diff --git a/src/bin/csql/stage.h b/src/bin/csql/stage.h index 01575f886..0863adff5 100644 --- a/src/bin/csql/stage.h +++ b/src/bin/csql/stage.h @@ -30,42 +30,44 @@ #define ROLLBACK_COMMAND "ROLLBACK" /* Names of remote function calls to execute on the master. */ -#define MASTER_GET_TABLE_METADATA "SELECT * FROM master_get_table_metadata($1::text)" +#define MASTER_GET_TABLE_METADATA "SELECT * FROM master_get_table_metadata($1::text)" #define MASTER_GET_TABLE_DDL_EVENTS "SELECT * FROM master_get_table_ddl_events($1::text)" -#define MASTER_GET_NEW_SHARDID "SELECT * FROM master_get_new_shardid()" -#define MASTER_GET_LOCAL_FIRST_CANDIDATE_NODES "SELECT * FROM \ - master_get_local_first_candidate_nodes()" -#define MASTER_GET_ROUND_ROBIN_CANDIDATE_NODES "SELECT * FROM \ - master_get_round_robin_candidate_nodes($1::int8)" +#define MASTER_GET_NEW_SHARDID "SELECT * FROM master_get_new_shardid()" +#define MASTER_GET_LOCAL_FIRST_CANDIDATE_NODES \ + "SELECT * FROM master_get_local_first_candidate_nodes()" +#define MASTER_GET_ROUND_ROBIN_CANDIDATE_NODES \ + "SELECT * FROM master_get_round_robin_candidate_nodes($1::int8)" -#define MASTER_INSERT_SHARD_ROW "INSERT INTO pg_dist_shard \ - (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES \ - ($1::oid, $2::int8, $3::char, $4::text, $5::text)" -#define MASTER_INSERT_PLACEMENT_ROW "INSERT INTO pg_dist_shard_placement \ - (shardid, shardstate, shardlength, nodename, nodeport) VALUES \ - ($1::int8, $2::int4, $3::int8, $4::text, $5::int4)" +#define MASTER_INSERT_SHARD_ROW \ + "INSERT INTO pg_dist_shard " \ + "(logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES " \ + "($1::oid, $2::int8, $3::char, $4::text, $5::text)" +#define MASTER_INSERT_PLACEMENT_ROW \ + "INSERT INTO pg_dist_shard_placement " \ + "(shardid, shardstate, shardlength, nodename, nodeport) VALUES " \ + "($1::int8, $2::int4, $3::int8, $4::text, $5::int4)" /* Column names used to identify response fields as returned from the master. */ -#define LOGICAL_RELID_FIELD "logical_relid" -#define PART_STORAGE_TYPE_FIELD "part_storage_type" -#define PART_METHOD_FIELD "part_method" -#define PART_KEY_FIELD "part_key" -#define PART_REPLICA_COUNT_FIELD "part_replica_count" -#define PART_MAX_SIZE_FIELD "part_max_size" -#define PART_PLACEMENT_POLICY_FIELD "part_placement_policy" -#define NODE_NAME_FIELD "node_name" -#define NODE_PORT_FIELD "node_port" +#define LOGICAL_RELID_FIELD "logical_relid" +#define PART_STORAGE_TYPE_FIELD "part_storage_type" +#define PART_METHOD_FIELD "part_method" +#define PART_KEY_FIELD "part_key" +#define PART_REPLICA_COUNT_FIELD "part_replica_count" +#define PART_MAX_SIZE_FIELD "part_max_size" +#define PART_PLACEMENT_POLICY_FIELD "part_placement_policy" +#define NODE_NAME_FIELD "node_name" +#define NODE_PORT_FIELD "node_port" /* the tablename in the overloaded COPY statement is the to-be-transferred file */ #define TRANSMIT_REGULAR_COMMAND "COPY \"%s\" FROM STDIN WITH (format 'transmit')" -#define SHARD_MIN_MAX_COMMAND "SELECT min(%s), max(%s) FROM %s" +#define SHARD_MIN_MAX_COMMAND "SELECT min(%s), max(%s) FROM %s" #define SHARD_TABLE_SIZE_COMMAND "SELECT pg_table_size('%s')" #define SET_FOREIGN_TABLE_FILENAME "ALTER FOREIGN TABLE %s OPTIONS (SET filename '%s')" -#define GET_COLUMNAR_TABLE_FILENAME_OPTION "SELECT * FROM \ - (SELECT (pg_options_to_table(ftoptions)).* FROM pg_foreign_table \ - WHERE ftrelid = %u) AS Q WHERE option_name = 'filename';" -#define APPLY_SHARD_DDL_COMMAND "SELECT * FROM worker_apply_shard_ddl_command \ - ($1::int8, $2::text)" +#define GET_COLUMNAR_TABLE_FILENAME_OPTION \ + "SELECT * FROM (SELECT (pg_options_to_table(ftoptions)).* FROM pg_foreign_table " \ + "WHERE ftrelid = %u) AS Q WHERE option_name = 'filename';" +#define APPLY_SHARD_DDL_COMMAND \ + "SELECT * FROM worker_apply_shard_ddl_command ($1::int8, $2::text)" #define REMOTE_FILE_SIZE_COMMAND "SELECT size FROM pg_stat_file('%s')" #define SHARD_COLUMNAR_TABLE_SIZE_COMMAND "SELECT cstore_table_size('%s')" @@ -90,17 +92,16 @@ */ typedef struct TableMetadata { - uint32 logicalRelid; /* table's relationId on the master */ - char tableStorageType; /* relay file, foreign table, or table */ - char partitionMethod; /* table's partition method */ - char *partitionKey; /* partition key expression */ - uint32 shardReplicaCount; /* shard replication factor */ - uint64 shardMaxSize; /* create new shard when shard reaches max size */ + uint32 logicalRelid; /* table's relationId on the master */ + char tableStorageType; /* relay file, foreign table, or table */ + char partitionMethod; /* table's partition method */ + char *partitionKey; /* partition key expression */ + uint32 shardReplicaCount; /* shard replication factor */ + uint64 shardMaxSize; /* create new shard when shard reaches max size */ uint32 shardPlacementPolicy; /* policy to use when choosing nodes to place shards */ char **ddlEventList; /* DDL statements used for creating new shard */ uint32 ddlEventCount; /* DDL statement count; statement list size */ - } TableMetadata; @@ -112,17 +113,16 @@ typedef struct TableMetadata */ typedef struct ShardMetadata { - uint64 shardId; /* global shardId; created on the master node */ + uint64 shardId; /* global shardId; created on the master node */ - char **nodeNameList; /* candidate node name list for shard uploading */ + char **nodeNameList; /* candidate node name list for shard uploading */ uint32 *nodePortList; /* candidate node port list for shard uploading */ - uint32 nodeCount; /* candidate node count; node list size */ - bool *nodeStageList; /* shard uploaded to corresponding candidate node? */ + uint32 nodeCount; /* candidate node count; node list size */ + bool *nodeStageList; /* shard uploaded to corresponding candidate node? */ char *shardMinValue; /* partition key's minimum value in shard */ char *shardMaxValue; /* partition key's maximum value in shard */ - uint64 shardSize; /* shard size; updated during staging */ - + uint64 shardSize; /* shard size; updated during staging */ } ShardMetadata; From 20c68fe251e234ec6593c126703e828b95e1ab14 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Wed, 10 Feb 2016 12:34:32 -0700 Subject: [PATCH 08/12] Switch to using git attributes to ignore files Ties into the script introduced [here][1]. [1]: https://github.com/citusdata/tools/pull/2 --- .gitattributes | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.gitattributes b/.gitattributes index e3ee9cf5b..5eb7bda7b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -20,3 +20,23 @@ src/test/regress/output/*.source -whitespace # These files are maintained or generated elsewhere. We take them as is. configure -whitespace + +# all C files (implementation and header) use our style... +*.[ch] citus-style + +# except these exceptions... +src/backend/distributed/utils/citus_outfuncs.c -citus-style +src/backend/distributed/utils/citus_read.c -citus-style +src/backend/distributed/utils/citus_readfuncs_94.c -citus-style +src/backend/distributed/utils/citus_readfuncs_95.c -citus-style +src/backend/distributed/utils/ruleutils_94.c -citus-style +src/backend/distributed/utils/ruleutils_95.c -citus-style +src/include/distributed/citus_nodes.h -citus-style +src/include/dumputils.h -citus-style + +# all csql files use PostgreSQL style... +src/bin/csql/*.[ch] -citus-style + +# except these exceptions +src/bin/csql/copy_options.c citus-style +src/bin/csql/stage.[ch] citus-style From 444f30516549204c67803d48aefe72a8956e7c77 Mon Sep 17 00:00:00 2001 From: Murat Tuncer Date: Fri, 12 Feb 2016 14:41:32 +0200 Subject: [PATCH 09/12] Add support for appending to cstore table shards - Flexed the check which prevented append operation cstore tables since its storage type is not SHARD_STORAGE_TABLE. - Used process utility function to perform copy operation in worker_append_table_to shard() instead of directly calling postgresql DoCopy(). - Removed the additional check in master_create_empty_shard() function. This check was redundant and erroneous since it was called after CheckDistributedTable() call. - Modified WorkerTableSize() function to retrieve cstore table shard size correctly. --- .../master/master_stage_protocol.c | 34 ++++++++++++------- .../worker/worker_data_fetch_protocol.c | 12 ++++--- src/include/distributed/master_protocol.h | 1 + 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/backend/distributed/master/master_stage_protocol.c b/src/backend/distributed/master/master_stage_protocol.c index 1c13237cc..388394856 100644 --- a/src/backend/distributed/master/master_stage_protocol.c +++ b/src/backend/distributed/master/master_stage_protocol.c @@ -45,7 +45,8 @@ static bool WorkerCreateShard(char *nodeName, uint32 nodePort, static bool WorkerShardStats(char *nodeName, uint32 nodePort, Oid relationId, char *shardName, uint64 *shardLength, text **shardMinValue, text **shardMaxValue); -static uint64 WorkerTableSize(char *nodeName, uint32 nodePort, char *tableName); +static uint64 WorkerTableSize(char *nodeName, uint32 nodePort, Oid relationId, + char *tableName); static StringInfo WorkerPartitionValue(char *nodeName, uint32 nodePort, Oid relationId, char *shardName, char *selectQuery); @@ -77,16 +78,15 @@ master_create_empty_shard(PG_FUNCTION_ARGS) List *candidateNodeList = NIL; text *nullMinValue = NULL; text *nullMaxValue = NULL; - char tableType = 0; char partitionMethod = 0; + char storageType = SHARD_STORAGE_TABLE; Oid relationId = ResolveRelationId(relationNameText); CheckDistributedTable(relationId); - tableType = get_rel_relkind(relationId); - if (tableType != RELKIND_RELATION) + if (CStoreTable(relationId)) { - ereport(ERROR, (errmsg("relation \"%s\" is not a regular table", relationName))); + storageType = SHARD_STORAGE_COLUMNAR; } partitionMethod = PartitionMethod(relationId); @@ -130,7 +130,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS) CreateShardPlacements(shardId, ddlEventList, candidateNodeList, 0, ShardReplicationFactor); - InsertShardRow(relationId, shardId, SHARD_STORAGE_TABLE, nullMinValue, nullMaxValue); + InsertShardRow(relationId, shardId, storageType, nullMinValue, nullMaxValue); PG_RETURN_INT64(shardId); } @@ -171,9 +171,10 @@ master_append_table_to_shard(PG_FUNCTION_ARGS) ShardInterval *shardInterval = LoadShardInterval(shardId); Oid relationId = shardInterval->relationId; + bool cstoreTable = CStoreTable(relationId); char storageType = shardInterval->storageType; - if (storageType != SHARD_STORAGE_TABLE) + if (storageType != SHARD_STORAGE_TABLE && !cstoreTable) { ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId), errdetail("The underlying shard is not a regular table"))); @@ -457,7 +458,7 @@ WorkerShardStats(char *nodeName, uint32 nodePort, Oid relationId, char *shardNam PG_TRY(); { - uint64 tableSize = WorkerTableSize(nodeName, nodePort, shardName); + uint64 tableSize = WorkerTableSize(nodeName, nodePort, relationId, shardName); StringInfo minValue = WorkerPartitionValue(nodeName, nodePort, relationId, shardName, SHARD_MIN_VALUE_QUERY); StringInfo maxValue = WorkerPartitionValue(nodeName, nodePort, relationId, @@ -479,18 +480,27 @@ WorkerShardStats(char *nodeName, uint32 nodePort, Oid relationId, char *shardNam /* * WorkerTableSize queries the worker node to extract the disk space used by the - * given relation. The function assumes the relation represents a regular table. + * given relation. The function assumes the relation represents a regular table or + * a cstore_fdw table. */ static uint64 -WorkerTableSize(char *nodeName, uint32 nodePort, char *tableName) +WorkerTableSize(char *nodeName, uint32 nodePort, Oid relationId, char *tableName) { uint64 tableSize = 0; List *queryResultList = NIL; StringInfo tableSizeString = NULL; char *tableSizeStringEnd = NULL; - + bool cstoreTable = CStoreTable(relationId); StringInfo tableSizeQuery = makeStringInfo(); - appendStringInfo(tableSizeQuery, SHARD_TABLE_SIZE_QUERY, tableName); + + if (cstoreTable) + { + appendStringInfo(tableSizeQuery, SHARD_CSTORE_TABLE_SIZE_QUERY, tableName); + } + else + { + appendStringInfo(tableSizeQuery, SHARD_TABLE_SIZE_QUERY, tableName); + } queryResultList = ExecuteRemoteQuery(nodeName, nodePort, tableSizeQuery); if (queryResultList == NIL) diff --git a/src/backend/distributed/worker/worker_data_fetch_protocol.c b/src/backend/distributed/worker/worker_data_fetch_protocol.c index 0e5b68a1d..10cd02324 100644 --- a/src/backend/distributed/worker/worker_data_fetch_protocol.c +++ b/src/backend/distributed/worker/worker_data_fetch_protocol.c @@ -994,11 +994,10 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS) StringInfo remoteCopyCommand = NULL; CopyStmt *localCopyCommand = NULL; RangeVar *localTable = NULL; - uint64 copiedRowCount = 0; uint64 shardId = INVALID_SHARD_ID; bool received = false; char *quotedTableName = NULL; - const char *queryString = NULL; + StringInfo queryString = NULL; const char *schemaName = NULL; /* copy remote table's data to this node */ @@ -1032,8 +1031,13 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS) localTable = makeRangeVar((char *) schemaName, shardNameString->data, -1); localCopyCommand = CopyStatement(localTable, localFilePath->data); - DoCopy(localCopyCommand, queryString, &copiedRowCount); - (void) copiedRowCount; + quotedTableName = quote_qualified_identifier(schemaName, shardNameString->data); + + queryString = makeStringInfo(); + appendStringInfo(queryString, COPY_IN_COMMAND, quotedTableName, localFilePath->data); + + ProcessUtility((Node *) localCopyCommand, queryString->data, + PROCESS_UTILITY_TOPLEVEL, NULL, None_Receiver, NULL); /* finally delete the temporary file we created */ DeleteFile(localFilePath->data); diff --git a/src/include/distributed/master_protocol.h b/src/include/distributed/master_protocol.h index f39ce865b..daa3b2414 100644 --- a/src/include/distributed/master_protocol.h +++ b/src/include/distributed/master_protocol.h @@ -56,6 +56,7 @@ #define SHARD_MIN_VALUE_QUERY "SELECT min(%s) FROM %s" #define SHARD_MAX_VALUE_QUERY "SELECT max(%s) FROM %s" #define SHARD_TABLE_SIZE_QUERY "SELECT pg_table_size('%s')" +#define SHARD_CSTORE_TABLE_SIZE_QUERY "SELECT cstore_table_size('%s')" #define DROP_REGULAR_TABLE_COMMAND "DROP TABLE IF EXISTS %s" #define DROP_FOREIGN_TABLE_COMMAND "DROP FOREIGN TABLE IF EXISTS %s" #define CREATE_SCHEMA_COMMAND "CREATE SCHEMA IF NOT EXISTS %s" From 0d196d1bf4da89251ea06a79c24014732186c054 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 16 Feb 2016 11:20:18 -0700 Subject: [PATCH 10/12] Ensure router executor acquires proper shard lock Though Citus' Task struct has a shardId field, it doesn't have the same semantics as the one previously used in pg_shard code. The analogous field in the Citus Task is anchorShardId. I've also added an argument check to the relevant locking function to catch future locking attempts which pass an invalid argument. --- src/backend/distributed/executor/multi_router_executor.c | 2 +- src/backend/distributed/utils/resource_lock.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/backend/distributed/executor/multi_router_executor.c b/src/backend/distributed/executor/multi_router_executor.c index 5c2f04165..d9900798f 100644 --- a/src/backend/distributed/executor/multi_router_executor.c +++ b/src/backend/distributed/executor/multi_router_executor.c @@ -153,7 +153,7 @@ CommutativityRuleToLockMode(CmdType commandType, bool upsertQuery) static void AcquireExecutorShardLock(Task *task, LOCKMODE lockMode) { - int64 shardId = task->shardId; + int64 shardId = task->anchorShardId; LockShardResource(shardId, lockMode); } diff --git a/src/backend/distributed/utils/resource_lock.c b/src/backend/distributed/utils/resource_lock.c index a2552d46b..fecd703d1 100644 --- a/src/backend/distributed/utils/resource_lock.c +++ b/src/backend/distributed/utils/resource_lock.c @@ -14,9 +14,10 @@ */ #include "postgres.h" - +#include "c.h" #include "miscadmin.h" +#include "distributed/relay_utility.h" #include "distributed/resource_lock.h" #include "storage/lmgr.h" @@ -68,6 +69,8 @@ LockShardResource(uint64 shardId, LOCKMODE lockmode) const bool sessionLock = false; const bool dontWait = false; + AssertArg(shardId != INVALID_SHARD_ID); + SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId); (void) LockAcquire(&tag, lockmode, sessionLock, dontWait); From 622eb2999664fcd8897eac402d024267bcc80251 Mon Sep 17 00:00:00 2001 From: Jason Petersen Date: Tue, 16 Feb 2016 11:34:01 -0700 Subject: [PATCH 11/12] Add make targets for applying and checking style Need to change to the project's top srcdir, as citus_indent expects to be able to find styled files using git ls-files, and VPATH builds would otherwise not return any results. --- Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Makefile b/Makefile index e024ef4ff..7131e2ead 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,13 @@ clean-csql: install: install-csql clean: clean-csql +# apply or check style +reindent: + cd ${citusdb_abs_top_srcdir} && citus_indent --quiet +check-style: + cd ${citusdb_abs_top_srcdir} && citus_indent --quiet --check +.PHONY: reindent check-style + # depend on install for now check: all install $(MAKE) -C src/test/regress check-full From 6123022ca7824931ecc0e45401f75f7e45e5f843 Mon Sep 17 00:00:00 2001 From: Metin Doslu Date: Fri, 12 Feb 2016 19:12:42 +0200 Subject: [PATCH 12/12] Add check for count distinct on single table subqueries Fixes #314 --- .../planner/multi_logical_optimizer.c | 19 +++++++++++++++++-- .../multi_single_relation_subquery.out | 12 ++++++++++++ .../sql/multi_single_relation_subquery.sql | 12 ++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 7acf85d7e..787937079 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -259,6 +259,7 @@ MultiLogicalPlanOptimize(MultiTreeRoot *multiLogicalPlan) MultiTable *tableNode = (MultiTable *) lfirst(tableNodeCell); if (tableNode->relationId == SUBQUERY_RELATION_ID) { + ErrorIfContainsUnsupportedAggregate((MultiNode *) tableNode); TransformSubqueryNode(tableNode); } } @@ -2145,8 +2146,9 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression, bool distinctSupported = true; List *repartitionNodeList = NIL; Var *distinctColumn = NULL; - - AggregateType aggregateType = GetAggregateType(aggregateExpression->aggfnoid); + List *multiTableNodeList = NIL; + ListCell *multiTableNodeCell = NULL; + AggregateType aggregateType = AGGREGATE_INVALID_FIRST; /* check if logical plan includes a subquery */ List *subqueryMultiTableList = SubqueryMultiTableList(logicalPlanNode); @@ -2157,7 +2159,20 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression, errdetail("distinct in the outermost query is unsupported"))); } + multiTableNodeList = FindNodesOfType(logicalPlanNode, T_MultiTable); + foreach(multiTableNodeCell, multiTableNodeList) + { + MultiTable *multiTable = (MultiTable *) lfirst(multiTableNodeCell); + if (multiTable->relationId == SUBQUERY_RELATION_ID) + { + ereport(ERROR, (errmsg("cannot compute count (distinct)"), + errdetail("Subqueries with aggregate (distinct) are " + "not supported yet"))); + } + } + /* if we have a count(distinct), and distinct approximation is enabled */ + aggregateType = GetAggregateType(aggregateExpression->aggfnoid); if (aggregateType == AGGREGATE_COUNT && CountDistinctErrorRate != DISABLE_DISTINCT_APPROXIMATION) { diff --git a/src/test/regress/expected/multi_single_relation_subquery.out b/src/test/regress/expected/multi_single_relation_subquery.out index 1573e3a0a..3a01f8a3a 100644 --- a/src/test/regress/expected/multi_single_relation_subquery.out +++ b/src/test/regress/expected/multi_single_relation_subquery.out @@ -171,6 +171,18 @@ from l_tax) as distributed_table; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries without aggregates are not supported yet +-- Check that we don't support subqueries with count(distinct). +select + different_shipment_days +from + (select + count(distinct l_shipdate) as different_shipment_days + from + lineitem + group by + l_partkey) as distributed_table; +ERROR: cannot compute count (distinct) +DETAIL: Subqueries with aggregate (distinct) are not supported yet -- Check that if subquery is pulled, we don't error and run query properly. SELECT max(l_suppkey) FROM ( diff --git a/src/test/regress/sql/multi_single_relation_subquery.sql b/src/test/regress/sql/multi_single_relation_subquery.sql index 19e56f60e..08853b639 100644 --- a/src/test/regress/sql/multi_single_relation_subquery.sql +++ b/src/test/regress/sql/multi_single_relation_subquery.sql @@ -125,6 +125,18 @@ from group by l_tax) as distributed_table; +-- Check that we don't support subqueries with count(distinct). + +select + different_shipment_days +from + (select + count(distinct l_shipdate) as different_shipment_days + from + lineitem + group by + l_partkey) as distributed_table; + -- Check that if subquery is pulled, we don't error and run query properly. SELECT max(l_suppkey) FROM