Merge pull request #327 from citusdata/apply_formatting_tool

Apply Citus Data coding conventions

cr: @sumedhpathak @anarazel
pull/335/head
Jason Petersen 2016-02-16 12:24:42 -07:00
commit 2fcfa011ae
61 changed files with 1519 additions and 1406 deletions

20
.gitattributes vendored
View File

@ -20,3 +20,23 @@ src/test/regress/output/*.source -whitespace
# These files are maintained or generated elsewhere. We take them as is. # These files are maintained or generated elsewhere. We take them as is.
configure -whitespace configure -whitespace
# all C files (implementation and header) use our style...
*.[ch] citus-style
# except these exceptions...
src/backend/distributed/utils/citus_outfuncs.c -citus-style
src/backend/distributed/utils/citus_read.c -citus-style
src/backend/distributed/utils/citus_readfuncs_94.c -citus-style
src/backend/distributed/utils/citus_readfuncs_95.c -citus-style
src/backend/distributed/utils/ruleutils_94.c -citus-style
src/backend/distributed/utils/ruleutils_95.c -citus-style
src/include/distributed/citus_nodes.h -citus-style
src/include/dumputils.h -citus-style
# all csql files use PostgreSQL style...
src/bin/csql/*.[ch] -citus-style
# except these exceptions
src/bin/csql/copy_options.c citus-style
src/bin/csql/stage.[ch] citus-style

View File

@ -42,6 +42,13 @@ clean-csql:
install: install-csql install: install-csql
clean: clean-csql clean: clean-csql
# apply or check style
reindent:
cd ${citusdb_abs_top_srcdir} && citus_indent --quiet
check-style:
cd ${citusdb_abs_top_srcdir} && citus_indent --quiet --check
.PHONY: reindent check-style
# depend on install for now # depend on install for now
check: all install check: all install
$(MAKE) -C src/test/regress check-full $(MAKE) -C src/test/regress check-full

View File

@ -197,11 +197,11 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
if (distributionMethod == DISTRIBUTE_BY_APPEND) if (distributionMethod == DISTRIBUTE_BY_APPEND)
{ {
ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("table \"%s\" has a unique constraint", errmsg("table \"%s\" has a unique constraint",
distributedRelationName), distributedRelationName),
errdetail("Unique constraints and primary keys on " errdetail("Unique constraints and primary keys on "
"append-partitioned tables cannot be enforced."), "append-partitioned tables cannot be enforced."),
errhint("Consider using hash partitioning."))); errhint("Consider using hash partitioning.")));
} }
attributeCount = indexInfo->ii_NumIndexAttrs; attributeCount = indexInfo->ii_NumIndexAttrs;

View File

@ -136,7 +136,7 @@ static File
FileOpenForTransmit(const char *filename, int fileFlags, int fileMode) FileOpenForTransmit(const char *filename, int fileFlags, int fileMode)
{ {
File fileDesc = -1; File fileDesc = -1;
int fileStated = -1; int fileStated = -1;
struct stat fileStat; struct stat fileStat;
fileStated = stat(filename, &fileStat); fileStated = stat(filename, &fileStat);
@ -145,7 +145,7 @@ FileOpenForTransmit(const char *filename, int fileFlags, int fileMode)
if (S_ISDIR(fileStat.st_mode)) if (S_ISDIR(fileStat.st_mode))
{ {
ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is a directory", filename))); errmsg("\"%s\" is a directory", filename)));
} }
} }
@ -270,18 +270,28 @@ ReceiveCopyData(StringInfo copyData)
switch (messageType) switch (messageType)
{ {
case 'd': /* CopyData */ case 'd': /* CopyData */
{
copyDone = false; copyDone = false;
break; break;
case 'c': /* CopyDone */ }
case 'c': /* CopyDone */
{
copyDone = true; copyDone = true;
break; break;
case 'f': /* CopyFail */ }
case 'f': /* CopyFail */
{
ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
errmsg("COPY data failed: %s", pq_getmsgstring(copyData)))); errmsg("COPY data failed: %s", pq_getmsgstring(copyData))));
break; break;
case 'H': /* Flush */ }
case 'S': /* Sync */
case 'H': /* Flush */
case 'S': /* Sync */
{
/* /*
* Ignore Flush/Sync for the convenience of client libraries (such * Ignore Flush/Sync for the convenience of client libraries (such
* as libpq) that may send those without noticing that the command * as libpq) that may send those without noticing that the command
@ -289,11 +299,15 @@ ReceiveCopyData(StringInfo copyData)
*/ */
copyDone = false; copyDone = false;
break; break;
}
default: default:
{
ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION),
errmsg("unexpected message type 0x%02X during COPY data", errmsg("unexpected message type 0x%02X during COPY data",
messageType))); messageType)));
break; break;
}
} }
return copyDone; return copyDone;

View File

@ -301,7 +301,7 @@ MultiClientCancel(int32 connectionId)
if (cancelSent == 0) if (cancelSent == 0)
{ {
ereport(WARNING, (errmsg("could not issue cancel request"), ereport(WARNING, (errmsg("could not issue cancel request"),
errdetail("Client error: %s", errorBuffer))); errdetail("Client error: %s", errorBuffer)));
canceled = false; canceled = false;
} }
@ -589,7 +589,7 @@ MultiClientCopyData(int32 connectionId, int32 fileDescriptor)
while (receiveLength > 0) while (receiveLength > 0)
{ {
/* received copy data; append these data to file */ /* received copy data; append these data to file */
int appended = -1; int appended = -1;
errno = 0; errno = 0;
appended = write(fileDescriptor, receiveBuffer, receiveLength); appended = write(fileDescriptor, receiveBuffer, receiveLength);
@ -706,7 +706,7 @@ ClientConnectionReady(PGconn *connection, PostgresPollingStatusType pollingStatu
fd_set readFileDescriptorSet; fd_set readFileDescriptorSet;
fd_set writeFileDescriptorSet; fd_set writeFileDescriptorSet;
fd_set exceptionFileDescriptorSet; fd_set exceptionFileDescriptorSet;
struct timeval immediateTimeout = {0, 0}; struct timeval immediateTimeout = { 0, 0 };
int connectionFileDescriptor = PQsocket(connection); int connectionFileDescriptor = PQsocket(connection);
FD_ZERO(&readFileDescriptorSet); FD_ZERO(&readFileDescriptorSet);

View File

@ -157,7 +157,6 @@ multi_ExecutorStart(QueryDesc *queryDesc, int eflags)
queryDesc->plannedstmt = masterSelectPlan; queryDesc->plannedstmt = masterSelectPlan;
eflags |= EXEC_FLAG_CITUS_MASTER_SELECT; eflags |= EXEC_FLAG_CITUS_MASTER_SELECT;
} }
} }
/* if the execution is not done for router executor, drop into standard executor */ /* if the execution is not done for router executor, drop into standard executor */
@ -253,7 +252,7 @@ multi_ExecutorEnd(QueryDesc *queryDesc)
RangeTblEntry *rangeTableEntry = linitial(planStatement->rtable); RangeTblEntry *rangeTableEntry = linitial(planStatement->rtable);
Oid masterTableRelid = rangeTableEntry->relid; Oid masterTableRelid = rangeTableEntry->relid;
ObjectAddress masterTableObject = {InvalidOid, InvalidOid, 0}; ObjectAddress masterTableObject = { InvalidOid, InvalidOid, 0 };
masterTableObject.classId = RelationRelationId; masterTableObject.classId = RelationRelationId;
masterTableObject.objectId = masterTableRelid; masterTableObject.objectId = masterTableRelid;

View File

@ -89,7 +89,7 @@ MultiRealTimeExecute(Job *job)
} }
/* loop around until all tasks complete, one task fails, or user cancels */ /* loop around until all tasks complete, one task fails, or user cancels */
while ( !(allTasksCompleted || taskFailed || QueryCancelPending) ) while (!(allTasksCompleted || taskFailed || QueryCancelPending))
{ {
uint32 taskCount = list_length(taskList); uint32 taskCount = list_length(taskList);
uint32 completedTaskCount = 0; uint32 completedTaskCount = 0;
@ -230,333 +230,338 @@ ManageTaskExecution(Task *task, TaskExecution *taskExecution)
switch (currentStatus) switch (currentStatus)
{ {
case EXEC_TASK_CONNECT_START: case EXEC_TASK_CONNECT_START:
{
int32 connectionId = INVALID_CONNECTION_ID;
char *nodeDatabase = NULL;
/* we use the same database name on the master and worker nodes */
nodeDatabase = get_database_name(MyDatabaseId);
connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase);
connectionIdArray[currentIndex] = connectionId;
/* if valid, poll the connection until the connection is initiated */
if (connectionId != INVALID_CONNECTION_ID)
{ {
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL; int32 connectionId = INVALID_CONNECTION_ID;
taskExecution->connectPollCount = 0; char *nodeDatabase = NULL;
connectAction = CONNECT_ACTION_OPENED;
}
else
{
AdjustStateForFailure(taskExecution);
}
break; /* we use the same database name on the master and worker nodes */
} nodeDatabase = get_database_name(MyDatabaseId);
case EXEC_TASK_CONNECT_POLL: connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase);
{ connectionIdArray[currentIndex] = connectionId;
int32 connectionId = connectionIdArray[currentIndex];
ConnectStatus pollStatus = MultiClientConnectPoll(connectionId);
/* /* if valid, poll the connection until the connection is initiated */
* If the connection is established, we reset the data fetch counter and if (connectionId != INVALID_CONNECTION_ID)
* change our status to data fetching.
*/
if (pollStatus == CLIENT_CONNECTION_READY)
{
taskExecution->dataFetchTaskIndex = -1;
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP;
}
else if (pollStatus == CLIENT_CONNECTION_BUSY)
{
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL;
}
else if (pollStatus == CLIENT_CONNECTION_BAD)
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
}
/* now check if we have been trying to connect for too long */
taskExecution->connectPollCount++;
if (pollStatus == CLIENT_CONNECTION_BUSY)
{
uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval;
uint32 currentCount = taskExecution->connectPollCount;
if (currentCount >= maxCount)
{ {
ereport(WARNING, (errmsg("could not establish asynchronous connection " taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL;
"after %u ms", REMOTE_NODE_CONNECT_TIMEOUT))); taskExecution->connectPollCount = 0;
connectAction = CONNECT_ACTION_OPENED;
}
else
{
AdjustStateForFailure(taskExecution);
}
break;
}
case EXEC_TASK_CONNECT_POLL:
{
int32 connectionId = connectionIdArray[currentIndex];
ConnectStatus pollStatus = MultiClientConnectPoll(connectionId);
/*
* If the connection is established, we reset the data fetch counter and
* change our status to data fetching.
*/
if (pollStatus == CLIENT_CONNECTION_READY)
{
taskExecution->dataFetchTaskIndex = -1;
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP;
}
else if (pollStatus == CLIENT_CONNECTION_BUSY)
{
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL;
}
else if (pollStatus == CLIENT_CONNECTION_BAD)
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
} }
}
break; /* now check if we have been trying to connect for too long */
} taskExecution->connectPollCount++;
if (pollStatus == CLIENT_CONNECTION_BUSY)
case EXEC_TASK_FAILED: {
{ uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval;
/* uint32 currentCount = taskExecution->connectPollCount;
* On task failure, we close the connection. We also reset our execution if (currentCount >= maxCount)
* status assuming that we might fail on all other worker nodes and come {
* back to this failed node. In that case, we will retry the same fetch ereport(WARNING, (errmsg("could not establish asynchronous "
* and compute task(s) on this node again. "connection after %u ms",
*/ REMOTE_NODE_CONNECT_TIMEOUT)));
int32 connectionId = connectionIdArray[currentIndex];
MultiClientDisconnect(connectionId); taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
connectionIdArray[currentIndex] = INVALID_CONNECTION_ID; }
connectAction = CONNECT_ACTION_CLOSED; }
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_START; break;
}
/* try next worker node */
AdjustStateForFailure(taskExecution); case EXEC_TASK_FAILED:
{
break; /*
} * On task failure, we close the connection. We also reset our execution
* status assuming that we might fail on all other worker nodes and come
case EXEC_FETCH_TASK_LOOP: * back to this failed node. In that case, we will retry the same fetch
{ * and compute task(s) on this node again.
List *dataFetchTaskList = task->dependedTaskList; */
int32 dataFetchTaskCount = list_length(dataFetchTaskList); int32 connectionId = connectionIdArray[currentIndex];
MultiClientDisconnect(connectionId);
/* move to the next data fetch task */ connectionIdArray[currentIndex] = INVALID_CONNECTION_ID;
taskExecution->dataFetchTaskIndex++; connectAction = CONNECT_ACTION_CLOSED;
if (taskExecution->dataFetchTaskIndex < dataFetchTaskCount) taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_START;
{
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_START; /* try next worker node */
} AdjustStateForFailure(taskExecution);
else
{ break;
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_START; }
}
case EXEC_FETCH_TASK_LOOP:
break; {
} List *dataFetchTaskList = task->dependedTaskList;
int32 dataFetchTaskCount = list_length(dataFetchTaskList);
case EXEC_FETCH_TASK_START:
{ /* move to the next data fetch task */
List *dataFetchTaskList = task->dependedTaskList; taskExecution->dataFetchTaskIndex++;
int32 dataFetchTaskIndex = taskExecution->dataFetchTaskIndex;
Task *dataFetchTask = (Task *) list_nth(dataFetchTaskList, dataFetchTaskIndex); if (taskExecution->dataFetchTaskIndex < dataFetchTaskCount)
{
char *dataFetchQuery = dataFetchTask->queryString; taskStatusArray[currentIndex] = EXEC_FETCH_TASK_START;
int32 connectionId = connectionIdArray[currentIndex]; }
else
bool querySent = MultiClientSendQuery(connectionId, dataFetchQuery); {
if (querySent) taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_START;
{ }
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING;
} break;
else }
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; case EXEC_FETCH_TASK_START:
} {
List *dataFetchTaskList = task->dependedTaskList;
break; int32 dataFetchTaskIndex = taskExecution->dataFetchTaskIndex;
} Task *dataFetchTask = (Task *) list_nth(dataFetchTaskList,
dataFetchTaskIndex);
case EXEC_FETCH_TASK_RUNNING:
{ char *dataFetchQuery = dataFetchTask->queryString;
int32 connectionId = connectionIdArray[currentIndex]; int32 connectionId = connectionIdArray[currentIndex];
ResultStatus resultStatus = MultiClientResultStatus(connectionId);
QueryStatus queryStatus = CLIENT_INVALID_QUERY; bool querySent = MultiClientSendQuery(connectionId, dataFetchQuery);
if (querySent)
/* check if query results are in progress or unavailable */ {
if (resultStatus == CLIENT_RESULT_BUSY) taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING;
{ }
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING; else
break; {
} taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) }
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; break;
break; }
}
case EXEC_FETCH_TASK_RUNNING:
Assert(resultStatus == CLIENT_RESULT_READY); {
int32 connectionId = connectionIdArray[currentIndex];
/* ResultStatus resultStatus = MultiClientResultStatus(connectionId);
* If the query executed successfully, loop onto the next data fetch QueryStatus queryStatus = CLIENT_INVALID_QUERY;
* task. Else if the query failed, try data fetching on another node.
*/ /* check if query results are in progress or unavailable */
queryStatus = MultiClientQueryStatus(connectionId); if (resultStatus == CLIENT_RESULT_BUSY)
if (queryStatus == CLIENT_QUERY_DONE) {
{ taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING;
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP; break;
} }
else if (queryStatus == CLIENT_QUERY_FAILED) else if (resultStatus == CLIENT_RESULT_UNAVAILABLE)
{ {
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
} break;
else }
{
ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); Assert(resultStatus == CLIENT_RESULT_READY);
}
/*
break; * If the query executed successfully, loop onto the next data fetch
} * task. Else if the query failed, try data fetching on another node.
*/
case EXEC_COMPUTE_TASK_START: queryStatus = MultiClientQueryStatus(connectionId);
{ if (queryStatus == CLIENT_QUERY_DONE)
int32 connectionId = connectionIdArray[currentIndex]; {
bool querySent = false; taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP;
}
/* construct new query to copy query results to stdout */ else if (queryStatus == CLIENT_QUERY_FAILED)
char *queryString = task->queryString; {
StringInfo computeTaskQuery = makeStringInfo(); taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
if (BinaryMasterCopyFormat) }
{ else
appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_BINARY, queryString); {
} ereport(FATAL, (errmsg("invalid query status: %d", queryStatus)));
else }
{
appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_TEXT, queryString); break;
} }
querySent = MultiClientSendQuery(connectionId, computeTaskQuery->data); case EXEC_COMPUTE_TASK_START:
if (querySent) {
{ int32 connectionId = connectionIdArray[currentIndex];
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; bool querySent = false;
}
else /* construct new query to copy query results to stdout */
{ char *queryString = task->queryString;
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; StringInfo computeTaskQuery = makeStringInfo();
} if (BinaryMasterCopyFormat)
{
break; appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_BINARY,
} queryString);
}
case EXEC_COMPUTE_TASK_RUNNING: else
{ {
int32 connectionId = connectionIdArray[currentIndex]; appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_TEXT,
ResultStatus resultStatus = MultiClientResultStatus(connectionId); queryString);
QueryStatus queryStatus = CLIENT_INVALID_QUERY; }
/* check if query results are in progress or unavailable */ querySent = MultiClientSendQuery(connectionId, computeTaskQuery->data);
if (resultStatus == CLIENT_RESULT_BUSY) if (querySent)
{ {
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING; taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING;
break; }
} else
else if (resultStatus == CLIENT_RESULT_UNAVAILABLE) {
{ taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; }
break;
} break;
}
Assert(resultStatus == CLIENT_RESULT_READY);
case EXEC_COMPUTE_TASK_RUNNING:
/* check if our request to copy query results has been acknowledged */ {
queryStatus = MultiClientQueryStatus(connectionId); int32 connectionId = connectionIdArray[currentIndex];
if (queryStatus == CLIENT_QUERY_COPY) ResultStatus resultStatus = MultiClientResultStatus(connectionId);
{ QueryStatus queryStatus = CLIENT_INVALID_QUERY;
StringInfo jobDirectoryName = JobDirectoryName(task->jobId);
StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId); /* check if query results are in progress or unavailable */
if (resultStatus == CLIENT_RESULT_BUSY)
char *filename = taskFilename->data; {
int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING;
int fileMode = (S_IRUSR | S_IWUSR); break;
}
int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode); else if (resultStatus == CLIENT_RESULT_UNAVAILABLE)
if (fileDescriptor >= 0) {
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
break;
}
Assert(resultStatus == CLIENT_RESULT_READY);
/* check if our request to copy query results has been acknowledged */
queryStatus = MultiClientQueryStatus(connectionId);
if (queryStatus == CLIENT_QUERY_COPY)
{
StringInfo jobDirectoryName = JobDirectoryName(task->jobId);
StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId);
char *filename = taskFilename->data;
int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY);
int fileMode = (S_IRUSR | S_IWUSR);
int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode);
if (fileDescriptor >= 0)
{
/*
* All files inside the job directory get automatically cleaned
* up on transaction commit or abort.
*/
fileDescriptorArray[currentIndex] = fileDescriptor;
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING;
}
else
{
ereport(WARNING, (errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
filename)));
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
}
}
else if (queryStatus == CLIENT_QUERY_FAILED)
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
}
else
{
ereport(FATAL, (errmsg("invalid query status: %d", queryStatus)));
}
break;
}
case EXEC_COMPUTE_TASK_COPYING:
{
int32 connectionId = connectionIdArray[currentIndex];
int32 fileDesc = fileDescriptorArray[currentIndex];
int closed = -1;
/* copy data from worker node, and write to local file */
CopyStatus copyStatus = MultiClientCopyData(connectionId, fileDesc);
/* if worker node will continue to send more data, keep reading */
if (copyStatus == CLIENT_COPY_MORE)
{ {
/*
* All files inside the job directory get automatically cleaned
* up on transaction commit or abort.
*/
fileDescriptorArray[currentIndex] = fileDescriptor;
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING; taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING;
} }
else else if (copyStatus == CLIENT_COPY_DONE)
{ {
ereport(WARNING, (errcode_for_file_access(), closed = close(fileDesc);
errmsg("could not open file \"%s\": %m", filename))); fileDescriptorArray[currentIndex] = -1;
if (closed >= 0)
{
taskStatusArray[currentIndex] = EXEC_TASK_DONE;
/* we are done executing; we no longer need the connection */
MultiClientDisconnect(connectionId);
connectionIdArray[currentIndex] = INVALID_CONNECTION_ID;
connectAction = CONNECT_ACTION_CLOSED;
}
else
{
ereport(WARNING, (errcode_for_file_access(),
errmsg("could not close copied file: %m")));
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
}
}
else if (copyStatus == CLIENT_COPY_FAILED)
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
closed = close(fileDesc);
fileDescriptorArray[currentIndex] = -1;
if (closed < 0)
{
ereport(WARNING, (errcode_for_file_access(),
errmsg("could not close copy file: %m")));
}
} }
break;
} }
else if (queryStatus == CLIENT_QUERY_FAILED)
case EXEC_TASK_DONE:
{ {
taskStatusArray[currentIndex] = EXEC_TASK_FAILED; /* we are done with this task's execution */
break;
} }
else
default:
{ {
ereport(FATAL, (errmsg("invalid query status: %d", queryStatus))); /* we fatal here to avoid leaking client-side resources */
ereport(FATAL, (errmsg("invalid execution status: %d", currentStatus)));
break;
} }
break;
}
case EXEC_COMPUTE_TASK_COPYING:
{
int32 connectionId = connectionIdArray[currentIndex];
int32 fileDesc = fileDescriptorArray[currentIndex];
int closed = -1;
/* copy data from worker node, and write to local file */
CopyStatus copyStatus = MultiClientCopyData(connectionId, fileDesc);
/* if worker node will continue to send more data, keep reading */
if (copyStatus == CLIENT_COPY_MORE)
{
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING;
}
else if (copyStatus == CLIENT_COPY_DONE)
{
closed = close(fileDesc);
fileDescriptorArray[currentIndex] = -1;
if (closed >= 0)
{
taskStatusArray[currentIndex] = EXEC_TASK_DONE;
/* we are done executing; we no longer need the connection */
MultiClientDisconnect(connectionId);
connectionIdArray[currentIndex] = INVALID_CONNECTION_ID;
connectAction = CONNECT_ACTION_CLOSED;
}
else
{
ereport(WARNING, (errcode_for_file_access(),
errmsg("could not close copied file: %m")));
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
}
}
else if (copyStatus == CLIENT_COPY_FAILED)
{
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
closed = close(fileDesc);
fileDescriptorArray[currentIndex] = -1;
if (closed < 0)
{
ereport(WARNING, (errcode_for_file_access(),
errmsg("could not close copy file: %m")));
}
}
break;
}
case EXEC_TASK_DONE:
{
/* we are done with this task's execution */
break;
}
default:
{
/* we fatal here to avoid leaking client-side resources */
ereport(FATAL, (errmsg("invalid execution status: %d", currentStatus)));
break;
}
} }
return connectAction; return connectAction;

View File

@ -80,6 +80,7 @@ RouterExecutorStart(QueryDesc *queryDesc, int eflags, Task *task)
queryDesc->estate = executorState; queryDesc->estate = executorState;
#if (PG_VERSION_NUM < 90500) #if (PG_VERSION_NUM < 90500)
/* make sure that upsertQuery is false for versions that UPSERT is not available */ /* make sure that upsertQuery is false for versions that UPSERT is not available */
Assert(task->upsertQuery == false); Assert(task->upsertQuery == false);
#endif #endif
@ -177,14 +178,14 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas
if (!ScanDirectionIsForward(direction)) if (!ScanDirectionIsForward(direction))
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("scan directions other than forward scans " errmsg("scan directions other than forward scans "
"are unsupported"))); "are unsupported")));
} }
if (count != 0) if (count != 0)
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("fetching rows from a query using a cursor " errmsg("fetching rows from a query using a cursor "
"is unsupported"))); "is unsupported")));
} }
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
@ -210,7 +211,7 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas
else else
{ {
ereport(ERROR, (errmsg("unrecognized operation code: %d", ereport(ERROR, (errmsg("unrecognized operation code: %d",
(int) operation))); (int) operation)));
} }
if (queryDesc->totaltime != NULL) if (queryDesc->totaltime != NULL)
@ -219,9 +220,9 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas
} }
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
} }
/* /*
* ExecuteDistributedModify is the main entry point for modifying distributed * ExecuteDistributedModify is the main entry point for modifying distributed
* tables. A distributed modification is successful if any placement of the * tables. A distributed modification is successful if any placement of the
@ -532,9 +533,10 @@ StoreQueryResult(PGconn *connection, TupleDesc tupleDescriptor,
return true; return true;
} }
/* /*
* RouterExecutorFinish cleans up after a distributed execution. * RouterExecutorFinish cleans up after a distributed execution.
*/ */
void void
RouterExecutorFinish(QueryDesc *queryDesc) RouterExecutorFinish(QueryDesc *queryDesc)
{ {

View File

@ -303,13 +303,13 @@ AdjustStateForFailure(TaskExecution *taskExecution)
if (taskExecution->currentNodeIndex < maxNodeIndex) if (taskExecution->currentNodeIndex < maxNodeIndex)
{ {
taskExecution->currentNodeIndex++; /* try next worker node */ taskExecution->currentNodeIndex++; /* try next worker node */
} }
else else
{ {
taskExecution->currentNodeIndex = 0; /* go back to the first worker node */ taskExecution->currentNodeIndex = 0; /* go back to the first worker node */
} }
taskExecution->dataFetchTaskIndex = -1; /* reset data fetch counter */ taskExecution->dataFetchTaskIndex = -1; /* reset data fetch counter */
taskExecution->failureCount++; /* record failure */ taskExecution->failureCount++; /* record failure */
} }

View File

@ -39,9 +39,9 @@
*/ */
struct DropRelationCallbackState struct DropRelationCallbackState
{ {
char relkind; char relkind;
Oid heapOid; Oid heapOid;
bool concurrent; bool concurrent;
}; };
@ -190,10 +190,10 @@ multi_ProcessUtility(Node *parsetree,
} }
else if (IsA(parsetree, CreateRoleStmt) && CitusDBHasBeenLoaded()) else if (IsA(parsetree, CreateRoleStmt) && CitusDBHasBeenLoaded())
{ {
ereport(NOTICE, (errmsg("CitusDB does not support CREATE ROLE/USER " ereport(NOTICE, (errmsg("CitusDB does not support CREATE ROLE/USER "
"for distributed databases"), "for distributed databases"),
errdetail("Multiple roles are currently supported " errdetail("Multiple roles are currently supported "
"only for local tables"))); "only for local tables")));
} }
/* now drop into standard process utility */ /* now drop into standard process utility */
@ -757,7 +757,7 @@ IsAlterTableRenameStmt(RenameStmt *renameStmt)
isAlterTableRenameStmt = true; isAlterTableRenameStmt = true;
} }
#if (PG_VERSION_NUM >=90500) #if (PG_VERSION_NUM >= 90500)
else if (renameStmt->renameType == OBJECT_TABCONSTRAINT) else if (renameStmt->renameType == OBJECT_TABCONSTRAINT)
{ {
isAlterTableRenameStmt = true; isAlterTableRenameStmt = true;
@ -905,8 +905,9 @@ ExecuteCommandOnWorkerShards(Oid relationId, const char *commandString,
} }
else else
{ {
ereport(DEBUG2, (errmsg("applied command on shard " UINT64_FORMAT " on " ereport(DEBUG2, (errmsg("applied command on shard " UINT64_FORMAT
"node %s:%d", shardId, workerName, workerPort))); " on node %s:%d", shardId, workerName,
workerPort)));
} }
isFirstPlacement = false; isFirstPlacement = false;
@ -988,6 +989,7 @@ AllFinalizedPlacementsAccessible(Oid relationId)
static void static void
RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, void *arg) RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, void *arg)
{ {
/* *INDENT-OFF* */
HeapTuple tuple; HeapTuple tuple;
struct DropRelationCallbackState *state; struct DropRelationCallbackState *state;
char relkind; char relkind;
@ -1022,10 +1024,8 @@ RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, voi
classform = (Form_pg_class) GETSTRUCT(tuple); classform = (Form_pg_class) GETSTRUCT(tuple);
if (classform->relkind != relkind) if (classform->relkind != relkind)
{
ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not an index", rel->relname))); errmsg("\"%s\" is not an index", rel->relname)));
}
/* Allow DROP to either table owner or schema owner */ /* Allow DROP to either table owner or schema owner */
if (!pg_class_ownercheck(relOid, GetUserId()) && if (!pg_class_ownercheck(relOid, GetUserId()) &&
@ -1054,4 +1054,5 @@ RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid, voi
if (OidIsValid(state->heapOid)) if (OidIsValid(state->heapOid))
LockRelationOid(state->heapOid, heap_lockmode); LockRelationOid(state->heapOid, heap_lockmode);
} }
/* *INDENT-ON* */
} }

View File

@ -185,7 +185,7 @@ master_create_worker_shards(PG_FUNCTION_ARGS)
LockShardDistributionMetadata(shardId, ExclusiveLock); LockShardDistributionMetadata(shardId, ExclusiveLock);
CreateShardPlacements(shardId, ddlCommandList, workerNodeList, CreateShardPlacements(shardId, ddlCommandList, workerNodeList,
roundRobinNodeIndex, replicationFactor); roundRobinNodeIndex, replicationFactor);
InsertShardRow(distributedTableId, shardId, shardStorageType, InsertShardRow(distributedTableId, shardId, shardStorageType,
minHashTokenText, maxHashTokenText); minHashTokenText, maxHashTokenText);

View File

@ -115,9 +115,9 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
if ((partitionMethod == DISTRIBUTE_BY_HASH) && (deleteCriteria != NULL)) if ((partitionMethod == DISTRIBUTE_BY_HASH) && (deleteCriteria != NULL))
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot delete from distributed table"), errmsg("cannot delete from distributed table"),
errdetail("Delete statements on hash-partitioned tables " errdetail("Delete statements on hash-partitioned tables "
"with where clause is not supported"))); "with where clause is not supported")));
} }
CheckDeleteCriteria(deleteCriteria); CheckDeleteCriteria(deleteCriteria);
@ -138,15 +138,15 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
else else
{ {
deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId, deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId,
shardIntervalList, shardIntervalList,
deleteCriteria); deleteCriteria);
} }
foreach(shardIntervalCell, deletableShardIntervalList) foreach(shardIntervalCell, deletableShardIntervalList)
{ {
List *shardPlacementList = NIL; List *shardPlacementList = NIL;
List *droppedPlacementList = NIL; List *droppedPlacementList = NIL;
List *lingeringPlacementList= NIL; List *lingeringPlacementList = NIL;
ListCell *shardPlacementCell = NULL; ListCell *shardPlacementCell = NULL;
ListCell *droppedPlacementCell = NULL; ListCell *droppedPlacementCell = NULL;
ListCell *lingeringPlacementCell = NULL; ListCell *lingeringPlacementCell = NULL;
@ -167,7 +167,8 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
shardPlacementList = ShardPlacementList(shardId); shardPlacementList = ShardPlacementList(shardId);
foreach(shardPlacementCell, shardPlacementList) foreach(shardPlacementCell, shardPlacementList)
{ {
ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell); ShardPlacement *shardPlacement =
(ShardPlacement *) lfirst(shardPlacementCell);
char *workerName = shardPlacement->nodeName; char *workerName = shardPlacement->nodeName;
uint32 workerPort = shardPlacement->nodePort; uint32 workerPort = shardPlacement->nodePort;
bool dropSuccessful = false; bool dropSuccessful = false;
@ -176,14 +177,17 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
char tableType = get_rel_relkind(relationId); char tableType = get_rel_relkind(relationId);
if (tableType == RELKIND_RELATION) if (tableType == RELKIND_RELATION)
{ {
appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND, quotedShardName); appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND,
quotedShardName);
} }
else if (tableType == RELKIND_FOREIGN_TABLE) else if (tableType == RELKIND_FOREIGN_TABLE)
{ {
appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND, quotedShardName); appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND,
quotedShardName);
} }
dropSuccessful = ExecuteRemoteCommand(workerName, workerPort, workerDropQuery); dropSuccessful = ExecuteRemoteCommand(workerName, workerPort,
workerDropQuery);
if (dropSuccessful) if (dropSuccessful)
{ {
droppedPlacementList = lappend(droppedPlacementList, shardPlacement); droppedPlacementList = lappend(droppedPlacementList, shardPlacement);
@ -227,12 +231,13 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
if (QueryCancelPending) if (QueryCancelPending)
{ {
ereport(WARNING, (errmsg("cancel requests are ignored during shard deletion"))); ereport(WARNING, (errmsg("cancel requests are ignored during shard "
"deletion")));
QueryCancelPending = false; QueryCancelPending = false;
} }
RESUME_INTERRUPTS(); RESUME_INTERRUPTS();
} }
deleteCriteriaShardCount = list_length(deletableShardIntervalList); deleteCriteriaShardCount = list_length(deletableShardIntervalList);
PG_RETURN_INT32(deleteCriteriaShardCount); PG_RETURN_INT32(deleteCriteriaShardCount);
@ -257,7 +262,7 @@ CheckTableCount(Query *deleteQuery)
static void static void
CheckDeleteCriteria(Node *deleteCriteria) CheckDeleteCriteria(Node *deleteCriteria)
{ {
bool simpleOpExpression = true; bool simpleOpExpression = true;
if (deleteCriteria == NULL) if (deleteCriteria == NULL)
{ {
@ -298,15 +303,15 @@ CheckDeleteCriteria(Node *deleteCriteria)
} }
/* /*
* CheckPartitionColumn checks that the given where clause is based only on the * CheckPartitionColumn checks that the given where clause is based only on the
* partition key of the given relation id. * partition key of the given relation id.
*/ */
static void static void
CheckPartitionColumn(Oid relationId, Node *whereClause) CheckPartitionColumn(Oid relationId, Node *whereClause)
{ {
Var *partitionColumn = PartitionKey(relationId); Var *partitionColumn = PartitionKey(relationId);
ListCell *columnCell = NULL; ListCell *columnCell = NULL;
List *columnList = pull_var_clause_default(whereClause); List *columnList = pull_var_clause_default(whereClause);
foreach(columnCell, columnList) foreach(columnCell, columnList)
@ -332,7 +337,7 @@ CheckPartitionColumn(Oid relationId, Node *whereClause)
*/ */
static List * static List *
ShardsMatchingDeleteCriteria(Oid relationId, List *shardIntervalList, ShardsMatchingDeleteCriteria(Oid relationId, List *shardIntervalList,
Node *deleteCriteria) Node *deleteCriteria)
{ {
List *dropShardIntervalList = NIL; List *dropShardIntervalList = NIL;
List *deleteCriteriaList = NIL; List *deleteCriteriaList = NIL;

View File

@ -219,7 +219,7 @@ ShardLength(uint64 shardId)
if (shardPlacementList == NIL) if (shardPlacementList == NIL)
{ {
ereport(ERROR, (errmsg("could not find length of shard " UINT64_FORMAT, shardId), ereport(ERROR, (errmsg("could not find length of shard " UINT64_FORMAT, shardId),
errdetail("Could not find any shard placements for the shard."))); errdetail("Could not find any shard placements for the shard.")));
} }
else else
{ {

View File

@ -49,7 +49,7 @@
/* Shard related configuration */ /* Shard related configuration */
int ShardReplicationFactor = 2; /* desired replication factor for shards */ int ShardReplicationFactor = 2; /* desired replication factor for shards */
int ShardMaxSize = 1048576; /* maximum size in KB one shard can grow to */ int ShardMaxSize = 1048576; /* maximum size in KB one shard can grow to */
int ShardPlacementPolicy = SHARD_PLACEMENT_ROUND_ROBIN; int ShardPlacementPolicy = SHARD_PLACEMENT_ROUND_ROBIN;
@ -252,7 +252,7 @@ Datum
master_get_new_shardid(PG_FUNCTION_ARGS) master_get_new_shardid(PG_FUNCTION_ARGS)
{ {
text *sequenceName = cstring_to_text(SHARDID_SEQUENCE_NAME); text *sequenceName = cstring_to_text(SHARDID_SEQUENCE_NAME);
Oid sequenceId = ResolveRelationId(sequenceName); Oid sequenceId = ResolveRelationId(sequenceName);
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId); Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
/* generate new and unique shardId from sequence */ /* generate new and unique shardId from sequence */
@ -281,7 +281,7 @@ master_get_local_first_candidate_nodes(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL()) if (SRF_IS_FIRSTCALL())
{ {
MemoryContext oldContext = NULL; MemoryContext oldContext = NULL;
TupleDesc tupleDescriptor = NULL; TupleDesc tupleDescriptor = NULL;
uint32 liveNodeCount = 0; uint32 liveNodeCount = 0;
bool hasOid = false; bool hasOid = false;
@ -396,7 +396,7 @@ master_get_round_robin_candidate_nodes(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL()) if (SRF_IS_FIRSTCALL())
{ {
MemoryContext oldContext = NULL; MemoryContext oldContext = NULL;
TupleDesc tupleDescriptor = NULL; TupleDesc tupleDescriptor = NULL;
List *workerNodeList = NIL; List *workerNodeList = NIL;
TypeFuncClass resultTypeClass = 0; TypeFuncClass resultTypeClass = 0;
@ -477,7 +477,7 @@ master_get_active_worker_nodes(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL()) if (SRF_IS_FIRSTCALL())
{ {
MemoryContext oldContext = NULL; MemoryContext oldContext = NULL;
List *workerNodeList = NIL; List *workerNodeList = NIL;
uint32 workerNodeCount = 0; uint32 workerNodeCount = 0;
TupleDesc tupleDescriptor = NULL; TupleDesc tupleDescriptor = NULL;
@ -567,7 +567,7 @@ GetTableDDLEvents(Oid relationId)
Relation pgIndex = NULL; Relation pgIndex = NULL;
SysScanDesc scanDescriptor = NULL; SysScanDesc scanDescriptor = NULL;
ScanKeyData scanKey[1]; ScanKeyData scanKey[1];
int scanKeyCount = 1; int scanKeyCount = 1;
HeapTuple heapTuple = NULL; HeapTuple heapTuple = NULL;
@ -695,8 +695,8 @@ hostname_client_addr(void)
Port *port = MyProcPort; Port *port = MyProcPort;
char *remoteHost = NULL; char *remoteHost = NULL;
int remoteHostLen = NI_MAXHOST; int remoteHostLen = NI_MAXHOST;
int flags = NI_NAMEREQD; /* require fully qualified hostname */ int flags = NI_NAMEREQD; /* require fully qualified hostname */
int nameFound = 0; int nameFound = 0;
if (port == NULL) if (port == NULL)
{ {
@ -709,10 +709,15 @@ hostname_client_addr(void)
#ifdef HAVE_IPV6 #ifdef HAVE_IPV6
case AF_INET6: case AF_INET6:
#endif #endif
break; {
break;
}
default: default:
{
ereport(ERROR, (errmsg("invalid address family in connection"))); ereport(ERROR, (errmsg("invalid address family in connection")));
break; break;
}
} }
remoteHost = palloc0(remoteHostLen); remoteHost = palloc0(remoteHostLen);

View File

@ -93,7 +93,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
if (partitionMethod == DISTRIBUTE_BY_HASH) if (partitionMethod == DISTRIBUTE_BY_HASH)
{ {
ereport(ERROR, (errmsg("relation \"%s\" is a hash partitioned table", ereport(ERROR, (errmsg("relation \"%s\" is a hash partitioned table",
relationName), relationName),
errdetail("We currently don't support creating shards " errdetail("We currently don't support creating shards "
"on hash-partitioned tables"))); "on hash-partitioned tables")));
} }
@ -128,7 +128,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
} }
CreateShardPlacements(shardId, ddlEventList, candidateNodeList, 0, CreateShardPlacements(shardId, ddlEventList, candidateNodeList, 0,
ShardReplicationFactor); ShardReplicationFactor);
InsertShardRow(relationId, shardId, SHARD_STORAGE_TABLE, nullMinValue, nullMaxValue); InsertShardRow(relationId, shardId, SHARD_STORAGE_TABLE, nullMinValue, nullMaxValue);
@ -361,7 +361,7 @@ CheckDistributedTable(Oid relationId)
*/ */
void void
CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList, CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList,
int workerStartIndex, int replicationFactor) int workerStartIndex, int replicationFactor)
{ {
int attemptCount = replicationFactor; int attemptCount = replicationFactor;
int workerNodeCount = list_length(workerNodeList); int workerNodeCount = list_length(workerNodeList);
@ -393,7 +393,7 @@ CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList,
else else
{ {
ereport(WARNING, (errmsg("could not create shard on \"%s:%u\"", ereport(WARNING, (errmsg("could not create shard on \"%s:%u\"",
nodeName, nodePort))); nodeName, nodePort)));
} }
if (placementsCreated >= replicationFactor) if (placementsCreated >= replicationFactor)
@ -406,7 +406,7 @@ CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList,
if (placementsCreated < replicationFactor) if (placementsCreated < replicationFactor)
{ {
ereport(ERROR, (errmsg("could only create %u of %u of required shard replicas", ereport(ERROR, (errmsg("could only create %u of %u of required shard replicas",
placementsCreated, replicationFactor))); placementsCreated, replicationFactor)));
} }
} }

View File

@ -393,6 +393,7 @@ DistributedModifyTask(Query *query)
query->onConflict = RebuildOnConflict(relationId, query->onConflict); query->onConflict = RebuildOnConflict(relationId, query->onConflict);
} }
#else #else
/* always set to false for PG_VERSION_NUM < 90500 */ /* always set to false for PG_VERSION_NUM < 90500 */
upsertQuery = false; upsertQuery = false;
#endif #endif
@ -414,6 +415,7 @@ DistributedModifyTask(Query *query)
#if (PG_VERSION_NUM >= 90500) #if (PG_VERSION_NUM >= 90500)
/* /*
* RebuildOnConflict rebuilds OnConflictExpr for correct deparsing. The function * RebuildOnConflict rebuilds OnConflictExpr for correct deparsing. The function
* makes WHERE clause elements explicit and filters dropped columns * makes WHERE clause elements explicit and filters dropped columns
@ -433,7 +435,7 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict)
/* Convert onConflictWhere qualifiers to an explicitly and'd clause */ /* Convert onConflictWhere qualifiers to an explicitly and'd clause */
updatedOnConflict->onConflictWhere = updatedOnConflict->onConflictWhere =
(Node *) make_ands_explicit((List *) onConflictWhere); (Node *) make_ands_explicit((List *) onConflictWhere);
/* /*
* Here we handle dropped columns on the distributed table. onConflictSet * Here we handle dropped columns on the distributed table. onConflictSet
@ -448,7 +450,7 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict)
foreach(targetEntryCell, onConflictSet) foreach(targetEntryCell, onConflictSet)
{ {
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
FormData_pg_attribute *tableAttribute = tableAttributes[targetEntry->resno -1]; FormData_pg_attribute *tableAttribute = tableAttributes[targetEntry->resno - 1];
/* skip dropped columns */ /* skip dropped columns */
if (tableAttribute->attisdropped) if (tableAttribute->attisdropped)
@ -468,6 +470,8 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict)
return updatedOnConflict; return updatedOnConflict;
} }
#endif #endif

View File

@ -45,7 +45,7 @@ MultiExplainOneQuery(Query *query, IntoClause *into, ExplainState *es,
if (localQuery) if (localQuery)
{ {
PlannedStmt *plan = NULL; PlannedStmt *plan = NULL;
instr_time planstart; instr_time planstart;
instr_time planduration; instr_time planduration;
INSTR_TIME_SET_CURRENT(planstart); INSTR_TIME_SET_CURRENT(planstart);

View File

@ -33,18 +33,18 @@
/* Config variables managed via guc.c */ /* Config variables managed via guc.c */
int LargeTableShardCount = 4; /* shard counts for a large table */ int LargeTableShardCount = 4; /* shard counts for a large table */
bool LogMultiJoinOrder = false; /* print join order as a debugging aid */ bool LogMultiJoinOrder = false; /* print join order as a debugging aid */
/* Function pointer type definition for join rule evaluation functions */ /* Function pointer type definition for join rule evaluation functions */
typedef JoinOrderNode * (*RuleEvalFunction) (JoinOrderNode *currentJoinNode, typedef JoinOrderNode *(*RuleEvalFunction) (JoinOrderNode *currentJoinNode,
TableEntry *candidateTable, TableEntry *candidateTable,
List *candidateShardList, List *candidateShardList,
List *applicableJoinClauses, List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static char * RuleNameArray[JOIN_RULE_LAST] = {0}; /* ordered join rule names */ static char *RuleNameArray[JOIN_RULE_LAST] = { 0 }; /* ordered join rule names */
static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = {0}; /* join rules */ static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */
/* Local functions forward declarations */ /* Local functions forward declarations */
@ -54,7 +54,8 @@ static bool JoinExprListWalker(Node *node, List **joinList);
static bool ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex); static bool ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex);
static List * MergeShardIntervals(List *leftShardIntervalList, static List * MergeShardIntervals(List *leftShardIntervalList,
List *rightShardIntervalList, JoinType joinType); List *rightShardIntervalList, JoinType joinType);
static bool ShardIntervalsMatch(List *leftShardIntervalList, List *rightShardIntervalList); static bool ShardIntervalsMatch(List *leftShardIntervalList,
List *rightShardIntervalList);
static List * LoadSortedShardIntervalList(Oid relationId); static List * LoadSortedShardIntervalList(Oid relationId);
static List * JoinOrderForTable(TableEntry *firstTable, List *tableEntryList, static List * JoinOrderForTable(TableEntry *firstTable, List *tableEntryList,
List *joinClauseList); List *joinClauseList);
@ -68,31 +69,41 @@ static List * TableEntryListDifference(List *lhsTableList, List *rhsTableList);
static TableEntry * FindTableEntry(List *tableEntryList, uint32 tableId); static TableEntry * FindTableEntry(List *tableEntryList, uint32 tableId);
/* Local functions forward declarations for join evaluations */ /* Local functions forward declarations for join evaluations */
static JoinOrderNode * EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode, static JoinOrderNode * EvaluateJoinRules(List *joinedTableList,
TableEntry *candidateTable, List *candidateShardList, JoinOrderNode *currentJoinNode,
TableEntry *candidateTable,
List *candidateShardList,
List *joinClauseList, JoinType joinType); List *joinClauseList, JoinType joinType);
static List * RangeTableIdList(List *tableList); static List * RangeTableIdList(List *tableList);
static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType); static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType);
static char * JoinRuleName(JoinRuleType ruleType); static char * JoinRuleName(JoinRuleType ruleType);
static JoinOrderNode * BroadcastJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * BroadcastJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
List *candidateShardList, List *applicableJoinClauses, List *candidateShardList,
List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static JoinOrderNode * LocalJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * LocalJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
List *candidateShardList, List *applicableJoinClauses, List *candidateShardList, List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static bool JoinOnColumns(Var *currentPartitioncolumn, Var *candidatePartitionColumn, static bool JoinOnColumns(Var *currentPartitioncolumn, Var *candidatePartitionColumn,
List *joinClauseList); List *joinClauseList);
static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode,
List *candidateShardList, List *applicableJoinClauses, TableEntry *candidateTable,
List *candidateShardList,
List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode,
List *candidateShardList, List *applicableJoinClauses, TableEntry *candidateTable,
List *candidateShardList,
List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode, TableEntry *candidateTable, static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode,
List *candidateShardList, List *applicableJoinClauses, TableEntry *candidateTable,
List *candidateShardList,
List *applicableJoinClauses,
JoinType joinType); JoinType joinType);
static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType, static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType
Var *partitionColumn, char partitionMethod); joinRuleType, Var *partitionColumn,
char partitionMethod);
/* /*
@ -106,7 +117,7 @@ List *
FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList) FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList)
{ {
List *joinList = NIL; List *joinList = NIL;
ListCell * joinCell = NULL; ListCell *joinCell = NULL;
List *joinWhereClauseList = NIL; List *joinWhereClauseList = NIL;
List *joinOrderList = NIL; List *joinOrderList = NIL;
List *joinedTableList = NIL; List *joinedTableList = NIL;
@ -199,7 +210,6 @@ FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList)
"query"), "query"),
errdetail("Shards of relations in outer join queries " errdetail("Shards of relations in outer join queries "
"must have 1-to-1 shard partitioning"))); "must have 1-to-1 shard partitioning")));
} }
} }
else else
@ -439,7 +449,7 @@ MergeShardIntervals(List *leftShardIntervalList, List *rightShardIntervalList,
bool nextMaxSmaller = comparisonResult > 0; bool nextMaxSmaller = comparisonResult > 0;
if ((shardUnion && nextMaxLarger) || if ((shardUnion && nextMaxLarger) ||
(!shardUnion && nextMaxSmaller) ) (!shardUnion && nextMaxSmaller))
{ {
newShardInterval->maxValue = datumCopy(nextMax, typeByValue, typeLen); newShardInterval->maxValue = datumCopy(nextMax, typeByValue, typeLen);
} }
@ -586,7 +596,8 @@ ShardIntervalsMatch(List *leftShardIntervalList, List *rightShardIntervalList)
nextRightIntervalCell = lnext(rightShardIntervalCell); nextRightIntervalCell = lnext(rightShardIntervalCell);
if (nextRightIntervalCell != NULL) if (nextRightIntervalCell != NULL)
{ {
ShardInterval *nextRightInterval = (ShardInterval *) lfirst(nextRightIntervalCell); ShardInterval *nextRightInterval =
(ShardInterval *) lfirst(nextRightIntervalCell);
shardIntervalsIntersect = ShardIntervalsOverlap(leftInterval, shardIntervalsIntersect = ShardIntervalsOverlap(leftInterval,
nextRightInterval); nextRightInterval);
if (shardIntervalsIntersect) if (shardIntervalsIntersect)
@ -1009,7 +1020,7 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
uint32 candidateTableId = 0; uint32 candidateTableId = 0;
List *joinedTableIdList = NIL; List *joinedTableIdList = NIL;
List *applicableJoinClauses = NIL; List *applicableJoinClauses = NIL;
uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1; uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1;
uint32 highestValidIndex = JOIN_RULE_LAST - 1; uint32 highestValidIndex = JOIN_RULE_LAST - 1;
uint32 ruleIndex = 0; uint32 ruleIndex = 0;
@ -1028,11 +1039,11 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
JoinRuleType ruleType = (JoinRuleType) ruleIndex; JoinRuleType ruleType = (JoinRuleType) ruleIndex;
RuleEvalFunction ruleEvalFunction = JoinRuleEvalFunction(ruleType); RuleEvalFunction ruleEvalFunction = JoinRuleEvalFunction(ruleType);
nextJoinNode = (*ruleEvalFunction) (currentJoinNode, nextJoinNode = (*ruleEvalFunction)(currentJoinNode,
candidateTable, candidateTable,
candidateShardList, candidateShardList,
applicableJoinClauses, applicableJoinClauses,
joinType); joinType);
/* break after finding the first join rule that applies */ /* break after finding the first join rule that applies */
if (nextJoinNode != NULL) if (nextJoinNode != NULL)

View File

@ -91,7 +91,8 @@ static void ParentSetNewChild(MultiNode *parentNode, MultiNode *oldChildNode,
/* Local functions forward declarations for aggregate expressions */ /* Local functions forward declarations for aggregate expressions */
static void ApplyExtendedOpNodes(MultiExtendedOp *originalNode, static void ApplyExtendedOpNodes(MultiExtendedOp *originalNode,
MultiExtendedOp *masterNode, MultiExtendedOp *workerNode); MultiExtendedOp *masterNode,
MultiExtendedOp *workerNode);
static void TransformSubqueryNode(MultiTable *subqueryNode); static void TransformSubqueryNode(MultiTable *subqueryNode);
static MultiExtendedOp * MasterExtendedOpNode(MultiExtendedOp *originalOpNode); static MultiExtendedOp * MasterExtendedOpNode(MultiExtendedOp *originalOpNode);
static Node * MasterAggregateMutator(Node *originalNode, AttrNumber *columnId); static Node * MasterAggregateMutator(Node *originalNode, AttrNumber *columnId);
@ -117,7 +118,8 @@ static void ErrorIfUnsupportedArrayAggregate(Aggref *arrayAggregateExpression);
static void ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression, static void ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
MultiNode *logicalPlanNode); MultiNode *logicalPlanNode);
static Var * AggregateDistinctColumn(Aggref *aggregateExpression); static Var * AggregateDistinctColumn(Aggref *aggregateExpression);
static bool TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode, static bool TablePartitioningSupportsDistinct(List *tableNodeList,
MultiExtendedOp *opNode,
Var *distinctColumn); Var *distinctColumn);
static bool GroupedByColumn(List *groupClauseList, List *targetList, Var *column); static bool GroupedByColumn(List *groupClauseList, List *targetList, Var *column);
@ -637,7 +639,7 @@ Commutative(MultiUnaryNode *parentNode, MultiUnaryNode *childNode)
{ {
PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID; PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID;
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
CitusNodeTag childNodeTag = CitusNodeTag(childNode); CitusNodeTag childNodeTag = CitusNodeTag(childNode);
/* we cannot be commutative with non-query operators */ /* we cannot be commutative with non-query operators */
if (childNodeTag == T_MultiTreeRoot || childNodeTag == T_MultiTable) if (childNodeTag == T_MultiTreeRoot || childNodeTag == T_MultiTable)
@ -692,7 +694,7 @@ Distributive(MultiUnaryNode *parentNode, MultiBinaryNode *childNode)
{ {
PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID; PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID;
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
CitusNodeTag childNodeTag = CitusNodeTag(childNode); CitusNodeTag childNodeTag = CitusNodeTag(childNode);
/* special condition checks for partition operator are not implemented */ /* special condition checks for partition operator are not implemented */
Assert(parentNodeTag != T_MultiPartition); Assert(parentNodeTag != T_MultiPartition);
@ -751,7 +753,7 @@ Factorizable(MultiBinaryNode *parentNode, MultiUnaryNode *childNode)
{ {
PullUpStatus pullUpStatus = PULL_UP_NOT_VALID; PullUpStatus pullUpStatus = PULL_UP_NOT_VALID;
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode); CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
CitusNodeTag childNodeTag = CitusNodeTag(childNode); CitusNodeTag childNodeTag = CitusNodeTag(childNode);
/* /*
* The following nodes are factorizable with their parents, but we don't * The following nodes are factorizable with their parents, but we don't
@ -1220,7 +1222,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode)
bool hasAggregates = contain_agg_clause((Node *) originalExpression); bool hasAggregates = contain_agg_clause((Node *) originalExpression);
if (hasAggregates) if (hasAggregates)
{ {
Node *newNode = MasterAggregateMutator((Node*) originalExpression, Node *newNode = MasterAggregateMutator((Node *) originalExpression,
&columnId); &columnId);
newExpression = (Expr *) newNode; newExpression = (Expr *) newNode;
} }
@ -1826,7 +1828,7 @@ WorkerAggregateExpressionList(Aggref *originalAggregate)
static AggregateType static AggregateType
GetAggregateType(Oid aggFunctionId) GetAggregateType(Oid aggFunctionId)
{ {
char *aggregateProcName = NULL; char *aggregateProcName = NULL;
uint32 aggregateCount = 0; uint32 aggregateCount = 0;
uint32 aggregateIndex = 0; uint32 aggregateIndex = 0;
bool found = false; bool found = false;
@ -1980,22 +1982,30 @@ CountDistinctHashFunctionName(Oid argumentType)
switch (argumentType) switch (argumentType)
{ {
case INT4OID: case INT4OID:
{
hashFunctionName = pstrdup(HLL_HASH_INTEGER_FUNC_NAME); hashFunctionName = pstrdup(HLL_HASH_INTEGER_FUNC_NAME);
break; break;
}
case INT8OID: case INT8OID:
{
hashFunctionName = pstrdup(HLL_HASH_BIGINT_FUNC_NAME); hashFunctionName = pstrdup(HLL_HASH_BIGINT_FUNC_NAME);
break; break;
}
case TEXTOID: case TEXTOID:
case BPCHAROID: case BPCHAROID:
case VARCHAROID: case VARCHAROID:
{
hashFunctionName = pstrdup(HLL_HASH_TEXT_FUNC_NAME); hashFunctionName = pstrdup(HLL_HASH_TEXT_FUNC_NAME);
break; break;
}
default: default:
{
hashFunctionName = pstrdup(HLL_HASH_ANY_FUNC_NAME); hashFunctionName = pstrdup(HLL_HASH_ANY_FUNC_NAME);
break; break;
}
} }
return hashFunctionName; return hashFunctionName;
@ -2479,7 +2489,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
if (subqueryTree->setOperations) if (subqueryTree->setOperations)
{ {
SetOperationStmt *setOperationStatement = SetOperationStmt *setOperationStatement =
(SetOperationStmt *) subqueryTree->setOperations; (SetOperationStmt *) subqueryTree->setOperations;
if (setOperationStatement->op == SETOP_UNION) if (setOperationStatement->op == SETOP_UNION)
{ {
@ -2563,7 +2573,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
List *joinTreeTableIndexList = NIL; List *joinTreeTableIndexList = NIL;
uint32 joiningTableCount = 0; uint32 joiningTableCount = 0;
ExtractRangeTableIndexWalker((Node*) subqueryTree->jointree, ExtractRangeTableIndexWalker((Node *) subqueryTree->jointree,
&joinTreeTableIndexList); &joinTreeTableIndexList);
joiningTableCount = list_length(joinTreeTableIndexList); joiningTableCount = list_length(joinTreeTableIndexList);
@ -2587,7 +2597,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList, List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList,
targetEntryList); targetEntryList);
bool distinctOnPartitionColumn = bool distinctOnPartitionColumn =
TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList); TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList);
if (!distinctOnPartitionColumn) if (!distinctOnPartitionColumn)
{ {
preconditionsSatisfied = false; preconditionsSatisfied = false;
@ -2609,7 +2619,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
foreach(rangeTableEntryCell, subqueryEntryList) foreach(rangeTableEntryCell, subqueryEntryList)
{ {
RangeTblEntry *rangeTableEntry = RangeTblEntry *rangeTableEntry =
(RangeTblEntry *) lfirst(rangeTableEntryCell); (RangeTblEntry *) lfirst(rangeTableEntryCell);
Query *innerSubquery = rangeTableEntry->subquery; Query *innerSubquery = rangeTableEntry->subquery;
ErrorIfCannotPushdownSubquery(innerSubquery, outerQueryHasLimit); ErrorIfCannotPushdownSubquery(innerSubquery, outerQueryHasLimit);
@ -2639,7 +2649,7 @@ ErrorIfUnsupportedTableCombination(Query *queryTree)
* Extract all range table indexes from the join tree. Note that sub-queries * Extract all range table indexes from the join tree. Note that sub-queries
* that get pulled up by PostgreSQL don't appear in this join tree. * that get pulled up by PostgreSQL don't appear in this join tree.
*/ */
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
foreach(joinTreeTableIndexCell, joinTreeTableIndexList) foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
{ {
/* /*
@ -2768,7 +2778,7 @@ ErrorIfUnsupportedUnionQuery(Query *unionQuery)
leftQueryOnPartitionColumn = TargetListOnPartitionColumn(leftQuery, leftQueryOnPartitionColumn = TargetListOnPartitionColumn(leftQuery,
leftQuery->targetList); leftQuery->targetList);
rightQueryOnPartitionColumn = TargetListOnPartitionColumn(rightQuery, rightQueryOnPartitionColumn = TargetListOnPartitionColumn(rightQuery,
rightQuery->targetList); rightQuery->targetList);
if (!(leftQueryOnPartitionColumn && rightQueryOnPartitionColumn)) if (!(leftQueryOnPartitionColumn && rightQueryOnPartitionColumn))
{ {
@ -2807,7 +2817,7 @@ GroupTargetEntryList(List *groupClauseList, List *targetEntryList)
{ {
SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupClauseCell); SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupClauseCell);
TargetEntry *groupTargetEntry = TargetEntry *groupTargetEntry =
get_sortgroupclause_tle(groupClause, targetEntryList); get_sortgroupclause_tle(groupClause, targetEntryList);
groupTargetEntryList = lappend(groupTargetEntryList, groupTargetEntry); groupTargetEntryList = lappend(groupTargetEntryList, groupTargetEntry);
} }
@ -2890,7 +2900,7 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query)
else if (IsA(columnExpression, FieldSelect)) else if (IsA(columnExpression, FieldSelect))
{ {
FieldSelect *compositeField = (FieldSelect *) columnExpression; FieldSelect *compositeField = (FieldSelect *) columnExpression;
Expr *fieldExpression = compositeField->arg; Expr *fieldExpression = compositeField->arg;
if (IsA(fieldExpression, Var)) if (IsA(fieldExpression, Var))
{ {
@ -2909,7 +2919,7 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query)
return false; return false;
} }
rangeTableEntryIndex = candidateColumn->varno - 1; rangeTableEntryIndex = candidateColumn->varno - 1;
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex); rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
if (rangeTableEntry->rtekind == RTE_RELATION) if (rangeTableEntry->rtekind == RTE_RELATION)
@ -2980,7 +2990,7 @@ CompositeFieldRecursive(Expr *expression, Query *query)
return NULL; return NULL;
} }
rangeTableEntryIndex = candidateColumn->varno - 1; rangeTableEntryIndex = candidateColumn->varno - 1;
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex); rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
if (rangeTableEntry->rtekind == RTE_SUBQUERY) if (rangeTableEntry->rtekind == RTE_SUBQUERY)
@ -3019,7 +3029,7 @@ FullCompositeFieldList(List *compositeFieldList)
uint32 fieldIndex = 0; uint32 fieldIndex = 0;
ListCell *fieldSelectCell = NULL; ListCell *fieldSelectCell = NULL;
foreach (fieldSelectCell, compositeFieldList) foreach(fieldSelectCell, compositeFieldList)
{ {
FieldSelect *fieldSelect = (FieldSelect *) lfirst(fieldSelectCell); FieldSelect *fieldSelect = (FieldSelect *) lfirst(fieldSelectCell);
uint32 compositeFieldIndex = 0; uint32 compositeFieldIndex = 0;
@ -3226,9 +3236,10 @@ SupportedLateralQuery(Query *parentQuery, Query *lateralQuery)
if (outerColumnIsPartitionColumn && localColumnIsPartitionColumn) if (outerColumnIsPartitionColumn && localColumnIsPartitionColumn)
{ {
FieldSelect *outerCompositeField = FieldSelect *outerCompositeField =
CompositeFieldRecursive(outerQueryExpression, parentQuery); CompositeFieldRecursive(outerQueryExpression, parentQuery);
FieldSelect *localCompositeField = FieldSelect *localCompositeField =
CompositeFieldRecursive(localQueryExpression, lateralQuery); CompositeFieldRecursive(localQueryExpression, lateralQuery);
/* /*
* If partition colums are composite fields, add them to list to * If partition colums are composite fields, add them to list to
* check later if all composite fields are used. * check later if all composite fields are used.
@ -3251,12 +3262,12 @@ SupportedLateralQuery(Query *parentQuery, Query *lateralQuery)
} }
/* check composite fields */ /* check composite fields */
if(!supportedLateralQuery) if (!supportedLateralQuery)
{ {
bool outerFullCompositeFieldList = bool outerFullCompositeFieldList =
FullCompositeFieldList(outerCompositeFieldList); FullCompositeFieldList(outerCompositeFieldList);
bool localFullCompositeFieldList = bool localFullCompositeFieldList =
FullCompositeFieldList(localCompositeFieldList); FullCompositeFieldList(localCompositeFieldList);
if (outerFullCompositeFieldList && localFullCompositeFieldList) if (outerFullCompositeFieldList && localFullCompositeFieldList)
{ {
@ -3301,15 +3312,15 @@ JoinOnPartitionColumn(Query *query)
if (isLeftColumnPartitionColumn && isRightColumnPartitionColumn) if (isLeftColumnPartitionColumn && isRightColumnPartitionColumn)
{ {
FieldSelect *leftCompositeField = FieldSelect *leftCompositeField =
CompositeFieldRecursive(leftArgument, query); CompositeFieldRecursive(leftArgument, query);
FieldSelect *rightCompositeField = FieldSelect *rightCompositeField =
CompositeFieldRecursive(rightArgument, query); CompositeFieldRecursive(rightArgument, query);
/* /*
* If partition colums are composite fields, add them to list to * If partition colums are composite fields, add them to list to
* check later if all composite fields are used. * check later if all composite fields are used.
*/ */
if(leftCompositeField && rightCompositeField) if (leftCompositeField && rightCompositeField)
{ {
leftCompositeFieldList = lappend(leftCompositeFieldList, leftCompositeFieldList = lappend(leftCompositeFieldList,
leftCompositeField); leftCompositeField);
@ -3318,7 +3329,7 @@ JoinOnPartitionColumn(Query *query)
} }
/* if both sides are not composite fields, they are normal columns */ /* if both sides are not composite fields, they are normal columns */
if(!(leftCompositeField && rightCompositeField)) if (!(leftCompositeField && rightCompositeField))
{ {
joinOnPartitionColumn = true; joinOnPartitionColumn = true;
break; break;
@ -3327,12 +3338,12 @@ JoinOnPartitionColumn(Query *query)
} }
/* check composite fields */ /* check composite fields */
if(!joinOnPartitionColumn) if (!joinOnPartitionColumn)
{ {
bool leftFullCompositeFieldList = bool leftFullCompositeFieldList =
FullCompositeFieldList(leftCompositeFieldList); FullCompositeFieldList(leftCompositeFieldList);
bool rightFullCompositeFieldList = bool rightFullCompositeFieldList =
FullCompositeFieldList(rightCompositeFieldList); FullCompositeFieldList(rightCompositeFieldList);
if (leftFullCompositeFieldList && rightFullCompositeFieldList) if (leftFullCompositeFieldList && rightFullCompositeFieldList)
{ {
@ -3409,7 +3420,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
/* check if this table has 1-1 shard partitioning with first table */ /* check if this table has 1-1 shard partitioning with first table */
coPartitionedTables = CoPartitionedTables(firstShardIntervalList, coPartitionedTables = CoPartitionedTables(firstShardIntervalList,
currentShardIntervalList); currentShardIntervalList);
if (!coPartitionedTables) if (!coPartitionedTables)
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@ -3437,7 +3448,7 @@ RelationIdList(Query *query)
foreach(tableEntryCell, tableEntryList) foreach(tableEntryCell, tableEntryList)
{ {
TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell); TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell);
Oid relationId = tableEntry->relationId; Oid relationId = tableEntry->relationId;
relationIdList = list_append_unique_oid(relationIdList, relationId); relationIdList = list_append_unique_oid(relationIdList, relationId);
@ -3617,7 +3628,7 @@ ExtractQueryWalker(Node *node, List **queryList)
Query *query = (Query *) node; Query *query = (Query *) node;
(*queryList) = lappend(*queryList, query); (*queryList) = lappend(*queryList, query);
walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList, walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList,
QTW_EXAMINE_RTES); QTW_EXAMINE_RTES);
} }
@ -3641,7 +3652,7 @@ LeafQuery(Query *queryTree)
* Extract all range table indexes from the join tree. Note that sub-queries * Extract all range table indexes from the join tree. Note that sub-queries
* that get pulled up by PostgreSQL don't appear in this join tree. * that get pulled up by PostgreSQL don't appear in this join tree.
*/ */
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
foreach(joinTreeTableIndexCell, joinTreeTableIndexList) foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
{ {
/* /*
@ -3725,7 +3736,7 @@ PartitionColumnOpExpressionList(Query *query)
} }
else if (IsA(leftArgument, Const) && IsA(leftArgument, Var)) else if (IsA(leftArgument, Const) && IsA(leftArgument, Var))
{ {
candidatePartitionColumn = (Var *) rightArgument; candidatePartitionColumn = (Var *) rightArgument;
} }
else else
{ {

View File

@ -39,11 +39,11 @@ bool SubqueryPushdown = false; /* is subquery pushdown enabled */
/* Function pointer type definition for apply join rule functions */ /* Function pointer type definition for apply join rule functions */
typedef MultiNode * (*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode, typedef MultiNode *(*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode,
Var *partitionColumn, JoinType joinType, Var *partitionColumn, JoinType joinType,
List *joinClauses); List *joinClauses);
static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = {0}; /* join rules */ static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */
/* Local functions forward declarations */ /* Local functions forward declarations */
static MultiNode * MultiPlanTree(Query *queryTree); static MultiNode * MultiPlanTree(Query *queryTree);
@ -157,7 +157,7 @@ SubqueryEntryList(Query *queryTree)
* only walk over range table entries at this level and do not recurse into * only walk over range table entries at this level and do not recurse into
* subqueries. * subqueries.
*/ */
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
foreach(joinTreeTableIndexCell, joinTreeTableIndexList) foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
{ {
/* /*
@ -285,6 +285,7 @@ MultiPlanTree(Query *queryTree)
else else
{ {
bool hasOuterJoin = false; bool hasOuterJoin = false;
/* /*
* We calculate the join order using the list of tables in the query and * We calculate the join order using the list of tables in the query and
* the join clauses between them. Note that this function owns the table * the join clauses between them. Note that this function owns the table
@ -465,6 +466,7 @@ ErrorIfQueryNotSupported(Query *queryTree)
#if (PG_VERSION_NUM >= 90500) #if (PG_VERSION_NUM >= 90500)
/* HasTablesample returns tree if the query contains tablesample */ /* HasTablesample returns tree if the query contains tablesample */
static bool static bool
HasTablesample(Query *queryTree) HasTablesample(Query *queryTree)
@ -485,6 +487,8 @@ HasTablesample(Query *queryTree)
return hasTablesample; return hasTablesample;
} }
#endif #endif
@ -529,7 +533,8 @@ HasUnsupportedJoinWalker(Node *node, void *context)
* ErrorIfSubqueryNotSupported checks that we can perform distributed planning for * ErrorIfSubqueryNotSupported checks that we can perform distributed planning for
* the given subquery. * the given subquery.
*/ */
static void ErrorIfSubqueryNotSupported(Query *subqueryTree) static void
ErrorIfSubqueryNotSupported(Query *subqueryTree)
{ {
char *errorDetail = NULL; char *errorDetail = NULL;
bool preconditionsSatisfied = true; bool preconditionsSatisfied = true;
@ -587,7 +592,6 @@ HasOuterJoin(Query *queryTree)
static bool static bool
HasOuterJoinWalker(Node *node, void *context) HasOuterJoinWalker(Node *node, void *context)
{ {
bool hasOuterJoin = false; bool hasOuterJoin = false;
if (node == NULL) if (node == NULL)
{ {
@ -657,7 +661,7 @@ HasComplexRangeTableType(Query *queryTree)
* Extract all range table indexes from the join tree. Note that sub-queries * Extract all range table indexes from the join tree. Note that sub-queries
* that get pulled up by PostgreSQL don't appear in this join tree. * that get pulled up by PostgreSQL don't appear in this join tree.
*/ */
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
foreach(joinTreeTableIndexCell, joinTreeTableIndexList) foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
{ {
/* /*
@ -675,7 +679,7 @@ HasComplexRangeTableType(Query *queryTree)
* subquery. * subquery.
*/ */
if (rangeTableEntry->rtekind != RTE_RELATION && if (rangeTableEntry->rtekind != RTE_RELATION &&
rangeTableEntry->rtekind != RTE_SUBQUERY) rangeTableEntry->rtekind != RTE_SUBQUERY)
{ {
hasComplexRangeTableType = true; hasComplexRangeTableType = true;
} }
@ -966,7 +970,7 @@ TableEntryList(List *rangeTableList)
foreach(rangeTableCell, rangeTableList) foreach(rangeTableCell, rangeTableList)
{ {
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
if (rangeTableEntry->rtekind == RTE_RELATION) if (rangeTableEntry->rtekind == RTE_RELATION)
{ {
@ -1178,8 +1182,8 @@ IsSelectClause(Node *clause)
/* we currently consider the following nodes as select clauses */ /* we currently consider the following nodes as select clauses */
NodeTag nodeTag = nodeTag(clause); NodeTag nodeTag = nodeTag(clause);
if ( !(nodeTag == T_OpExpr || nodeTag == T_ScalarArrayOpExpr || if (!(nodeTag == T_OpExpr || nodeTag == T_ScalarArrayOpExpr ||
nodeTag == T_NullTest || nodeTag == T_BooleanTest) ) nodeTag == T_NullTest || nodeTag == T_BooleanTest))
{ {
return false; return false;
} }
@ -1317,9 +1321,9 @@ UnaryOperator(MultiNode *node)
{ {
bool unaryOperator = false; bool unaryOperator = false;
if (CitusIsA(node, MultiTreeRoot) || CitusIsA(node, MultiTable) || if (CitusIsA(node, MultiTreeRoot) || CitusIsA(node, MultiTable) ||
CitusIsA(node, MultiCollect) || CitusIsA(node, MultiSelect) || CitusIsA(node, MultiCollect) || CitusIsA(node, MultiSelect) ||
CitusIsA(node, MultiProject) || CitusIsA(node, MultiPartition) || CitusIsA(node, MultiProject) || CitusIsA(node, MultiPartition) ||
CitusIsA(node, MultiExtendedOp)) CitusIsA(node, MultiExtendedOp))
{ {
unaryOperator = true; unaryOperator = true;
@ -1403,7 +1407,7 @@ FindNodesOfType(MultiNode *node, int type)
} }
else if (BinaryOperator(node)) else if (BinaryOperator(node))
{ {
MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode; MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode;
MultiNode *rightChildNode = ((MultiBinaryNode *) node)->rightChildNode; MultiNode *rightChildNode = ((MultiBinaryNode *) node)->rightChildNode;
List *leftChildNodeList = FindNodesOfType(leftChildNode, type); List *leftChildNodeList = FindNodesOfType(leftChildNode, type);
@ -1533,9 +1537,9 @@ ExtractRangeTableEntryWalker(Node *node, List **rangeTableList)
List * List *
pull_var_clause_default(Node *node) pull_var_clause_default(Node *node)
{ {
List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES, List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES,
PVC_REJECT_PLACEHOLDERS); PVC_REJECT_PLACEHOLDERS);
return columnList; return columnList;
} }
@ -1552,7 +1556,7 @@ ApplyJoinRule(MultiNode *leftNode, MultiNode *rightNode, JoinRuleType ruleType,
MultiNode *multiNode = NULL; MultiNode *multiNode = NULL;
List *applicableJoinClauses = NIL; List *applicableJoinClauses = NIL;
List *leftTableIdList = OutputTableIdList(leftNode); List *leftTableIdList = OutputTableIdList(leftNode);
List *rightTableIdList = OutputTableIdList(rightNode); List *rightTableIdList = OutputTableIdList(rightNode);
int rightTableIdCount = 0; int rightTableIdCount = 0;
uint32 rightTableId = 0; uint32 rightTableId = 0;
@ -1567,8 +1571,8 @@ ApplyJoinRule(MultiNode *leftNode, MultiNode *rightNode, JoinRuleType ruleType,
/* call the join rule application function to create the new join node */ /* call the join rule application function to create the new join node */
ruleApplyFunction = JoinRuleApplyFunction(ruleType); ruleApplyFunction = JoinRuleApplyFunction(ruleType);
multiNode = (*ruleApplyFunction) (leftNode, rightNode, partitionColumn, multiNode = (*ruleApplyFunction)(leftNode, rightNode, partitionColumn,
joinType, applicableJoinClauses); joinType, applicableJoinClauses);
if (joinType != JOIN_INNER && CitusIsA(multiNode, MultiJoin)) if (joinType != JOIN_INNER && CitusIsA(multiNode, MultiJoin))
{ {
@ -1918,7 +1922,7 @@ ErrorIfSubqueryJoin(Query *queryTree)
* Extract all range table indexes from the join tree. Note that sub-queries * Extract all range table indexes from the join tree. Note that sub-queries
* that get pulled up by PostgreSQL don't appear in this join tree. * that get pulled up by PostgreSQL don't appear in this join tree.
*/ */
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList); ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
joiningRangeTableCount = list_length(joinTreeTableIndexList); joiningRangeTableCount = list_length(joinTreeTableIndexList);
if (joiningRangeTableCount > 1) if (joiningRangeTableCount > 1)

View File

@ -168,13 +168,13 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
/* finally create the plan */ /* finally create the plan */
#if (PG_VERSION_NUM >= 90500) #if (PG_VERSION_NUM >= 90500)
aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy,
&aggregateCosts, groupColumnCount, groupColumnIdArray, &aggregateCosts, groupColumnCount, groupColumnIdArray,
groupColumnOpArray, NIL, rowEstimate, subPlan); groupColumnOpArray, NIL, rowEstimate, subPlan);
#else #else
aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy, aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy,
&aggregateCosts, groupColumnCount, groupColumnIdArray, &aggregateCosts, groupColumnCount, groupColumnIdArray,
groupColumnOpArray, rowEstimate, subPlan); groupColumnOpArray, rowEstimate, subPlan);
#endif #endif
return aggregatePlan; return aggregatePlan;
@ -211,7 +211,7 @@ BuildSelectStatement(Query *masterQuery, char *masterTableName,
rangeTableEntry = copyObject(queryRangeTableEntry); rangeTableEntry = copyObject(queryRangeTableEntry);
rangeTableEntry->rtekind = RTE_RELATION; rangeTableEntry->rtekind = RTE_RELATION;
rangeTableEntry->eref = makeAlias(masterTableName, NIL); rangeTableEntry->eref = makeAlias(masterTableName, NIL);
rangeTableEntry->relid = 0; /* to be filled in exec_Start */ rangeTableEntry->relid = 0; /* to be filled in exec_Start */
rangeTableEntry->inh = false; rangeTableEntry->inh = false;
rangeTableEntry->inFromCl = true; rangeTableEntry->inFromCl = true;
@ -220,7 +220,7 @@ BuildSelectStatement(Query *masterQuery, char *masterTableName,
/* (2) build and initialize sequential scan node */ /* (2) build and initialize sequential scan node */
sequentialScan = makeNode(SeqScan); sequentialScan = makeNode(SeqScan);
sequentialScan->scanrelid = 1; /* always one */ sequentialScan->scanrelid = 1; /* always one */
/* (3) add an aggregation plan if needed */ /* (3) add an aggregation plan if needed */
if (masterQuery->hasAggs || masterQuery->groupClause) if (masterQuery->hasAggs || masterQuery->groupClause)

View File

@ -138,7 +138,7 @@ static OpExpr * MakeOpExpressionWithZeroConst(void);
static List * BuildRestrictInfoList(List *qualList); static List * BuildRestrictInfoList(List *qualList);
static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery, static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery,
List *dependedJobList); List *dependedJobList);
static JoinSequenceNode * JoinSequenceArray(List * rangeTableFragmentsList, static JoinSequenceNode * JoinSequenceArray(List *rangeTableFragmentsList,
Query *jobQuery, List *dependedJobList); Query *jobQuery, List *dependedJobList);
static bool PartitionedOnColumn(Var *column, List *rangeTableList, List *dependedJobList); static bool PartitionedOnColumn(Var *column, List *rangeTableList, List *dependedJobList);
static void CheckJoinBetweenColumns(OpExpr *joinClause); static void CheckJoinBetweenColumns(OpExpr *joinClause);
@ -155,7 +155,8 @@ static StringInfo DatumArrayString(Datum *datumArray, uint32 datumCount, Oid dat
static Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType, static Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType,
char *queryString); char *queryString);
static void UpdateRangeTableAlias(List *rangeTableList, List *fragmentList); static void UpdateRangeTableAlias(List *rangeTableList, List *fragmentList);
static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment); static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry,
RangeTableFragment *fragment);
static uint64 AnchorShardId(List *fragmentList, uint32 anchorRangeTableId); static uint64 AnchorShardId(List *fragmentList, uint32 anchorRangeTableId);
static List * PruneSqlTaskDependencies(List *sqlTaskList); static List * PruneSqlTaskDependencies(List *sqlTaskList);
static List * AssignTaskList(List *sqlTaskList); static List * AssignTaskList(List *sqlTaskList);
@ -167,7 +168,7 @@ static Task * GreedyAssignTask(WorkerNode *workerNode, List *taskList,
static List * RoundRobinAssignTaskList(List *taskList); static List * RoundRobinAssignTaskList(List *taskList);
static List * RoundRobinReorder(Task *task, List *placementList); static List * RoundRobinReorder(Task *task, List *placementList);
static List * ReorderAndAssignTaskList(List *taskList, static List * ReorderAndAssignTaskList(List *taskList,
List * (*reorderFunction) (Task *, List *)); List * (*reorderFunction)(Task *, List *));
static int CompareTasksByShardId(const void *leftElement, const void *rightElement); static int CompareTasksByShardId(const void *leftElement, const void *rightElement);
static List * ActiveShardPlacementLists(List *taskList); static List * ActiveShardPlacementLists(List *taskList);
static List * ActivePlacementList(List *placementList); static List * ActivePlacementList(List *placementList);
@ -309,6 +310,7 @@ BuildJobTree(MultiTreeRoot *multiTree)
partitionKey, partitionType, partitionKey, partitionType,
baseRelationId, baseRelationId,
JOIN_MAP_MERGE_JOB); JOIN_MAP_MERGE_JOB);
/* reset depended job list */ /* reset depended job list */
loopDependedJobList = NIL; loopDependedJobList = NIL;
loopDependedJobList = list_make1(mapMergeJob); loopDependedJobList = list_make1(mapMergeJob);
@ -538,7 +540,7 @@ BuildJobQuery(MultiNode *multiNode, List *dependedJobList)
* If we are building this query on a repartitioned subquery job then we * If we are building this query on a repartitioned subquery job then we
* don't need to update column attributes. * don't need to update column attributes.
*/ */
if(dependedJobList != NIL) if (dependedJobList != NIL)
{ {
Job *job = (Job *) linitial(dependedJobList); Job *job = (Job *) linitial(dependedJobList);
if (CitusIsA(job, MapMergeJob)) if (CitusIsA(job, MapMergeJob))
@ -628,10 +630,10 @@ BuildJobQuery(MultiNode *multiNode, List *dependedJobList)
jobQuery->rtable = rangeTableList; jobQuery->rtable = rangeTableList;
jobQuery->targetList = targetList; jobQuery->targetList = targetList;
jobQuery->jointree = joinTree; jobQuery->jointree = joinTree;
jobQuery->sortClause = sortClauseList; jobQuery->sortClause = sortClauseList;
jobQuery->groupClause = groupClauseList; jobQuery->groupClause = groupClauseList;
jobQuery->limitOffset = limitOffset; jobQuery->limitOffset = limitOffset;
jobQuery->limitCount = limitCount; jobQuery->limitCount = limitCount;
jobQuery->hasAggs = contain_agg_clause((Node *) targetList); jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
return jobQuery; return jobQuery;
@ -718,10 +720,10 @@ BuildReduceQuery(MultiExtendedOp *extendedOpNode, List *dependedJobList)
reduceQuery->rtable = derivedRangeTableList; reduceQuery->rtable = derivedRangeTableList;
reduceQuery->targetList = targetList; reduceQuery->targetList = targetList;
reduceQuery->jointree = joinTree; reduceQuery->jointree = joinTree;
reduceQuery->sortClause = extendedOpNode->sortClauseList; reduceQuery->sortClause = extendedOpNode->sortClauseList;
reduceQuery->groupClause = extendedOpNode->groupClauseList; reduceQuery->groupClause = extendedOpNode->groupClauseList;
reduceQuery->limitOffset = extendedOpNode->limitOffset; reduceQuery->limitOffset = extendedOpNode->limitOffset;
reduceQuery->limitCount = extendedOpNode->limitCount; reduceQuery->limitCount = extendedOpNode->limitCount;
reduceQuery->hasAggs = contain_agg_clause((Node *) targetList); reduceQuery->hasAggs = contain_agg_clause((Node *) targetList);
return reduceQuery; return reduceQuery;
@ -754,7 +756,7 @@ BaseRangeTableList(MultiNode *multiNode)
*/ */
MultiTable *multiTable = (MultiTable *) multiNode; MultiTable *multiTable = (MultiTable *) multiNode;
if (multiTable->relationId != SUBQUERY_RELATION_ID && if (multiTable->relationId != SUBQUERY_RELATION_ID &&
multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID) multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID)
{ {
RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry); RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry);
rangeTableEntry->inFromCl = true; rangeTableEntry->inFromCl = true;
@ -870,7 +872,7 @@ TargetEntryList(List *expressionList)
Expr *expression = (Expr *) lfirst(expressionCell); Expr *expression = (Expr *) lfirst(expressionCell);
TargetEntry *targetEntry = makeTargetEntry(expression, TargetEntry *targetEntry = makeTargetEntry(expression,
list_length(targetEntryList)+1, list_length(targetEntryList) + 1,
NULL, false); NULL, false);
targetEntryList = lappend(targetEntryList, targetEntry); targetEntryList = lappend(targetEntryList, targetEntry);
} }
@ -1044,7 +1046,7 @@ QueryJoinTree(MultiNode *multiNode, List *dependedJobList, List **rangeTableList
/* fix the column attributes in ON (...) clauses */ /* fix the column attributes in ON (...) clauses */
columnList = pull_var_clause_default((Node *) joinNode->joinClauseList); columnList = pull_var_clause_default((Node *) joinNode->joinClauseList);
foreach (columnCell, columnList) foreach(columnCell, columnList)
{ {
Var *column = (Var *) lfirst(columnCell); Var *column = (Var *) lfirst(columnCell);
UpdateColumnAttributes(column, *rangeTableList, dependedJobList); UpdateColumnAttributes(column, *rangeTableList, dependedJobList);
@ -1093,7 +1095,8 @@ QueryJoinTree(MultiNode *multiNode, List *dependedJobList, List **rangeTableList
uint32 columnCount = (uint32) list_length(dependedTargetList); uint32 columnCount = (uint32) list_length(dependedTargetList);
List *columnNameList = DerivedColumnNameList(columnCount, dependedJob->jobId); List *columnNameList = DerivedColumnNameList(columnCount, dependedJob->jobId);
RangeTblEntry *rangeTableEntry = DerivedRangeTableEntry(multiNode, columnNameList, RangeTblEntry *rangeTableEntry = DerivedRangeTableEntry(multiNode,
columnNameList,
tableIdList); tableIdList);
RangeTblRef *rangeTableRef = makeNode(RangeTblRef); RangeTblRef *rangeTableRef = makeNode(RangeTblRef);
@ -1405,10 +1408,10 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
jobQuery->rtable = rangeTableList; jobQuery->rtable = rangeTableList;
jobQuery->targetList = targetList; jobQuery->targetList = targetList;
jobQuery->jointree = joinTree; jobQuery->jointree = joinTree;
jobQuery->sortClause = sortClauseList; jobQuery->sortClause = sortClauseList;
jobQuery->groupClause = groupClauseList; jobQuery->groupClause = groupClauseList;
jobQuery->limitOffset = limitOffset; jobQuery->limitOffset = limitOffset;
jobQuery->limitCount = limitCount; jobQuery->limitCount = limitCount;
jobQuery->hasAggs = contain_agg_clause((Node *) targetList); jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
return jobQuery; return jobQuery;
@ -1646,7 +1649,7 @@ static uint64
UniqueJobId(void) UniqueJobId(void)
{ {
text *sequenceName = cstring_to_text(JOBID_SEQUENCE_NAME); text *sequenceName = cstring_to_text(JOBID_SEQUENCE_NAME);
Oid sequenceId = ResolveRelationId(sequenceName); Oid sequenceId = ResolveRelationId(sequenceName);
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId); Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
/* generate new and unique jobId from sequence */ /* generate new and unique jobId from sequence */
@ -1864,6 +1867,7 @@ SplitPointObject(ShardInterval **shardIntervalArray, uint32 shardIntervalCount)
return splitPointObject; return splitPointObject;
} }
/* ------------------------------------------------------------ /* ------------------------------------------------------------
* Functions that relate to building and assigning tasks follow * Functions that relate to building and assigning tasks follow
* ------------------------------------------------------------ * ------------------------------------------------------------
@ -1986,7 +1990,7 @@ SubquerySqlTaskList(Job *job)
ListCell *rangeTableCell = NULL; ListCell *rangeTableCell = NULL;
ListCell *queryCell = NULL; ListCell *queryCell = NULL;
Node *whereClauseTree = NULL; Node *whereClauseTree = NULL;
uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */
uint32 anchorRangeTableId = 0; uint32 anchorRangeTableId = 0;
uint32 rangeTableIndex = 0; uint32 rangeTableIndex = 0;
const uint32 fragmentSize = sizeof(RangeTableFragment); const uint32 fragmentSize = sizeof(RangeTableFragment);
@ -2036,10 +2040,10 @@ SubquerySqlTaskList(Job *job)
if (opExpressionList != NIL) if (opExpressionList != NIL)
{ {
Var *partitionColumn = PartitionColumn(relationId, tableId); Var *partitionColumn = PartitionColumn(relationId, tableId);
List *whereClauseList = ReplaceColumnsInOpExpressionList(opExpressionList, List *whereClauseList = ReplaceColumnsInOpExpressionList(opExpressionList,
partitionColumn); partitionColumn);
finalShardIntervalList = PruneShardList(relationId, tableId, whereClauseList, finalShardIntervalList = PruneShardList(relationId, tableId, whereClauseList,
shardIntervalList); shardIntervalList);
} }
else else
{ {
@ -2146,7 +2150,7 @@ static List *
SqlTaskList(Job *job) SqlTaskList(Job *job)
{ {
List *sqlTaskList = NIL; List *sqlTaskList = NIL;
uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */
uint64 jobId = job->jobId; uint64 jobId = job->jobId;
bool anchorRangeTableBasedAssignment = false; bool anchorRangeTableBasedAssignment = false;
uint32 anchorRangeTableId = 0; uint32 anchorRangeTableId = 0;
@ -2472,8 +2476,8 @@ RangeTableFragmentsList(List *rangeTableList, List *whereClauseList,
List *shardIntervalList = LoadShardIntervalList(relationId); List *shardIntervalList = LoadShardIntervalList(relationId);
List *prunedShardIntervalList = PruneShardList(relationId, tableId, List *prunedShardIntervalList = PruneShardList(relationId, tableId,
whereClauseList, whereClauseList,
shardIntervalList); shardIntervalList);
/* /*
* If we prune all shards for one table, query results will be empty. * If we prune all shards for one table, query results will be empty.
@ -2548,7 +2552,7 @@ RangeTableFragmentsList(List *rangeTableList, List *whereClauseList,
*/ */
List * List *
PruneShardList(Oid relationId, Index tableId, List *whereClauseList, PruneShardList(Oid relationId, Index tableId, List *whereClauseList,
List *shardIntervalList) List *shardIntervalList)
{ {
List *remainingShardList = NIL; List *remainingShardList = NIL;
ListCell *shardIntervalCell = NULL; ListCell *shardIntervalCell = NULL;
@ -2653,7 +2657,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber)
Oid accessMethodId = BTREE_AM_OID; Oid accessMethodId = BTREE_AM_OID;
Oid operatorId = InvalidOid; Oid operatorId = InvalidOid;
Oid operatorClassInputType = InvalidOid; Oid operatorClassInputType = InvalidOid;
Const *constantValue = NULL; Const *constantValue = NULL;
OpExpr *expression = NULL; OpExpr *expression = NULL;
char typeType = 0; char typeType = 0;
@ -2679,7 +2683,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber)
/* Now make the expression with the given variable and a null constant */ /* Now make the expression with the given variable and a null constant */
expression = (OpExpr *) make_opclause(operatorId, expression = (OpExpr *) make_opclause(operatorId,
InvalidOid, /* no result type yet */ InvalidOid, /* no result type yet */
false, /* no return set */ false, /* no return set */
(Expr *) variable, (Expr *) variable,
(Expr *) constantValue, (Expr *) constantValue,
InvalidOid, collationId); InvalidOid, collationId);
@ -2900,7 +2904,7 @@ HashableClauseMutator(Node *originalNode, Var *partitionColumn)
* If this node is not hashable, continue walking down the expression tree * If this node is not hashable, continue walking down the expression tree
* to find and hash clauses which are eligible. * to find and hash clauses which are eligible.
*/ */
if(newNode == NULL) if (newNode == NULL)
{ {
newNode = expression_tree_mutator(originalNode, HashableClauseMutator, newNode = expression_tree_mutator(originalNode, HashableClauseMutator,
(void *) partitionColumn); (void *) partitionColumn);
@ -3045,7 +3049,7 @@ MakeInt4Constant(Datum constantValue)
bool constantIsNull = false; bool constantIsNull = false;
bool constantByValue = true; bool constantByValue = true;
Const *int4Constant = makeConst(constantType, constantTypeMode, constantCollationId, Const *int4Constant = makeConst(constantType, constantTypeMode, constantCollationId,
constantLength, constantValue, constantIsNull, constantLength, constantValue, constantIsNull,
constantByValue); constantByValue);
return int4Constant; return int4Constant;
@ -3102,7 +3106,7 @@ UpdateConstraint(Node *baseConstraint, ShardInterval *shardInterval)
Node *greaterThanExpr = (Node *) lsecond(andExpr->args); Node *greaterThanExpr = (Node *) lsecond(andExpr->args);
Node *minNode = get_rightop((Expr *) greaterThanExpr); /* right op */ Node *minNode = get_rightop((Expr *) greaterThanExpr); /* right op */
Node *maxNode = get_rightop((Expr *) lessThanExpr); /* right op */ Node *maxNode = get_rightop((Expr *) lessThanExpr); /* right op */
Const *minConstant = NULL; Const *minConstant = NULL;
Const *maxConstant = NULL; Const *maxConstant = NULL;
@ -3273,7 +3277,7 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended
joinSequenceArray[joinedTableCount].joiningRangeTableId = NON_PRUNABLE_JOIN; joinSequenceArray[joinedTableCount].joiningRangeTableId = NON_PRUNABLE_JOIN;
joinedTableCount++; joinedTableCount++;
foreach (joinExprCell, joinExprList) foreach(joinExprCell, joinExprList)
{ {
JoinExpr *joinExpr = (JoinExpr *) lfirst(joinExprCell); JoinExpr *joinExpr = (JoinExpr *) lfirst(joinExprCell);
JoinType joinType = joinExpr->jointype; JoinType joinType = joinExpr->jointype;
@ -3347,7 +3351,7 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended
if (IS_OUTER_JOIN(joinType)) if (IS_OUTER_JOIN(joinType))
{ {
int innerRangeTableId = 0; int innerRangeTableId = 0;
List * tableFragments = NIL; List *tableFragments = NIL;
int fragmentCount = 0; int fragmentCount = 0;
if (joinType == JOIN_RIGHT) if (joinType == JOIN_RIGHT)
@ -3500,7 +3504,7 @@ FindRangeTableFragmentsList(List *rangeTableFragmentsList, int tableId)
if (tableFragments != NIL) if (tableFragments != NIL)
{ {
RangeTableFragment *tableFragment = RangeTableFragment *tableFragment =
(RangeTableFragment*) linitial(tableFragments); (RangeTableFragment *) linitial(tableFragments);
if (tableFragment->rangeTableId == tableId) if (tableFragment->rangeTableId == tableId)
{ {
foundTableFragments = tableFragments; foundTableFragments = tableFragments;
@ -3706,7 +3710,7 @@ UniqueFragmentList(List *fragmentList)
foreach(uniqueFragmentCell, uniqueFragmentList) foreach(uniqueFragmentCell, uniqueFragmentList)
{ {
RangeTableFragment *uniqueFragment = RangeTableFragment *uniqueFragment =
(RangeTableFragment *) lfirst(uniqueFragmentCell); (RangeTableFragment *) lfirst(uniqueFragmentCell);
uint64 *uniqueShardId = uniqueFragment->fragmentReference; uint64 *uniqueShardId = uniqueFragment->fragmentReference;
if (*shardId == *uniqueShardId) if (*shardId == *uniqueShardId)
@ -4046,6 +4050,7 @@ FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment)
return alias; return alias;
} }
/* /*
* AnchorShardId walks over each fragment in the given fragment list, finds the * AnchorShardId walks over each fragment in the given fragment list, finds the
* fragment that corresponds to the given anchor range tableId, and returns this * fragment that corresponds to the given anchor range tableId, and returns this
@ -4360,7 +4365,7 @@ MergeTaskList(MapMergeJob *mapMergeJob, List *mapTaskList, uint32 taskIdIndex)
StringInfo intermediateTableQueryString = StringInfo intermediateTableQueryString =
IntermediateTableQueryString(jobId, taskIdIndex, reduceQuery); IntermediateTableQueryString(jobId, taskIdIndex, reduceQuery);
StringInfo mergeAndRunQueryString= makeStringInfo(); StringInfo mergeAndRunQueryString = makeStringInfo();
appendStringInfo(mergeAndRunQueryString, MERGE_FILES_AND_RUN_QUERY_COMMAND, appendStringInfo(mergeAndRunQueryString, MERGE_FILES_AND_RUN_QUERY_COMMAND,
jobId, taskIdIndex, mergeTableQueryString->data, jobId, taskIdIndex, mergeTableQueryString->data,
intermediateTableQueryString->data); intermediateTableQueryString->data);
@ -4686,7 +4691,7 @@ TaskListAppendUnique(List *list, Task *task)
List * List *
TaskListConcatUnique(List *list1, List *list2) TaskListConcatUnique(List *list1, List *list2)
{ {
ListCell *taskCell = NULL; ListCell *taskCell = NULL;
foreach(taskCell, list2) foreach(taskCell, list2)
{ {
@ -4960,7 +4965,7 @@ List *
FirstReplicaAssignTaskList(List *taskList) FirstReplicaAssignTaskList(List *taskList)
{ {
/* No additional reordering need take place for this algorithm */ /* No additional reordering need take place for this algorithm */
List * (*reorderFunction)(Task *, List *) = NULL; List *(*reorderFunction)(Task *, List *) = NULL;
taskList = ReorderAndAssignTaskList(taskList, reorderFunction); taskList = ReorderAndAssignTaskList(taskList, reorderFunction);
@ -4984,6 +4989,7 @@ RoundRobinAssignTaskList(List *taskList)
return taskList; return taskList;
} }
/* /*
* RoundRobinReorder implements the core of the round-robin assignment policy. * RoundRobinReorder implements the core of the round-robin assignment policy.
* It takes a task and placement list and rotates a copy of the placement list * It takes a task and placement list and rotates a copy of the placement list
@ -5116,7 +5122,8 @@ ActiveShardPlacementLists(List *taskList)
List *activeShardPlacementList = ActivePlacementList(shardPlacementList); List *activeShardPlacementList = ActivePlacementList(shardPlacementList);
/* sort shard placements by their insertion time */ /* sort shard placements by their insertion time */
activeShardPlacementList = SortList(activeShardPlacementList, CompareShardPlacements); activeShardPlacementList = SortList(activeShardPlacementList,
CompareShardPlacements);
shardPlacementLists = lappend(shardPlacementLists, activeShardPlacementList); shardPlacementLists = lappend(shardPlacementLists, activeShardPlacementList);
} }
@ -5257,7 +5264,8 @@ AssignDualHashTaskList(List *taskList)
uint32 replicaIndex = 0; uint32 replicaIndex = 0;
for (replicaIndex = 0; replicaIndex < ShardReplicationFactor; replicaIndex++) for (replicaIndex = 0; replicaIndex < ShardReplicationFactor; replicaIndex++)
{ {
uint32 assignmentOffset = beginningNodeIndex + assignedTaskIndex + replicaIndex; uint32 assignmentOffset = beginningNodeIndex + assignedTaskIndex +
replicaIndex;
uint32 assignmentIndex = assignmentOffset % workerNodeCount; uint32 assignmentIndex = assignmentOffset % workerNodeCount;
WorkerNode *workerNode = list_nth(workerNodeList, assignmentIndex); WorkerNode *workerNode = list_nth(workerNodeList, assignmentIndex);

View File

@ -79,7 +79,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
AlterTableStmt *alterTableStmt = (AlterTableStmt *) parseTree; AlterTableStmt *alterTableStmt = (AlterTableStmt *) parseTree;
char **relationName = &(alterTableStmt->relation->relname); char **relationName = &(alterTableStmt->relation->relname);
RangeVar *relation = alterTableStmt->relation; /* for constraints */ RangeVar *relation = alterTableStmt->relation; /* for constraints */
List *commandList = alterTableStmt->cmds; List *commandList = alterTableStmt->cmds;
ListCell *commandCell = NULL; ListCell *commandCell = NULL;
@ -179,10 +179,10 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
objectType == OBJECT_INDEX || objectType == OBJECT_FOREIGN_TABLE || objectType == OBJECT_INDEX || objectType == OBJECT_FOREIGN_TABLE ||
objectType == OBJECT_FOREIGN_SERVER) objectType == OBJECT_FOREIGN_SERVER)
{ {
List *relationNameList = NULL; List *relationNameList = NULL;
int relationNameListLength = 0; int relationNameListLength = 0;
Value *relationNameValue = NULL; Value *relationNameValue = NULL;
char **relationName = NULL; char **relationName = NULL;
uint32 dropCount = list_length(dropStmt->objects); uint32 dropCount = list_length(dropStmt->objects);
if (dropCount > 1) if (dropCount > 1)
@ -205,19 +205,30 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
switch (relationNameListLength) switch (relationNameListLength)
{ {
case 1: case 1:
{
relationNameValue = linitial(relationNameList); relationNameValue = linitial(relationNameList);
break; break;
}
case 2: case 2:
{
relationNameValue = lsecond(relationNameList); relationNameValue = lsecond(relationNameList);
break; break;
}
case 3: case 3:
{
relationNameValue = lthird(relationNameList); relationNameValue = lthird(relationNameList);
break; break;
}
default: default:
{
ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("improper relation name: \"%s\"", errmsg("improper relation name: \"%s\"",
NameListToString(relationNameList)))); NameListToString(relationNameList))));
break; break;
}
} }
relationName = &(relationNameValue->val.str); relationName = &(relationNameValue->val.str);
@ -389,13 +400,13 @@ TypeDropIndexConstraint(const AlterTableCmd *command,
{ {
Relation pgConstraint = NULL; Relation pgConstraint = NULL;
SysScanDesc scanDescriptor = NULL; SysScanDesc scanDescriptor = NULL;
ScanKeyData scanKey[1]; ScanKeyData scanKey[1];
int scanKeyCount = 1; int scanKeyCount = 1;
HeapTuple heapTuple = NULL; HeapTuple heapTuple = NULL;
char *searchedConstraintName = NULL; char *searchedConstraintName = NULL;
bool indexConstraint = false; bool indexConstraint = false;
Oid relationId = InvalidOid; Oid relationId = InvalidOid;
bool failOK = true; bool failOK = true;
if (command->subtype != AT_DropConstraint) if (command->subtype != AT_DropConstraint)
@ -489,7 +500,7 @@ AppendShardIdToConstraintName(AlterTableCmd *command, uint64 shardId)
void void
AppendShardIdToName(char **name, uint64 shardId) AppendShardIdToName(char **name, uint64 shardId)
{ {
char extendedName[NAMEDATALEN]; char extendedName[NAMEDATALEN];
uint32 extendedNameLength = 0; uint32 extendedNameLength = 0;
snprintf(extendedName, NAMEDATALEN, "%s%c" UINT64_FORMAT, snprintf(extendedName, NAMEDATALEN, "%s%c" UINT64_FORMAT,

View File

@ -48,23 +48,23 @@ static void NormalizeWorkerListPath(void);
/* GUC enum definitions */ /* GUC enum definitions */
static const struct config_enum_entry task_assignment_policy_options[] = { static const struct config_enum_entry task_assignment_policy_options[] = {
{"greedy", TASK_ASSIGNMENT_GREEDY, false}, { "greedy", TASK_ASSIGNMENT_GREEDY, false },
{"first-replica", TASK_ASSIGNMENT_FIRST_REPLICA, false}, { "first-replica", TASK_ASSIGNMENT_FIRST_REPLICA, false },
{"round-robin", TASK_ASSIGNMENT_ROUND_ROBIN, false}, { "round-robin", TASK_ASSIGNMENT_ROUND_ROBIN, false },
{NULL, 0, false} { NULL, 0, false }
}; };
static const struct config_enum_entry task_executor_type_options[] = { static const struct config_enum_entry task_executor_type_options[] = {
{"real-time", MULTI_EXECUTOR_REAL_TIME, false}, { "real-time", MULTI_EXECUTOR_REAL_TIME, false },
{"task-tracker", MULTI_EXECUTOR_TASK_TRACKER, false}, { "task-tracker", MULTI_EXECUTOR_TASK_TRACKER, false },
{"router", MULTI_EXECUTOR_ROUTER, false}, { "router", MULTI_EXECUTOR_ROUTER, false },
{NULL, 0, false} { NULL, 0, false }
}; };
static const struct config_enum_entry shard_placement_policy_options[] = { static const struct config_enum_entry shard_placement_policy_options[] = {
{"local-node-first", SHARD_PLACEMENT_LOCAL_NODE_FIRST, false}, { "local-node-first", SHARD_PLACEMENT_LOCAL_NODE_FIRST, false },
{"round-robin", SHARD_PLACEMENT_ROUND_ROBIN, false}, { "round-robin", SHARD_PLACEMENT_ROUND_ROBIN, false },
{NULL, 0, false} { NULL, 0, false }
}; };
@ -206,9 +206,10 @@ RegisterCitusConfigVariables(void)
DefineCustomBoolVariable( DefineCustomBoolVariable(
"citusdb.expire_cached_shards", "citusdb.expire_cached_shards",
gettext_noop("Enables shard cache expiration if a shard's size on disk has changed. "), gettext_noop("Enables shard cache expiration if a shard's size on disk has "
gettext_noop("When appending to an existing shard, old data may still be cached on " "changed."),
"other workers. This configuration entry activates automatic " gettext_noop("When appending to an existing shard, old data may still be cached "
"on other workers. This configuration entry activates automatic "
"expiration, but should not be used with manual updates to shards."), "expiration, but should not be used with manual updates to shards."),
&ExpireCachedShards, &ExpireCachedShards,
false, false,
@ -440,11 +441,11 @@ RegisterCitusConfigVariables(void)
"citusdb.task_assignment_policy", "citusdb.task_assignment_policy",
gettext_noop("Sets the policy to use when assigning tasks to worker nodes."), gettext_noop("Sets the policy to use when assigning tasks to worker nodes."),
gettext_noop("The master node assigns tasks to worker nodes based on shard " gettext_noop("The master node assigns tasks to worker nodes based on shard "
"locations. This configuration value specifies the policy to " "locations. This configuration value specifies the policy to "
"use when making these assignments. The greedy policy aims to " "use when making these assignments. The greedy policy aims to "
"evenly distribute tasks across worker nodes, first-replica just " "evenly distribute tasks across worker nodes, first-replica just "
"assigns tasks in the order shard placements were created, " "assigns tasks in the order shard placements were created, "
"and the round-robin policy assigns tasks to worker nodes in " "and the round-robin policy assigns tasks to worker nodes in "
"a round-robin fashion."), "a round-robin fashion."),
&TaskAssignmentPolicy, &TaskAssignmentPolicy,
TASK_ASSIGNMENT_GREEDY, TASK_ASSIGNMENT_GREEDY,
@ -488,6 +489,7 @@ RegisterCitusConfigVariables(void)
/* warn about config items in the citusdb namespace that are not registered above */ /* warn about config items in the citusdb namespace that are not registered above */
EmitWarningsOnPlaceholders("citusdb"); EmitWarningsOnPlaceholders("citusdb");
/* Also warn about citus namespace, as that's a very likely misspelling */ /* Also warn about citus namespace, as that's a very likely misspelling */
EmitWarningsOnPlaceholders("citus"); EmitWarningsOnPlaceholders("citus");
} }
@ -515,8 +517,10 @@ NormalizeWorkerListPath(void)
{ {
absoluteFileName = malloc(strlen(DataDir) + strlen(WORKER_LIST_FILENAME) + 2); absoluteFileName = malloc(strlen(DataDir) + strlen(WORKER_LIST_FILENAME) + 2);
if (absoluteFileName == NULL) if (absoluteFileName == NULL)
{
ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory"))); errmsg("out of memory")));
}
sprintf(absoluteFileName, "%s/%s", DataDir, WORKER_LIST_FILENAME); sprintf(absoluteFileName, "%s/%s", DataDir, WORKER_LIST_FILENAME);
} }
@ -530,6 +534,7 @@ NormalizeWorkerListPath(void)
"environment variable.\n", progname, ConfigFileName))); "environment variable.\n", progname, ConfigFileName)));
} }
SetConfigOption("citusdb.worker_list_file", absoluteFileName, PGC_POSTMASTER, PGC_S_OVERRIDE); SetConfigOption("citusdb.worker_list_file", absoluteFileName, PGC_POSTMASTER,
PGC_S_OVERRIDE);
free(absoluteFileName); free(absoluteFileName);
} }

View File

@ -116,9 +116,9 @@ FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid,
ForeignPath *best_path, List *tlist, List *scan_clauses) ForeignPath *best_path, List *tlist, List *scan_clauses)
#else #else
static ForeignScan * static ForeignScan *
FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid, FakeGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreigntableid,
ForeignPath *best_path, List *tlist, List *scan_clauses, ForeignPath * best_path, List * tlist, List * scan_clauses,
Plan *outer_plan) Plan * outer_plan)
#endif #endif
{ {
Index scan_relid = baserel->relid; Index scan_relid = baserel->relid;
@ -129,7 +129,7 @@ FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid,
return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL); return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL);
#else #else
return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL, NIL, NIL, return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL, NIL, NIL,
outer_plan); outer_plan);
#endif #endif
} }

View File

@ -265,7 +265,7 @@ GetRangeTblKind(RangeTblEntry *rte)
{ {
CitusRTEKind rteKind = CITUS_RTE_RELATION /* invalid */; CitusRTEKind rteKind = CITUS_RTE_RELATION /* invalid */;
switch(rte->rtekind) switch (rte->rtekind)
{ {
/* directly rtekind if it's not possibly an extended RTE */ /* directly rtekind if it's not possibly an extended RTE */
case RTE_RELATION: case RTE_RELATION:
@ -273,9 +273,13 @@ GetRangeTblKind(RangeTblEntry *rte)
case RTE_JOIN: case RTE_JOIN:
case RTE_VALUES: case RTE_VALUES:
case RTE_CTE: case RTE_CTE:
{
rteKind = (CitusRTEKind) rte->rtekind; rteKind = (CitusRTEKind) rte->rtekind;
break; break;
}
case RTE_FUNCTION: case RTE_FUNCTION:
{
/* /*
* Extract extra data - correct even if a plain RTE_FUNCTION, not * Extract extra data - correct even if a plain RTE_FUNCTION, not
* an extended one, ExtractRangeTblExtraData handles that case * an extended one, ExtractRangeTblExtraData handles that case
@ -283,6 +287,7 @@ GetRangeTblKind(RangeTblEntry *rte)
*/ */
ExtractRangeTblExtraData(rte, &rteKind, NULL, NULL, NULL); ExtractRangeTblExtraData(rte, &rteKind, NULL, NULL, NULL);
break; break;
}
} }
return rteKind; return rteKind;

View File

@ -102,6 +102,7 @@ pg_get_extensiondef_string(Oid tableRelationId)
static Oid static Oid
get_extension_schema(Oid ext_oid) get_extension_schema(Oid ext_oid)
{ {
/* *INDENT-OFF* */
Oid result; Oid result;
Relation rel; Relation rel;
SysScanDesc scandesc; SysScanDesc scandesc;
@ -131,6 +132,7 @@ get_extension_schema(Oid ext_oid)
heap_close(rel, AccessShareLock); heap_close(rel, AccessShareLock);
return result; return result;
/* *INDENT-ON* */
} }
@ -186,7 +188,7 @@ AppendOptionListToString(StringInfo stringBuffer, List *optionList)
foreach(optionCell, optionList) foreach(optionCell, optionList)
{ {
DefElem *option = (DefElem*) lfirst(optionCell); DefElem *option = (DefElem *) lfirst(optionCell);
char *optionName = option->defname; char *optionName = option->defname;
char *optionValue = defGetString(option); char *optionValue = defGetString(option);
@ -219,7 +221,7 @@ pg_get_tableschemadef_string(Oid tableRelationId)
char relationKind = 0; char relationKind = 0;
TupleDesc tupleDescriptor = NULL; TupleDesc tupleDescriptor = NULL;
TupleConstr *tupleConstraints = NULL; TupleConstr *tupleConstraints = NULL;
int attributeIndex = 0; int attributeIndex = 0;
bool firstAttributePrinted = false; bool firstAttributePrinted = false;
AttrNumber defaultValueIndex = 0; AttrNumber defaultValueIndex = 0;
AttrNumber constraintIndex = 0; AttrNumber constraintIndex = 0;
@ -447,21 +449,35 @@ pg_get_tablecolumnoptionsdef_string(Oid tableRelationId)
switch (attributeForm->attstorage) switch (attributeForm->attstorage)
{ {
case 'p': case 'p':
{
storageName = "PLAIN"; storageName = "PLAIN";
break; break;
}
case 'e': case 'e':
{
storageName = "EXTERNAL"; storageName = "EXTERNAL";
break; break;
}
case 'm': case 'm':
{
storageName = "MAIN"; storageName = "MAIN";
break; break;
}
case 'x': case 'x':
{
storageName = "EXTENDED"; storageName = "EXTENDED";
break; break;
}
default: default:
{
ereport(ERROR, (errmsg("unrecognized storage type: %c", ereport(ERROR, (errmsg("unrecognized storage type: %c",
attributeForm->attstorage))); attributeForm->attstorage)));
break; break;
}
} }
appendStringInfo(&statement, "ALTER COLUMN %s ", appendStringInfo(&statement, "ALTER COLUMN %s ",

View File

@ -51,10 +51,10 @@ static void InvalidateDistRelationCacheCallback(Datum argument, Oid relationId);
static HeapTuple LookupDistPartitionTuple(Oid relationId); static HeapTuple LookupDistPartitionTuple(Oid relationId);
static List * LookupDistShardTuples(Oid relationId); static List * LookupDistShardTuples(Oid relationId);
static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
Oid *intervalTypeId, int32 *intervalTypeMod); Oid *intervalTypeId, int32 *intervalTypeMod);
static ShardInterval * TupleToShardInterval(HeapTuple heapTuple, static ShardInterval * TupleToShardInterval(HeapTuple heapTuple,
TupleDesc tupleDescriptor, Oid intervalTypeId, TupleDesc tupleDescriptor, Oid intervalTypeId,
int32 intervalTypeMod); int32 intervalTypeMod);
static void CachedRelationLookup(const char *relationName, Oid *cachedOid); static void CachedRelationLookup(const char *relationName, Oid *cachedOid);
@ -87,6 +87,7 @@ IsDistributedTable(Oid relationId)
return cacheEntry->isDistributedTable; return cacheEntry->isDistributedTable;
} }
/* /*
* LoadShardInterval reads shard metadata for given shardId from pg_dist_shard, * LoadShardInterval reads shard metadata for given shardId from pg_dist_shard,
* and converts min/max values in these metadata to their properly typed datum * and converts min/max values in these metadata to their properly typed datum
@ -98,7 +99,7 @@ LoadShardInterval(uint64 shardId)
{ {
ShardInterval *shardInterval; ShardInterval *shardInterval;
SysScanDesc scanDescriptor = NULL; SysScanDesc scanDescriptor = NULL;
ScanKeyData scanKey[1]; ScanKeyData scanKey[1];
int scanKeyCount = 1; int scanKeyCount = 1;
HeapTuple heapTuple = NULL; HeapTuple heapTuple = NULL;
Form_pg_dist_shard shardForm = NULL; Form_pg_dist_shard shardForm = NULL;
@ -127,11 +128,11 @@ LoadShardInterval(uint64 shardId)
partitionEntry = DistributedTableCacheEntry(shardForm->logicalrelid); partitionEntry = DistributedTableCacheEntry(shardForm->logicalrelid);
GetPartitionTypeInputInfo(partitionEntry->partitionKeyString, GetPartitionTypeInputInfo(partitionEntry->partitionKeyString,
partitionEntry->partitionMethod, &intervalTypeId, partitionEntry->partitionMethod, &intervalTypeId,
&intervalTypeMod); &intervalTypeMod);
shardInterval = TupleToShardInterval(heapTuple, tupleDescriptor, intervalTypeId, shardInterval = TupleToShardInterval(heapTuple, tupleDescriptor, intervalTypeId,
intervalTypeMod); intervalTypeMod);
systable_endscan(scanDescriptor); systable_endscan(scanDescriptor);
heap_close(pgDistShard, AccessShareLock); heap_close(pgDistShard, AccessShareLock);
@ -139,6 +140,7 @@ LoadShardInterval(uint64 shardId)
return shardInterval; return shardInterval;
} }
/* /*
* DistributedTableCacheEntry looks up a pg_dist_partition entry for a * DistributedTableCacheEntry looks up a pg_dist_partition entry for a
* relation. * relation.
@ -239,19 +241,19 @@ LookupDistTableCacheEntry(Oid relationId)
int32 intervalTypeMod = -1; int32 intervalTypeMod = -1;
GetPartitionTypeInputInfo(partitionKeyString, partitionMethod, &intervalTypeId, GetPartitionTypeInputInfo(partitionKeyString, partitionMethod, &intervalTypeId,
&intervalTypeMod); &intervalTypeMod);
shardIntervalArray = MemoryContextAllocZero(CacheMemoryContext, shardIntervalArray = MemoryContextAllocZero(CacheMemoryContext,
shardIntervalArrayLength * shardIntervalArrayLength *
sizeof(ShardInterval)); sizeof(ShardInterval));
foreach(distShardTupleCell, distShardTupleList) foreach(distShardTupleCell, distShardTupleList)
{ {
HeapTuple shardTuple = lfirst(distShardTupleCell); HeapTuple shardTuple = lfirst(distShardTupleCell);
ShardInterval *shardInterval = TupleToShardInterval(shardTuple, ShardInterval *shardInterval = TupleToShardInterval(shardTuple,
distShardTupleDesc, distShardTupleDesc,
intervalTypeId, intervalTypeId,
intervalTypeMod); intervalTypeMod);
MemoryContext oldContext = MemoryContextSwitchTo(CacheMemoryContext); MemoryContext oldContext = MemoryContextSwitchTo(CacheMemoryContext);
CopyShardInterval(shardInterval, &shardIntervalArray[arrayIndex]); CopyShardInterval(shardInterval, &shardIntervalArray[arrayIndex]);
@ -773,7 +775,7 @@ LookupDistShardTuples(Oid relationId)
scanKey[0].sk_argument = ObjectIdGetDatum(relationId); scanKey[0].sk_argument = ObjectIdGetDatum(relationId);
scanDescriptor = systable_beginscan(pgDistShard, DistShardLogicalRelidIndexId(), true, scanDescriptor = systable_beginscan(pgDistShard, DistShardLogicalRelidIndexId(), true,
NULL, 1, scanKey); NULL, 1, scanKey);
currentShardTuple = systable_getnext(scanDescriptor); currentShardTuple = systable_getnext(scanDescriptor);
while (HeapTupleIsValid(currentShardTuple)) while (HeapTupleIsValid(currentShardTuple))
@ -797,7 +799,7 @@ LookupDistShardTuples(Oid relationId)
*/ */
static void static void
GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod, GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
Oid *intervalTypeId, int32 *intervalTypeMod) Oid *intervalTypeId, int32 *intervalTypeMod)
{ {
*intervalTypeId = InvalidOid; *intervalTypeId = InvalidOid;
*intervalTypeMod = -1; *intervalTypeMod = -1;
@ -826,7 +828,7 @@ GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
{ {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("unsupported table partition type: %c", errmsg("unsupported table partition type: %c",
partitionMethod))); partitionMethod)));
} }
} }
} }
@ -838,7 +840,7 @@ GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
*/ */
static ShardInterval * static ShardInterval *
TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid intervalTypeId, TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid intervalTypeId,
int32 intervalTypeMod) int32 intervalTypeMod)
{ {
ShardInterval *shardInterval = NULL; ShardInterval *shardInterval = NULL;
bool isNull = false; bool isNull = false;
@ -847,16 +849,16 @@ TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid interva
Oid inputFunctionId = InvalidOid; Oid inputFunctionId = InvalidOid;
Oid typeIoParam = InvalidOid; Oid typeIoParam = InvalidOid;
Datum relationIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_logicalrelid, Datum relationIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_logicalrelid,
tupleDescriptor, &isNull); tupleDescriptor, &isNull);
Datum shardIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardid, Datum shardIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardid,
tupleDescriptor, &isNull); tupleDescriptor, &isNull);
Datum storageTypeDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardstorage, Datum storageTypeDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardstorage,
tupleDescriptor, &isNull); tupleDescriptor, &isNull);
Datum minValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardminvalue, Datum minValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardminvalue,
tupleDescriptor, &minValueNull); tupleDescriptor, &minValueNull);
Datum maxValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardmaxvalue, Datum maxValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardmaxvalue,
tupleDescriptor, &maxValueNull); tupleDescriptor, &maxValueNull);
Oid relationId = DatumGetObjectId(relationIdDatum); Oid relationId = DatumGetObjectId(relationIdDatum);
int64 shardId = DatumGetInt64(shardIdDatum); int64 shardId = DatumGetInt64(shardIdDatum);
@ -877,7 +879,7 @@ TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid interva
/* TODO: move this up the call stack to avoid per-tuple invocation? */ /* TODO: move this up the call stack to avoid per-tuple invocation? */
get_type_io_data(intervalTypeId, IOFunc_input, &intervalTypeLen, &intervalByVal, get_type_io_data(intervalTypeId, IOFunc_input, &intervalTypeLen, &intervalByVal,
&intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId); &intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId);
/* finally convert min/max values to their actual types */ /* finally convert min/max values to their actual types */
minValue = OidInputFunctionCall(inputFunctionId, minValueString, minValue = OidInputFunctionCall(inputFunctionId, minValueString,

View File

@ -22,7 +22,8 @@
#include "distributed/multi_resowner.h" #include "distributed/multi_resowner.h"
typedef struct JobDirectoryEntry { typedef struct JobDirectoryEntry
{
ResourceOwner owner; ResourceOwner owner;
uint64 jobId; uint64 jobId;
} JobDirectoryEntry; } JobDirectoryEntry;
@ -44,8 +45,8 @@ MultiResourceOwnerReleaseCallback(ResourceReleasePhase phase,
bool isTopLevel, bool isTopLevel,
void *arg) void *arg)
{ {
int lastJobIndex = NumRegisteredJobDirectories - 1; int lastJobIndex = NumRegisteredJobDirectories - 1;
int jobIndex = 0; int jobIndex = 0;
if (phase == RESOURCE_RELEASE_AFTER_LOCKS) if (phase == RESOURCE_RELEASE_AFTER_LOCKS)
{ {
@ -79,7 +80,7 @@ MultiResourceOwnerReleaseCallback(ResourceReleasePhase phase,
void void
ResourceOwnerEnlargeJobDirectories(ResourceOwner owner) ResourceOwnerEnlargeJobDirectories(ResourceOwner owner)
{ {
int newMax = 0; int newMax = 0;
/* ensure callback is registered */ /* ensure callback is registered */
if (!RegisteredResownerCallback) if (!RegisteredResownerCallback)
@ -91,15 +92,17 @@ ResourceOwnerEnlargeJobDirectories(ResourceOwner owner)
if (RegisteredJobDirectories == NULL) if (RegisteredJobDirectories == NULL)
{ {
newMax = 16; newMax = 16;
RegisteredJobDirectories = (JobDirectoryEntry *) RegisteredJobDirectories =
MemoryContextAlloc(TopMemoryContext, newMax * sizeof(JobDirectoryEntry)); (JobDirectoryEntry *) MemoryContextAlloc(TopMemoryContext,
newMax * sizeof(JobDirectoryEntry));
NumAllocatedJobDirectories = newMax; NumAllocatedJobDirectories = newMax;
} }
else if (NumRegisteredJobDirectories + 1 > NumAllocatedJobDirectories) else if (NumRegisteredJobDirectories + 1 > NumAllocatedJobDirectories)
{ {
newMax = NumAllocatedJobDirectories * 2; newMax = NumAllocatedJobDirectories * 2;
RegisteredJobDirectories = (JobDirectoryEntry *) RegisteredJobDirectories =
repalloc(RegisteredJobDirectories, newMax * sizeof(JobDirectoryEntry)); (JobDirectoryEntry *) repalloc(RegisteredJobDirectories,
newMax * sizeof(JobDirectoryEntry));
NumAllocatedJobDirectories = newMax; NumAllocatedJobDirectories = newMax;
} }
} }
@ -123,8 +126,8 @@ ResourceOwnerRememberJobDirectory(ResourceOwner owner, uint64 jobId)
void void
ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId) ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId)
{ {
int lastJobIndex = NumRegisteredJobDirectories - 1; int lastJobIndex = NumRegisteredJobDirectories - 1;
int jobIndex = 0; int jobIndex = 0;
for (jobIndex = lastJobIndex; jobIndex >= 0; jobIndex--) for (jobIndex = lastJobIndex; jobIndex >= 0; jobIndex--)
{ {
@ -135,7 +138,8 @@ ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId)
/* move all later entries one up */ /* move all later entries one up */
while (jobIndex < lastJobIndex) while (jobIndex < lastJobIndex)
{ {
RegisteredJobDirectories[jobIndex] = RegisteredJobDirectories[jobIndex + 1]; RegisteredJobDirectories[jobIndex] =
RegisteredJobDirectories[jobIndex + 1];
jobIndex++; jobIndex++;
} }
NumRegisteredJobDirectories = lastJobIndex; NumRegisteredJobDirectories = lastJobIndex;

View File

@ -30,7 +30,7 @@
void void
LockShardDistributionMetadata(int64 shardId, LOCKMODE lockMode) LockShardDistributionMetadata(int64 shardId, LOCKMODE lockMode)
{ {
LOCKTAG tag; LOCKTAG tag;
const bool sessionLock = false; const bool sessionLock = false;
const bool dontWait = false; const bool dontWait = false;
@ -64,7 +64,7 @@ LockRelationDistributionMetadata(Oid relationId, LOCKMODE lockMode)
void void
LockShardResource(uint64 shardId, LOCKMODE lockmode) LockShardResource(uint64 shardId, LOCKMODE lockmode)
{ {
LOCKTAG tag; LOCKTAG tag;
const bool sessionLock = false; const bool sessionLock = false;
const bool dontWait = false; const bool dontWait = false;
@ -78,7 +78,7 @@ LockShardResource(uint64 shardId, LOCKMODE lockmode)
void void
UnlockShardResource(uint64 shardId, LOCKMODE lockmode) UnlockShardResource(uint64 shardId, LOCKMODE lockmode)
{ {
LOCKTAG tag; LOCKTAG tag;
const bool sessionLock = false; const bool sessionLock = false;
SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId); SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId);
@ -95,7 +95,7 @@ UnlockShardResource(uint64 shardId, LOCKMODE lockmode)
void void
LockJobResource(uint64 jobId, LOCKMODE lockmode) LockJobResource(uint64 jobId, LOCKMODE lockmode)
{ {
LOCKTAG tag; LOCKTAG tag;
const bool sessionLock = false; const bool sessionLock = false;
const bool dontWait = false; const bool dontWait = false;
@ -109,7 +109,7 @@ LockJobResource(uint64 jobId, LOCKMODE lockmode)
void void
UnlockJobResource(uint64 jobId, LOCKMODE lockmode) UnlockJobResource(uint64 jobId, LOCKMODE lockmode)
{ {
LOCKTAG tag; LOCKTAG tag;
const bool sessionLock = false; const bool sessionLock = false;
SET_LOCKTAG_JOB_RESOURCE(tag, MyDatabaseId, jobId); SET_LOCKTAG_JOB_RESOURCE(tag, MyDatabaseId, jobId);

View File

@ -50,7 +50,7 @@
#include "utils/memutils.h" #include "utils/memutils.h"
int TaskTrackerDelay = 200; /* process sleep interval in millisecs */ int TaskTrackerDelay = 200; /* process sleep interval in millisecs */
int MaxRunningTasksPerNode = 16; /* max number of running tasks */ int MaxRunningTasksPerNode = 16; /* max number of running tasks */
int MaxTrackedTasksPerNode = 1024; /* max number of tracked tasks */ int MaxTrackedTasksPerNode = 1024; /* max number of tracked tasks */
WorkerTasksSharedStateData *WorkerTasksSharedState; /* shared memory state */ WorkerTasksSharedStateData *WorkerTasksSharedState; /* shared memory state */
@ -76,10 +76,10 @@ static void TrackerCleanupJobSchemas(void);
static void TrackerCleanupConnections(HTAB *WorkerTasksHash); static void TrackerCleanupConnections(HTAB *WorkerTasksHash);
static void TrackerRegisterShutDown(HTAB *WorkerTasksHash); static void TrackerRegisterShutDown(HTAB *WorkerTasksHash);
static void TrackerDelayLoop(void); static void TrackerDelayLoop(void);
static List *SchedulableTaskList(HTAB *WorkerTasksHash); static List * SchedulableTaskList(HTAB *WorkerTasksHash);
static WorkerTask * SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash); static WorkerTask * SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash);
static uint32 CountTasksMatchingCriteria(HTAB *WorkerTasksHash, static uint32 CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
bool (*CriteriaFunction) (WorkerTask *)); bool (*CriteriaFunction)(WorkerTask *));
static bool RunningTask(WorkerTask *workerTask); static bool RunningTask(WorkerTask *workerTask);
static bool SchedulableTask(WorkerTask *workerTask); static bool SchedulableTask(WorkerTask *workerTask);
static int CompareTasksByTime(const void *first, const void *second); static int CompareTasksByTime(const void *first, const void *second);
@ -494,6 +494,7 @@ TrackerDelayLoop(void)
} }
} }
/* ------------------------------------------------------------ /* ------------------------------------------------------------
* Signal handling and shared hash initialization functions follow * Signal handling and shared hash initialization functions follow
* ------------------------------------------------------------ * ------------------------------------------------------------
@ -503,7 +504,7 @@ TrackerDelayLoop(void)
static void static void
TrackerSigHupHandler(SIGNAL_ARGS) TrackerSigHupHandler(SIGNAL_ARGS)
{ {
int save_errno = errno; int save_errno = errno;
got_SIGHUP = true; got_SIGHUP = true;
if (MyProc != NULL) if (MyProc != NULL)
@ -519,7 +520,7 @@ TrackerSigHupHandler(SIGNAL_ARGS)
static void static void
TrackerShutdownHandler(SIGNAL_ARGS) TrackerShutdownHandler(SIGNAL_ARGS)
{ {
int save_errno = errno; int save_errno = errno;
got_SIGTERM = true; got_SIGTERM = true;
if (MyProc != NULL) if (MyProc != NULL)
@ -579,10 +580,10 @@ TaskTrackerShmemInit(void)
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
/* allocate struct containing task tracker related shared state */ /* allocate struct containing task tracker related shared state */
WorkerTasksSharedState = (WorkerTasksSharedStateData *) WorkerTasksSharedState =
ShmemInitStruct("Worker Task Control", (WorkerTasksSharedStateData *) ShmemInitStruct("Worker Task Control",
sizeof(WorkerTasksSharedStateData), sizeof(WorkerTasksSharedStateData),
&alreadyInitialized); &alreadyInitialized);
if (!alreadyInitialized) if (!alreadyInitialized)
{ {
@ -607,6 +608,7 @@ TaskTrackerShmemInit(void)
} }
} }
/* ------------------------------------------------------------ /* ------------------------------------------------------------
* Task scheduling and management functions follow * Task scheduling and management functions follow
* ------------------------------------------------------------ * ------------------------------------------------------------
@ -653,7 +655,7 @@ SchedulableTaskList(HTAB *WorkerTasksHash)
for (queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++) for (queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++)
{ {
WorkerTask *schedulableTask = (WorkerTask *) palloc0(sizeof(WorkerTask)); WorkerTask *schedulableTask = (WorkerTask *) palloc0(sizeof(WorkerTask));
schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId; schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId;
schedulableTask->taskId = schedulableTaskQueue[queueIndex].taskId; schedulableTask->taskId = schedulableTaskQueue[queueIndex].taskId;
schedulableTaskList = lappend(schedulableTaskList, schedulableTask); schedulableTaskList = lappend(schedulableTaskList, schedulableTask);
@ -719,7 +721,7 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash)
/* Counts the number of tasks that match the given criteria function. */ /* Counts the number of tasks that match the given criteria function. */
static uint32 static uint32
CountTasksMatchingCriteria(HTAB *WorkerTasksHash, CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
bool (*CriteriaFunction) (WorkerTask *)) bool (*CriteriaFunction)(WorkerTask *))
{ {
HASH_SEQ_STATUS status; HASH_SEQ_STATUS status;
WorkerTask *currentTask = NULL; WorkerTask *currentTask = NULL;
@ -730,7 +732,7 @@ CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
currentTask = (WorkerTask *) hash_seq_search(&status); currentTask = (WorkerTask *) hash_seq_search(&status);
while (currentTask != NULL) while (currentTask != NULL)
{ {
bool matchesCriteria = (*CriteriaFunction) (currentTask); bool matchesCriteria = (*CriteriaFunction)(currentTask);
if (matchesCriteria) if (matchesCriteria)
{ {
taskCount++; taskCount++;
@ -775,7 +777,7 @@ SchedulableTask(WorkerTask *workerTask)
static int static int
CompareTasksByTime(const void *first, const void *second) CompareTasksByTime(const void *first, const void *second)
{ {
WorkerTask *firstTask = (WorkerTask *) first; WorkerTask *firstTask = (WorkerTask *) first;
WorkerTask *secondTask = (WorkerTask *) second; WorkerTask *secondTask = (WorkerTask *) second;
/* tasks that are assigned earlier have higher priority */ /* tasks that are assigned earlier have higher priority */
@ -893,7 +895,7 @@ ManageWorkerTask(WorkerTask *workerTask, HTAB *WorkerTasksHash)
{ {
case TASK_ASSIGNED: case TASK_ASSIGNED:
{ {
break; /* nothing to do until the task gets scheduled */ break; /* nothing to do until the task gets scheduled */
} }
case TASK_SCHEDULED: case TASK_SCHEDULED:

View File

@ -57,7 +57,7 @@ task_tracker_assign_task(PG_FUNCTION_ARGS)
{ {
uint64 jobId = PG_GETARG_INT64(0); uint64 jobId = PG_GETARG_INT64(0);
uint32 taskId = PG_GETARG_UINT32(1); uint32 taskId = PG_GETARG_UINT32(1);
text *taskCallStringText = PG_GETARG_TEXT_P(2); text *taskCallStringText = PG_GETARG_TEXT_P(2);
StringInfo jobSchemaName = JobSchemaName(jobId); StringInfo jobSchemaName = JobSchemaName(jobId);
bool schemaExists = false; bool schemaExists = false;
@ -331,7 +331,7 @@ UpdateTask(WorkerTask *workerTask, char *taskCallString)
if (taskStatus == TASK_SUCCEEDED || taskStatus == TASK_CANCEL_REQUESTED || if (taskStatus == TASK_SUCCEEDED || taskStatus == TASK_CANCEL_REQUESTED ||
taskStatus == TASK_CANCELED) taskStatus == TASK_CANCELED)
{ {
; /* nothing to do */ /* nothing to do */
} }
else if (taskStatus == TASK_PERMANENTLY_FAILED) else if (taskStatus == TASK_PERMANENTLY_FAILED)
{ {

View File

@ -53,11 +53,14 @@ static void ReceiveResourceCleanup(int32 connectionId, const char *filename,
static void DeleteFile(const char *filename); static void DeleteFile(const char *filename);
static void FetchTableCommon(text *tableName, uint64 remoteTableSize, static void FetchTableCommon(text *tableName, uint64 remoteTableSize,
ArrayType *nodeNameObject, ArrayType *nodePortObject, ArrayType *nodeNameObject, ArrayType *nodePortObject,
bool (*FetchTableFunction) (const char *, uint32, StringInfo)); bool (*FetchTableFunction)(const char *, uint32,
StringInfo));
static uint64 LocalTableSize(Oid relationId); static uint64 LocalTableSize(Oid relationId);
static uint64 ExtractShardId(StringInfo tableName); static uint64 ExtractShardId(StringInfo tableName);
static bool FetchRegularTable(const char *nodeName, uint32 nodePort, StringInfo tableName); static bool FetchRegularTable(const char *nodeName, uint32 nodePort,
static bool FetchForeignTable(const char *nodeName, uint32 nodePort, StringInfo tableName); StringInfo tableName);
static bool FetchForeignTable(const char *nodeName, uint32 nodePort,
StringInfo tableName);
static List * TableDDLCommandList(const char *nodeName, uint32 nodePort, static List * TableDDLCommandList(const char *nodeName, uint32 nodePort,
StringInfo tableName); StringInfo tableName);
static StringInfo ForeignFilePath(const char *nodeName, uint32 nodePort, static StringInfo ForeignFilePath(const char *nodeName, uint32 nodePort,
@ -85,7 +88,7 @@ worker_fetch_partition_file(PG_FUNCTION_ARGS)
uint64 jobId = PG_GETARG_INT64(0); uint64 jobId = PG_GETARG_INT64(0);
uint32 partitionTaskId = PG_GETARG_UINT32(1); uint32 partitionTaskId = PG_GETARG_UINT32(1);
uint32 partitionFileId = PG_GETARG_UINT32(2); uint32 partitionFileId = PG_GETARG_UINT32(2);
uint32 upstreamTaskId = PG_GETARG_UINT32(3); uint32 upstreamTaskId = PG_GETARG_UINT32(3);
text *nodeNameText = PG_GETARG_TEXT_P(4); text *nodeNameText = PG_GETARG_TEXT_P(4);
uint32 nodePort = PG_GETARG_UINT32(5); uint32 nodePort = PG_GETARG_UINT32(5);
char *nodeName = NULL; char *nodeName = NULL;
@ -226,7 +229,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort,
char filename[MAXPGPATH]; char filename[MAXPGPATH];
int closed = -1; int closed = -1;
const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY);
const int fileMode = (S_IRUSR | S_IWUSR); const int fileMode = (S_IRUSR | S_IWUSR);
QueryStatus queryStatus = CLIENT_INVALID_QUERY; QueryStatus queryStatus = CLIENT_INVALID_QUERY;
int32 connectionId = INVALID_CONNECTION_ID; int32 connectionId = INVALID_CONNECTION_ID;
@ -309,7 +312,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort,
} }
else if (copyStatus == CLIENT_COPY_MORE) else if (copyStatus == CLIENT_COPY_MORE)
{ {
; /* remote node will continue to send more data */ /* remote node will continue to send more data */
} }
else else
{ {
@ -468,7 +471,7 @@ worker_fetch_foreign_file(PG_FUNCTION_ARGS)
static void static void
FetchTableCommon(text *tableNameText, uint64 remoteTableSize, FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
ArrayType *nodeNameObject, ArrayType *nodePortObject, ArrayType *nodeNameObject, ArrayType *nodePortObject,
bool (*FetchTableFunction) (const char *, uint32, StringInfo)) bool (*FetchTableFunction)(const char *, uint32, StringInfo))
{ {
StringInfo tableName = NULL; StringInfo tableName = NULL;
char *tableNameCString = NULL; char *tableNameCString = NULL;
@ -531,7 +534,7 @@ FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
if (remoteTableSize > localTableSize) if (remoteTableSize > localTableSize)
{ {
/* table is not up to date, drop the table */ /* table is not up to date, drop the table */
ObjectAddress tableObject = {InvalidOid, InvalidOid, 0}; ObjectAddress tableObject = { InvalidOid, InvalidOid, 0 };
tableObject.classId = RelationRelationId; tableObject.classId = RelationRelationId;
tableObject.objectId = relationId; tableObject.objectId = relationId;
@ -554,7 +557,7 @@ FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
char *nodeName = TextDatumGetCString(nodeNameDatum); char *nodeName = TextDatumGetCString(nodeNameDatum);
uint32 nodePort = DatumGetUInt32(nodePortDatum); uint32 nodePort = DatumGetUInt32(nodePortDatum);
tableFetched = (*FetchTableFunction) (nodeName, nodePort, tableName); tableFetched = (*FetchTableFunction)(nodeName, nodePort, tableName);
nodeIndex++; nodeIndex++;
} }
@ -1010,7 +1013,7 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
* the transaction for this function commits, this lock will automatically * the transaction for this function commits, this lock will automatically
* be released. This ensures appends to a shard happen in a serial manner. * be released. This ensures appends to a shard happen in a serial manner.
*/ */
shardId = ExtractShardId(shardNameString); shardId = ExtractShardId(shardNameString);
LockShardResource(shardId, AccessExclusiveLock); LockShardResource(shardId, AccessExclusiveLock);
localFilePath = makeStringInfo(); localFilePath = makeStringInfo();
@ -1049,7 +1052,7 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
static bool static bool
check_log_statement(List *statementList) check_log_statement(List *statementList)
{ {
ListCell *statementCell; ListCell *statementCell;
if (log_statement == LOGSTMT_NONE) if (log_statement == LOGSTMT_NONE)
{ {

View File

@ -40,22 +40,22 @@ worker_foreign_file_path(PG_FUNCTION_ARGS)
ForeignTable *foreignTable = GetForeignTable(relationId); ForeignTable *foreignTable = GetForeignTable(relationId);
ListCell *optionCell = NULL; ListCell *optionCell = NULL;
foreach(optionCell, foreignTable->options) foreach(optionCell, foreignTable->options)
{ {
DefElem *option = (DefElem *) lfirst(optionCell); DefElem *option = (DefElem *) lfirst(optionCell);
char *optionName = option->defname; char *optionName = option->defname;
int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH); int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH);
if (compareResult == 0) if (compareResult == 0)
{ {
char *optionValue = defGetString(option); char *optionValue = defGetString(option);
foreignFilePath = cstring_to_text(optionValue); foreignFilePath = cstring_to_text(optionValue);
break; break;
} }
} }
/* check that we found the filename option */ /* check that we found the filename option */
if (foreignFilePath == NULL) if (foreignFilePath == NULL)
{ {
char *relationName = get_rel_name(relationId); char *relationName = get_rel_name(relationId);
ereport(ERROR, (errmsg("could not find filename for foreign table: \"%s\"", ereport(ERROR, (errmsg("could not find filename for foreign table: \"%s\"",

View File

@ -133,7 +133,7 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS)
const char *createMergeTableQuery = text_to_cstring(createMergeTableQueryText); const char *createMergeTableQuery = text_to_cstring(createMergeTableQueryText);
const char *createIntermediateTableQuery = const char *createIntermediateTableQuery =
text_to_cstring(createIntermediateTableQueryText); text_to_cstring(createIntermediateTableQueryText);
StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId); StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId);
StringInfo jobSchemaName = JobSchemaName(jobId); StringInfo jobSchemaName = JobSchemaName(jobId);
@ -170,14 +170,14 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS)
if (setSearchPathResult < 0) if (setSearchPathResult < 0)
{ {
ereport(ERROR, (errmsg("execution was not successful \"%s\"", ereport(ERROR, (errmsg("execution was not successful \"%s\"",
setSearchPathString->data))); setSearchPathString->data)));
} }
createMergeTableResult = SPI_exec(createMergeTableQuery, 0); createMergeTableResult = SPI_exec(createMergeTableQuery, 0);
if (createMergeTableResult < 0) if (createMergeTableResult < 0)
{ {
ereport(ERROR, (errmsg("execution was not successful \"%s\"", ereport(ERROR, (errmsg("execution was not successful \"%s\"",
createMergeTableQuery))); createMergeTableQuery)));
} }
appendStringInfo(mergeTableName, "%s%s", intermediateTableName->data, appendStringInfo(mergeTableName, "%s%s", intermediateTableName->data,
@ -188,7 +188,7 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS)
if (createIntermediateTableResult < 0) if (createIntermediateTableResult < 0)
{ {
ereport(ERROR, (errmsg("execution was not successful \"%s\"", ereport(ERROR, (errmsg("execution was not successful \"%s\"",
createIntermediateTableQuery))); createIntermediateTableQuery)));
} }
finished = SPI_finish(); finished = SPI_finish();
@ -256,8 +256,8 @@ JobSchemaName(uint64 jobId)
*/ */
#ifdef HAVE_INTTYPES_H #ifdef HAVE_INTTYPES_H
StringInfo jobSchemaName = makeStringInfo(); StringInfo jobSchemaName = makeStringInfo();
appendStringInfo(jobSchemaName, "%s%0*"PRIu64, appendStringInfo(jobSchemaName, "%s%0*" PRIu64, JOB_SCHEMA_PREFIX,
JOB_SCHEMA_PREFIX, MIN_JOB_DIRNAME_WIDTH, jobId); MIN_JOB_DIRNAME_WIDTH, jobId);
#else #else
StringInfo jobSchemaName = makeStringInfo(); StringInfo jobSchemaName = makeStringInfo();
appendStringInfo(jobSchemaName, "%s%0*llu", appendStringInfo(jobSchemaName, "%s%0*llu",

View File

@ -59,7 +59,7 @@ static void FileOutputStreamWrite(FileOutputStream file, StringInfo dataToWrite)
static void FileOutputStreamFlush(FileOutputStream file); static void FileOutputStreamFlush(FileOutputStream file);
static void FilterAndPartitionTable(const char *filterQuery, static void FilterAndPartitionTable(const char *filterQuery,
const char *columnName, Oid columnType, const char *columnName, Oid columnType,
uint32 (*PartitionIdFunction) (Datum, const void *), uint32 (*PartitionIdFunction)(Datum, const void *),
const void *partitionIdContext, const void *partitionIdContext,
FileOutputStream *partitionFileArray, FileOutputStream *partitionFileArray,
uint32 fileCount); uint32 fileCount);
@ -105,7 +105,7 @@ worker_range_partition_table(PG_FUNCTION_ARGS)
uint32 taskId = PG_GETARG_UINT32(1); uint32 taskId = PG_GETARG_UINT32(1);
text *filterQueryText = PG_GETARG_TEXT_P(2); text *filterQueryText = PG_GETARG_TEXT_P(2);
text *partitionColumnText = PG_GETARG_TEXT_P(3); text *partitionColumnText = PG_GETARG_TEXT_P(3);
Oid partitionColumnType = PG_GETARG_OID(4); Oid partitionColumnType = PG_GETARG_OID(4);
ArrayType *splitPointObject = PG_GETARG_ARRAYTYPE_P(5); ArrayType *splitPointObject = PG_GETARG_ARRAYTYPE_P(5);
const char *filterQuery = text_to_cstring(filterQueryText); const char *filterQuery = text_to_cstring(filterQueryText);
@ -181,7 +181,7 @@ worker_hash_partition_table(PG_FUNCTION_ARGS)
uint32 taskId = PG_GETARG_UINT32(1); uint32 taskId = PG_GETARG_UINT32(1);
text *filterQueryText = PG_GETARG_TEXT_P(2); text *filterQueryText = PG_GETARG_TEXT_P(2);
text *partitionColumnText = PG_GETARG_TEXT_P(3); text *partitionColumnText = PG_GETARG_TEXT_P(3);
Oid partitionColumnType = PG_GETARG_OID(4); Oid partitionColumnType = PG_GETARG_OID(4);
uint32 partitionCount = PG_GETARG_UINT32(5); uint32 partitionCount = PG_GETARG_UINT32(5);
const char *filterQuery = text_to_cstring(filterQueryText); const char *filterQuery = text_to_cstring(filterQueryText);
@ -463,7 +463,7 @@ JobDirectoryName(uint64 jobId)
*/ */
#ifdef HAVE_INTTYPES_H #ifdef HAVE_INTTYPES_H
StringInfo jobDirectoryName = makeStringInfo(); StringInfo jobDirectoryName = makeStringInfo();
appendStringInfo(jobDirectoryName, "base/%s/%s%0*"PRIu64, appendStringInfo(jobDirectoryName, "base/%s/%s%0*" PRIu64,
PG_JOB_CACHE_DIR, JOB_DIRECTORY_PREFIX, PG_JOB_CACHE_DIR, JOB_DIRECTORY_PREFIX,
MIN_JOB_DIRNAME_WIDTH, jobId); MIN_JOB_DIRNAME_WIDTH, jobId);
#else #else
@ -726,7 +726,7 @@ FileOutputStreamFlush(FileOutputStream file)
static void static void
FilterAndPartitionTable(const char *filterQuery, FilterAndPartitionTable(const char *filterQuery,
const char *partitionColumnName, Oid partitionColumnType, const char *partitionColumnName, Oid partitionColumnType,
uint32 (*PartitionIdFunction) (Datum, const void *), uint32 (*PartitionIdFunction)(Datum, const void *),
const void *partitionIdContext, const void *partitionIdContext,
FileOutputStream *partitionFileArray, FileOutputStream *partitionFileArray,
uint32 fileCount) uint32 fileCount)
@ -794,7 +794,7 @@ FilterAndPartitionTable(const char *filterQuery,
FileOutputStream partitionFile = { 0, 0, 0 }; FileOutputStream partitionFile = { 0, 0, 0 };
StringInfo rowText = NULL; StringInfo rowText = NULL;
Datum partitionKey = 0; Datum partitionKey = 0;
bool partitionKeyNull = false; bool partitionKeyNull = false;
uint32 partitionId = 0; uint32 partitionId = 0;
partitionKey = SPI_getbinval(row, rowDescriptor, partitionKey = SPI_getbinval(row, rowDescriptor,
@ -808,7 +808,7 @@ FilterAndPartitionTable(const char *filterQuery,
*/ */
if (!partitionKeyNull) if (!partitionKeyNull)
{ {
partitionId = (*PartitionIdFunction) (partitionKey, partitionIdContext); partitionId = (*PartitionIdFunction)(partitionKey, partitionIdContext);
} }
else else
{ {
@ -926,7 +926,7 @@ InitRowOutputState(void)
/* initialize defaults for printing null values */ /* initialize defaults for printing null values */
char *nullPrint = pstrdup("\\N"); char *nullPrint = pstrdup("\\N");
int nullPrintLen = strlen(nullPrint); int nullPrintLen = strlen(nullPrint);
char *nullPrintClient = pg_server_to_any(nullPrint, nullPrintLen, fileEncoding); char *nullPrintClient = pg_server_to_any(nullPrint, nullPrintLen, fileEncoding);
/* set default text output characters */ /* set default text output characters */
@ -946,7 +946,7 @@ InitRowOutputState(void)
} }
/* set up transcoding information and default text output characters */ /* set up transcoding information and default text output characters */
if ( (fileEncoding != databaseEncoding) || (databaseEncodingMaxLength > 1) ) if ((fileEncoding != databaseEncoding) || (databaseEncodingMaxLength > 1))
{ {
rowOutputState->need_transcoding = true; rowOutputState->need_transcoding = true;
} }
@ -1057,7 +1057,7 @@ OutputRow(HeapTuple row, TupleDesc rowDescriptor,
CopySendString(rowOutputState, rowOutputState->null_print_client); CopySendString(rowOutputState, rowOutputState->null_print_client);
} }
lastColumn = ((columnIndex+1) == columnCount); lastColumn = ((columnIndex + 1) == columnCount);
if (!lastColumn) if (!lastColumn)
{ {
CopySendChar(rowOutputState, rowOutputState->delim[0]); CopySendChar(rowOutputState, rowOutputState->delim[0]);
@ -1094,9 +1094,9 @@ OutputBinaryHeaders(FileOutputStream *partitionFileArray, uint32 fileCount)
{ {
/* Generate header for a binary copy */ /* Generate header for a binary copy */
const int32 zero = 0; const int32 zero = 0;
FileOutputStream partitionFile = {0, 0, 0}; FileOutputStream partitionFile = { 0, 0, 0 };
PartialCopyStateData headerOutputStateData; PartialCopyStateData headerOutputStateData;
PartialCopyState headerOutputState = (PartialCopyState) &headerOutputStateData; PartialCopyState headerOutputState = (PartialCopyState) & headerOutputStateData;
memset(headerOutputState, 0, sizeof(PartialCopyStateData)); memset(headerOutputState, 0, sizeof(PartialCopyStateData));
headerOutputState->fe_msgbuf = makeStringInfo(); headerOutputState->fe_msgbuf = makeStringInfo();
@ -1128,9 +1128,9 @@ OutputBinaryFooters(FileOutputStream *partitionFileArray, uint32 fileCount)
{ {
/* Generate footer for a binary copy */ /* Generate footer for a binary copy */
int16 negative = -1; int16 negative = -1;
FileOutputStream partitionFile = {0, 0, 0}; FileOutputStream partitionFile = { 0, 0, 0 };
PartialCopyStateData footerOutputStateData; PartialCopyStateData footerOutputStateData;
PartialCopyState footerOutputState = (PartialCopyState) &footerOutputStateData; PartialCopyState footerOutputState = (PartialCopyState) & footerOutputStateData;
memset(footerOutputState, 0, sizeof(PartialCopyStateData)); memset(footerOutputState, 0, sizeof(PartialCopyStateData));
footerOutputState->fe_msgbuf = makeStringInfo(); footerOutputState->fe_msgbuf = makeStringInfo();
@ -1143,6 +1143,7 @@ OutputBinaryFooters(FileOutputStream *partitionFileArray, uint32 fileCount)
} }
/* *INDENT-OFF* */
/* Append data to the copy buffer in outputState */ /* Append data to the copy buffer in outputState */
static void static void
CopySendData(PartialCopyState outputState, const void *databuf, int datasize) CopySendData(PartialCopyState outputState, const void *databuf, int datasize)
@ -1282,6 +1283,7 @@ CopyAttributeOutText(PartialCopyState cstate, char *string)
} }
/* *INDENT-ON* */
/* Helper function to send pending copy output */ /* Helper function to send pending copy output */
static inline void static inline void
CopyFlushOutput(PartialCopyState cstate, char *start, char *pointer) CopyFlushOutput(PartialCopyState cstate, char *start, char *pointer)

View File

@ -16,7 +16,22 @@
#include "stringutils.h" #include "stringutils.h"
/* Concatenates "more" onto "var", and frees the original value of *var. */ /* *INDENT-OFF* */
void
free_copy_options(copy_options * ptr)
{
if (!ptr)
return;
free(ptr->before_tofrom);
free(ptr->after_tofrom);
free(ptr->file);
free(ptr->tableName);
free(ptr->columnList);
free(ptr);
}
/* concatenate "more" onto "var", freeing the original value of *var */
static void static void
xstrcat(char **var, const char *more) xstrcat(char **var, const char *more)
{ {
@ -210,21 +225,9 @@ error:
return NULL; return NULL;
} }
/* *INDENT-ON* */
/* Frees copy options. */ /* Frees copy options. */
void
free_copy_options(copy_options * ptr)
{
if (!ptr)
return;
free(ptr->before_tofrom);
free(ptr->after_tofrom);
free(ptr->file);
free(ptr->tableName);
free(ptr->columnList);
free(ptr);
}
/* /*
* ParseStageOptions takes the given copy options, parses the additional options * ParseStageOptions takes the given copy options, parses the additional options

View File

@ -46,7 +46,7 @@ typedef struct copy_options
bool psql_inout; /* true = use psql stdin/stdout */ bool psql_inout; /* true = use psql stdin/stdout */
bool from; /* true = FROM, false = TO */ bool from; /* true = FROM, false = TO */
char *tableName; /* table name to stage data to */ char *tableName; /* table name to stage data to */
char *columnList; /* optional column list used in staging */ char *columnList; /* optional column list used in staging */
} copy_options; } copy_options;

View File

@ -26,7 +26,8 @@
static bool FileSize(char *filename, uint64 *fileSize); static bool FileSize(char *filename, uint64 *fileSize);
static PGconn * ConnectToWorkerNode(const char *nodeName, uint32 nodePort, static PGconn * ConnectToWorkerNode(const char *nodeName, uint32 nodePort,
const char *nodeDatabase); const char *nodeDatabase);
static PGresult * ExecuteRemoteCommand(PGconn *remoteConnection, const char *remoteCommand, static PGresult * ExecuteRemoteCommand(PGconn *remoteConnection,
const char *remoteCommand,
const char **parameterValues, int parameterCount); const char **parameterValues, int parameterCount);
static TableMetadata * InitTableMetadata(const char *tableName); static TableMetadata * InitTableMetadata(const char *tableName);
static ShardMetadata * InitShardMetadata(int shardPlacementPolicy); static ShardMetadata * InitShardMetadata(int shardPlacementPolicy);
@ -41,7 +42,8 @@ static uint64 GetValueUint64(const PGresult *result, int rowNumber, int columnNu
static bool MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata); static bool MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata);
static bool MasterGetTableDDLEvents(const char *tableName, TableMetadata *tableMetadata); static bool MasterGetTableDDLEvents(const char *tableName, TableMetadata *tableMetadata);
static bool MasterGetNewShardId(ShardMetadata *shardMetadata); static bool MasterGetNewShardId(ShardMetadata *shardMetadata);
static bool MasterGetCandidateNodes(ShardMetadata *shardMetadata, int shardPlacementPolicy); static bool MasterGetCandidateNodes(ShardMetadata *shardMetadata,
int shardPlacementPolicy);
static bool MasterInsertShardRow(uint32 logicalRelid, char storageType, static bool MasterInsertShardRow(uint32 logicalRelid, char storageType,
const ShardMetadata *shardMetadata); const ShardMetadata *shardMetadata);
static bool MasterInsertPlacementRows(const ShardMetadata *shardMetadata); static bool MasterInsertPlacementRows(const ShardMetadata *shardMetadata);
@ -62,7 +64,8 @@ static bool ApplyShardDDLCommand(PGconn *workerNode, uint64 shardId, const char
static bool TransmitTableData(PGconn *workerNode, uint64 shardId, static bool TransmitTableData(PGconn *workerNode, uint64 shardId,
uint64 shardMaxSize, copy_options *stageOptions, uint64 shardMaxSize, copy_options *stageOptions,
uint64 currentFileOffset, uint64 *nextFileOffset); uint64 currentFileOffset, uint64 *nextFileOffset);
static bool TransmitFile(PGconn *workerNode, const char *localPath, const char *remotePath); static bool TransmitFile(PGconn *workerNode, const char *localPath,
const char *remotePath);
static bool FileStreamOK(const copy_options *stageOptions); static bool FileStreamOK(const copy_options *stageOptions);
static PQExpBuffer CreateCopyQueryString(const char *tableName, const char *columnList, static PQExpBuffer CreateCopyQueryString(const char *tableName, const char *columnList,
const char *afterToFrom); const char *afterToFrom);
@ -166,7 +169,7 @@ DoStageData(const char *stageCommand)
if (partitionMethod == DISTRIBUTE_BY_HASH) if (partitionMethod == DISTRIBUTE_BY_HASH)
{ {
psql_error("\\stage: staging data into hash partitioned tables is not " psql_error("\\stage: staging data into hash partitioned tables is not "
"supported\n"); "supported\n");
free_copy_options(stageOptions); free_copy_options(stageOptions);
FreeTableMetadata(tableMetadata); FreeTableMetadata(tableMetadata);
@ -179,7 +182,7 @@ DoStageData(const char *stageCommand)
bool tableOptionsOK = ColumnarTableOptionsOK(tableMetadata->logicalRelid); bool tableOptionsOK = ColumnarTableOptionsOK(tableMetadata->logicalRelid);
if (!tableOptionsOK) if (!tableOptionsOK)
{ {
return false; /* error message already displayed */ return false; /* error message already displayed */
} }
} }
@ -225,7 +228,7 @@ DoStageData(const char *stageCommand)
*/ */
FreeCommonStageData(stageOptions, tableMetadata, shardMetadataList); FreeCommonStageData(stageOptions, tableMetadata, shardMetadataList);
return false; /* abort immediately */ return false; /* abort immediately */
} }
/* save allocated shard metadata */ /* save allocated shard metadata */
@ -245,7 +248,7 @@ DoStageData(const char *stageCommand)
*/ */
for (nodeIndex = 0; nodeIndex < shardMetadata->nodeCount; nodeIndex++) for (nodeIndex = 0; nodeIndex < shardMetadata->nodeCount; nodeIndex++)
{ {
char *remoteNodeName = shardMetadata->nodeNameList[nodeIndex]; char *remoteNodeName = shardMetadata->nodeNameList[nodeIndex];
uint32 remoteNodePort = shardMetadata->nodePortList[nodeIndex]; uint32 remoteNodePort = shardMetadata->nodePortList[nodeIndex];
PGconn *remoteNode = NULL; PGconn *remoteNode = NULL;
@ -341,7 +344,6 @@ DoStageData(const char *stageCommand)
/* update current file offset */ /* update current file offset */
currentFileOffset = nextFileOffset; currentFileOffset = nextFileOffset;
} /* while more file data left for sharding */ } /* while more file data left for sharding */
/* /*
@ -421,10 +423,10 @@ ExecuteRemoteCommand(PGconn *remoteConnection, const char *remoteCommand,
{ {
PGresult *result = NULL; PGresult *result = NULL;
const Oid *parameterType = NULL; /* let the backend deduce type */ const Oid *parameterType = NULL; /* let the backend deduce type */
const int *parameterLength = NULL; /* text params do not need length */ const int *parameterLength = NULL; /* text params do not need length */
const int *parameterFormat = NULL; /* text params have Null by default */ const int *parameterFormat = NULL; /* text params have Null by default */
const int resultFormat = 0; /* ask for results in text format */ const int resultFormat = 0; /* ask for results in text format */
result = PQexecParams(remoteConnection, remoteCommand, result = PQexecParams(remoteConnection, remoteCommand,
parameterCount, parameterType, parameterValues, parameterCount, parameterType, parameterValues,
@ -716,7 +718,7 @@ MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata)
char *tableStorageType = NULL; char *tableStorageType = NULL;
char *partitionMethod = NULL; char *partitionMethod = NULL;
char *partitionKey = NULL; char *partitionKey = NULL;
int partitionKeyLength = 0; int partitionKeyLength = 0;
uint64 logicalRelid = 0; uint64 logicalRelid = 0;
uint64 shardReplicaCount = 0; uint64 shardReplicaCount = 0;
uint64 shardMaxSize = 0; uint64 shardMaxSize = 0;
@ -727,7 +729,7 @@ MasterGetTableMetadata(const char *tableName, TableMetadata *tableMetadata)
parameterValue, parameterCount); parameterValue, parameterCount);
if (result == NULL) if (result == NULL)
{ {
return false; /* error message already displayed */ return false; /* error message already displayed */
} }
/* find column numbers associated with column names */ /* find column numbers associated with column names */
@ -825,7 +827,7 @@ MasterGetTableDDLEvents(const char *tableName, TableMetadata *tableMetadata)
{ {
char *ddlEvent = NULL; char *ddlEvent = NULL;
char *ddlEventValue = PQgetvalue(result, ddlEventIndex, 0); char *ddlEventValue = PQgetvalue(result, ddlEventIndex, 0);
int ddlEventLength = PQgetlength(result, ddlEventIndex, 0); int ddlEventLength = PQgetlength(result, ddlEventIndex, 0);
if (ddlEventLength <= 0) if (ddlEventLength <= 0)
{ {
@ -996,11 +998,11 @@ MasterGetCandidateNodes(ShardMetadata *shardMetadata, int shardPlacementPolicy)
/* walk over fetched node name/port list, and assign them to metadata */ /* walk over fetched node name/port list, and assign them to metadata */
for (nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) for (nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
{ {
char *nodeName = NULL; char *nodeName = NULL;
uint64 nodePort = 0; uint64 nodePort = 0;
char *nodeNameValue = PQgetvalue(result, nodeIndex, nodeNameIndex); char *nodeNameValue = PQgetvalue(result, nodeIndex, nodeNameIndex);
int nodeNameLength = PQgetlength(result, nodeIndex, nodeNameIndex); int nodeNameLength = PQgetlength(result, nodeIndex, nodeNameIndex);
if (nodeNameLength <= 0) if (nodeNameLength <= 0)
{ {
@ -1107,7 +1109,7 @@ MasterInsertPlacementRows(const ShardMetadata *shardMetadata)
bool staged = shardMetadata->nodeStageList[nodeIndex]; bool staged = shardMetadata->nodeStageList[nodeIndex];
if (staged) if (staged)
{ {
char *nodeName = shardMetadata->nodeNameList[nodeIndex]; char *nodeName = shardMetadata->nodeNameList[nodeIndex];
uint32 nodePort = shardMetadata->nodePortList[nodeIndex]; uint32 nodePort = shardMetadata->nodePortList[nodeIndex];
/* convert parameter to its string representation */ /* convert parameter to its string representation */

View File

@ -30,42 +30,44 @@
#define ROLLBACK_COMMAND "ROLLBACK" #define ROLLBACK_COMMAND "ROLLBACK"
/* Names of remote function calls to execute on the master. */ /* Names of remote function calls to execute on the master. */
#define MASTER_GET_TABLE_METADATA "SELECT * FROM master_get_table_metadata($1::text)" #define MASTER_GET_TABLE_METADATA "SELECT * FROM master_get_table_metadata($1::text)"
#define MASTER_GET_TABLE_DDL_EVENTS "SELECT * FROM master_get_table_ddl_events($1::text)" #define MASTER_GET_TABLE_DDL_EVENTS "SELECT * FROM master_get_table_ddl_events($1::text)"
#define MASTER_GET_NEW_SHARDID "SELECT * FROM master_get_new_shardid()" #define MASTER_GET_NEW_SHARDID "SELECT * FROM master_get_new_shardid()"
#define MASTER_GET_LOCAL_FIRST_CANDIDATE_NODES "SELECT * FROM \ #define MASTER_GET_LOCAL_FIRST_CANDIDATE_NODES \
master_get_local_first_candidate_nodes()" "SELECT * FROM master_get_local_first_candidate_nodes()"
#define MASTER_GET_ROUND_ROBIN_CANDIDATE_NODES "SELECT * FROM \ #define MASTER_GET_ROUND_ROBIN_CANDIDATE_NODES \
master_get_round_robin_candidate_nodes($1::int8)" "SELECT * FROM master_get_round_robin_candidate_nodes($1::int8)"
#define MASTER_INSERT_SHARD_ROW "INSERT INTO pg_dist_shard \ #define MASTER_INSERT_SHARD_ROW \
(logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES \ "INSERT INTO pg_dist_shard " \
($1::oid, $2::int8, $3::char, $4::text, $5::text)" "(logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES " \
#define MASTER_INSERT_PLACEMENT_ROW "INSERT INTO pg_dist_shard_placement \ "($1::oid, $2::int8, $3::char, $4::text, $5::text)"
(shardid, shardstate, shardlength, nodename, nodeport) VALUES \ #define MASTER_INSERT_PLACEMENT_ROW \
($1::int8, $2::int4, $3::int8, $4::text, $5::int4)" "INSERT INTO pg_dist_shard_placement " \
"(shardid, shardstate, shardlength, nodename, nodeport) VALUES " \
"($1::int8, $2::int4, $3::int8, $4::text, $5::int4)"
/* Column names used to identify response fields as returned from the master. */ /* Column names used to identify response fields as returned from the master. */
#define LOGICAL_RELID_FIELD "logical_relid" #define LOGICAL_RELID_FIELD "logical_relid"
#define PART_STORAGE_TYPE_FIELD "part_storage_type" #define PART_STORAGE_TYPE_FIELD "part_storage_type"
#define PART_METHOD_FIELD "part_method" #define PART_METHOD_FIELD "part_method"
#define PART_KEY_FIELD "part_key" #define PART_KEY_FIELD "part_key"
#define PART_REPLICA_COUNT_FIELD "part_replica_count" #define PART_REPLICA_COUNT_FIELD "part_replica_count"
#define PART_MAX_SIZE_FIELD "part_max_size" #define PART_MAX_SIZE_FIELD "part_max_size"
#define PART_PLACEMENT_POLICY_FIELD "part_placement_policy" #define PART_PLACEMENT_POLICY_FIELD "part_placement_policy"
#define NODE_NAME_FIELD "node_name" #define NODE_NAME_FIELD "node_name"
#define NODE_PORT_FIELD "node_port" #define NODE_PORT_FIELD "node_port"
/* the tablename in the overloaded COPY statement is the to-be-transferred file */ /* the tablename in the overloaded COPY statement is the to-be-transferred file */
#define TRANSMIT_REGULAR_COMMAND "COPY \"%s\" FROM STDIN WITH (format 'transmit')" #define TRANSMIT_REGULAR_COMMAND "COPY \"%s\" FROM STDIN WITH (format 'transmit')"
#define SHARD_MIN_MAX_COMMAND "SELECT min(%s), max(%s) FROM %s" #define SHARD_MIN_MAX_COMMAND "SELECT min(%s), max(%s) FROM %s"
#define SHARD_TABLE_SIZE_COMMAND "SELECT pg_table_size('%s')" #define SHARD_TABLE_SIZE_COMMAND "SELECT pg_table_size('%s')"
#define SET_FOREIGN_TABLE_FILENAME "ALTER FOREIGN TABLE %s OPTIONS (SET filename '%s')" #define SET_FOREIGN_TABLE_FILENAME "ALTER FOREIGN TABLE %s OPTIONS (SET filename '%s')"
#define GET_COLUMNAR_TABLE_FILENAME_OPTION "SELECT * FROM \ #define GET_COLUMNAR_TABLE_FILENAME_OPTION \
(SELECT (pg_options_to_table(ftoptions)).* FROM pg_foreign_table \ "SELECT * FROM (SELECT (pg_options_to_table(ftoptions)).* FROM pg_foreign_table " \
WHERE ftrelid = %u) AS Q WHERE option_name = 'filename';" "WHERE ftrelid = %u) AS Q WHERE option_name = 'filename';"
#define APPLY_SHARD_DDL_COMMAND "SELECT * FROM worker_apply_shard_ddl_command \ #define APPLY_SHARD_DDL_COMMAND \
($1::int8, $2::text)" "SELECT * FROM worker_apply_shard_ddl_command ($1::int8, $2::text)"
#define REMOTE_FILE_SIZE_COMMAND "SELECT size FROM pg_stat_file('%s')" #define REMOTE_FILE_SIZE_COMMAND "SELECT size FROM pg_stat_file('%s')"
#define SHARD_COLUMNAR_TABLE_SIZE_COMMAND "SELECT cstore_table_size('%s')" #define SHARD_COLUMNAR_TABLE_SIZE_COMMAND "SELECT cstore_table_size('%s')"
@ -90,17 +92,16 @@
*/ */
typedef struct TableMetadata typedef struct TableMetadata
{ {
uint32 logicalRelid; /* table's relationId on the master */ uint32 logicalRelid; /* table's relationId on the master */
char tableStorageType; /* relay file, foreign table, or table */ char tableStorageType; /* relay file, foreign table, or table */
char partitionMethod; /* table's partition method */ char partitionMethod; /* table's partition method */
char *partitionKey; /* partition key expression */ char *partitionKey; /* partition key expression */
uint32 shardReplicaCount; /* shard replication factor */ uint32 shardReplicaCount; /* shard replication factor */
uint64 shardMaxSize; /* create new shard when shard reaches max size */ uint64 shardMaxSize; /* create new shard when shard reaches max size */
uint32 shardPlacementPolicy; /* policy to use when choosing nodes to place shards */ uint32 shardPlacementPolicy; /* policy to use when choosing nodes to place shards */
char **ddlEventList; /* DDL statements used for creating new shard */ char **ddlEventList; /* DDL statements used for creating new shard */
uint32 ddlEventCount; /* DDL statement count; statement list size */ uint32 ddlEventCount; /* DDL statement count; statement list size */
} TableMetadata; } TableMetadata;
@ -112,17 +113,16 @@ typedef struct TableMetadata
*/ */
typedef struct ShardMetadata typedef struct ShardMetadata
{ {
uint64 shardId; /* global shardId; created on the master node */ uint64 shardId; /* global shardId; created on the master node */
char **nodeNameList; /* candidate node name list for shard uploading */ char **nodeNameList; /* candidate node name list for shard uploading */
uint32 *nodePortList; /* candidate node port list for shard uploading */ uint32 *nodePortList; /* candidate node port list for shard uploading */
uint32 nodeCount; /* candidate node count; node list size */ uint32 nodeCount; /* candidate node count; node list size */
bool *nodeStageList; /* shard uploaded to corresponding candidate node? */ bool *nodeStageList; /* shard uploaded to corresponding candidate node? */
char *shardMinValue; /* partition key's minimum value in shard */ char *shardMinValue; /* partition key's minimum value in shard */
char *shardMaxValue; /* partition key's maximum value in shard */ char *shardMaxValue; /* partition key's maximum value in shard */
uint64 shardSize; /* shard size; updated during staging */ uint64 shardSize; /* shard size; updated during staging */
} ShardMetadata; } ShardMetadata;

View File

@ -2,7 +2,7 @@
* *
* citus_ruleutils.h * citus_ruleutils.h
* CitusDB ruleutils wrapper functions and exported PostgreSQL ruleutils * CitusDB ruleutils wrapper functions and exported PostgreSQL ruleutils
* functions. * functions.
* *
* Copyright (c) 2012-2015, Citus Data, Inc. * Copyright (c) 2012-2015, Citus Data, Inc.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
@ -16,16 +16,17 @@
/* Function declarations for version independent CitusDB ruleutils wrapper functions */ /* Function declarations for version independent CitusDB ruleutils wrapper functions */
extern char *pg_get_extensiondef_string(Oid tableRelationId); extern char * pg_get_extensiondef_string(Oid tableRelationId);
extern char *pg_get_serverdef_string(Oid tableRelationId); extern char * pg_get_serverdef_string(Oid tableRelationId);
extern char *pg_get_tableschemadef_string(Oid tableRelationId); extern char * pg_get_tableschemadef_string(Oid tableRelationId);
extern char *pg_get_tablecolumnoptionsdef_string(Oid tableRelationId); extern char * pg_get_tablecolumnoptionsdef_string(Oid tableRelationId);
extern char *pg_get_indexclusterdef_string(Oid indexRelationId); extern char * pg_get_indexclusterdef_string(Oid indexRelationId);
/* Function declarations for version dependent PostgreSQL ruleutils functions */ /* Function declarations for version dependent PostgreSQL ruleutils functions */
extern void pg_get_query_def(Query *query, StringInfo buffer); extern void pg_get_query_def(Query *query, StringInfo buffer);
extern void deparse_shard_query(Query *query, Oid distrelid, int64 shardid, StringInfo buffer); extern void deparse_shard_query(Query *query, Oid distrelid, int64 shardid, StringInfo
extern char *generate_relation_name(Oid relid, List *namespaces); buffer);
extern char * generate_relation_name(Oid relid, List *namespaces);
#endif /* CITUS_RULEUTILS_H */ #endif /* CITUS_RULEUTILS_H */

View File

@ -30,15 +30,14 @@ typedef struct ShardInterval
CitusNodeTag type; CitusNodeTag type;
Oid relationId; Oid relationId;
char storageType; char storageType;
Oid valueTypeId; /* min/max value datum's typeId */ Oid valueTypeId; /* min/max value datum's typeId */
int valueTypeLen; /* min/max value datum's typelen */ int valueTypeLen; /* min/max value datum's typelen */
bool valueByVal; /* min/max value datum's byval */ bool valueByVal; /* min/max value datum's byval */
bool minValueExists; bool minValueExists;
bool maxValueExists; bool maxValueExists;
Datum minValue; /* a shard's typed min value datum */ Datum minValue; /* a shard's typed min value datum */
Datum maxValue; /* a shard's typed max value datum */ Datum maxValue; /* a shard's typed max value datum */
uint64 shardId; uint64 shardId;
} ShardInterval; } ShardInterval;
@ -46,13 +45,12 @@ typedef struct ShardInterval
typedef struct ShardPlacement typedef struct ShardPlacement
{ {
CitusNodeTag type; CitusNodeTag type;
Oid tupleOid; /* unique oid that implies this row's insertion order */ Oid tupleOid; /* unique oid that implies this row's insertion order */
uint64 shardId; uint64 shardId;
uint64 shardLength; uint64 shardLength;
RelayFileState shardState; RelayFileState shardState;
char *nodeName; char *nodeName;
uint32 nodePort; uint32 nodePort;
} ShardPlacement; } ShardPlacement;

View File

@ -49,10 +49,10 @@
#define SHARDID_SEQUENCE_NAME "pg_dist_shardid_seq" #define SHARDID_SEQUENCE_NAME "pg_dist_shardid_seq"
/* Remote call definitions to help with data staging and deletion */ /* Remote call definitions to help with data staging and deletion */
#define WORKER_APPLY_SHARD_DDL_COMMAND "SELECT worker_apply_shard_ddl_command \ #define WORKER_APPLY_SHARD_DDL_COMMAND \
("UINT64_FORMAT", %s)" "SELECT worker_apply_shard_ddl_command (" UINT64_FORMAT ", %s)"
#define WORKER_APPEND_TABLE_TO_SHARD "SELECT worker_append_table_to_shard \ #define WORKER_APPEND_TABLE_TO_SHARD \
(%s, %s, %s, %u)" "SELECT worker_append_table_to_shard (%s, %s, %s, %u)"
#define SHARD_MIN_VALUE_QUERY "SELECT min(%s) FROM %s" #define SHARD_MIN_VALUE_QUERY "SELECT min(%s) FROM %s"
#define SHARD_MAX_VALUE_QUERY "SELECT max(%s) FROM %s" #define SHARD_MAX_VALUE_QUERY "SELECT max(%s) FROM %s"
#define SHARD_TABLE_SIZE_QUERY "SELECT pg_table_size('%s')" #define SHARD_TABLE_SIZE_QUERY "SELECT pg_table_size('%s')"
@ -67,7 +67,6 @@ typedef enum
SHARD_PLACEMENT_INVALID_FIRST = 0, SHARD_PLACEMENT_INVALID_FIRST = 0,
SHARD_PLACEMENT_LOCAL_NODE_FIRST = 1, SHARD_PLACEMENT_LOCAL_NODE_FIRST = 1,
SHARD_PLACEMENT_ROUND_ROBIN = 2 SHARD_PLACEMENT_ROUND_ROBIN = 2
} ShardPlacementPolicyType; } ShardPlacementPolicyType;
@ -83,8 +82,8 @@ extern Oid ResolveRelationId(text *relationName);
extern List * GetTableDDLEvents(Oid relationId); extern List * GetTableDDLEvents(Oid relationId);
extern void CheckDistributedTable(Oid relationId); extern void CheckDistributedTable(Oid relationId);
extern void CreateShardPlacements(int64 shardId, List *ddlEventList, extern void CreateShardPlacements(int64 shardId, List *ddlEventList,
List *workerNodeList, int workerStartIndex, List *workerNodeList, int workerStartIndex,
int replicationFactor); int replicationFactor);
/* Function declarations for generating metadata for shard creation */ /* Function declarations for generating metadata for shard creation */
extern Datum master_get_table_metadata(PG_FUNCTION_ARGS); extern Datum master_get_table_metadata(PG_FUNCTION_ARGS);

View File

@ -24,6 +24,7 @@
#define INVALID_TASK_ID 0 #define INVALID_TASK_ID 0
#if (PG_VERSION_NUM >= 90500) #if (PG_VERSION_NUM >= 90500)
/* reserved alias name for UPSERTs */ /* reserved alias name for UPSERTs */
#define UPSERT_ALIAS "citus_table_alias" #define UPSERT_ALIAS "citus_table_alias"
#endif #endif

View File

@ -15,21 +15,20 @@
#define MULTI_CLIENT_EXECUTOR_H #define MULTI_CLIENT_EXECUTOR_H
#define INVALID_CONNECTION_ID -1 /* identifies an invalid connection */ #define INVALID_CONNECTION_ID -1 /* identifies an invalid connection */
#define CLIENT_CONNECT_TIMEOUT 5 /* connection timeout in seconds */ #define CLIENT_CONNECT_TIMEOUT 5 /* connection timeout in seconds */
#define MAX_CONNECTION_COUNT 2048 /* simultaneous client connection count */ #define MAX_CONNECTION_COUNT 2048 /* simultaneous client connection count */
#define STRING_BUFFER_SIZE 1024 /* buffer size for character arrays */ #define STRING_BUFFER_SIZE 1024 /* buffer size for character arrays */
#define CONN_INFO_TEMPLATE "host=%s port=%u dbname=%s connect_timeout=%u" #define CONN_INFO_TEMPLATE "host=%s port=%u dbname=%s connect_timeout=%u"
/* Enumeration to track one client connection's status */ /* Enumeration to track one client connection's status */
typedef enum typedef enum
{ {
CLIENT_INVALID_CONNECT = 0, CLIENT_INVALID_CONNECT = 0,
CLIENT_CONNECTION_BAD = 1, CLIENT_CONNECTION_BAD = 1,
CLIENT_CONNECTION_BUSY = 2, CLIENT_CONNECTION_BUSY = 2,
CLIENT_CONNECTION_READY = 3 CLIENT_CONNECTION_READY = 3
} ConnectStatus; } ConnectStatus;
@ -38,9 +37,8 @@ typedef enum
{ {
CLIENT_INVALID_RESULT_STATUS = 0, CLIENT_INVALID_RESULT_STATUS = 0,
CLIENT_RESULT_UNAVAILABLE = 1, CLIENT_RESULT_UNAVAILABLE = 1,
CLIENT_RESULT_BUSY = 2, CLIENT_RESULT_BUSY = 2,
CLIENT_RESULT_READY = 3 CLIENT_RESULT_READY = 3
} ResultStatus; } ResultStatus;
@ -48,10 +46,9 @@ typedef enum
typedef enum typedef enum
{ {
CLIENT_INVALID_QUERY = 0, CLIENT_INVALID_QUERY = 0,
CLIENT_QUERY_FAILED = 1, CLIENT_QUERY_FAILED = 1,
CLIENT_QUERY_DONE = 2, CLIENT_QUERY_DONE = 2,
CLIENT_QUERY_COPY = 3 CLIENT_QUERY_COPY = 3
} QueryStatus; } QueryStatus;
@ -59,21 +56,19 @@ typedef enum
typedef enum typedef enum
{ {
CLIENT_INVALID_COPY = 0, CLIENT_INVALID_COPY = 0,
CLIENT_COPY_MORE = 1, CLIENT_COPY_MORE = 1,
CLIENT_COPY_FAILED = 2, CLIENT_COPY_FAILED = 2,
CLIENT_COPY_DONE = 3 CLIENT_COPY_DONE = 3
} CopyStatus; } CopyStatus;
/* Enumeration to track the status of a query in a batch on the client */ /* Enumeration to track the status of a query in a batch on the client */
typedef enum typedef enum
{ {
CLIENT_INVALID_BATCH_QUERY = 0, CLIENT_INVALID_BATCH_QUERY = 0,
CLIENT_BATCH_QUERY_FAILED = 1, CLIENT_BATCH_QUERY_FAILED = 1,
CLIENT_BATCH_QUERY_CONTINUE = 2, CLIENT_BATCH_QUERY_CONTINUE = 2,
CLIENT_BATCH_QUERY_DONE = 3 CLIENT_BATCH_QUERY_DONE = 3
} BatchQueryStatus; } BatchQueryStatus;

View File

@ -14,12 +14,12 @@
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
/* signal currently executed statement is a master select statement or router execution */ /* signal currently executed statement is a master select statement or router execution */
#define EXEC_FLAG_CITUS_MASTER_SELECT 0x100 #define EXEC_FLAG_CITUS_MASTER_SELECT 0x100
#define EXEC_FLAG_CITUS_ROUTER_EXECUTOR 0x200 #define EXEC_FLAG_CITUS_ROUTER_EXECUTOR 0x200
extern void multi_ExecutorStart(QueryDesc *queryDesc, int eflags); extern void multi_ExecutorStart(QueryDesc *queryDesc, int eflags);
extern void multi_ExecutorRun(QueryDesc *queryDesc, extern void multi_ExecutorRun(QueryDesc *queryDesc,
ScanDirection direction, long count); ScanDirection direction, long count);
extern void multi_ExecutorFinish(QueryDesc *queryDesc); extern void multi_ExecutorFinish(QueryDesc *queryDesc);
extern void multi_ExecutorEnd(QueryDesc *queryDesc); extern void multi_ExecutorEnd(QueryDesc *queryDesc);

View File

@ -29,7 +29,7 @@ typedef enum JoinRuleType
{ {
JOIN_RULE_INVALID_FIRST = 0, JOIN_RULE_INVALID_FIRST = 0,
BROADCAST_JOIN = 1, BROADCAST_JOIN = 1,
LOCAL_PARTITION_JOIN = 2, LOCAL_PARTITION_JOIN = 2,
SINGLE_PARTITION_JOIN = 3, SINGLE_PARTITION_JOIN = 3,
DUAL_PARTITION_JOIN = 4, DUAL_PARTITION_JOIN = 4,
CARTESIAN_PRODUCT = 5, CARTESIAN_PRODUCT = 5,
@ -40,7 +40,6 @@ typedef enum JoinRuleType
* RuleNameArray. * RuleNameArray.
*/ */
JOIN_RULE_LAST JOIN_RULE_LAST
} JoinRuleType; } JoinRuleType;
@ -53,7 +52,6 @@ typedef struct TableEntry
{ {
Oid relationId; Oid relationId;
uint32 rangeTableId; uint32 rangeTableId;
} TableEntry; } TableEntry;
@ -65,14 +63,13 @@ typedef struct TableEntry
*/ */
typedef struct JoinOrderNode typedef struct JoinOrderNode
{ {
TableEntry *tableEntry; /* this node's relation and range table id */ TableEntry *tableEntry; /* this node's relation and range table id */
JoinRuleType joinRuleType; /* not relevant for the first table */ JoinRuleType joinRuleType; /* not relevant for the first table */
JoinType joinType; /* not relevant for the first table */ JoinType joinType; /* not relevant for the first table */
Var *partitionColumn; /* not relevant for the first table */ Var *partitionColumn; /* not relevant for the first table */
char partitionMethod; char partitionMethod;
List *joinClauseList; /* not relevant for the first table */ List *joinClauseList; /* not relevant for the first table */
List *shardIntervalList; List *shardIntervalList;
} JoinOrderNode; } JoinOrderNode;

View File

@ -55,7 +55,6 @@ typedef enum
AGGREGATE_SUM = 4, AGGREGATE_SUM = 4,
AGGREGATE_COUNT = 5, AGGREGATE_COUNT = 5,
AGGREGATE_ARRAY_AGG = 6 AGGREGATE_ARRAY_AGG = 6
} AggregateType; } AggregateType;
@ -69,7 +68,6 @@ typedef enum
PUSH_DOWN_VALID = 1, PUSH_DOWN_VALID = 1,
PUSH_DOWN_NOT_VALID = 2, PUSH_DOWN_NOT_VALID = 2,
PUSH_DOWN_SPECIAL_CONDITIONS = 3 PUSH_DOWN_SPECIAL_CONDITIONS = 3
} PushDownStatus; } PushDownStatus;
@ -82,7 +80,6 @@ typedef enum
PULL_UP_INVALID_FIRST = 0, PULL_UP_INVALID_FIRST = 0,
PULL_UP_VALID = 1, PULL_UP_VALID = 1,
PULL_UP_NOT_VALID = 2 PULL_UP_NOT_VALID = 2
} PullUpStatus; } PullUpStatus;
@ -97,8 +94,10 @@ typedef enum
* Please note that the order of elements in this array is tied to the order of * Please note that the order of elements in this array is tied to the order of
* values in the preceding AggregateType enum. This order needs to be preserved. * values in the preceding AggregateType enum. This order needs to be preserved.
*/ */
static const char * const AggregateNames[] = { "invalid", "avg", "min", "max", static const char *const AggregateNames[] = {
"sum", "count", "array_agg" }; "invalid", "avg", "min", "max", "sum",
"count", "array_agg"
};
/* Config variable managed via guc.c */ /* Config variable managed via guc.c */

View File

@ -40,8 +40,8 @@ typedef struct MultiNode
CitusNodeTag type; CitusNodeTag type;
struct MultiNode *parentNode; struct MultiNode *parentNode;
/* child node(s) are defined in unary and binary nodes */
/* child node(s) are defined in unary and binary nodes */
} MultiNode; } MultiNode;
@ -51,7 +51,6 @@ typedef struct MultiUnaryNode
MultiNode node; MultiNode node;
struct MultiNode *childNode; struct MultiNode *childNode;
} MultiUnaryNode; } MultiUnaryNode;
@ -62,7 +61,6 @@ typedef struct MultiBinaryNode
struct MultiNode *leftChildNode; struct MultiNode *leftChildNode;
struct MultiNode *rightChildNode; struct MultiNode *rightChildNode;
} MultiBinaryNode; } MultiBinaryNode;
@ -73,7 +71,6 @@ typedef struct MultiBinaryNode
typedef struct MultiTreeRoot typedef struct MultiTreeRoot
{ {
MultiUnaryNode unaryNode; MultiUnaryNode unaryNode;
} MultiTreeRoot; } MultiTreeRoot;
@ -91,7 +88,6 @@ typedef struct MultiTable
Alias *alias; Alias *alias;
Alias *referenceNames; Alias *referenceNames;
Query *subquery; /* this field is only valid for non-relation subquery types */ Query *subquery; /* this field is only valid for non-relation subquery types */
} MultiTable; } MultiTable;
@ -100,7 +96,6 @@ typedef struct MultiProject
{ {
MultiUnaryNode unaryNode; MultiUnaryNode unaryNode;
List *columnList; List *columnList;
} MultiProject; } MultiProject;
@ -112,7 +107,6 @@ typedef struct MultiProject
typedef struct MultiCollect typedef struct MultiCollect
{ {
MultiUnaryNode unaryNode; MultiUnaryNode unaryNode;
} MultiCollect; } MultiCollect;
@ -125,7 +119,6 @@ typedef struct MultiSelect
{ {
MultiUnaryNode unaryNode; MultiUnaryNode unaryNode;
List *selectClauseList; List *selectClauseList;
} MultiSelect; } MultiSelect;
@ -140,7 +133,6 @@ typedef struct MultiJoin
List *joinClauseList; List *joinClauseList;
JoinRuleType joinRuleType; JoinRuleType joinRuleType;
JoinType joinType; JoinType joinType;
} MultiJoin; } MultiJoin;
@ -150,7 +142,6 @@ typedef struct MultiPartition
MultiUnaryNode unaryNode; MultiUnaryNode unaryNode;
Var *partitionColumn; Var *partitionColumn;
uint32 splitPointTableId; uint32 splitPointTableId;
} MultiPartition; } MultiPartition;
@ -158,7 +149,6 @@ typedef struct MultiPartition
typedef struct MultiCartesianProduct typedef struct MultiCartesianProduct
{ {
MultiBinaryNode binaryNode; MultiBinaryNode binaryNode;
} MultiCartesianProduct; } MultiCartesianProduct;
@ -183,7 +173,6 @@ typedef struct MultiExtendedOp
List *sortClauseList; List *sortClauseList;
Node *limitCount; Node *limitCount;
Node *limitOffset; Node *limitOffset;
} MultiExtendedOp; } MultiExtendedOp;

View File

@ -40,17 +40,18 @@
(" UINT64_FORMAT ", %d, %s, '%s', %d, %d)" (" UINT64_FORMAT ", %d, %s, '%s', %d, %d)"
#define MERGE_FILES_INTO_TABLE_COMMAND "SELECT worker_merge_files_into_table \ #define MERGE_FILES_INTO_TABLE_COMMAND "SELECT worker_merge_files_into_table \
(" UINT64_FORMAT ", %d, '%s', '%s')" (" UINT64_FORMAT ", %d, '%s', '%s')"
#define MERGE_FILES_AND_RUN_QUERY_COMMAND "SELECT worker_merge_files_and_run_query(" UINT64_FORMAT ", %d, '%s', '%s')" #define MERGE_FILES_AND_RUN_QUERY_COMMAND \
"SELECT worker_merge_files_and_run_query(" UINT64_FORMAT ", %d, '%s', '%s')"
typedef enum CitusRTEKind typedef enum CitusRTEKind
{ {
CITUS_RTE_RELATION = RTE_RELATION, /* ordinary relation reference */ CITUS_RTE_RELATION = RTE_RELATION, /* ordinary relation reference */
CITUS_RTE_SUBQUERY = RTE_SUBQUERY, /* subquery in FROM */ CITUS_RTE_SUBQUERY = RTE_SUBQUERY, /* subquery in FROM */
CITUS_RTE_JOIN = RTE_JOIN, /* join */ CITUS_RTE_JOIN = RTE_JOIN, /* join */
CITUS_RTE_FUNCTION = RTE_FUNCTION, /* function in FROM */ CITUS_RTE_FUNCTION = RTE_FUNCTION, /* function in FROM */
CITUS_RTE_VALUES = RTE_VALUES, /* VALUES (<exprlist>), (<exprlist>), ... */ CITUS_RTE_VALUES = RTE_VALUES, /* VALUES (<exprlist>), (<exprlist>), ... */
CITUS_RTE_CTE = RTE_CTE, /* common table expr (WITH list element) */ CITUS_RTE_CTE = RTE_CTE, /* common table expr (WITH list element) */
CITUS_RTE_SHARD, CITUS_RTE_SHARD,
CITUS_RTE_REMOTE_QUERY CITUS_RTE_REMOTE_QUERY
} CitusRTEKind; } CitusRTEKind;
@ -61,8 +62,7 @@ typedef enum
{ {
PARTITION_INVALID_FIRST = 0, PARTITION_INVALID_FIRST = 0,
RANGE_PARTITION_TYPE = 1, RANGE_PARTITION_TYPE = 1,
HASH_PARTITION_TYPE = 2 HASH_PARTITION_TYPE = 2
} PartitionType; } PartitionType;
@ -77,7 +77,6 @@ typedef enum
MAP_OUTPUT_FETCH_TASK = 5, MAP_OUTPUT_FETCH_TASK = 5,
MERGE_FETCH_TASK = 6, MERGE_FETCH_TASK = 6,
MODIFY_TASK = 7 MODIFY_TASK = 7
} TaskType; } TaskType;
@ -88,7 +87,6 @@ typedef enum
TASK_ASSIGNMENT_GREEDY = 1, TASK_ASSIGNMENT_GREEDY = 1,
TASK_ASSIGNMENT_ROUND_ROBIN = 2, TASK_ASSIGNMENT_ROUND_ROBIN = 2,
TASK_ASSIGNMENT_FIRST_REPLICA = 3 TASK_ASSIGNMENT_FIRST_REPLICA = 3
} TaskAssignmentPolicyType; } TaskAssignmentPolicyType;
@ -99,7 +97,6 @@ typedef enum
JOIN_MAP_MERGE_JOB = 1, JOIN_MAP_MERGE_JOB = 1,
SUBQUERY_MAP_MERGE_JOB = 2, SUBQUERY_MAP_MERGE_JOB = 2,
TOP_LEVEL_WORKER_JOB = 3 TOP_LEVEL_WORKER_JOB = 3
} BoundaryNodeJobType; } BoundaryNodeJobType;
@ -133,7 +130,6 @@ typedef struct MapMergeJob
ShardInterval **sortedShardIntervalArray; /* only applies to range partitioning */ ShardInterval **sortedShardIntervalArray; /* only applies to range partitioning */
List *mapTaskList; List *mapTaskList;
List *mergeTaskList; List *mergeTaskList;
} MapMergeJob; } MapMergeJob;
@ -153,18 +149,17 @@ typedef struct Task
uint64 jobId; uint64 jobId;
uint32 taskId; uint32 taskId;
char *queryString; char *queryString;
uint64 anchorShardId; /* only applies to compute tasks */ uint64 anchorShardId; /* only applies to compute tasks */
List *taskPlacementList; /* only applies to compute tasks */ List *taskPlacementList; /* only applies to compute tasks */
List *dependedTaskList; /* only applies to compute tasks */ List *dependedTaskList; /* only applies to compute tasks */
uint32 partitionId; uint32 partitionId;
uint32 upstreamTaskId; /* only applies to data fetch tasks */ uint32 upstreamTaskId; /* only applies to data fetch tasks */
ShardInterval *shardInterval; /* only applies to merge tasks */ ShardInterval *shardInterval; /* only applies to merge tasks */
bool assignmentConstrained; /* only applies to merge tasks */ bool assignmentConstrained; /* only applies to merge tasks */
uint64 shardId; /* only applies to shard fetch tasks */ uint64 shardId; /* only applies to shard fetch tasks */
TaskExecution *taskExecution; /* used by task tracker executor */ TaskExecution *taskExecution; /* used by task tracker executor */
bool upsertQuery; /* only applies to modify tasks */ bool upsertQuery; /* only applies to modify tasks */
} Task; } Task;
@ -177,7 +172,6 @@ typedef struct RangeTableFragment
CitusRTEKind fragmentType; CitusRTEKind fragmentType;
void *fragmentReference; void *fragmentReference;
uint32 rangeTableId; uint32 rangeTableId;
} RangeTableFragment; } RangeTableFragment;
@ -190,7 +184,6 @@ typedef struct JoinSequenceNode
{ {
uint32 rangeTableId; uint32 rangeTableId;
int32 joiningRangeTableId; int32 joiningRangeTableId;
} JoinSequenceNode; } JoinSequenceNode;
@ -203,7 +196,6 @@ typedef struct MultiPlan
Job *workerJob; Job *workerJob;
Query *masterQuery; Query *masterQuery;
char *masterTableName; char *masterTableName;
} MultiPlan; } MultiPlan;

View File

@ -13,8 +13,8 @@
#include "nodes/plannodes.h" #include "nodes/plannodes.h"
#include "nodes/relation.h" #include "nodes/relation.h"
extern PlannedStmt *multi_planner(Query *parse, int cursorOptions, extern PlannedStmt * multi_planner(Query *parse, int cursorOptions,
ParamListInfo boundParams); ParamListInfo boundParams);
extern bool HasCitusToplevelNode(PlannedStmt *planStatement); extern bool HasCitusToplevelNode(PlannedStmt *planStatement);
struct MultiPlan; struct MultiPlan;

View File

@ -20,9 +20,9 @@
#define MAX_TASK_EXECUTION_FAILURES 3 /* allowed failure count for one task */ #define MAX_TASK_EXECUTION_FAILURES 3 /* allowed failure count for one task */
#define MAX_TRACKER_FAILURE_COUNT 3 /* allowed failure count for one tracker */ #define MAX_TRACKER_FAILURE_COUNT 3 /* allowed failure count for one tracker */
#define REMOTE_NODE_CONNECT_TIMEOUT 4000 /* async connect timeout in ms */ #define REMOTE_NODE_CONNECT_TIMEOUT 4000 /* async connect timeout in ms */
#define RESERVED_FD_COUNT 64 /* file descriptors unavailable to executor */ #define RESERVED_FD_COUNT 64 /* file descriptors unavailable to executor */
/* copy out query results */ /* copy out query results */
#define COPY_QUERY_TO_STDOUT_TEXT "COPY (%s) TO STDOUT" #define COPY_QUERY_TO_STDOUT_TEXT "COPY (%s) TO STDOUT"
@ -32,9 +32,9 @@
/* Task tracker executor related defines */ /* Task tracker executor related defines */
#define TASK_ASSIGNMENT_QUERY "SELECT task_tracker_assign_task \ #define TASK_ASSIGNMENT_QUERY "SELECT task_tracker_assign_task \
("UINT64_FORMAT", %u, %s)" ("UINT64_FORMAT ", %u, %s)"
#define TASK_STATUS_QUERY "SELECT task_tracker_task_status("UINT64_FORMAT", %u)" #define TASK_STATUS_QUERY "SELECT task_tracker_task_status("UINT64_FORMAT ", %u)"
#define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT")" #define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT ")"
#define JOB_CLEANUP_TASK_ID INT_MAX #define JOB_CLEANUP_TASK_ID INT_MAX
@ -43,9 +43,9 @@ typedef enum
{ {
EXEC_TASK_INVALID_FIRST = 0, EXEC_TASK_INVALID_FIRST = 0,
EXEC_TASK_CONNECT_START = 1, EXEC_TASK_CONNECT_START = 1,
EXEC_TASK_CONNECT_POLL = 2, EXEC_TASK_CONNECT_POLL = 2,
EXEC_TASK_FAILED = 3, EXEC_TASK_FAILED = 3,
EXEC_FETCH_TASK_LOOP = 4, EXEC_FETCH_TASK_LOOP = 4,
EXEC_FETCH_TASK_START = 5, EXEC_FETCH_TASK_START = 5,
EXEC_FETCH_TASK_RUNNING = 6, EXEC_FETCH_TASK_RUNNING = 6,
EXEC_COMPUTE_TASK_START = 7, EXEC_COMPUTE_TASK_START = 7,
@ -60,7 +60,6 @@ typedef enum
EXEC_TASK_TRACKER_FAILED = 14, EXEC_TASK_TRACKER_FAILED = 14,
EXEC_SOURCE_TASK_TRACKER_RETRY = 15, EXEC_SOURCE_TASK_TRACKER_RETRY = 15,
EXEC_SOURCE_TASK_TRACKER_FAILED = 16 EXEC_SOURCE_TASK_TRACKER_FAILED = 16
} TaskExecStatus; } TaskExecStatus;
@ -74,7 +73,6 @@ typedef enum
EXEC_TRANSMIT_TRACKER_RETRY = 4, EXEC_TRANSMIT_TRACKER_RETRY = 4,
EXEC_TRANSMIT_TRACKER_FAILED = 5, EXEC_TRANSMIT_TRACKER_FAILED = 5,
EXEC_TRANSMIT_DONE = 6 EXEC_TRANSMIT_DONE = 6
} TransmitExecStatus; } TransmitExecStatus;
@ -86,7 +84,6 @@ typedef enum
TRACKER_CONNECT_POLL = 2, TRACKER_CONNECT_POLL = 2,
TRACKER_CONNECTED = 3, TRACKER_CONNECTED = 3,
TRACKER_CONNECTION_FAILED = 4 TRACKER_CONNECTION_FAILED = 4
} TrackerStatus; } TrackerStatus;
@ -97,7 +94,6 @@ typedef enum
MULTI_EXECUTOR_REAL_TIME = 1, MULTI_EXECUTOR_REAL_TIME = 1,
MULTI_EXECUTOR_TASK_TRACKER = 2, MULTI_EXECUTOR_TASK_TRACKER = 2,
MULTI_EXECUTOR_ROUTER = 3 MULTI_EXECUTOR_ROUTER = 3
} MultiExecutorType; } MultiExecutorType;
@ -107,7 +103,6 @@ typedef enum
CONNECT_ACTION_NONE = 0, CONNECT_ACTION_NONE = 0,
CONNECT_ACTION_OPENED = 1, CONNECT_ACTION_OPENED = 1,
CONNECT_ACTION_CLOSED = 2 CONNECT_ACTION_CLOSED = 2
} ConnectAction; } ConnectAction;
@ -132,7 +127,6 @@ struct TaskExecution
uint32 querySourceNodeIndex; /* only applies to map fetch tasks */ uint32 querySourceNodeIndex; /* only applies to map fetch tasks */
int32 dataFetchTaskIndex; int32 dataFetchTaskIndex;
uint32 failureCount; uint32 failureCount;
}; };
@ -147,7 +141,6 @@ typedef struct TrackerTaskState
uint32 taskId; uint32 taskId;
TaskStatus status; TaskStatus status;
StringInfo taskAssignmentQuery; StringInfo taskAssignmentQuery;
} TrackerTaskState; } TrackerTaskState;
@ -158,7 +151,7 @@ typedef struct TrackerTaskState
*/ */
typedef struct TaskTracker typedef struct TaskTracker
{ {
uint32 workerPort; /* node's port; part of hash table key */ uint32 workerPort; /* node's port; part of hash table key */
char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */ char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */
TrackerStatus trackerStatus; TrackerStatus trackerStatus;
int32 connectionId; int32 connectionId;
@ -171,7 +164,6 @@ typedef struct TaskTracker
int32 currentTaskIndex; int32 currentTaskIndex;
bool connectionBusy; bool connectionBusy;
TrackerTaskState *connectionBusyOnTask; TrackerTaskState *connectionBusyOnTask;
} TaskTracker; } TaskTracker;
@ -184,7 +176,6 @@ typedef struct WorkerNodeState
uint32 workerPort; uint32 workerPort;
char workerName[WORKER_LENGTH]; char workerName[WORKER_LENGTH];
uint32 openConnectionCount; uint32 openConnectionCount;
} WorkerNodeState; } WorkerNodeState;

View File

@ -21,9 +21,9 @@
*/ */
typedef struct FormData_pg_dist_partition typedef struct FormData_pg_dist_partition
{ {
Oid logicalrelid; /* logical relation id; references pg_class oid */ Oid logicalrelid; /* logical relation id; references pg_class oid */
char partmethod; /* partition method; see codes below */ char partmethod; /* partition method; see codes below */
text partkey; /* partition key expression */ text partkey; /* partition key expression */
} FormData_pg_dist_partition; } FormData_pg_dist_partition;
/* ---------------- /* ----------------
@ -37,16 +37,16 @@ typedef FormData_pg_dist_partition *Form_pg_dist_partition;
* compiler constants for pg_dist_partitions * compiler constants for pg_dist_partitions
* ---------------- * ----------------
*/ */
#define Natts_pg_dist_partition 3 #define Natts_pg_dist_partition 3
#define Anum_pg_dist_partition_logicalrelid 1 #define Anum_pg_dist_partition_logicalrelid 1
#define Anum_pg_dist_partition_partmethod 2 #define Anum_pg_dist_partition_partmethod 2
#define Anum_pg_dist_partition_partkey 3 #define Anum_pg_dist_partition_partkey 3
/* valid values for partmethod include append, hash, and range */ /* valid values for partmethod include append, hash, and range */
#define DISTRIBUTE_BY_APPEND 'a' #define DISTRIBUTE_BY_APPEND 'a'
#define DISTRIBUTE_BY_HASH 'h' #define DISTRIBUTE_BY_HASH 'h'
#define DISTRIBUTE_BY_RANGE 'r' #define DISTRIBUTE_BY_RANGE 'r'
#define REDISTRIBUTE_BY_HASH 'x' #define REDISTRIBUTE_BY_HASH 'x'
#endif /* PG_DIST_PARTITION_H */ #endif /* PG_DIST_PARTITION_H */

View File

@ -22,13 +22,13 @@
*/ */
typedef struct FormData_pg_dist_shard typedef struct FormData_pg_dist_shard
{ {
Oid logicalrelid; /* logical relation id; references pg_class oid */ Oid logicalrelid; /* logical relation id; references pg_class oid */
int64 shardid; /* global shardId representing remote partition */ int64 shardid; /* global shardId representing remote partition */
char shardstorage; /* shard storage type; see codes below */ char shardstorage; /* shard storage type; see codes below */
#ifdef CATALOG_VARLEN /* variable-length fields start here */ #ifdef CATALOG_VARLEN /* variable-length fields start here */
text shardalias; /* user specified table name for shard, if any */ text shardalias; /* user specified table name for shard, if any */
text shardminvalue; /* partition key's minimum value in shard */ text shardminvalue; /* partition key's minimum value in shard */
text shardmaxvalue; /* partition key's maximum value in shard */ text shardmaxvalue; /* partition key's maximum value in shard */
#endif #endif
} FormData_pg_dist_shard; } FormData_pg_dist_shard;
@ -43,22 +43,22 @@ typedef FormData_pg_dist_shard *Form_pg_dist_shard;
* compiler constants for pg_dist_shards * compiler constants for pg_dist_shards
* ---------------- * ----------------
*/ */
#define Natts_pg_dist_shard 6 #define Natts_pg_dist_shard 6
#define Anum_pg_dist_shard_logicalrelid 1 #define Anum_pg_dist_shard_logicalrelid 1
#define Anum_pg_dist_shard_shardid 2 #define Anum_pg_dist_shard_shardid 2
#define Anum_pg_dist_shard_shardstorage 3 #define Anum_pg_dist_shard_shardstorage 3
#define Anum_pg_dist_shard_shardalias 4 #define Anum_pg_dist_shard_shardalias 4
#define Anum_pg_dist_shard_shardminvalue 5 #define Anum_pg_dist_shard_shardminvalue 5
#define Anum_pg_dist_shard_shardmaxvalue 6 #define Anum_pg_dist_shard_shardmaxvalue 6
/* /*
* Valid values for shard storage types include relay file, foreign table, * Valid values for shard storage types include relay file, foreign table,
* (standard) table and columnar table. Relay file types are currently unused. * (standard) table and columnar table. Relay file types are currently unused.
*/ */
#define SHARD_STORAGE_RELAY 'r' #define SHARD_STORAGE_RELAY 'r'
#define SHARD_STORAGE_FOREIGN 'f' #define SHARD_STORAGE_FOREIGN 'f'
#define SHARD_STORAGE_TABLE 't' #define SHARD_STORAGE_TABLE 't'
#define SHARD_STORAGE_COLUMNAR 'c' #define SHARD_STORAGE_COLUMNAR 'c'
#endif /* PG_DIST_SHARD_H */ #endif /* PG_DIST_SHARD_H */

View File

@ -23,12 +23,12 @@
*/ */
typedef struct FormData_pg_dist_shard_placement typedef struct FormData_pg_dist_shard_placement
{ {
int64 shardid; /* global shardId on remote node */ int64 shardid; /* global shardId on remote node */
int32 shardstate; /* shard state on remote node; see RelayFileState */ int32 shardstate; /* shard state on remote node; see RelayFileState */
int64 shardlength; /* shard length on remote node; stored as bigint */ int64 shardlength; /* shard length on remote node; stored as bigint */
#ifdef CATALOG_VARLEN /* variable-length fields start here */ #ifdef CATALOG_VARLEN /* variable-length fields start here */
text nodename; /* remote node's host name */ text nodename; /* remote node's host name */
int32 nodeport; /* remote node's port number */ int32 nodeport; /* remote node's port number */
#endif #endif
} FormData_pg_dist_shard_placement; } FormData_pg_dist_shard_placement;
@ -43,12 +43,12 @@ typedef FormData_pg_dist_shard_placement *Form_pg_dist_shard_placement;
* compiler constants for pg_dist_shard_placement * compiler constants for pg_dist_shard_placement
* ---------------- * ----------------
*/ */
#define Natts_pg_dist_shard_placement 5 #define Natts_pg_dist_shard_placement 5
#define Anum_pg_dist_shard_placement_shardid 1 #define Anum_pg_dist_shard_placement_shardid 1
#define Anum_pg_dist_shard_placement_shardstate 2 #define Anum_pg_dist_shard_placement_shardstate 2
#define Anum_pg_dist_shard_placement_shardlength 3 #define Anum_pg_dist_shard_placement_shardlength 3
#define Anum_pg_dist_shard_placement_nodename 4 #define Anum_pg_dist_shard_placement_nodename 4
#define Anum_pg_dist_shard_placement_nodeport 5 #define Anum_pg_dist_shard_placement_nodeport 5
#endif /* PG_DIST_SHARD_PLACEMENT_H */ #endif /* PG_DIST_SHARD_PLACEMENT_H */

View File

@ -35,7 +35,6 @@ typedef enum
FILE_CACHED = 2, FILE_CACHED = 2,
FILE_INACTIVE = 3, FILE_INACTIVE = 3,
FILE_TO_DELETE = 4 FILE_TO_DELETE = 4
} RelayFileState; } RelayFileState;

View File

@ -29,6 +29,7 @@ typedef enum AdvisoryLocktagClass
/* values defined in postgres' lockfuncs.c */ /* values defined in postgres' lockfuncs.c */
ADV_LOCKTAG_CLASS_INT64 = 1, ADV_LOCKTAG_CLASS_INT64 = 1,
ADV_LOCKTAG_CLASS_INT32 = 2, ADV_LOCKTAG_CLASS_INT32 = 2,
/* CitusDB lock types */ /* CitusDB lock types */
ADV_LOCKTAG_CLASS_CITUS_SHARD_METADATA = 4, ADV_LOCKTAG_CLASS_CITUS_SHARD_METADATA = 4,
ADV_LOCKTAG_CLASS_CITUS_SHARD = 5, ADV_LOCKTAG_CLASS_CITUS_SHARD = 5,

View File

@ -19,10 +19,10 @@
#include "utils/hsearch.h" #include "utils/hsearch.h"
#define HIGH_PRIORITY_TASK_TIME 1 /* assignment time for high priority tasks */ #define HIGH_PRIORITY_TASK_TIME 1 /* assignment time for high priority tasks */
#define RESERVED_JOB_ID 1 /* reserved for cleanup and shutdown tasks */ #define RESERVED_JOB_ID 1 /* reserved for cleanup and shutdown tasks */
#define SHUTDOWN_MARKER_TASK_ID UINT_MAX /* used to identify task tracker shutdown */ #define SHUTDOWN_MARKER_TASK_ID UINT_MAX /* used to identify task tracker shutdown */
#define MAX_TASK_FAILURE_COUNT 2 /* allowed failure count for one task */ #define MAX_TASK_FAILURE_COUNT 2 /* allowed failure count for one task */
#define LOCAL_HOST_NAME "localhost" /* connect to local backends using this name */ #define LOCAL_HOST_NAME "localhost" /* connect to local backends using this name */
#define TASK_CALL_STRING_SIZE 12288 /* max length of task call string */ #define TASK_CALL_STRING_SIZE 12288 /* max length of task call string */
#define TEMPLATE0_NAME "template0" /* skip job schema cleanup for template0 */ #define TEMPLATE0_NAME "template0" /* skip job schema cleanup for template0 */
@ -37,13 +37,13 @@
typedef enum typedef enum
{ {
TASK_STATUS_INVALID_FIRST = 0, TASK_STATUS_INVALID_FIRST = 0,
TASK_ASSIGNED = 1, /* master node and task tracker */ TASK_ASSIGNED = 1, /* master node and task tracker */
TASK_SCHEDULED = 2, TASK_SCHEDULED = 2,
TASK_RUNNING = 3, TASK_RUNNING = 3,
TASK_FAILED = 4, TASK_FAILED = 4,
TASK_PERMANENTLY_FAILED = 5, TASK_PERMANENTLY_FAILED = 5,
TASK_SUCCEEDED = 6, TASK_SUCCEEDED = 6,
TASK_CANCEL_REQUESTED = 7, /* master node only */ TASK_CANCEL_REQUESTED = 7, /* master node only */
TASK_CANCELED = 8, TASK_CANCELED = 8,
TASK_TO_REMOVE = 9, TASK_TO_REMOVE = 9,
@ -63,7 +63,6 @@ typedef enum
* TASK_STATUS_LAST, should never have their numbers changed. * TASK_STATUS_LAST, should never have their numbers changed.
*/ */
TASK_STATUS_LAST TASK_STATUS_LAST
} TaskStatus; } TaskStatus;
@ -76,16 +75,15 @@ typedef enum
*/ */
typedef struct WorkerTask typedef struct WorkerTask
{ {
uint64 jobId; /* job id (upper 32-bits reserved); part of hash table key */ uint64 jobId; /* job id (upper 32-bits reserved); part of hash table key */
uint32 taskId; /* task id; part of hash table key */ uint32 taskId; /* task id; part of hash table key */
uint32 assignedAt; /* task assignment time in epoch seconds */ uint32 assignedAt; /* task assignment time in epoch seconds */
char taskCallString[TASK_CALL_STRING_SIZE]; /* query or function call string */ char taskCallString[TASK_CALL_STRING_SIZE]; /* query or function call string */
TaskStatus taskStatus; /* task's current execution status */ TaskStatus taskStatus; /* task's current execution status */
char databaseName[NAMEDATALEN]; /* name to use for local backend connection */ char databaseName[NAMEDATALEN]; /* name to use for local backend connection */
int32 connectionId; /* connection id to local backend */ int32 connectionId; /* connection id to local backend */
uint32 failureCount; /* number of task failures */ uint32 failureCount; /* number of task failures */
} WorkerTask; } WorkerTask;
@ -97,6 +95,7 @@ typedef struct WorkerTasksSharedStateData
{ {
/* Hash table shared by the task tracker and task tracker protocol functions */ /* Hash table shared by the task tracker and task tracker protocol functions */
HTAB *taskHash; HTAB *taskHash;
/* Lock protecting workerNodesHash */ /* Lock protecting workerNodesHash */
LWLock *taskHashLock; LWLock *taskHashLock;
} WorkerTasksSharedStateData; } WorkerTasksSharedStateData;

View File

@ -43,12 +43,11 @@
*/ */
typedef struct WorkerNode typedef struct WorkerNode
{ {
uint32 workerPort; /* node's port; part of hash table key */ uint32 workerPort; /* node's port; part of hash table key */
char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */ char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */
char workerRack[WORKER_LENGTH]; /* node's network location */ char workerRack[WORKER_LENGTH]; /* node's network location */
bool inWorkerFile; /* is node in current membership file? */ bool inWorkerFile; /* is node in current membership file? */
} WorkerNode; } WorkerNode;

View File

@ -64,8 +64,7 @@ typedef struct RangePartitionContext
{ {
FmgrInfo *comparisonFunction; FmgrInfo *comparisonFunction;
Datum *splitPointArray; Datum *splitPointArray;
int32 splitPointCount; int32 splitPointCount;
} RangePartitionContext; } RangePartitionContext;
@ -77,7 +76,6 @@ typedef struct HashPartitionContext
{ {
FmgrInfo *hashFunction; FmgrInfo *hashFunction;
uint32 partitionCount; uint32 partitionCount;
} HashPartitionContext; } HashPartitionContext;
@ -88,16 +86,16 @@ typedef struct HashPartitionContext
*/ */
typedef struct PartialCopyStateData typedef struct PartialCopyStateData
{ {
StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for
* dest == COPY_NEW_FE in COPY FROM */ * dest == COPY_NEW_FE in COPY FROM */
int file_encoding; /* file or remote side's character encoding */ int file_encoding; /* file or remote side's character encoding */
bool need_transcoding; /* file encoding diff from server? */ bool need_transcoding; /* file encoding diff from server? */
bool binary; /* binary format? */ bool binary; /* binary format? */
char *null_print; /* NULL marker string (server encoding!) */ char *null_print; /* NULL marker string (server encoding!) */
char *null_print_client; /* same converted to file encoding */ char *null_print_client; /* same converted to file encoding */
char *delim; /* column delimiter (must be 1 byte) */ char *delim; /* column delimiter (must be 1 byte) */
MemoryContext rowcontext; /* per-row evaluation context */ MemoryContext rowcontext; /* per-row evaluation context */
} PartialCopyStateData; } PartialCopyStateData;
typedef struct PartialCopyStateData *PartialCopyState; typedef struct PartialCopyStateData *PartialCopyState;
@ -114,7 +112,6 @@ typedef struct FileOutputStream
File fileDescriptor; File fileDescriptor;
StringInfo fileBuffer; StringInfo fileBuffer;
StringInfo filePath; StringInfo filePath;
} FileOutputStream; } FileOutputStream;