mirror of https://github.com/citusdata/citus.git
First formatting attempt
Skipped csql, ruleutils, readfuncs, and functions obviously copied from PostgreSQL. Seeing how this looks, then continuing.pull/327/head
parent
334f800016
commit
fdb37682b2
|
@ -197,11 +197,11 @@ master_create_distributed_table(PG_FUNCTION_ARGS)
|
||||||
if (distributionMethod == DISTRIBUTE_BY_APPEND)
|
if (distributionMethod == DISTRIBUTE_BY_APPEND)
|
||||||
{
|
{
|
||||||
ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("table \"%s\" has a unique constraint",
|
errmsg("table \"%s\" has a unique constraint",
|
||||||
distributedRelationName),
|
distributedRelationName),
|
||||||
errdetail("Unique constraints and primary keys on "
|
errdetail("Unique constraints and primary keys on "
|
||||||
"append-partitioned tables cannot be enforced."),
|
"append-partitioned tables cannot be enforced."),
|
||||||
errhint("Consider using hash partitioning.")));
|
errhint("Consider using hash partitioning.")));
|
||||||
}
|
}
|
||||||
|
|
||||||
attributeCount = indexInfo->ii_NumIndexAttrs;
|
attributeCount = indexInfo->ii_NumIndexAttrs;
|
||||||
|
|
|
@ -136,7 +136,7 @@ static File
|
||||||
FileOpenForTransmit(const char *filename, int fileFlags, int fileMode)
|
FileOpenForTransmit(const char *filename, int fileFlags, int fileMode)
|
||||||
{
|
{
|
||||||
File fileDesc = -1;
|
File fileDesc = -1;
|
||||||
int fileStated = -1;
|
int fileStated = -1;
|
||||||
struct stat fileStat;
|
struct stat fileStat;
|
||||||
|
|
||||||
fileStated = stat(filename, &fileStat);
|
fileStated = stat(filename, &fileStat);
|
||||||
|
@ -145,7 +145,7 @@ FileOpenForTransmit(const char *filename, int fileFlags, int fileMode)
|
||||||
if (S_ISDIR(fileStat.st_mode))
|
if (S_ISDIR(fileStat.st_mode))
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
||||||
errmsg("\"%s\" is a directory", filename)));
|
errmsg("\"%s\" is a directory", filename)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,18 +270,28 @@ ReceiveCopyData(StringInfo copyData)
|
||||||
|
|
||||||
switch (messageType)
|
switch (messageType)
|
||||||
{
|
{
|
||||||
case 'd': /* CopyData */
|
case 'd': /* CopyData */
|
||||||
|
{
|
||||||
copyDone = false;
|
copyDone = false;
|
||||||
break;
|
break;
|
||||||
case 'c': /* CopyDone */
|
}
|
||||||
|
|
||||||
|
case 'c': /* CopyDone */
|
||||||
|
{
|
||||||
copyDone = true;
|
copyDone = true;
|
||||||
break;
|
break;
|
||||||
case 'f': /* CopyFail */
|
}
|
||||||
|
|
||||||
|
case 'f': /* CopyFail */
|
||||||
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
|
ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
|
||||||
errmsg("COPY data failed: %s", pq_getmsgstring(copyData))));
|
errmsg("COPY data failed: %s", pq_getmsgstring(copyData))));
|
||||||
break;
|
break;
|
||||||
case 'H': /* Flush */
|
}
|
||||||
case 'S': /* Sync */
|
|
||||||
|
case 'H': /* Flush */
|
||||||
|
case 'S': /* Sync */
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
* Ignore Flush/Sync for the convenience of client libraries (such
|
* Ignore Flush/Sync for the convenience of client libraries (such
|
||||||
* as libpq) that may send those without noticing that the command
|
* as libpq) that may send those without noticing that the command
|
||||||
|
@ -289,11 +299,15 @@ ReceiveCopyData(StringInfo copyData)
|
||||||
*/
|
*/
|
||||||
copyDone = false;
|
copyDone = false;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION),
|
ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION),
|
||||||
errmsg("unexpected message type 0x%02X during COPY data",
|
errmsg("unexpected message type 0x%02X during COPY data",
|
||||||
messageType)));
|
messageType)));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return copyDone;
|
return copyDone;
|
||||||
|
|
|
@ -301,7 +301,7 @@ MultiClientCancel(int32 connectionId)
|
||||||
if (cancelSent == 0)
|
if (cancelSent == 0)
|
||||||
{
|
{
|
||||||
ereport(WARNING, (errmsg("could not issue cancel request"),
|
ereport(WARNING, (errmsg("could not issue cancel request"),
|
||||||
errdetail("Client error: %s", errorBuffer)));
|
errdetail("Client error: %s", errorBuffer)));
|
||||||
|
|
||||||
canceled = false;
|
canceled = false;
|
||||||
}
|
}
|
||||||
|
@ -589,7 +589,7 @@ MultiClientCopyData(int32 connectionId, int32 fileDescriptor)
|
||||||
while (receiveLength > 0)
|
while (receiveLength > 0)
|
||||||
{
|
{
|
||||||
/* received copy data; append these data to file */
|
/* received copy data; append these data to file */
|
||||||
int appended = -1;
|
int appended = -1;
|
||||||
errno = 0;
|
errno = 0;
|
||||||
|
|
||||||
appended = write(fileDescriptor, receiveBuffer, receiveLength);
|
appended = write(fileDescriptor, receiveBuffer, receiveLength);
|
||||||
|
@ -706,7 +706,7 @@ ClientConnectionReady(PGconn *connection, PostgresPollingStatusType pollingStatu
|
||||||
fd_set readFileDescriptorSet;
|
fd_set readFileDescriptorSet;
|
||||||
fd_set writeFileDescriptorSet;
|
fd_set writeFileDescriptorSet;
|
||||||
fd_set exceptionFileDescriptorSet;
|
fd_set exceptionFileDescriptorSet;
|
||||||
struct timeval immediateTimeout = {0, 0};
|
struct timeval immediateTimeout = { 0, 0 };
|
||||||
int connectionFileDescriptor = PQsocket(connection);
|
int connectionFileDescriptor = PQsocket(connection);
|
||||||
|
|
||||||
FD_ZERO(&readFileDescriptorSet);
|
FD_ZERO(&readFileDescriptorSet);
|
||||||
|
|
|
@ -157,7 +157,6 @@ multi_ExecutorStart(QueryDesc *queryDesc, int eflags)
|
||||||
queryDesc->plannedstmt = masterSelectPlan;
|
queryDesc->plannedstmt = masterSelectPlan;
|
||||||
eflags |= EXEC_FLAG_CITUS_MASTER_SELECT;
|
eflags |= EXEC_FLAG_CITUS_MASTER_SELECT;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if the execution is not done for router executor, drop into standard executor */
|
/* if the execution is not done for router executor, drop into standard executor */
|
||||||
|
@ -253,7 +252,7 @@ multi_ExecutorEnd(QueryDesc *queryDesc)
|
||||||
RangeTblEntry *rangeTableEntry = linitial(planStatement->rtable);
|
RangeTblEntry *rangeTableEntry = linitial(planStatement->rtable);
|
||||||
Oid masterTableRelid = rangeTableEntry->relid;
|
Oid masterTableRelid = rangeTableEntry->relid;
|
||||||
|
|
||||||
ObjectAddress masterTableObject = {InvalidOid, InvalidOid, 0};
|
ObjectAddress masterTableObject = { InvalidOid, InvalidOid, 0 };
|
||||||
|
|
||||||
masterTableObject.classId = RelationRelationId;
|
masterTableObject.classId = RelationRelationId;
|
||||||
masterTableObject.objectId = masterTableRelid;
|
masterTableObject.objectId = masterTableRelid;
|
||||||
|
|
|
@ -89,7 +89,7 @@ MultiRealTimeExecute(Job *job)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* loop around until all tasks complete, one task fails, or user cancels */
|
/* loop around until all tasks complete, one task fails, or user cancels */
|
||||||
while ( !(allTasksCompleted || taskFailed || QueryCancelPending) )
|
while (!(allTasksCompleted || taskFailed || QueryCancelPending))
|
||||||
{
|
{
|
||||||
uint32 taskCount = list_length(taskList);
|
uint32 taskCount = list_length(taskList);
|
||||||
uint32 completedTaskCount = 0;
|
uint32 completedTaskCount = 0;
|
||||||
|
@ -230,333 +230,338 @@ ManageTaskExecution(Task *task, TaskExecution *taskExecution)
|
||||||
|
|
||||||
switch (currentStatus)
|
switch (currentStatus)
|
||||||
{
|
{
|
||||||
case EXEC_TASK_CONNECT_START:
|
case EXEC_TASK_CONNECT_START:
|
||||||
{
|
|
||||||
int32 connectionId = INVALID_CONNECTION_ID;
|
|
||||||
char *nodeDatabase = NULL;
|
|
||||||
|
|
||||||
/* we use the same database name on the master and worker nodes */
|
|
||||||
nodeDatabase = get_database_name(MyDatabaseId);
|
|
||||||
|
|
||||||
connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase);
|
|
||||||
connectionIdArray[currentIndex] = connectionId;
|
|
||||||
|
|
||||||
/* if valid, poll the connection until the connection is initiated */
|
|
||||||
if (connectionId != INVALID_CONNECTION_ID)
|
|
||||||
{
|
{
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL;
|
int32 connectionId = INVALID_CONNECTION_ID;
|
||||||
taskExecution->connectPollCount = 0;
|
char *nodeDatabase = NULL;
|
||||||
connectAction = CONNECT_ACTION_OPENED;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
AdjustStateForFailure(taskExecution);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
/* we use the same database name on the master and worker nodes */
|
||||||
}
|
nodeDatabase = get_database_name(MyDatabaseId);
|
||||||
|
|
||||||
case EXEC_TASK_CONNECT_POLL:
|
connectionId = MultiClientConnectStart(nodeName, nodePort, nodeDatabase);
|
||||||
{
|
connectionIdArray[currentIndex] = connectionId;
|
||||||
int32 connectionId = connectionIdArray[currentIndex];
|
|
||||||
ConnectStatus pollStatus = MultiClientConnectPoll(connectionId);
|
|
||||||
|
|
||||||
/*
|
/* if valid, poll the connection until the connection is initiated */
|
||||||
* If the connection is established, we reset the data fetch counter and
|
if (connectionId != INVALID_CONNECTION_ID)
|
||||||
* change our status to data fetching.
|
|
||||||
*/
|
|
||||||
if (pollStatus == CLIENT_CONNECTION_READY)
|
|
||||||
{
|
|
||||||
taskExecution->dataFetchTaskIndex = -1;
|
|
||||||
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP;
|
|
||||||
}
|
|
||||||
else if (pollStatus == CLIENT_CONNECTION_BUSY)
|
|
||||||
{
|
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL;
|
|
||||||
}
|
|
||||||
else if (pollStatus == CLIENT_CONNECTION_BAD)
|
|
||||||
{
|
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* now check if we have been trying to connect for too long */
|
|
||||||
taskExecution->connectPollCount++;
|
|
||||||
if (pollStatus == CLIENT_CONNECTION_BUSY)
|
|
||||||
{
|
|
||||||
uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval;
|
|
||||||
uint32 currentCount = taskExecution->connectPollCount;
|
|
||||||
if (currentCount >= maxCount)
|
|
||||||
{
|
{
|
||||||
ereport(WARNING, (errmsg("could not establish asynchronous connection "
|
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL;
|
||||||
"after %u ms", REMOTE_NODE_CONNECT_TIMEOUT)));
|
taskExecution->connectPollCount = 0;
|
||||||
|
connectAction = CONNECT_ACTION_OPENED;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
AdjustStateForFailure(taskExecution);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case EXEC_TASK_CONNECT_POLL:
|
||||||
|
{
|
||||||
|
int32 connectionId = connectionIdArray[currentIndex];
|
||||||
|
ConnectStatus pollStatus = MultiClientConnectPoll(connectionId);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the connection is established, we reset the data fetch counter and
|
||||||
|
* change our status to data fetching.
|
||||||
|
*/
|
||||||
|
if (pollStatus == CLIENT_CONNECTION_READY)
|
||||||
|
{
|
||||||
|
taskExecution->dataFetchTaskIndex = -1;
|
||||||
|
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP;
|
||||||
|
}
|
||||||
|
else if (pollStatus == CLIENT_CONNECTION_BUSY)
|
||||||
|
{
|
||||||
|
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_POLL;
|
||||||
|
}
|
||||||
|
else if (pollStatus == CLIENT_CONNECTION_BAD)
|
||||||
|
{
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
/* now check if we have been trying to connect for too long */
|
||||||
}
|
taskExecution->connectPollCount++;
|
||||||
|
if (pollStatus == CLIENT_CONNECTION_BUSY)
|
||||||
case EXEC_TASK_FAILED:
|
{
|
||||||
{
|
uint32 maxCount = REMOTE_NODE_CONNECT_TIMEOUT / RemoteTaskCheckInterval;
|
||||||
/*
|
uint32 currentCount = taskExecution->connectPollCount;
|
||||||
* On task failure, we close the connection. We also reset our execution
|
if (currentCount >= maxCount)
|
||||||
* status assuming that we might fail on all other worker nodes and come
|
{
|
||||||
* back to this failed node. In that case, we will retry the same fetch
|
ereport(WARNING, (errmsg("could not establish asynchronous "
|
||||||
* and compute task(s) on this node again.
|
"connection after %u ms",
|
||||||
*/
|
REMOTE_NODE_CONNECT_TIMEOUT)));
|
||||||
int32 connectionId = connectionIdArray[currentIndex];
|
|
||||||
MultiClientDisconnect(connectionId);
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
connectionIdArray[currentIndex] = INVALID_CONNECTION_ID;
|
}
|
||||||
connectAction = CONNECT_ACTION_CLOSED;
|
}
|
||||||
|
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_START;
|
break;
|
||||||
|
}
|
||||||
/* try next worker node */
|
|
||||||
AdjustStateForFailure(taskExecution);
|
case EXEC_TASK_FAILED:
|
||||||
|
{
|
||||||
break;
|
/*
|
||||||
}
|
* On task failure, we close the connection. We also reset our execution
|
||||||
|
* status assuming that we might fail on all other worker nodes and come
|
||||||
case EXEC_FETCH_TASK_LOOP:
|
* back to this failed node. In that case, we will retry the same fetch
|
||||||
{
|
* and compute task(s) on this node again.
|
||||||
List *dataFetchTaskList = task->dependedTaskList;
|
*/
|
||||||
int32 dataFetchTaskCount = list_length(dataFetchTaskList);
|
int32 connectionId = connectionIdArray[currentIndex];
|
||||||
|
MultiClientDisconnect(connectionId);
|
||||||
/* move to the next data fetch task */
|
connectionIdArray[currentIndex] = INVALID_CONNECTION_ID;
|
||||||
taskExecution->dataFetchTaskIndex++;
|
connectAction = CONNECT_ACTION_CLOSED;
|
||||||
|
|
||||||
if (taskExecution->dataFetchTaskIndex < dataFetchTaskCount)
|
taskStatusArray[currentIndex] = EXEC_TASK_CONNECT_START;
|
||||||
{
|
|
||||||
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_START;
|
/* try next worker node */
|
||||||
}
|
AdjustStateForFailure(taskExecution);
|
||||||
else
|
|
||||||
{
|
break;
|
||||||
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_START;
|
}
|
||||||
}
|
|
||||||
|
case EXEC_FETCH_TASK_LOOP:
|
||||||
break;
|
{
|
||||||
}
|
List *dataFetchTaskList = task->dependedTaskList;
|
||||||
|
int32 dataFetchTaskCount = list_length(dataFetchTaskList);
|
||||||
case EXEC_FETCH_TASK_START:
|
|
||||||
{
|
/* move to the next data fetch task */
|
||||||
List *dataFetchTaskList = task->dependedTaskList;
|
taskExecution->dataFetchTaskIndex++;
|
||||||
int32 dataFetchTaskIndex = taskExecution->dataFetchTaskIndex;
|
|
||||||
Task *dataFetchTask = (Task *) list_nth(dataFetchTaskList, dataFetchTaskIndex);
|
if (taskExecution->dataFetchTaskIndex < dataFetchTaskCount)
|
||||||
|
{
|
||||||
char *dataFetchQuery = dataFetchTask->queryString;
|
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_START;
|
||||||
int32 connectionId = connectionIdArray[currentIndex];
|
}
|
||||||
|
else
|
||||||
bool querySent = MultiClientSendQuery(connectionId, dataFetchQuery);
|
{
|
||||||
if (querySent)
|
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_START;
|
||||||
{
|
}
|
||||||
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING;
|
|
||||||
}
|
break;
|
||||||
else
|
}
|
||||||
{
|
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
case EXEC_FETCH_TASK_START:
|
||||||
}
|
{
|
||||||
|
List *dataFetchTaskList = task->dependedTaskList;
|
||||||
break;
|
int32 dataFetchTaskIndex = taskExecution->dataFetchTaskIndex;
|
||||||
}
|
Task *dataFetchTask = (Task *) list_nth(dataFetchTaskList,
|
||||||
|
dataFetchTaskIndex);
|
||||||
case EXEC_FETCH_TASK_RUNNING:
|
|
||||||
{
|
char *dataFetchQuery = dataFetchTask->queryString;
|
||||||
int32 connectionId = connectionIdArray[currentIndex];
|
int32 connectionId = connectionIdArray[currentIndex];
|
||||||
ResultStatus resultStatus = MultiClientResultStatus(connectionId);
|
|
||||||
QueryStatus queryStatus = CLIENT_INVALID_QUERY;
|
bool querySent = MultiClientSendQuery(connectionId, dataFetchQuery);
|
||||||
|
if (querySent)
|
||||||
/* check if query results are in progress or unavailable */
|
{
|
||||||
if (resultStatus == CLIENT_RESULT_BUSY)
|
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING;
|
||||||
{
|
}
|
||||||
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING;
|
else
|
||||||
break;
|
{
|
||||||
}
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
else if (resultStatus == CLIENT_RESULT_UNAVAILABLE)
|
}
|
||||||
{
|
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
break;
|
||||||
break;
|
}
|
||||||
}
|
|
||||||
|
case EXEC_FETCH_TASK_RUNNING:
|
||||||
Assert(resultStatus == CLIENT_RESULT_READY);
|
{
|
||||||
|
int32 connectionId = connectionIdArray[currentIndex];
|
||||||
/*
|
ResultStatus resultStatus = MultiClientResultStatus(connectionId);
|
||||||
* If the query executed successfully, loop onto the next data fetch
|
QueryStatus queryStatus = CLIENT_INVALID_QUERY;
|
||||||
* task. Else if the query failed, try data fetching on another node.
|
|
||||||
*/
|
/* check if query results are in progress or unavailable */
|
||||||
queryStatus = MultiClientQueryStatus(connectionId);
|
if (resultStatus == CLIENT_RESULT_BUSY)
|
||||||
if (queryStatus == CLIENT_QUERY_DONE)
|
{
|
||||||
{
|
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_RUNNING;
|
||||||
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP;
|
break;
|
||||||
}
|
}
|
||||||
else if (queryStatus == CLIENT_QUERY_FAILED)
|
else if (resultStatus == CLIENT_RESULT_UNAVAILABLE)
|
||||||
{
|
{
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
}
|
break;
|
||||||
else
|
}
|
||||||
{
|
|
||||||
ereport(FATAL, (errmsg("invalid query status: %d", queryStatus)));
|
Assert(resultStatus == CLIENT_RESULT_READY);
|
||||||
}
|
|
||||||
|
/*
|
||||||
break;
|
* If the query executed successfully, loop onto the next data fetch
|
||||||
}
|
* task. Else if the query failed, try data fetching on another node.
|
||||||
|
*/
|
||||||
case EXEC_COMPUTE_TASK_START:
|
queryStatus = MultiClientQueryStatus(connectionId);
|
||||||
{
|
if (queryStatus == CLIENT_QUERY_DONE)
|
||||||
int32 connectionId = connectionIdArray[currentIndex];
|
{
|
||||||
bool querySent = false;
|
taskStatusArray[currentIndex] = EXEC_FETCH_TASK_LOOP;
|
||||||
|
}
|
||||||
/* construct new query to copy query results to stdout */
|
else if (queryStatus == CLIENT_QUERY_FAILED)
|
||||||
char *queryString = task->queryString;
|
{
|
||||||
StringInfo computeTaskQuery = makeStringInfo();
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
if (BinaryMasterCopyFormat)
|
}
|
||||||
{
|
else
|
||||||
appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_BINARY, queryString);
|
{
|
||||||
}
|
ereport(FATAL, (errmsg("invalid query status: %d", queryStatus)));
|
||||||
else
|
}
|
||||||
{
|
|
||||||
appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_TEXT, queryString);
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
querySent = MultiClientSendQuery(connectionId, computeTaskQuery->data);
|
case EXEC_COMPUTE_TASK_START:
|
||||||
if (querySent)
|
{
|
||||||
{
|
int32 connectionId = connectionIdArray[currentIndex];
|
||||||
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING;
|
bool querySent = false;
|
||||||
}
|
|
||||||
else
|
/* construct new query to copy query results to stdout */
|
||||||
{
|
char *queryString = task->queryString;
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
StringInfo computeTaskQuery = makeStringInfo();
|
||||||
}
|
if (BinaryMasterCopyFormat)
|
||||||
|
{
|
||||||
break;
|
appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_BINARY,
|
||||||
}
|
queryString);
|
||||||
|
}
|
||||||
case EXEC_COMPUTE_TASK_RUNNING:
|
else
|
||||||
{
|
{
|
||||||
int32 connectionId = connectionIdArray[currentIndex];
|
appendStringInfo(computeTaskQuery, COPY_QUERY_TO_STDOUT_TEXT,
|
||||||
ResultStatus resultStatus = MultiClientResultStatus(connectionId);
|
queryString);
|
||||||
QueryStatus queryStatus = CLIENT_INVALID_QUERY;
|
}
|
||||||
|
|
||||||
/* check if query results are in progress or unavailable */
|
querySent = MultiClientSendQuery(connectionId, computeTaskQuery->data);
|
||||||
if (resultStatus == CLIENT_RESULT_BUSY)
|
if (querySent)
|
||||||
{
|
{
|
||||||
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING;
|
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING;
|
||||||
break;
|
}
|
||||||
}
|
else
|
||||||
else if (resultStatus == CLIENT_RESULT_UNAVAILABLE)
|
{
|
||||||
{
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
}
|
||||||
break;
|
|
||||||
}
|
break;
|
||||||
|
}
|
||||||
Assert(resultStatus == CLIENT_RESULT_READY);
|
|
||||||
|
case EXEC_COMPUTE_TASK_RUNNING:
|
||||||
/* check if our request to copy query results has been acknowledged */
|
{
|
||||||
queryStatus = MultiClientQueryStatus(connectionId);
|
int32 connectionId = connectionIdArray[currentIndex];
|
||||||
if (queryStatus == CLIENT_QUERY_COPY)
|
ResultStatus resultStatus = MultiClientResultStatus(connectionId);
|
||||||
{
|
QueryStatus queryStatus = CLIENT_INVALID_QUERY;
|
||||||
StringInfo jobDirectoryName = JobDirectoryName(task->jobId);
|
|
||||||
StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId);
|
/* check if query results are in progress or unavailable */
|
||||||
|
if (resultStatus == CLIENT_RESULT_BUSY)
|
||||||
char *filename = taskFilename->data;
|
{
|
||||||
int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY);
|
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_RUNNING;
|
||||||
int fileMode = (S_IRUSR | S_IWUSR);
|
break;
|
||||||
|
}
|
||||||
int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode);
|
else if (resultStatus == CLIENT_RESULT_UNAVAILABLE)
|
||||||
if (fileDescriptor >= 0)
|
{
|
||||||
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert(resultStatus == CLIENT_RESULT_READY);
|
||||||
|
|
||||||
|
/* check if our request to copy query results has been acknowledged */
|
||||||
|
queryStatus = MultiClientQueryStatus(connectionId);
|
||||||
|
if (queryStatus == CLIENT_QUERY_COPY)
|
||||||
|
{
|
||||||
|
StringInfo jobDirectoryName = JobDirectoryName(task->jobId);
|
||||||
|
StringInfo taskFilename = TaskFilename(jobDirectoryName, task->taskId);
|
||||||
|
|
||||||
|
char *filename = taskFilename->data;
|
||||||
|
int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY);
|
||||||
|
int fileMode = (S_IRUSR | S_IWUSR);
|
||||||
|
|
||||||
|
int32 fileDescriptor = BasicOpenFile(filename, fileFlags, fileMode);
|
||||||
|
if (fileDescriptor >= 0)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* All files inside the job directory get automatically cleaned
|
||||||
|
* up on transaction commit or abort.
|
||||||
|
*/
|
||||||
|
fileDescriptorArray[currentIndex] = fileDescriptor;
|
||||||
|
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ereport(WARNING, (errcode_for_file_access(),
|
||||||
|
errmsg("could not open file \"%s\": %m",
|
||||||
|
filename)));
|
||||||
|
|
||||||
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (queryStatus == CLIENT_QUERY_FAILED)
|
||||||
|
{
|
||||||
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ereport(FATAL, (errmsg("invalid query status: %d", queryStatus)));
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case EXEC_COMPUTE_TASK_COPYING:
|
||||||
|
{
|
||||||
|
int32 connectionId = connectionIdArray[currentIndex];
|
||||||
|
int32 fileDesc = fileDescriptorArray[currentIndex];
|
||||||
|
int closed = -1;
|
||||||
|
|
||||||
|
/* copy data from worker node, and write to local file */
|
||||||
|
CopyStatus copyStatus = MultiClientCopyData(connectionId, fileDesc);
|
||||||
|
|
||||||
|
/* if worker node will continue to send more data, keep reading */
|
||||||
|
if (copyStatus == CLIENT_COPY_MORE)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* All files inside the job directory get automatically cleaned
|
|
||||||
* up on transaction commit or abort.
|
|
||||||
*/
|
|
||||||
fileDescriptorArray[currentIndex] = fileDescriptor;
|
|
||||||
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING;
|
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING;
|
||||||
}
|
}
|
||||||
else
|
else if (copyStatus == CLIENT_COPY_DONE)
|
||||||
{
|
{
|
||||||
ereport(WARNING, (errcode_for_file_access(),
|
closed = close(fileDesc);
|
||||||
errmsg("could not open file \"%s\": %m", filename)));
|
fileDescriptorArray[currentIndex] = -1;
|
||||||
|
|
||||||
|
if (closed >= 0)
|
||||||
|
{
|
||||||
|
taskStatusArray[currentIndex] = EXEC_TASK_DONE;
|
||||||
|
|
||||||
|
/* we are done executing; we no longer need the connection */
|
||||||
|
MultiClientDisconnect(connectionId);
|
||||||
|
connectionIdArray[currentIndex] = INVALID_CONNECTION_ID;
|
||||||
|
connectAction = CONNECT_ACTION_CLOSED;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ereport(WARNING, (errcode_for_file_access(),
|
||||||
|
errmsg("could not close copied file: %m")));
|
||||||
|
|
||||||
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (copyStatus == CLIENT_COPY_FAILED)
|
||||||
|
{
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
||||||
|
|
||||||
|
closed = close(fileDesc);
|
||||||
|
fileDescriptorArray[currentIndex] = -1;
|
||||||
|
|
||||||
|
if (closed < 0)
|
||||||
|
{
|
||||||
|
ereport(WARNING, (errcode_for_file_access(),
|
||||||
|
errmsg("could not close copy file: %m")));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
else if (queryStatus == CLIENT_QUERY_FAILED)
|
|
||||||
|
case EXEC_TASK_DONE:
|
||||||
{
|
{
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
/* we are done with this task's execution */
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
default:
|
||||||
{
|
{
|
||||||
ereport(FATAL, (errmsg("invalid query status: %d", queryStatus)));
|
/* we fatal here to avoid leaking client-side resources */
|
||||||
|
ereport(FATAL, (errmsg("invalid execution status: %d", currentStatus)));
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case EXEC_COMPUTE_TASK_COPYING:
|
|
||||||
{
|
|
||||||
int32 connectionId = connectionIdArray[currentIndex];
|
|
||||||
int32 fileDesc = fileDescriptorArray[currentIndex];
|
|
||||||
int closed = -1;
|
|
||||||
|
|
||||||
/* copy data from worker node, and write to local file */
|
|
||||||
CopyStatus copyStatus = MultiClientCopyData(connectionId, fileDesc);
|
|
||||||
|
|
||||||
/* if worker node will continue to send more data, keep reading */
|
|
||||||
if (copyStatus == CLIENT_COPY_MORE)
|
|
||||||
{
|
|
||||||
taskStatusArray[currentIndex] = EXEC_COMPUTE_TASK_COPYING;
|
|
||||||
}
|
|
||||||
else if (copyStatus == CLIENT_COPY_DONE)
|
|
||||||
{
|
|
||||||
closed = close(fileDesc);
|
|
||||||
fileDescriptorArray[currentIndex] = -1;
|
|
||||||
|
|
||||||
if (closed >= 0)
|
|
||||||
{
|
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_DONE;
|
|
||||||
|
|
||||||
/* we are done executing; we no longer need the connection */
|
|
||||||
MultiClientDisconnect(connectionId);
|
|
||||||
connectionIdArray[currentIndex] = INVALID_CONNECTION_ID;
|
|
||||||
connectAction = CONNECT_ACTION_CLOSED;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ereport(WARNING, (errcode_for_file_access(),
|
|
||||||
errmsg("could not close copied file: %m")));
|
|
||||||
|
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (copyStatus == CLIENT_COPY_FAILED)
|
|
||||||
{
|
|
||||||
taskStatusArray[currentIndex] = EXEC_TASK_FAILED;
|
|
||||||
|
|
||||||
closed = close(fileDesc);
|
|
||||||
fileDescriptorArray[currentIndex] = -1;
|
|
||||||
|
|
||||||
if (closed < 0)
|
|
||||||
{
|
|
||||||
ereport(WARNING, (errcode_for_file_access(),
|
|
||||||
errmsg("could not close copy file: %m")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case EXEC_TASK_DONE:
|
|
||||||
{
|
|
||||||
/* we are done with this task's execution */
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
/* we fatal here to avoid leaking client-side resources */
|
|
||||||
ereport(FATAL, (errmsg("invalid execution status: %d", currentStatus)));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return connectAction;
|
return connectAction;
|
||||||
|
|
|
@ -80,6 +80,7 @@ RouterExecutorStart(QueryDesc *queryDesc, int eflags, Task *task)
|
||||||
queryDesc->estate = executorState;
|
queryDesc->estate = executorState;
|
||||||
|
|
||||||
#if (PG_VERSION_NUM < 90500)
|
#if (PG_VERSION_NUM < 90500)
|
||||||
|
|
||||||
/* make sure that upsertQuery is false for versions that UPSERT is not available */
|
/* make sure that upsertQuery is false for versions that UPSERT is not available */
|
||||||
Assert(task->upsertQuery == false);
|
Assert(task->upsertQuery == false);
|
||||||
#endif
|
#endif
|
||||||
|
@ -177,14 +178,14 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas
|
||||||
if (!ScanDirectionIsForward(direction))
|
if (!ScanDirectionIsForward(direction))
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("scan directions other than forward scans "
|
errmsg("scan directions other than forward scans "
|
||||||
"are unsupported")));
|
"are unsupported")));
|
||||||
}
|
}
|
||||||
if (count != 0)
|
if (count != 0)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("fetching rows from a query using a cursor "
|
errmsg("fetching rows from a query using a cursor "
|
||||||
"is unsupported")));
|
"is unsupported")));
|
||||||
}
|
}
|
||||||
|
|
||||||
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
|
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
|
||||||
|
@ -210,7 +211,7 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errmsg("unrecognized operation code: %d",
|
ereport(ERROR, (errmsg("unrecognized operation code: %d",
|
||||||
(int) operation)));
|
(int) operation)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (queryDesc->totaltime != NULL)
|
if (queryDesc->totaltime != NULL)
|
||||||
|
@ -219,9 +220,9 @@ RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count, Tas
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryContextSwitchTo(oldcontext);
|
MemoryContextSwitchTo(oldcontext);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ExecuteDistributedModify is the main entry point for modifying distributed
|
* ExecuteDistributedModify is the main entry point for modifying distributed
|
||||||
* tables. A distributed modification is successful if any placement of the
|
* tables. A distributed modification is successful if any placement of the
|
||||||
|
@ -532,9 +533,10 @@ StoreQueryResult(PGconn *connection, TupleDesc tupleDescriptor,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RouterExecutorFinish cleans up after a distributed execution.
|
* RouterExecutorFinish cleans up after a distributed execution.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
RouterExecutorFinish(QueryDesc *queryDesc)
|
RouterExecutorFinish(QueryDesc *queryDesc)
|
||||||
{
|
{
|
||||||
|
|
|
@ -303,13 +303,13 @@ AdjustStateForFailure(TaskExecution *taskExecution)
|
||||||
|
|
||||||
if (taskExecution->currentNodeIndex < maxNodeIndex)
|
if (taskExecution->currentNodeIndex < maxNodeIndex)
|
||||||
{
|
{
|
||||||
taskExecution->currentNodeIndex++; /* try next worker node */
|
taskExecution->currentNodeIndex++; /* try next worker node */
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
taskExecution->currentNodeIndex = 0; /* go back to the first worker node */
|
taskExecution->currentNodeIndex = 0; /* go back to the first worker node */
|
||||||
}
|
}
|
||||||
|
|
||||||
taskExecution->dataFetchTaskIndex = -1; /* reset data fetch counter */
|
taskExecution->dataFetchTaskIndex = -1; /* reset data fetch counter */
|
||||||
taskExecution->failureCount++; /* record failure */
|
taskExecution->failureCount++; /* record failure */
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -39,9 +39,9 @@
|
||||||
*/
|
*/
|
||||||
struct DropRelationCallbackState
|
struct DropRelationCallbackState
|
||||||
{
|
{
|
||||||
char relkind;
|
char relkind;
|
||||||
Oid heapOid;
|
Oid heapOid;
|
||||||
bool concurrent;
|
bool concurrent;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -190,10 +190,10 @@ multi_ProcessUtility(Node *parsetree,
|
||||||
}
|
}
|
||||||
else if (IsA(parsetree, CreateRoleStmt) && CitusDBHasBeenLoaded())
|
else if (IsA(parsetree, CreateRoleStmt) && CitusDBHasBeenLoaded())
|
||||||
{
|
{
|
||||||
ereport(NOTICE, (errmsg("CitusDB does not support CREATE ROLE/USER "
|
ereport(NOTICE, (errmsg("CitusDB does not support CREATE ROLE/USER "
|
||||||
"for distributed databases"),
|
"for distributed databases"),
|
||||||
errdetail("Multiple roles are currently supported "
|
errdetail("Multiple roles are currently supported "
|
||||||
"only for local tables")));
|
"only for local tables")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* now drop into standard process utility */
|
/* now drop into standard process utility */
|
||||||
|
@ -757,7 +757,7 @@ IsAlterTableRenameStmt(RenameStmt *renameStmt)
|
||||||
isAlterTableRenameStmt = true;
|
isAlterTableRenameStmt = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (PG_VERSION_NUM >=90500)
|
#if (PG_VERSION_NUM >= 90500)
|
||||||
else if (renameStmt->renameType == OBJECT_TABCONSTRAINT)
|
else if (renameStmt->renameType == OBJECT_TABCONSTRAINT)
|
||||||
{
|
{
|
||||||
isAlterTableRenameStmt = true;
|
isAlterTableRenameStmt = true;
|
||||||
|
@ -905,8 +905,9 @@ ExecuteCommandOnWorkerShards(Oid relationId, const char *commandString,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ereport(DEBUG2, (errmsg("applied command on shard " UINT64_FORMAT " on "
|
ereport(DEBUG2, (errmsg("applied command on shard " UINT64_FORMAT
|
||||||
"node %s:%d", shardId, workerName, workerPort)));
|
" on node %s:%d", shardId, workerName,
|
||||||
|
workerPort)));
|
||||||
}
|
}
|
||||||
|
|
||||||
isFirstPlacement = false;
|
isFirstPlacement = false;
|
||||||
|
|
|
@ -185,7 +185,7 @@ master_create_worker_shards(PG_FUNCTION_ARGS)
|
||||||
LockShardDistributionMetadata(shardId, ExclusiveLock);
|
LockShardDistributionMetadata(shardId, ExclusiveLock);
|
||||||
|
|
||||||
CreateShardPlacements(shardId, ddlCommandList, workerNodeList,
|
CreateShardPlacements(shardId, ddlCommandList, workerNodeList,
|
||||||
roundRobinNodeIndex, replicationFactor);
|
roundRobinNodeIndex, replicationFactor);
|
||||||
|
|
||||||
InsertShardRow(distributedTableId, shardId, shardStorageType,
|
InsertShardRow(distributedTableId, shardId, shardStorageType,
|
||||||
minHashTokenText, maxHashTokenText);
|
minHashTokenText, maxHashTokenText);
|
||||||
|
|
|
@ -115,9 +115,9 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
|
||||||
if ((partitionMethod == DISTRIBUTE_BY_HASH) && (deleteCriteria != NULL))
|
if ((partitionMethod == DISTRIBUTE_BY_HASH) && (deleteCriteria != NULL))
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("cannot delete from distributed table"),
|
errmsg("cannot delete from distributed table"),
|
||||||
errdetail("Delete statements on hash-partitioned tables "
|
errdetail("Delete statements on hash-partitioned tables "
|
||||||
"with where clause is not supported")));
|
"with where clause is not supported")));
|
||||||
}
|
}
|
||||||
|
|
||||||
CheckDeleteCriteria(deleteCriteria);
|
CheckDeleteCriteria(deleteCriteria);
|
||||||
|
@ -138,15 +138,15 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId,
|
deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId,
|
||||||
shardIntervalList,
|
shardIntervalList,
|
||||||
deleteCriteria);
|
deleteCriteria);
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach(shardIntervalCell, deletableShardIntervalList)
|
foreach(shardIntervalCell, deletableShardIntervalList)
|
||||||
{
|
{
|
||||||
List *shardPlacementList = NIL;
|
List *shardPlacementList = NIL;
|
||||||
List *droppedPlacementList = NIL;
|
List *droppedPlacementList = NIL;
|
||||||
List *lingeringPlacementList= NIL;
|
List *lingeringPlacementList = NIL;
|
||||||
ListCell *shardPlacementCell = NULL;
|
ListCell *shardPlacementCell = NULL;
|
||||||
ListCell *droppedPlacementCell = NULL;
|
ListCell *droppedPlacementCell = NULL;
|
||||||
ListCell *lingeringPlacementCell = NULL;
|
ListCell *lingeringPlacementCell = NULL;
|
||||||
|
@ -167,7 +167,8 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
|
||||||
shardPlacementList = ShardPlacementList(shardId);
|
shardPlacementList = ShardPlacementList(shardId);
|
||||||
foreach(shardPlacementCell, shardPlacementList)
|
foreach(shardPlacementCell, shardPlacementList)
|
||||||
{
|
{
|
||||||
ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell);
|
ShardPlacement *shardPlacement =
|
||||||
|
(ShardPlacement *) lfirst(shardPlacementCell);
|
||||||
char *workerName = shardPlacement->nodeName;
|
char *workerName = shardPlacement->nodeName;
|
||||||
uint32 workerPort = shardPlacement->nodePort;
|
uint32 workerPort = shardPlacement->nodePort;
|
||||||
bool dropSuccessful = false;
|
bool dropSuccessful = false;
|
||||||
|
@ -176,14 +177,17 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
|
||||||
char tableType = get_rel_relkind(relationId);
|
char tableType = get_rel_relkind(relationId);
|
||||||
if (tableType == RELKIND_RELATION)
|
if (tableType == RELKIND_RELATION)
|
||||||
{
|
{
|
||||||
appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND, quotedShardName);
|
appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND,
|
||||||
|
quotedShardName);
|
||||||
}
|
}
|
||||||
else if (tableType == RELKIND_FOREIGN_TABLE)
|
else if (tableType == RELKIND_FOREIGN_TABLE)
|
||||||
{
|
{
|
||||||
appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND, quotedShardName);
|
appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND,
|
||||||
|
quotedShardName);
|
||||||
}
|
}
|
||||||
|
|
||||||
dropSuccessful = ExecuteRemoteCommand(workerName, workerPort, workerDropQuery);
|
dropSuccessful = ExecuteRemoteCommand(workerName, workerPort,
|
||||||
|
workerDropQuery);
|
||||||
if (dropSuccessful)
|
if (dropSuccessful)
|
||||||
{
|
{
|
||||||
droppedPlacementList = lappend(droppedPlacementList, shardPlacement);
|
droppedPlacementList = lappend(droppedPlacementList, shardPlacement);
|
||||||
|
@ -227,12 +231,13 @@ master_apply_delete_command(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
if (QueryCancelPending)
|
if (QueryCancelPending)
|
||||||
{
|
{
|
||||||
ereport(WARNING, (errmsg("cancel requests are ignored during shard deletion")));
|
ereport(WARNING, (errmsg("cancel requests are ignored during shard "
|
||||||
|
"deletion")));
|
||||||
QueryCancelPending = false;
|
QueryCancelPending = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
RESUME_INTERRUPTS();
|
RESUME_INTERRUPTS();
|
||||||
}
|
}
|
||||||
|
|
||||||
deleteCriteriaShardCount = list_length(deletableShardIntervalList);
|
deleteCriteriaShardCount = list_length(deletableShardIntervalList);
|
||||||
PG_RETURN_INT32(deleteCriteriaShardCount);
|
PG_RETURN_INT32(deleteCriteriaShardCount);
|
||||||
|
@ -257,7 +262,7 @@ CheckTableCount(Query *deleteQuery)
|
||||||
static void
|
static void
|
||||||
CheckDeleteCriteria(Node *deleteCriteria)
|
CheckDeleteCriteria(Node *deleteCriteria)
|
||||||
{
|
{
|
||||||
bool simpleOpExpression = true;
|
bool simpleOpExpression = true;
|
||||||
|
|
||||||
if (deleteCriteria == NULL)
|
if (deleteCriteria == NULL)
|
||||||
{
|
{
|
||||||
|
@ -298,15 +303,15 @@ CheckDeleteCriteria(Node *deleteCriteria)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CheckPartitionColumn checks that the given where clause is based only on the
|
* CheckPartitionColumn checks that the given where clause is based only on the
|
||||||
* partition key of the given relation id.
|
* partition key of the given relation id.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
CheckPartitionColumn(Oid relationId, Node *whereClause)
|
CheckPartitionColumn(Oid relationId, Node *whereClause)
|
||||||
{
|
{
|
||||||
Var *partitionColumn = PartitionKey(relationId);
|
Var *partitionColumn = PartitionKey(relationId);
|
||||||
ListCell *columnCell = NULL;
|
ListCell *columnCell = NULL;
|
||||||
|
|
||||||
List *columnList = pull_var_clause_default(whereClause);
|
List *columnList = pull_var_clause_default(whereClause);
|
||||||
foreach(columnCell, columnList)
|
foreach(columnCell, columnList)
|
||||||
|
@ -332,7 +337,7 @@ CheckPartitionColumn(Oid relationId, Node *whereClause)
|
||||||
*/
|
*/
|
||||||
static List *
|
static List *
|
||||||
ShardsMatchingDeleteCriteria(Oid relationId, List *shardIntervalList,
|
ShardsMatchingDeleteCriteria(Oid relationId, List *shardIntervalList,
|
||||||
Node *deleteCriteria)
|
Node *deleteCriteria)
|
||||||
{
|
{
|
||||||
List *dropShardIntervalList = NIL;
|
List *dropShardIntervalList = NIL;
|
||||||
List *deleteCriteriaList = NIL;
|
List *deleteCriteriaList = NIL;
|
||||||
|
|
|
@ -219,7 +219,7 @@ ShardLength(uint64 shardId)
|
||||||
if (shardPlacementList == NIL)
|
if (shardPlacementList == NIL)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errmsg("could not find length of shard " UINT64_FORMAT, shardId),
|
ereport(ERROR, (errmsg("could not find length of shard " UINT64_FORMAT, shardId),
|
||||||
errdetail("Could not find any shard placements for the shard.")));
|
errdetail("Could not find any shard placements for the shard.")));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -49,7 +49,7 @@
|
||||||
|
|
||||||
/* Shard related configuration */
|
/* Shard related configuration */
|
||||||
int ShardReplicationFactor = 2; /* desired replication factor for shards */
|
int ShardReplicationFactor = 2; /* desired replication factor for shards */
|
||||||
int ShardMaxSize = 1048576; /* maximum size in KB one shard can grow to */
|
int ShardMaxSize = 1048576; /* maximum size in KB one shard can grow to */
|
||||||
int ShardPlacementPolicy = SHARD_PLACEMENT_ROUND_ROBIN;
|
int ShardPlacementPolicy = SHARD_PLACEMENT_ROUND_ROBIN;
|
||||||
|
|
||||||
|
|
||||||
|
@ -252,7 +252,7 @@ Datum
|
||||||
master_get_new_shardid(PG_FUNCTION_ARGS)
|
master_get_new_shardid(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
text *sequenceName = cstring_to_text(SHARDID_SEQUENCE_NAME);
|
text *sequenceName = cstring_to_text(SHARDID_SEQUENCE_NAME);
|
||||||
Oid sequenceId = ResolveRelationId(sequenceName);
|
Oid sequenceId = ResolveRelationId(sequenceName);
|
||||||
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
|
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
|
||||||
|
|
||||||
/* generate new and unique shardId from sequence */
|
/* generate new and unique shardId from sequence */
|
||||||
|
@ -281,7 +281,7 @@ master_get_local_first_candidate_nodes(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
if (SRF_IS_FIRSTCALL())
|
if (SRF_IS_FIRSTCALL())
|
||||||
{
|
{
|
||||||
MemoryContext oldContext = NULL;
|
MemoryContext oldContext = NULL;
|
||||||
TupleDesc tupleDescriptor = NULL;
|
TupleDesc tupleDescriptor = NULL;
|
||||||
uint32 liveNodeCount = 0;
|
uint32 liveNodeCount = 0;
|
||||||
bool hasOid = false;
|
bool hasOid = false;
|
||||||
|
@ -396,7 +396,7 @@ master_get_round_robin_candidate_nodes(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
if (SRF_IS_FIRSTCALL())
|
if (SRF_IS_FIRSTCALL())
|
||||||
{
|
{
|
||||||
MemoryContext oldContext = NULL;
|
MemoryContext oldContext = NULL;
|
||||||
TupleDesc tupleDescriptor = NULL;
|
TupleDesc tupleDescriptor = NULL;
|
||||||
List *workerNodeList = NIL;
|
List *workerNodeList = NIL;
|
||||||
TypeFuncClass resultTypeClass = 0;
|
TypeFuncClass resultTypeClass = 0;
|
||||||
|
@ -477,7 +477,7 @@ master_get_active_worker_nodes(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
if (SRF_IS_FIRSTCALL())
|
if (SRF_IS_FIRSTCALL())
|
||||||
{
|
{
|
||||||
MemoryContext oldContext = NULL;
|
MemoryContext oldContext = NULL;
|
||||||
List *workerNodeList = NIL;
|
List *workerNodeList = NIL;
|
||||||
uint32 workerNodeCount = 0;
|
uint32 workerNodeCount = 0;
|
||||||
TupleDesc tupleDescriptor = NULL;
|
TupleDesc tupleDescriptor = NULL;
|
||||||
|
@ -567,7 +567,7 @@ GetTableDDLEvents(Oid relationId)
|
||||||
|
|
||||||
Relation pgIndex = NULL;
|
Relation pgIndex = NULL;
|
||||||
SysScanDesc scanDescriptor = NULL;
|
SysScanDesc scanDescriptor = NULL;
|
||||||
ScanKeyData scanKey[1];
|
ScanKeyData scanKey[1];
|
||||||
int scanKeyCount = 1;
|
int scanKeyCount = 1;
|
||||||
HeapTuple heapTuple = NULL;
|
HeapTuple heapTuple = NULL;
|
||||||
|
|
||||||
|
@ -695,8 +695,8 @@ hostname_client_addr(void)
|
||||||
Port *port = MyProcPort;
|
Port *port = MyProcPort;
|
||||||
char *remoteHost = NULL;
|
char *remoteHost = NULL;
|
||||||
int remoteHostLen = NI_MAXHOST;
|
int remoteHostLen = NI_MAXHOST;
|
||||||
int flags = NI_NAMEREQD; /* require fully qualified hostname */
|
int flags = NI_NAMEREQD; /* require fully qualified hostname */
|
||||||
int nameFound = 0;
|
int nameFound = 0;
|
||||||
|
|
||||||
if (port == NULL)
|
if (port == NULL)
|
||||||
{
|
{
|
||||||
|
@ -709,10 +709,15 @@ hostname_client_addr(void)
|
||||||
#ifdef HAVE_IPV6
|
#ifdef HAVE_IPV6
|
||||||
case AF_INET6:
|
case AF_INET6:
|
||||||
#endif
|
#endif
|
||||||
break;
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
{
|
||||||
ereport(ERROR, (errmsg("invalid address family in connection")));
|
ereport(ERROR, (errmsg("invalid address family in connection")));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
remoteHost = palloc0(remoteHostLen);
|
remoteHost = palloc0(remoteHostLen);
|
||||||
|
|
|
@ -93,7 +93,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
|
||||||
if (partitionMethod == DISTRIBUTE_BY_HASH)
|
if (partitionMethod == DISTRIBUTE_BY_HASH)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errmsg("relation \"%s\" is a hash partitioned table",
|
ereport(ERROR, (errmsg("relation \"%s\" is a hash partitioned table",
|
||||||
relationName),
|
relationName),
|
||||||
errdetail("We currently don't support creating shards "
|
errdetail("We currently don't support creating shards "
|
||||||
"on hash-partitioned tables")));
|
"on hash-partitioned tables")));
|
||||||
}
|
}
|
||||||
|
@ -128,7 +128,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
CreateShardPlacements(shardId, ddlEventList, candidateNodeList, 0,
|
CreateShardPlacements(shardId, ddlEventList, candidateNodeList, 0,
|
||||||
ShardReplicationFactor);
|
ShardReplicationFactor);
|
||||||
|
|
||||||
InsertShardRow(relationId, shardId, SHARD_STORAGE_TABLE, nullMinValue, nullMaxValue);
|
InsertShardRow(relationId, shardId, SHARD_STORAGE_TABLE, nullMinValue, nullMaxValue);
|
||||||
|
|
||||||
|
@ -361,7 +361,7 @@ CheckDistributedTable(Oid relationId)
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList,
|
CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList,
|
||||||
int workerStartIndex, int replicationFactor)
|
int workerStartIndex, int replicationFactor)
|
||||||
{
|
{
|
||||||
int attemptCount = replicationFactor;
|
int attemptCount = replicationFactor;
|
||||||
int workerNodeCount = list_length(workerNodeList);
|
int workerNodeCount = list_length(workerNodeList);
|
||||||
|
@ -393,7 +393,7 @@ CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList,
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ereport(WARNING, (errmsg("could not create shard on \"%s:%u\"",
|
ereport(WARNING, (errmsg("could not create shard on \"%s:%u\"",
|
||||||
nodeName, nodePort)));
|
nodeName, nodePort)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (placementsCreated >= replicationFactor)
|
if (placementsCreated >= replicationFactor)
|
||||||
|
@ -406,7 +406,7 @@ CreateShardPlacements(int64 shardId, List *ddlEventList, List *workerNodeList,
|
||||||
if (placementsCreated < replicationFactor)
|
if (placementsCreated < replicationFactor)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errmsg("could only create %u of %u of required shard replicas",
|
ereport(ERROR, (errmsg("could only create %u of %u of required shard replicas",
|
||||||
placementsCreated, replicationFactor)));
|
placementsCreated, replicationFactor)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -393,6 +393,7 @@ DistributedModifyTask(Query *query)
|
||||||
query->onConflict = RebuildOnConflict(relationId, query->onConflict);
|
query->onConflict = RebuildOnConflict(relationId, query->onConflict);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
/* always set to false for PG_VERSION_NUM < 90500 */
|
/* always set to false for PG_VERSION_NUM < 90500 */
|
||||||
upsertQuery = false;
|
upsertQuery = false;
|
||||||
#endif
|
#endif
|
||||||
|
@ -414,6 +415,7 @@ DistributedModifyTask(Query *query)
|
||||||
|
|
||||||
|
|
||||||
#if (PG_VERSION_NUM >= 90500)
|
#if (PG_VERSION_NUM >= 90500)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RebuildOnConflict rebuilds OnConflictExpr for correct deparsing. The function
|
* RebuildOnConflict rebuilds OnConflictExpr for correct deparsing. The function
|
||||||
* makes WHERE clause elements explicit and filters dropped columns
|
* makes WHERE clause elements explicit and filters dropped columns
|
||||||
|
@ -433,7 +435,7 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict)
|
||||||
|
|
||||||
/* Convert onConflictWhere qualifiers to an explicitly and'd clause */
|
/* Convert onConflictWhere qualifiers to an explicitly and'd clause */
|
||||||
updatedOnConflict->onConflictWhere =
|
updatedOnConflict->onConflictWhere =
|
||||||
(Node *) make_ands_explicit((List *) onConflictWhere);
|
(Node *) make_ands_explicit((List *) onConflictWhere);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Here we handle dropped columns on the distributed table. onConflictSet
|
* Here we handle dropped columns on the distributed table. onConflictSet
|
||||||
|
@ -448,7 +450,7 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict)
|
||||||
foreach(targetEntryCell, onConflictSet)
|
foreach(targetEntryCell, onConflictSet)
|
||||||
{
|
{
|
||||||
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell);
|
||||||
FormData_pg_attribute *tableAttribute = tableAttributes[targetEntry->resno -1];
|
FormData_pg_attribute *tableAttribute = tableAttributes[targetEntry->resno - 1];
|
||||||
|
|
||||||
/* skip dropped columns */
|
/* skip dropped columns */
|
||||||
if (tableAttribute->attisdropped)
|
if (tableAttribute->attisdropped)
|
||||||
|
@ -468,6 +470,8 @@ RebuildOnConflict(Oid relationId, OnConflictExpr *originalOnConflict)
|
||||||
|
|
||||||
return updatedOnConflict;
|
return updatedOnConflict;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ MultiExplainOneQuery(Query *query, IntoClause *into, ExplainState *es,
|
||||||
if (localQuery)
|
if (localQuery)
|
||||||
{
|
{
|
||||||
PlannedStmt *plan = NULL;
|
PlannedStmt *plan = NULL;
|
||||||
instr_time planstart;
|
instr_time planstart;
|
||||||
instr_time planduration;
|
instr_time planduration;
|
||||||
|
|
||||||
INSTR_TIME_SET_CURRENT(planstart);
|
INSTR_TIME_SET_CURRENT(planstart);
|
||||||
|
|
|
@ -33,18 +33,18 @@
|
||||||
|
|
||||||
|
|
||||||
/* Config variables managed via guc.c */
|
/* Config variables managed via guc.c */
|
||||||
int LargeTableShardCount = 4; /* shard counts for a large table */
|
int LargeTableShardCount = 4; /* shard counts for a large table */
|
||||||
bool LogMultiJoinOrder = false; /* print join order as a debugging aid */
|
bool LogMultiJoinOrder = false; /* print join order as a debugging aid */
|
||||||
|
|
||||||
/* Function pointer type definition for join rule evaluation functions */
|
/* Function pointer type definition for join rule evaluation functions */
|
||||||
typedef JoinOrderNode * (*RuleEvalFunction) (JoinOrderNode *currentJoinNode,
|
typedef JoinOrderNode *(*RuleEvalFunction) (JoinOrderNode *currentJoinNode,
|
||||||
TableEntry *candidateTable,
|
TableEntry *candidateTable,
|
||||||
List *candidateShardList,
|
List *candidateShardList,
|
||||||
List *applicableJoinClauses,
|
List *applicableJoinClauses,
|
||||||
JoinType joinType);
|
JoinType joinType);
|
||||||
|
|
||||||
static char * RuleNameArray[JOIN_RULE_LAST] = {0}; /* ordered join rule names */
|
static char *RuleNameArray[JOIN_RULE_LAST] = { 0 }; /* ordered join rule names */
|
||||||
static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = {0}; /* join rules */
|
static RuleEvalFunction RuleEvalFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */
|
||||||
|
|
||||||
|
|
||||||
/* Local functions forward declarations */
|
/* Local functions forward declarations */
|
||||||
|
@ -54,7 +54,8 @@ static bool JoinExprListWalker(Node *node, List **joinList);
|
||||||
static bool ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex);
|
static bool ExtractLeftMostRangeTableIndex(Node *node, int *rangeTableIndex);
|
||||||
static List * MergeShardIntervals(List *leftShardIntervalList,
|
static List * MergeShardIntervals(List *leftShardIntervalList,
|
||||||
List *rightShardIntervalList, JoinType joinType);
|
List *rightShardIntervalList, JoinType joinType);
|
||||||
static bool ShardIntervalsMatch(List *leftShardIntervalList, List *rightShardIntervalList);
|
static bool ShardIntervalsMatch(List *leftShardIntervalList,
|
||||||
|
List *rightShardIntervalList);
|
||||||
static List * LoadSortedShardIntervalList(Oid relationId);
|
static List * LoadSortedShardIntervalList(Oid relationId);
|
||||||
static List * JoinOrderForTable(TableEntry *firstTable, List *tableEntryList,
|
static List * JoinOrderForTable(TableEntry *firstTable, List *tableEntryList,
|
||||||
List *joinClauseList);
|
List *joinClauseList);
|
||||||
|
@ -68,31 +69,41 @@ static List * TableEntryListDifference(List *lhsTableList, List *rhsTableList);
|
||||||
static TableEntry * FindTableEntry(List *tableEntryList, uint32 tableId);
|
static TableEntry * FindTableEntry(List *tableEntryList, uint32 tableId);
|
||||||
|
|
||||||
/* Local functions forward declarations for join evaluations */
|
/* Local functions forward declarations for join evaluations */
|
||||||
static JoinOrderNode * EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
|
static JoinOrderNode * EvaluateJoinRules(List *joinedTableList,
|
||||||
TableEntry *candidateTable, List *candidateShardList,
|
JoinOrderNode *currentJoinNode,
|
||||||
|
TableEntry *candidateTable,
|
||||||
|
List *candidateShardList,
|
||||||
List *joinClauseList, JoinType joinType);
|
List *joinClauseList, JoinType joinType);
|
||||||
static List * RangeTableIdList(List *tableList);
|
static List * RangeTableIdList(List *tableList);
|
||||||
static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType);
|
static RuleEvalFunction JoinRuleEvalFunction(JoinRuleType ruleType);
|
||||||
static char * JoinRuleName(JoinRuleType ruleType);
|
static char * JoinRuleName(JoinRuleType ruleType);
|
||||||
static JoinOrderNode * BroadcastJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
|
static JoinOrderNode * BroadcastJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
|
||||||
List *candidateShardList, List *applicableJoinClauses,
|
List *candidateShardList,
|
||||||
|
List *applicableJoinClauses,
|
||||||
JoinType joinType);
|
JoinType joinType);
|
||||||
static JoinOrderNode * LocalJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
|
static JoinOrderNode * LocalJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
|
||||||
List *candidateShardList, List *applicableJoinClauses,
|
List *candidateShardList, List *applicableJoinClauses,
|
||||||
JoinType joinType);
|
JoinType joinType);
|
||||||
static bool JoinOnColumns(Var *currentPartitioncolumn, Var *candidatePartitionColumn,
|
static bool JoinOnColumns(Var *currentPartitioncolumn, Var *candidatePartitionColumn,
|
||||||
List *joinClauseList);
|
List *joinClauseList);
|
||||||
static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
|
static JoinOrderNode * SinglePartitionJoin(JoinOrderNode *joinNode,
|
||||||
List *candidateShardList, List *applicableJoinClauses,
|
TableEntry *candidateTable,
|
||||||
|
List *candidateShardList,
|
||||||
|
List *applicableJoinClauses,
|
||||||
JoinType joinType);
|
JoinType joinType);
|
||||||
static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode, TableEntry *candidateTable,
|
static JoinOrderNode * DualPartitionJoin(JoinOrderNode *joinNode,
|
||||||
List *candidateShardList, List *applicableJoinClauses,
|
TableEntry *candidateTable,
|
||||||
|
List *candidateShardList,
|
||||||
|
List *applicableJoinClauses,
|
||||||
JoinType joinType);
|
JoinType joinType);
|
||||||
static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode, TableEntry *candidateTable,
|
static JoinOrderNode * CartesianProduct(JoinOrderNode *joinNode,
|
||||||
List *candidateShardList, List *applicableJoinClauses,
|
TableEntry *candidateTable,
|
||||||
|
List *candidateShardList,
|
||||||
|
List *applicableJoinClauses,
|
||||||
JoinType joinType);
|
JoinType joinType);
|
||||||
static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType joinRuleType,
|
static JoinOrderNode * MakeJoinOrderNode(TableEntry *tableEntry, JoinRuleType
|
||||||
Var *partitionColumn, char partitionMethod);
|
joinRuleType, Var *partitionColumn,
|
||||||
|
char partitionMethod);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -106,7 +117,7 @@ List *
|
||||||
FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList)
|
FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList)
|
||||||
{
|
{
|
||||||
List *joinList = NIL;
|
List *joinList = NIL;
|
||||||
ListCell * joinCell = NULL;
|
ListCell *joinCell = NULL;
|
||||||
List *joinWhereClauseList = NIL;
|
List *joinWhereClauseList = NIL;
|
||||||
List *joinOrderList = NIL;
|
List *joinOrderList = NIL;
|
||||||
List *joinedTableList = NIL;
|
List *joinedTableList = NIL;
|
||||||
|
@ -199,7 +210,6 @@ FixedJoinOrderList(FromExpr *fromExpr, List *tableEntryList)
|
||||||
"query"),
|
"query"),
|
||||||
errdetail("Shards of relations in outer join queries "
|
errdetail("Shards of relations in outer join queries "
|
||||||
"must have 1-to-1 shard partitioning")));
|
"must have 1-to-1 shard partitioning")));
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -439,7 +449,7 @@ MergeShardIntervals(List *leftShardIntervalList, List *rightShardIntervalList,
|
||||||
bool nextMaxSmaller = comparisonResult > 0;
|
bool nextMaxSmaller = comparisonResult > 0;
|
||||||
|
|
||||||
if ((shardUnion && nextMaxLarger) ||
|
if ((shardUnion && nextMaxLarger) ||
|
||||||
(!shardUnion && nextMaxSmaller) )
|
(!shardUnion && nextMaxSmaller))
|
||||||
{
|
{
|
||||||
newShardInterval->maxValue = datumCopy(nextMax, typeByValue, typeLen);
|
newShardInterval->maxValue = datumCopy(nextMax, typeByValue, typeLen);
|
||||||
}
|
}
|
||||||
|
@ -586,7 +596,8 @@ ShardIntervalsMatch(List *leftShardIntervalList, List *rightShardIntervalList)
|
||||||
nextRightIntervalCell = lnext(rightShardIntervalCell);
|
nextRightIntervalCell = lnext(rightShardIntervalCell);
|
||||||
if (nextRightIntervalCell != NULL)
|
if (nextRightIntervalCell != NULL)
|
||||||
{
|
{
|
||||||
ShardInterval *nextRightInterval = (ShardInterval *) lfirst(nextRightIntervalCell);
|
ShardInterval *nextRightInterval =
|
||||||
|
(ShardInterval *) lfirst(nextRightIntervalCell);
|
||||||
shardIntervalsIntersect = ShardIntervalsOverlap(leftInterval,
|
shardIntervalsIntersect = ShardIntervalsOverlap(leftInterval,
|
||||||
nextRightInterval);
|
nextRightInterval);
|
||||||
if (shardIntervalsIntersect)
|
if (shardIntervalsIntersect)
|
||||||
|
@ -1009,7 +1020,7 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
|
||||||
uint32 candidateTableId = 0;
|
uint32 candidateTableId = 0;
|
||||||
List *joinedTableIdList = NIL;
|
List *joinedTableIdList = NIL;
|
||||||
List *applicableJoinClauses = NIL;
|
List *applicableJoinClauses = NIL;
|
||||||
uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1;
|
uint32 lowestValidIndex = JOIN_RULE_INVALID_FIRST + 1;
|
||||||
uint32 highestValidIndex = JOIN_RULE_LAST - 1;
|
uint32 highestValidIndex = JOIN_RULE_LAST - 1;
|
||||||
uint32 ruleIndex = 0;
|
uint32 ruleIndex = 0;
|
||||||
|
|
||||||
|
@ -1028,11 +1039,11 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
|
||||||
JoinRuleType ruleType = (JoinRuleType) ruleIndex;
|
JoinRuleType ruleType = (JoinRuleType) ruleIndex;
|
||||||
RuleEvalFunction ruleEvalFunction = JoinRuleEvalFunction(ruleType);
|
RuleEvalFunction ruleEvalFunction = JoinRuleEvalFunction(ruleType);
|
||||||
|
|
||||||
nextJoinNode = (*ruleEvalFunction) (currentJoinNode,
|
nextJoinNode = (*ruleEvalFunction)(currentJoinNode,
|
||||||
candidateTable,
|
candidateTable,
|
||||||
candidateShardList,
|
candidateShardList,
|
||||||
applicableJoinClauses,
|
applicableJoinClauses,
|
||||||
joinType);
|
joinType);
|
||||||
|
|
||||||
/* break after finding the first join rule that applies */
|
/* break after finding the first join rule that applies */
|
||||||
if (nextJoinNode != NULL)
|
if (nextJoinNode != NULL)
|
||||||
|
|
|
@ -91,7 +91,8 @@ static void ParentSetNewChild(MultiNode *parentNode, MultiNode *oldChildNode,
|
||||||
|
|
||||||
/* Local functions forward declarations for aggregate expressions */
|
/* Local functions forward declarations for aggregate expressions */
|
||||||
static void ApplyExtendedOpNodes(MultiExtendedOp *originalNode,
|
static void ApplyExtendedOpNodes(MultiExtendedOp *originalNode,
|
||||||
MultiExtendedOp *masterNode, MultiExtendedOp *workerNode);
|
MultiExtendedOp *masterNode,
|
||||||
|
MultiExtendedOp *workerNode);
|
||||||
static void TransformSubqueryNode(MultiTable *subqueryNode);
|
static void TransformSubqueryNode(MultiTable *subqueryNode);
|
||||||
static MultiExtendedOp * MasterExtendedOpNode(MultiExtendedOp *originalOpNode);
|
static MultiExtendedOp * MasterExtendedOpNode(MultiExtendedOp *originalOpNode);
|
||||||
static Node * MasterAggregateMutator(Node *originalNode, AttrNumber *columnId);
|
static Node * MasterAggregateMutator(Node *originalNode, AttrNumber *columnId);
|
||||||
|
@ -117,7 +118,8 @@ static void ErrorIfUnsupportedArrayAggregate(Aggref *arrayAggregateExpression);
|
||||||
static void ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
|
static void ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
|
||||||
MultiNode *logicalPlanNode);
|
MultiNode *logicalPlanNode);
|
||||||
static Var * AggregateDistinctColumn(Aggref *aggregateExpression);
|
static Var * AggregateDistinctColumn(Aggref *aggregateExpression);
|
||||||
static bool TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode,
|
static bool TablePartitioningSupportsDistinct(List *tableNodeList,
|
||||||
|
MultiExtendedOp *opNode,
|
||||||
Var *distinctColumn);
|
Var *distinctColumn);
|
||||||
static bool GroupedByColumn(List *groupClauseList, List *targetList, Var *column);
|
static bool GroupedByColumn(List *groupClauseList, List *targetList, Var *column);
|
||||||
|
|
||||||
|
@ -637,7 +639,7 @@ Commutative(MultiUnaryNode *parentNode, MultiUnaryNode *childNode)
|
||||||
{
|
{
|
||||||
PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID;
|
PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID;
|
||||||
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
|
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
|
||||||
CitusNodeTag childNodeTag = CitusNodeTag(childNode);
|
CitusNodeTag childNodeTag = CitusNodeTag(childNode);
|
||||||
|
|
||||||
/* we cannot be commutative with non-query operators */
|
/* we cannot be commutative with non-query operators */
|
||||||
if (childNodeTag == T_MultiTreeRoot || childNodeTag == T_MultiTable)
|
if (childNodeTag == T_MultiTreeRoot || childNodeTag == T_MultiTable)
|
||||||
|
@ -692,7 +694,7 @@ Distributive(MultiUnaryNode *parentNode, MultiBinaryNode *childNode)
|
||||||
{
|
{
|
||||||
PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID;
|
PushDownStatus pushDownStatus = PUSH_DOWN_NOT_VALID;
|
||||||
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
|
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
|
||||||
CitusNodeTag childNodeTag = CitusNodeTag(childNode);
|
CitusNodeTag childNodeTag = CitusNodeTag(childNode);
|
||||||
|
|
||||||
/* special condition checks for partition operator are not implemented */
|
/* special condition checks for partition operator are not implemented */
|
||||||
Assert(parentNodeTag != T_MultiPartition);
|
Assert(parentNodeTag != T_MultiPartition);
|
||||||
|
@ -751,7 +753,7 @@ Factorizable(MultiBinaryNode *parentNode, MultiUnaryNode *childNode)
|
||||||
{
|
{
|
||||||
PullUpStatus pullUpStatus = PULL_UP_NOT_VALID;
|
PullUpStatus pullUpStatus = PULL_UP_NOT_VALID;
|
||||||
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
|
CitusNodeTag parentNodeTag = CitusNodeTag(parentNode);
|
||||||
CitusNodeTag childNodeTag = CitusNodeTag(childNode);
|
CitusNodeTag childNodeTag = CitusNodeTag(childNode);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following nodes are factorizable with their parents, but we don't
|
* The following nodes are factorizable with their parents, but we don't
|
||||||
|
@ -1220,7 +1222,7 @@ MasterExtendedOpNode(MultiExtendedOp *originalOpNode)
|
||||||
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
|
bool hasAggregates = contain_agg_clause((Node *) originalExpression);
|
||||||
if (hasAggregates)
|
if (hasAggregates)
|
||||||
{
|
{
|
||||||
Node *newNode = MasterAggregateMutator((Node*) originalExpression,
|
Node *newNode = MasterAggregateMutator((Node *) originalExpression,
|
||||||
&columnId);
|
&columnId);
|
||||||
newExpression = (Expr *) newNode;
|
newExpression = (Expr *) newNode;
|
||||||
}
|
}
|
||||||
|
@ -1826,7 +1828,7 @@ WorkerAggregateExpressionList(Aggref *originalAggregate)
|
||||||
static AggregateType
|
static AggregateType
|
||||||
GetAggregateType(Oid aggFunctionId)
|
GetAggregateType(Oid aggFunctionId)
|
||||||
{
|
{
|
||||||
char *aggregateProcName = NULL;
|
char *aggregateProcName = NULL;
|
||||||
uint32 aggregateCount = 0;
|
uint32 aggregateCount = 0;
|
||||||
uint32 aggregateIndex = 0;
|
uint32 aggregateIndex = 0;
|
||||||
bool found = false;
|
bool found = false;
|
||||||
|
@ -1980,22 +1982,30 @@ CountDistinctHashFunctionName(Oid argumentType)
|
||||||
switch (argumentType)
|
switch (argumentType)
|
||||||
{
|
{
|
||||||
case INT4OID:
|
case INT4OID:
|
||||||
|
{
|
||||||
hashFunctionName = pstrdup(HLL_HASH_INTEGER_FUNC_NAME);
|
hashFunctionName = pstrdup(HLL_HASH_INTEGER_FUNC_NAME);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case INT8OID:
|
case INT8OID:
|
||||||
|
{
|
||||||
hashFunctionName = pstrdup(HLL_HASH_BIGINT_FUNC_NAME);
|
hashFunctionName = pstrdup(HLL_HASH_BIGINT_FUNC_NAME);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case TEXTOID:
|
case TEXTOID:
|
||||||
case BPCHAROID:
|
case BPCHAROID:
|
||||||
case VARCHAROID:
|
case VARCHAROID:
|
||||||
|
{
|
||||||
hashFunctionName = pstrdup(HLL_HASH_TEXT_FUNC_NAME);
|
hashFunctionName = pstrdup(HLL_HASH_TEXT_FUNC_NAME);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
{
|
||||||
hashFunctionName = pstrdup(HLL_HASH_ANY_FUNC_NAME);
|
hashFunctionName = pstrdup(HLL_HASH_ANY_FUNC_NAME);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return hashFunctionName;
|
return hashFunctionName;
|
||||||
|
@ -2479,7 +2489,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
|
||||||
if (subqueryTree->setOperations)
|
if (subqueryTree->setOperations)
|
||||||
{
|
{
|
||||||
SetOperationStmt *setOperationStatement =
|
SetOperationStmt *setOperationStatement =
|
||||||
(SetOperationStmt *) subqueryTree->setOperations;
|
(SetOperationStmt *) subqueryTree->setOperations;
|
||||||
|
|
||||||
if (setOperationStatement->op == SETOP_UNION)
|
if (setOperationStatement->op == SETOP_UNION)
|
||||||
{
|
{
|
||||||
|
@ -2563,7 +2573,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
|
||||||
List *joinTreeTableIndexList = NIL;
|
List *joinTreeTableIndexList = NIL;
|
||||||
uint32 joiningTableCount = 0;
|
uint32 joiningTableCount = 0;
|
||||||
|
|
||||||
ExtractRangeTableIndexWalker((Node*) subqueryTree->jointree,
|
ExtractRangeTableIndexWalker((Node *) subqueryTree->jointree,
|
||||||
&joinTreeTableIndexList);
|
&joinTreeTableIndexList);
|
||||||
joiningTableCount = list_length(joinTreeTableIndexList);
|
joiningTableCount = list_length(joinTreeTableIndexList);
|
||||||
|
|
||||||
|
@ -2587,7 +2597,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
|
||||||
List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList,
|
List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList,
|
||||||
targetEntryList);
|
targetEntryList);
|
||||||
bool distinctOnPartitionColumn =
|
bool distinctOnPartitionColumn =
|
||||||
TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList);
|
TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList);
|
||||||
if (!distinctOnPartitionColumn)
|
if (!distinctOnPartitionColumn)
|
||||||
{
|
{
|
||||||
preconditionsSatisfied = false;
|
preconditionsSatisfied = false;
|
||||||
|
@ -2609,7 +2619,7 @@ ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHasLimit)
|
||||||
foreach(rangeTableEntryCell, subqueryEntryList)
|
foreach(rangeTableEntryCell, subqueryEntryList)
|
||||||
{
|
{
|
||||||
RangeTblEntry *rangeTableEntry =
|
RangeTblEntry *rangeTableEntry =
|
||||||
(RangeTblEntry *) lfirst(rangeTableEntryCell);
|
(RangeTblEntry *) lfirst(rangeTableEntryCell);
|
||||||
|
|
||||||
Query *innerSubquery = rangeTableEntry->subquery;
|
Query *innerSubquery = rangeTableEntry->subquery;
|
||||||
ErrorIfCannotPushdownSubquery(innerSubquery, outerQueryHasLimit);
|
ErrorIfCannotPushdownSubquery(innerSubquery, outerQueryHasLimit);
|
||||||
|
@ -2639,7 +2649,7 @@ ErrorIfUnsupportedTableCombination(Query *queryTree)
|
||||||
* Extract all range table indexes from the join tree. Note that sub-queries
|
* Extract all range table indexes from the join tree. Note that sub-queries
|
||||||
* that get pulled up by PostgreSQL don't appear in this join tree.
|
* that get pulled up by PostgreSQL don't appear in this join tree.
|
||||||
*/
|
*/
|
||||||
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList);
|
ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
|
||||||
foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
|
foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -2768,7 +2778,7 @@ ErrorIfUnsupportedUnionQuery(Query *unionQuery)
|
||||||
leftQueryOnPartitionColumn = TargetListOnPartitionColumn(leftQuery,
|
leftQueryOnPartitionColumn = TargetListOnPartitionColumn(leftQuery,
|
||||||
leftQuery->targetList);
|
leftQuery->targetList);
|
||||||
rightQueryOnPartitionColumn = TargetListOnPartitionColumn(rightQuery,
|
rightQueryOnPartitionColumn = TargetListOnPartitionColumn(rightQuery,
|
||||||
rightQuery->targetList);
|
rightQuery->targetList);
|
||||||
|
|
||||||
if (!(leftQueryOnPartitionColumn && rightQueryOnPartitionColumn))
|
if (!(leftQueryOnPartitionColumn && rightQueryOnPartitionColumn))
|
||||||
{
|
{
|
||||||
|
@ -2807,7 +2817,7 @@ GroupTargetEntryList(List *groupClauseList, List *targetEntryList)
|
||||||
{
|
{
|
||||||
SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupClauseCell);
|
SortGroupClause *groupClause = (SortGroupClause *) lfirst(groupClauseCell);
|
||||||
TargetEntry *groupTargetEntry =
|
TargetEntry *groupTargetEntry =
|
||||||
get_sortgroupclause_tle(groupClause, targetEntryList);
|
get_sortgroupclause_tle(groupClause, targetEntryList);
|
||||||
groupTargetEntryList = lappend(groupTargetEntryList, groupTargetEntry);
|
groupTargetEntryList = lappend(groupTargetEntryList, groupTargetEntry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2890,7 +2900,7 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query)
|
||||||
else if (IsA(columnExpression, FieldSelect))
|
else if (IsA(columnExpression, FieldSelect))
|
||||||
{
|
{
|
||||||
FieldSelect *compositeField = (FieldSelect *) columnExpression;
|
FieldSelect *compositeField = (FieldSelect *) columnExpression;
|
||||||
Expr *fieldExpression = compositeField->arg;
|
Expr *fieldExpression = compositeField->arg;
|
||||||
|
|
||||||
if (IsA(fieldExpression, Var))
|
if (IsA(fieldExpression, Var))
|
||||||
{
|
{
|
||||||
|
@ -2909,7 +2919,7 @@ IsPartitionColumnRecursive(Expr *columnExpression, Query *query)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
rangeTableEntryIndex = candidateColumn->varno - 1;
|
rangeTableEntryIndex = candidateColumn->varno - 1;
|
||||||
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
||||||
|
|
||||||
if (rangeTableEntry->rtekind == RTE_RELATION)
|
if (rangeTableEntry->rtekind == RTE_RELATION)
|
||||||
|
@ -2980,7 +2990,7 @@ CompositeFieldRecursive(Expr *expression, Query *query)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
rangeTableEntryIndex = candidateColumn->varno - 1;
|
rangeTableEntryIndex = candidateColumn->varno - 1;
|
||||||
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
rangeTableEntry = list_nth(rangetableList, rangeTableEntryIndex);
|
||||||
|
|
||||||
if (rangeTableEntry->rtekind == RTE_SUBQUERY)
|
if (rangeTableEntry->rtekind == RTE_SUBQUERY)
|
||||||
|
@ -3019,7 +3029,7 @@ FullCompositeFieldList(List *compositeFieldList)
|
||||||
uint32 fieldIndex = 0;
|
uint32 fieldIndex = 0;
|
||||||
|
|
||||||
ListCell *fieldSelectCell = NULL;
|
ListCell *fieldSelectCell = NULL;
|
||||||
foreach (fieldSelectCell, compositeFieldList)
|
foreach(fieldSelectCell, compositeFieldList)
|
||||||
{
|
{
|
||||||
FieldSelect *fieldSelect = (FieldSelect *) lfirst(fieldSelectCell);
|
FieldSelect *fieldSelect = (FieldSelect *) lfirst(fieldSelectCell);
|
||||||
uint32 compositeFieldIndex = 0;
|
uint32 compositeFieldIndex = 0;
|
||||||
|
@ -3226,9 +3236,10 @@ SupportedLateralQuery(Query *parentQuery, Query *lateralQuery)
|
||||||
if (outerColumnIsPartitionColumn && localColumnIsPartitionColumn)
|
if (outerColumnIsPartitionColumn && localColumnIsPartitionColumn)
|
||||||
{
|
{
|
||||||
FieldSelect *outerCompositeField =
|
FieldSelect *outerCompositeField =
|
||||||
CompositeFieldRecursive(outerQueryExpression, parentQuery);
|
CompositeFieldRecursive(outerQueryExpression, parentQuery);
|
||||||
FieldSelect *localCompositeField =
|
FieldSelect *localCompositeField =
|
||||||
CompositeFieldRecursive(localQueryExpression, lateralQuery);
|
CompositeFieldRecursive(localQueryExpression, lateralQuery);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If partition colums are composite fields, add them to list to
|
* If partition colums are composite fields, add them to list to
|
||||||
* check later if all composite fields are used.
|
* check later if all composite fields are used.
|
||||||
|
@ -3251,12 +3262,12 @@ SupportedLateralQuery(Query *parentQuery, Query *lateralQuery)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check composite fields */
|
/* check composite fields */
|
||||||
if(!supportedLateralQuery)
|
if (!supportedLateralQuery)
|
||||||
{
|
{
|
||||||
bool outerFullCompositeFieldList =
|
bool outerFullCompositeFieldList =
|
||||||
FullCompositeFieldList(outerCompositeFieldList);
|
FullCompositeFieldList(outerCompositeFieldList);
|
||||||
bool localFullCompositeFieldList =
|
bool localFullCompositeFieldList =
|
||||||
FullCompositeFieldList(localCompositeFieldList);
|
FullCompositeFieldList(localCompositeFieldList);
|
||||||
|
|
||||||
if (outerFullCompositeFieldList && localFullCompositeFieldList)
|
if (outerFullCompositeFieldList && localFullCompositeFieldList)
|
||||||
{
|
{
|
||||||
|
@ -3301,15 +3312,15 @@ JoinOnPartitionColumn(Query *query)
|
||||||
if (isLeftColumnPartitionColumn && isRightColumnPartitionColumn)
|
if (isLeftColumnPartitionColumn && isRightColumnPartitionColumn)
|
||||||
{
|
{
|
||||||
FieldSelect *leftCompositeField =
|
FieldSelect *leftCompositeField =
|
||||||
CompositeFieldRecursive(leftArgument, query);
|
CompositeFieldRecursive(leftArgument, query);
|
||||||
FieldSelect *rightCompositeField =
|
FieldSelect *rightCompositeField =
|
||||||
CompositeFieldRecursive(rightArgument, query);
|
CompositeFieldRecursive(rightArgument, query);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If partition colums are composite fields, add them to list to
|
* If partition colums are composite fields, add them to list to
|
||||||
* check later if all composite fields are used.
|
* check later if all composite fields are used.
|
||||||
*/
|
*/
|
||||||
if(leftCompositeField && rightCompositeField)
|
if (leftCompositeField && rightCompositeField)
|
||||||
{
|
{
|
||||||
leftCompositeFieldList = lappend(leftCompositeFieldList,
|
leftCompositeFieldList = lappend(leftCompositeFieldList,
|
||||||
leftCompositeField);
|
leftCompositeField);
|
||||||
|
@ -3318,7 +3329,7 @@ JoinOnPartitionColumn(Query *query)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if both sides are not composite fields, they are normal columns */
|
/* if both sides are not composite fields, they are normal columns */
|
||||||
if(!(leftCompositeField && rightCompositeField))
|
if (!(leftCompositeField && rightCompositeField))
|
||||||
{
|
{
|
||||||
joinOnPartitionColumn = true;
|
joinOnPartitionColumn = true;
|
||||||
break;
|
break;
|
||||||
|
@ -3327,12 +3338,12 @@ JoinOnPartitionColumn(Query *query)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check composite fields */
|
/* check composite fields */
|
||||||
if(!joinOnPartitionColumn)
|
if (!joinOnPartitionColumn)
|
||||||
{
|
{
|
||||||
bool leftFullCompositeFieldList =
|
bool leftFullCompositeFieldList =
|
||||||
FullCompositeFieldList(leftCompositeFieldList);
|
FullCompositeFieldList(leftCompositeFieldList);
|
||||||
bool rightFullCompositeFieldList =
|
bool rightFullCompositeFieldList =
|
||||||
FullCompositeFieldList(rightCompositeFieldList);
|
FullCompositeFieldList(rightCompositeFieldList);
|
||||||
|
|
||||||
if (leftFullCompositeFieldList && rightFullCompositeFieldList)
|
if (leftFullCompositeFieldList && rightFullCompositeFieldList)
|
||||||
{
|
{
|
||||||
|
@ -3409,7 +3420,7 @@ ErrorIfUnsupportedShardDistribution(Query *query)
|
||||||
|
|
||||||
/* check if this table has 1-1 shard partitioning with first table */
|
/* check if this table has 1-1 shard partitioning with first table */
|
||||||
coPartitionedTables = CoPartitionedTables(firstShardIntervalList,
|
coPartitionedTables = CoPartitionedTables(firstShardIntervalList,
|
||||||
currentShardIntervalList);
|
currentShardIntervalList);
|
||||||
if (!coPartitionedTables)
|
if (!coPartitionedTables)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
@ -3437,7 +3448,7 @@ RelationIdList(Query *query)
|
||||||
|
|
||||||
foreach(tableEntryCell, tableEntryList)
|
foreach(tableEntryCell, tableEntryList)
|
||||||
{
|
{
|
||||||
TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell);
|
TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell);
|
||||||
Oid relationId = tableEntry->relationId;
|
Oid relationId = tableEntry->relationId;
|
||||||
|
|
||||||
relationIdList = list_append_unique_oid(relationIdList, relationId);
|
relationIdList = list_append_unique_oid(relationIdList, relationId);
|
||||||
|
@ -3617,7 +3628,7 @@ ExtractQueryWalker(Node *node, List **queryList)
|
||||||
Query *query = (Query *) node;
|
Query *query = (Query *) node;
|
||||||
|
|
||||||
(*queryList) = lappend(*queryList, query);
|
(*queryList) = lappend(*queryList, query);
|
||||||
walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList,
|
walkerResult = query_tree_walker(query, ExtractQueryWalker, queryList,
|
||||||
QTW_EXAMINE_RTES);
|
QTW_EXAMINE_RTES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3641,7 +3652,7 @@ LeafQuery(Query *queryTree)
|
||||||
* Extract all range table indexes from the join tree. Note that sub-queries
|
* Extract all range table indexes from the join tree. Note that sub-queries
|
||||||
* that get pulled up by PostgreSQL don't appear in this join tree.
|
* that get pulled up by PostgreSQL don't appear in this join tree.
|
||||||
*/
|
*/
|
||||||
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList);
|
ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
|
||||||
foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
|
foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -3725,7 +3736,7 @@ PartitionColumnOpExpressionList(Query *query)
|
||||||
}
|
}
|
||||||
else if (IsA(leftArgument, Const) && IsA(leftArgument, Var))
|
else if (IsA(leftArgument, Const) && IsA(leftArgument, Var))
|
||||||
{
|
{
|
||||||
candidatePartitionColumn = (Var *) rightArgument;
|
candidatePartitionColumn = (Var *) rightArgument;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -39,11 +39,11 @@ bool SubqueryPushdown = false; /* is subquery pushdown enabled */
|
||||||
|
|
||||||
|
|
||||||
/* Function pointer type definition for apply join rule functions */
|
/* Function pointer type definition for apply join rule functions */
|
||||||
typedef MultiNode * (*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode,
|
typedef MultiNode *(*RuleApplyFunction) (MultiNode *leftNode, MultiNode *rightNode,
|
||||||
Var *partitionColumn, JoinType joinType,
|
Var *partitionColumn, JoinType joinType,
|
||||||
List *joinClauses);
|
List *joinClauses);
|
||||||
|
|
||||||
static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = {0}; /* join rules */
|
static RuleApplyFunction RuleApplyFunctionArray[JOIN_RULE_LAST] = { 0 }; /* join rules */
|
||||||
|
|
||||||
/* Local functions forward declarations */
|
/* Local functions forward declarations */
|
||||||
static MultiNode * MultiPlanTree(Query *queryTree);
|
static MultiNode * MultiPlanTree(Query *queryTree);
|
||||||
|
@ -157,7 +157,7 @@ SubqueryEntryList(Query *queryTree)
|
||||||
* only walk over range table entries at this level and do not recurse into
|
* only walk over range table entries at this level and do not recurse into
|
||||||
* subqueries.
|
* subqueries.
|
||||||
*/
|
*/
|
||||||
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList);
|
ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
|
||||||
foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
|
foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -285,6 +285,7 @@ MultiPlanTree(Query *queryTree)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bool hasOuterJoin = false;
|
bool hasOuterJoin = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We calculate the join order using the list of tables in the query and
|
* We calculate the join order using the list of tables in the query and
|
||||||
* the join clauses between them. Note that this function owns the table
|
* the join clauses between them. Note that this function owns the table
|
||||||
|
@ -465,6 +466,7 @@ ErrorIfQueryNotSupported(Query *queryTree)
|
||||||
|
|
||||||
|
|
||||||
#if (PG_VERSION_NUM >= 90500)
|
#if (PG_VERSION_NUM >= 90500)
|
||||||
|
|
||||||
/* HasTablesample returns tree if the query contains tablesample */
|
/* HasTablesample returns tree if the query contains tablesample */
|
||||||
static bool
|
static bool
|
||||||
HasTablesample(Query *queryTree)
|
HasTablesample(Query *queryTree)
|
||||||
|
@ -485,6 +487,8 @@ HasTablesample(Query *queryTree)
|
||||||
|
|
||||||
return hasTablesample;
|
return hasTablesample;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -529,7 +533,8 @@ HasUnsupportedJoinWalker(Node *node, void *context)
|
||||||
* ErrorIfSubqueryNotSupported checks that we can perform distributed planning for
|
* ErrorIfSubqueryNotSupported checks that we can perform distributed planning for
|
||||||
* the given subquery.
|
* the given subquery.
|
||||||
*/
|
*/
|
||||||
static void ErrorIfSubqueryNotSupported(Query *subqueryTree)
|
static void
|
||||||
|
ErrorIfSubqueryNotSupported(Query *subqueryTree)
|
||||||
{
|
{
|
||||||
char *errorDetail = NULL;
|
char *errorDetail = NULL;
|
||||||
bool preconditionsSatisfied = true;
|
bool preconditionsSatisfied = true;
|
||||||
|
@ -587,7 +592,6 @@ HasOuterJoin(Query *queryTree)
|
||||||
static bool
|
static bool
|
||||||
HasOuterJoinWalker(Node *node, void *context)
|
HasOuterJoinWalker(Node *node, void *context)
|
||||||
{
|
{
|
||||||
|
|
||||||
bool hasOuterJoin = false;
|
bool hasOuterJoin = false;
|
||||||
if (node == NULL)
|
if (node == NULL)
|
||||||
{
|
{
|
||||||
|
@ -657,7 +661,7 @@ HasComplexRangeTableType(Query *queryTree)
|
||||||
* Extract all range table indexes from the join tree. Note that sub-queries
|
* Extract all range table indexes from the join tree. Note that sub-queries
|
||||||
* that get pulled up by PostgreSQL don't appear in this join tree.
|
* that get pulled up by PostgreSQL don't appear in this join tree.
|
||||||
*/
|
*/
|
||||||
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList);
|
ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
|
||||||
foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
|
foreach(joinTreeTableIndexCell, joinTreeTableIndexList)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -675,7 +679,7 @@ HasComplexRangeTableType(Query *queryTree)
|
||||||
* subquery.
|
* subquery.
|
||||||
*/
|
*/
|
||||||
if (rangeTableEntry->rtekind != RTE_RELATION &&
|
if (rangeTableEntry->rtekind != RTE_RELATION &&
|
||||||
rangeTableEntry->rtekind != RTE_SUBQUERY)
|
rangeTableEntry->rtekind != RTE_SUBQUERY)
|
||||||
{
|
{
|
||||||
hasComplexRangeTableType = true;
|
hasComplexRangeTableType = true;
|
||||||
}
|
}
|
||||||
|
@ -966,7 +970,7 @@ TableEntryList(List *rangeTableList)
|
||||||
|
|
||||||
foreach(rangeTableCell, rangeTableList)
|
foreach(rangeTableCell, rangeTableList)
|
||||||
{
|
{
|
||||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
|
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
|
||||||
|
|
||||||
if (rangeTableEntry->rtekind == RTE_RELATION)
|
if (rangeTableEntry->rtekind == RTE_RELATION)
|
||||||
{
|
{
|
||||||
|
@ -1178,8 +1182,8 @@ IsSelectClause(Node *clause)
|
||||||
|
|
||||||
/* we currently consider the following nodes as select clauses */
|
/* we currently consider the following nodes as select clauses */
|
||||||
NodeTag nodeTag = nodeTag(clause);
|
NodeTag nodeTag = nodeTag(clause);
|
||||||
if ( !(nodeTag == T_OpExpr || nodeTag == T_ScalarArrayOpExpr ||
|
if (!(nodeTag == T_OpExpr || nodeTag == T_ScalarArrayOpExpr ||
|
||||||
nodeTag == T_NullTest || nodeTag == T_BooleanTest) )
|
nodeTag == T_NullTest || nodeTag == T_BooleanTest))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1317,9 +1321,9 @@ UnaryOperator(MultiNode *node)
|
||||||
{
|
{
|
||||||
bool unaryOperator = false;
|
bool unaryOperator = false;
|
||||||
|
|
||||||
if (CitusIsA(node, MultiTreeRoot) || CitusIsA(node, MultiTable) ||
|
if (CitusIsA(node, MultiTreeRoot) || CitusIsA(node, MultiTable) ||
|
||||||
CitusIsA(node, MultiCollect) || CitusIsA(node, MultiSelect) ||
|
CitusIsA(node, MultiCollect) || CitusIsA(node, MultiSelect) ||
|
||||||
CitusIsA(node, MultiProject) || CitusIsA(node, MultiPartition) ||
|
CitusIsA(node, MultiProject) || CitusIsA(node, MultiPartition) ||
|
||||||
CitusIsA(node, MultiExtendedOp))
|
CitusIsA(node, MultiExtendedOp))
|
||||||
{
|
{
|
||||||
unaryOperator = true;
|
unaryOperator = true;
|
||||||
|
@ -1403,7 +1407,7 @@ FindNodesOfType(MultiNode *node, int type)
|
||||||
}
|
}
|
||||||
else if (BinaryOperator(node))
|
else if (BinaryOperator(node))
|
||||||
{
|
{
|
||||||
MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode;
|
MultiNode *leftChildNode = ((MultiBinaryNode *) node)->leftChildNode;
|
||||||
MultiNode *rightChildNode = ((MultiBinaryNode *) node)->rightChildNode;
|
MultiNode *rightChildNode = ((MultiBinaryNode *) node)->rightChildNode;
|
||||||
|
|
||||||
List *leftChildNodeList = FindNodesOfType(leftChildNode, type);
|
List *leftChildNodeList = FindNodesOfType(leftChildNode, type);
|
||||||
|
@ -1533,9 +1537,9 @@ ExtractRangeTableEntryWalker(Node *node, List **rangeTableList)
|
||||||
List *
|
List *
|
||||||
pull_var_clause_default(Node *node)
|
pull_var_clause_default(Node *node)
|
||||||
{
|
{
|
||||||
List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES,
|
List *columnList = pull_var_clause(node, PVC_RECURSE_AGGREGATES,
|
||||||
PVC_REJECT_PLACEHOLDERS);
|
PVC_REJECT_PLACEHOLDERS);
|
||||||
return columnList;
|
return columnList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1552,7 +1556,7 @@ ApplyJoinRule(MultiNode *leftNode, MultiNode *rightNode, JoinRuleType ruleType,
|
||||||
MultiNode *multiNode = NULL;
|
MultiNode *multiNode = NULL;
|
||||||
|
|
||||||
List *applicableJoinClauses = NIL;
|
List *applicableJoinClauses = NIL;
|
||||||
List *leftTableIdList = OutputTableIdList(leftNode);
|
List *leftTableIdList = OutputTableIdList(leftNode);
|
||||||
List *rightTableIdList = OutputTableIdList(rightNode);
|
List *rightTableIdList = OutputTableIdList(rightNode);
|
||||||
int rightTableIdCount = 0;
|
int rightTableIdCount = 0;
|
||||||
uint32 rightTableId = 0;
|
uint32 rightTableId = 0;
|
||||||
|
@ -1567,8 +1571,8 @@ ApplyJoinRule(MultiNode *leftNode, MultiNode *rightNode, JoinRuleType ruleType,
|
||||||
|
|
||||||
/* call the join rule application function to create the new join node */
|
/* call the join rule application function to create the new join node */
|
||||||
ruleApplyFunction = JoinRuleApplyFunction(ruleType);
|
ruleApplyFunction = JoinRuleApplyFunction(ruleType);
|
||||||
multiNode = (*ruleApplyFunction) (leftNode, rightNode, partitionColumn,
|
multiNode = (*ruleApplyFunction)(leftNode, rightNode, partitionColumn,
|
||||||
joinType, applicableJoinClauses);
|
joinType, applicableJoinClauses);
|
||||||
|
|
||||||
if (joinType != JOIN_INNER && CitusIsA(multiNode, MultiJoin))
|
if (joinType != JOIN_INNER && CitusIsA(multiNode, MultiJoin))
|
||||||
{
|
{
|
||||||
|
@ -1918,7 +1922,7 @@ ErrorIfSubqueryJoin(Query *queryTree)
|
||||||
* Extract all range table indexes from the join tree. Note that sub-queries
|
* Extract all range table indexes from the join tree. Note that sub-queries
|
||||||
* that get pulled up by PostgreSQL don't appear in this join tree.
|
* that get pulled up by PostgreSQL don't appear in this join tree.
|
||||||
*/
|
*/
|
||||||
ExtractRangeTableIndexWalker((Node*) queryTree->jointree, &joinTreeTableIndexList);
|
ExtractRangeTableIndexWalker((Node *) queryTree->jointree, &joinTreeTableIndexList);
|
||||||
joiningRangeTableCount = list_length(joinTreeTableIndexList);
|
joiningRangeTableCount = list_length(joinTreeTableIndexList);
|
||||||
|
|
||||||
if (joiningRangeTableCount > 1)
|
if (joiningRangeTableCount > 1)
|
||||||
|
|
|
@ -168,13 +168,13 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan)
|
||||||
|
|
||||||
/* finally create the plan */
|
/* finally create the plan */
|
||||||
#if (PG_VERSION_NUM >= 90500)
|
#if (PG_VERSION_NUM >= 90500)
|
||||||
aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy,
|
aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy,
|
||||||
&aggregateCosts, groupColumnCount, groupColumnIdArray,
|
&aggregateCosts, groupColumnCount, groupColumnIdArray,
|
||||||
groupColumnOpArray, NIL, rowEstimate, subPlan);
|
groupColumnOpArray, NIL, rowEstimate, subPlan);
|
||||||
#else
|
#else
|
||||||
aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy,
|
aggregatePlan = make_agg(NULL, aggregateTargetList, NIL, aggregateStrategy,
|
||||||
&aggregateCosts, groupColumnCount, groupColumnIdArray,
|
&aggregateCosts, groupColumnCount, groupColumnIdArray,
|
||||||
groupColumnOpArray, rowEstimate, subPlan);
|
groupColumnOpArray, rowEstimate, subPlan);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return aggregatePlan;
|
return aggregatePlan;
|
||||||
|
@ -211,7 +211,7 @@ BuildSelectStatement(Query *masterQuery, char *masterTableName,
|
||||||
rangeTableEntry = copyObject(queryRangeTableEntry);
|
rangeTableEntry = copyObject(queryRangeTableEntry);
|
||||||
rangeTableEntry->rtekind = RTE_RELATION;
|
rangeTableEntry->rtekind = RTE_RELATION;
|
||||||
rangeTableEntry->eref = makeAlias(masterTableName, NIL);
|
rangeTableEntry->eref = makeAlias(masterTableName, NIL);
|
||||||
rangeTableEntry->relid = 0; /* to be filled in exec_Start */
|
rangeTableEntry->relid = 0; /* to be filled in exec_Start */
|
||||||
rangeTableEntry->inh = false;
|
rangeTableEntry->inh = false;
|
||||||
rangeTableEntry->inFromCl = true;
|
rangeTableEntry->inFromCl = true;
|
||||||
|
|
||||||
|
@ -220,7 +220,7 @@ BuildSelectStatement(Query *masterQuery, char *masterTableName,
|
||||||
|
|
||||||
/* (2) build and initialize sequential scan node */
|
/* (2) build and initialize sequential scan node */
|
||||||
sequentialScan = makeNode(SeqScan);
|
sequentialScan = makeNode(SeqScan);
|
||||||
sequentialScan->scanrelid = 1; /* always one */
|
sequentialScan->scanrelid = 1; /* always one */
|
||||||
|
|
||||||
/* (3) add an aggregation plan if needed */
|
/* (3) add an aggregation plan if needed */
|
||||||
if (masterQuery->hasAggs || masterQuery->groupClause)
|
if (masterQuery->hasAggs || masterQuery->groupClause)
|
||||||
|
|
|
@ -138,7 +138,7 @@ static OpExpr * MakeOpExpressionWithZeroConst(void);
|
||||||
static List * BuildRestrictInfoList(List *qualList);
|
static List * BuildRestrictInfoList(List *qualList);
|
||||||
static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery,
|
static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery,
|
||||||
List *dependedJobList);
|
List *dependedJobList);
|
||||||
static JoinSequenceNode * JoinSequenceArray(List * rangeTableFragmentsList,
|
static JoinSequenceNode * JoinSequenceArray(List *rangeTableFragmentsList,
|
||||||
Query *jobQuery, List *dependedJobList);
|
Query *jobQuery, List *dependedJobList);
|
||||||
static bool PartitionedOnColumn(Var *column, List *rangeTableList, List *dependedJobList);
|
static bool PartitionedOnColumn(Var *column, List *rangeTableList, List *dependedJobList);
|
||||||
static void CheckJoinBetweenColumns(OpExpr *joinClause);
|
static void CheckJoinBetweenColumns(OpExpr *joinClause);
|
||||||
|
@ -155,7 +155,8 @@ static StringInfo DatumArrayString(Datum *datumArray, uint32 datumCount, Oid dat
|
||||||
static Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType,
|
static Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType,
|
||||||
char *queryString);
|
char *queryString);
|
||||||
static void UpdateRangeTableAlias(List *rangeTableList, List *fragmentList);
|
static void UpdateRangeTableAlias(List *rangeTableList, List *fragmentList);
|
||||||
static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment);
|
static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry,
|
||||||
|
RangeTableFragment *fragment);
|
||||||
static uint64 AnchorShardId(List *fragmentList, uint32 anchorRangeTableId);
|
static uint64 AnchorShardId(List *fragmentList, uint32 anchorRangeTableId);
|
||||||
static List * PruneSqlTaskDependencies(List *sqlTaskList);
|
static List * PruneSqlTaskDependencies(List *sqlTaskList);
|
||||||
static List * AssignTaskList(List *sqlTaskList);
|
static List * AssignTaskList(List *sqlTaskList);
|
||||||
|
@ -167,7 +168,7 @@ static Task * GreedyAssignTask(WorkerNode *workerNode, List *taskList,
|
||||||
static List * RoundRobinAssignTaskList(List *taskList);
|
static List * RoundRobinAssignTaskList(List *taskList);
|
||||||
static List * RoundRobinReorder(Task *task, List *placementList);
|
static List * RoundRobinReorder(Task *task, List *placementList);
|
||||||
static List * ReorderAndAssignTaskList(List *taskList,
|
static List * ReorderAndAssignTaskList(List *taskList,
|
||||||
List * (*reorderFunction) (Task *, List *));
|
List * (*reorderFunction)(Task *, List *));
|
||||||
static int CompareTasksByShardId(const void *leftElement, const void *rightElement);
|
static int CompareTasksByShardId(const void *leftElement, const void *rightElement);
|
||||||
static List * ActiveShardPlacementLists(List *taskList);
|
static List * ActiveShardPlacementLists(List *taskList);
|
||||||
static List * ActivePlacementList(List *placementList);
|
static List * ActivePlacementList(List *placementList);
|
||||||
|
@ -309,6 +310,7 @@ BuildJobTree(MultiTreeRoot *multiTree)
|
||||||
partitionKey, partitionType,
|
partitionKey, partitionType,
|
||||||
baseRelationId,
|
baseRelationId,
|
||||||
JOIN_MAP_MERGE_JOB);
|
JOIN_MAP_MERGE_JOB);
|
||||||
|
|
||||||
/* reset depended job list */
|
/* reset depended job list */
|
||||||
loopDependedJobList = NIL;
|
loopDependedJobList = NIL;
|
||||||
loopDependedJobList = list_make1(mapMergeJob);
|
loopDependedJobList = list_make1(mapMergeJob);
|
||||||
|
@ -538,7 +540,7 @@ BuildJobQuery(MultiNode *multiNode, List *dependedJobList)
|
||||||
* If we are building this query on a repartitioned subquery job then we
|
* If we are building this query on a repartitioned subquery job then we
|
||||||
* don't need to update column attributes.
|
* don't need to update column attributes.
|
||||||
*/
|
*/
|
||||||
if(dependedJobList != NIL)
|
if (dependedJobList != NIL)
|
||||||
{
|
{
|
||||||
Job *job = (Job *) linitial(dependedJobList);
|
Job *job = (Job *) linitial(dependedJobList);
|
||||||
if (CitusIsA(job, MapMergeJob))
|
if (CitusIsA(job, MapMergeJob))
|
||||||
|
@ -628,10 +630,10 @@ BuildJobQuery(MultiNode *multiNode, List *dependedJobList)
|
||||||
jobQuery->rtable = rangeTableList;
|
jobQuery->rtable = rangeTableList;
|
||||||
jobQuery->targetList = targetList;
|
jobQuery->targetList = targetList;
|
||||||
jobQuery->jointree = joinTree;
|
jobQuery->jointree = joinTree;
|
||||||
jobQuery->sortClause = sortClauseList;
|
jobQuery->sortClause = sortClauseList;
|
||||||
jobQuery->groupClause = groupClauseList;
|
jobQuery->groupClause = groupClauseList;
|
||||||
jobQuery->limitOffset = limitOffset;
|
jobQuery->limitOffset = limitOffset;
|
||||||
jobQuery->limitCount = limitCount;
|
jobQuery->limitCount = limitCount;
|
||||||
jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
|
jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
|
||||||
|
|
||||||
return jobQuery;
|
return jobQuery;
|
||||||
|
@ -718,10 +720,10 @@ BuildReduceQuery(MultiExtendedOp *extendedOpNode, List *dependedJobList)
|
||||||
reduceQuery->rtable = derivedRangeTableList;
|
reduceQuery->rtable = derivedRangeTableList;
|
||||||
reduceQuery->targetList = targetList;
|
reduceQuery->targetList = targetList;
|
||||||
reduceQuery->jointree = joinTree;
|
reduceQuery->jointree = joinTree;
|
||||||
reduceQuery->sortClause = extendedOpNode->sortClauseList;
|
reduceQuery->sortClause = extendedOpNode->sortClauseList;
|
||||||
reduceQuery->groupClause = extendedOpNode->groupClauseList;
|
reduceQuery->groupClause = extendedOpNode->groupClauseList;
|
||||||
reduceQuery->limitOffset = extendedOpNode->limitOffset;
|
reduceQuery->limitOffset = extendedOpNode->limitOffset;
|
||||||
reduceQuery->limitCount = extendedOpNode->limitCount;
|
reduceQuery->limitCount = extendedOpNode->limitCount;
|
||||||
reduceQuery->hasAggs = contain_agg_clause((Node *) targetList);
|
reduceQuery->hasAggs = contain_agg_clause((Node *) targetList);
|
||||||
|
|
||||||
return reduceQuery;
|
return reduceQuery;
|
||||||
|
@ -754,7 +756,7 @@ BaseRangeTableList(MultiNode *multiNode)
|
||||||
*/
|
*/
|
||||||
MultiTable *multiTable = (MultiTable *) multiNode;
|
MultiTable *multiTable = (MultiTable *) multiNode;
|
||||||
if (multiTable->relationId != SUBQUERY_RELATION_ID &&
|
if (multiTable->relationId != SUBQUERY_RELATION_ID &&
|
||||||
multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID)
|
multiTable->relationId != HEAP_ANALYTICS_SUBQUERY_RELATION_ID)
|
||||||
{
|
{
|
||||||
RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry);
|
RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry);
|
||||||
rangeTableEntry->inFromCl = true;
|
rangeTableEntry->inFromCl = true;
|
||||||
|
@ -870,7 +872,7 @@ TargetEntryList(List *expressionList)
|
||||||
Expr *expression = (Expr *) lfirst(expressionCell);
|
Expr *expression = (Expr *) lfirst(expressionCell);
|
||||||
|
|
||||||
TargetEntry *targetEntry = makeTargetEntry(expression,
|
TargetEntry *targetEntry = makeTargetEntry(expression,
|
||||||
list_length(targetEntryList)+1,
|
list_length(targetEntryList) + 1,
|
||||||
NULL, false);
|
NULL, false);
|
||||||
targetEntryList = lappend(targetEntryList, targetEntry);
|
targetEntryList = lappend(targetEntryList, targetEntry);
|
||||||
}
|
}
|
||||||
|
@ -1044,7 +1046,7 @@ QueryJoinTree(MultiNode *multiNode, List *dependedJobList, List **rangeTableList
|
||||||
|
|
||||||
/* fix the column attributes in ON (...) clauses */
|
/* fix the column attributes in ON (...) clauses */
|
||||||
columnList = pull_var_clause_default((Node *) joinNode->joinClauseList);
|
columnList = pull_var_clause_default((Node *) joinNode->joinClauseList);
|
||||||
foreach (columnCell, columnList)
|
foreach(columnCell, columnList)
|
||||||
{
|
{
|
||||||
Var *column = (Var *) lfirst(columnCell);
|
Var *column = (Var *) lfirst(columnCell);
|
||||||
UpdateColumnAttributes(column, *rangeTableList, dependedJobList);
|
UpdateColumnAttributes(column, *rangeTableList, dependedJobList);
|
||||||
|
@ -1093,7 +1095,8 @@ QueryJoinTree(MultiNode *multiNode, List *dependedJobList, List **rangeTableList
|
||||||
uint32 columnCount = (uint32) list_length(dependedTargetList);
|
uint32 columnCount = (uint32) list_length(dependedTargetList);
|
||||||
List *columnNameList = DerivedColumnNameList(columnCount, dependedJob->jobId);
|
List *columnNameList = DerivedColumnNameList(columnCount, dependedJob->jobId);
|
||||||
|
|
||||||
RangeTblEntry *rangeTableEntry = DerivedRangeTableEntry(multiNode, columnNameList,
|
RangeTblEntry *rangeTableEntry = DerivedRangeTableEntry(multiNode,
|
||||||
|
columnNameList,
|
||||||
tableIdList);
|
tableIdList);
|
||||||
RangeTblRef *rangeTableRef = makeNode(RangeTblRef);
|
RangeTblRef *rangeTableRef = makeNode(RangeTblRef);
|
||||||
|
|
||||||
|
@ -1405,10 +1408,10 @@ BuildSubqueryJobQuery(MultiNode *multiNode)
|
||||||
jobQuery->rtable = rangeTableList;
|
jobQuery->rtable = rangeTableList;
|
||||||
jobQuery->targetList = targetList;
|
jobQuery->targetList = targetList;
|
||||||
jobQuery->jointree = joinTree;
|
jobQuery->jointree = joinTree;
|
||||||
jobQuery->sortClause = sortClauseList;
|
jobQuery->sortClause = sortClauseList;
|
||||||
jobQuery->groupClause = groupClauseList;
|
jobQuery->groupClause = groupClauseList;
|
||||||
jobQuery->limitOffset = limitOffset;
|
jobQuery->limitOffset = limitOffset;
|
||||||
jobQuery->limitCount = limitCount;
|
jobQuery->limitCount = limitCount;
|
||||||
jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
|
jobQuery->hasAggs = contain_agg_clause((Node *) targetList);
|
||||||
|
|
||||||
return jobQuery;
|
return jobQuery;
|
||||||
|
@ -1646,7 +1649,7 @@ static uint64
|
||||||
UniqueJobId(void)
|
UniqueJobId(void)
|
||||||
{
|
{
|
||||||
text *sequenceName = cstring_to_text(JOBID_SEQUENCE_NAME);
|
text *sequenceName = cstring_to_text(JOBID_SEQUENCE_NAME);
|
||||||
Oid sequenceId = ResolveRelationId(sequenceName);
|
Oid sequenceId = ResolveRelationId(sequenceName);
|
||||||
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
|
Datum sequenceIdDatum = ObjectIdGetDatum(sequenceId);
|
||||||
|
|
||||||
/* generate new and unique jobId from sequence */
|
/* generate new and unique jobId from sequence */
|
||||||
|
@ -1864,6 +1867,7 @@ SplitPointObject(ShardInterval **shardIntervalArray, uint32 shardIntervalCount)
|
||||||
return splitPointObject;
|
return splitPointObject;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ------------------------------------------------------------
|
/* ------------------------------------------------------------
|
||||||
* Functions that relate to building and assigning tasks follow
|
* Functions that relate to building and assigning tasks follow
|
||||||
* ------------------------------------------------------------
|
* ------------------------------------------------------------
|
||||||
|
@ -1986,7 +1990,7 @@ SubquerySqlTaskList(Job *job)
|
||||||
ListCell *rangeTableCell = NULL;
|
ListCell *rangeTableCell = NULL;
|
||||||
ListCell *queryCell = NULL;
|
ListCell *queryCell = NULL;
|
||||||
Node *whereClauseTree = NULL;
|
Node *whereClauseTree = NULL;
|
||||||
uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */
|
uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */
|
||||||
uint32 anchorRangeTableId = 0;
|
uint32 anchorRangeTableId = 0;
|
||||||
uint32 rangeTableIndex = 0;
|
uint32 rangeTableIndex = 0;
|
||||||
const uint32 fragmentSize = sizeof(RangeTableFragment);
|
const uint32 fragmentSize = sizeof(RangeTableFragment);
|
||||||
|
@ -2036,10 +2040,10 @@ SubquerySqlTaskList(Job *job)
|
||||||
if (opExpressionList != NIL)
|
if (opExpressionList != NIL)
|
||||||
{
|
{
|
||||||
Var *partitionColumn = PartitionColumn(relationId, tableId);
|
Var *partitionColumn = PartitionColumn(relationId, tableId);
|
||||||
List *whereClauseList = ReplaceColumnsInOpExpressionList(opExpressionList,
|
List *whereClauseList = ReplaceColumnsInOpExpressionList(opExpressionList,
|
||||||
partitionColumn);
|
partitionColumn);
|
||||||
finalShardIntervalList = PruneShardList(relationId, tableId, whereClauseList,
|
finalShardIntervalList = PruneShardList(relationId, tableId, whereClauseList,
|
||||||
shardIntervalList);
|
shardIntervalList);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -2146,7 +2150,7 @@ static List *
|
||||||
SqlTaskList(Job *job)
|
SqlTaskList(Job *job)
|
||||||
{
|
{
|
||||||
List *sqlTaskList = NIL;
|
List *sqlTaskList = NIL;
|
||||||
uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */
|
uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */
|
||||||
uint64 jobId = job->jobId;
|
uint64 jobId = job->jobId;
|
||||||
bool anchorRangeTableBasedAssignment = false;
|
bool anchorRangeTableBasedAssignment = false;
|
||||||
uint32 anchorRangeTableId = 0;
|
uint32 anchorRangeTableId = 0;
|
||||||
|
@ -2472,8 +2476,8 @@ RangeTableFragmentsList(List *rangeTableList, List *whereClauseList,
|
||||||
|
|
||||||
List *shardIntervalList = LoadShardIntervalList(relationId);
|
List *shardIntervalList = LoadShardIntervalList(relationId);
|
||||||
List *prunedShardIntervalList = PruneShardList(relationId, tableId,
|
List *prunedShardIntervalList = PruneShardList(relationId, tableId,
|
||||||
whereClauseList,
|
whereClauseList,
|
||||||
shardIntervalList);
|
shardIntervalList);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we prune all shards for one table, query results will be empty.
|
* If we prune all shards for one table, query results will be empty.
|
||||||
|
@ -2548,7 +2552,7 @@ RangeTableFragmentsList(List *rangeTableList, List *whereClauseList,
|
||||||
*/
|
*/
|
||||||
List *
|
List *
|
||||||
PruneShardList(Oid relationId, Index tableId, List *whereClauseList,
|
PruneShardList(Oid relationId, Index tableId, List *whereClauseList,
|
||||||
List *shardIntervalList)
|
List *shardIntervalList)
|
||||||
{
|
{
|
||||||
List *remainingShardList = NIL;
|
List *remainingShardList = NIL;
|
||||||
ListCell *shardIntervalCell = NULL;
|
ListCell *shardIntervalCell = NULL;
|
||||||
|
@ -2653,7 +2657,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber)
|
||||||
Oid accessMethodId = BTREE_AM_OID;
|
Oid accessMethodId = BTREE_AM_OID;
|
||||||
Oid operatorId = InvalidOid;
|
Oid operatorId = InvalidOid;
|
||||||
Oid operatorClassInputType = InvalidOid;
|
Oid operatorClassInputType = InvalidOid;
|
||||||
Const *constantValue = NULL;
|
Const *constantValue = NULL;
|
||||||
OpExpr *expression = NULL;
|
OpExpr *expression = NULL;
|
||||||
char typeType = 0;
|
char typeType = 0;
|
||||||
|
|
||||||
|
@ -2679,7 +2683,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber)
|
||||||
/* Now make the expression with the given variable and a null constant */
|
/* Now make the expression with the given variable and a null constant */
|
||||||
expression = (OpExpr *) make_opclause(operatorId,
|
expression = (OpExpr *) make_opclause(operatorId,
|
||||||
InvalidOid, /* no result type yet */
|
InvalidOid, /* no result type yet */
|
||||||
false, /* no return set */
|
false, /* no return set */
|
||||||
(Expr *) variable,
|
(Expr *) variable,
|
||||||
(Expr *) constantValue,
|
(Expr *) constantValue,
|
||||||
InvalidOid, collationId);
|
InvalidOid, collationId);
|
||||||
|
@ -2900,7 +2904,7 @@ HashableClauseMutator(Node *originalNode, Var *partitionColumn)
|
||||||
* If this node is not hashable, continue walking down the expression tree
|
* If this node is not hashable, continue walking down the expression tree
|
||||||
* to find and hash clauses which are eligible.
|
* to find and hash clauses which are eligible.
|
||||||
*/
|
*/
|
||||||
if(newNode == NULL)
|
if (newNode == NULL)
|
||||||
{
|
{
|
||||||
newNode = expression_tree_mutator(originalNode, HashableClauseMutator,
|
newNode = expression_tree_mutator(originalNode, HashableClauseMutator,
|
||||||
(void *) partitionColumn);
|
(void *) partitionColumn);
|
||||||
|
@ -3045,7 +3049,7 @@ MakeInt4Constant(Datum constantValue)
|
||||||
bool constantIsNull = false;
|
bool constantIsNull = false;
|
||||||
bool constantByValue = true;
|
bool constantByValue = true;
|
||||||
|
|
||||||
Const *int4Constant = makeConst(constantType, constantTypeMode, constantCollationId,
|
Const *int4Constant = makeConst(constantType, constantTypeMode, constantCollationId,
|
||||||
constantLength, constantValue, constantIsNull,
|
constantLength, constantValue, constantIsNull,
|
||||||
constantByValue);
|
constantByValue);
|
||||||
return int4Constant;
|
return int4Constant;
|
||||||
|
@ -3102,7 +3106,7 @@ UpdateConstraint(Node *baseConstraint, ShardInterval *shardInterval)
|
||||||
Node *greaterThanExpr = (Node *) lsecond(andExpr->args);
|
Node *greaterThanExpr = (Node *) lsecond(andExpr->args);
|
||||||
|
|
||||||
Node *minNode = get_rightop((Expr *) greaterThanExpr); /* right op */
|
Node *minNode = get_rightop((Expr *) greaterThanExpr); /* right op */
|
||||||
Node *maxNode = get_rightop((Expr *) lessThanExpr); /* right op */
|
Node *maxNode = get_rightop((Expr *) lessThanExpr); /* right op */
|
||||||
Const *minConstant = NULL;
|
Const *minConstant = NULL;
|
||||||
Const *maxConstant = NULL;
|
Const *maxConstant = NULL;
|
||||||
|
|
||||||
|
@ -3273,7 +3277,7 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended
|
||||||
joinSequenceArray[joinedTableCount].joiningRangeTableId = NON_PRUNABLE_JOIN;
|
joinSequenceArray[joinedTableCount].joiningRangeTableId = NON_PRUNABLE_JOIN;
|
||||||
joinedTableCount++;
|
joinedTableCount++;
|
||||||
|
|
||||||
foreach (joinExprCell, joinExprList)
|
foreach(joinExprCell, joinExprList)
|
||||||
{
|
{
|
||||||
JoinExpr *joinExpr = (JoinExpr *) lfirst(joinExprCell);
|
JoinExpr *joinExpr = (JoinExpr *) lfirst(joinExprCell);
|
||||||
JoinType joinType = joinExpr->jointype;
|
JoinType joinType = joinExpr->jointype;
|
||||||
|
@ -3347,7 +3351,7 @@ JoinSequenceArray(List *rangeTableFragmentsList, Query *jobQuery, List *depended
|
||||||
if (IS_OUTER_JOIN(joinType))
|
if (IS_OUTER_JOIN(joinType))
|
||||||
{
|
{
|
||||||
int innerRangeTableId = 0;
|
int innerRangeTableId = 0;
|
||||||
List * tableFragments = NIL;
|
List *tableFragments = NIL;
|
||||||
int fragmentCount = 0;
|
int fragmentCount = 0;
|
||||||
|
|
||||||
if (joinType == JOIN_RIGHT)
|
if (joinType == JOIN_RIGHT)
|
||||||
|
@ -3500,7 +3504,7 @@ FindRangeTableFragmentsList(List *rangeTableFragmentsList, int tableId)
|
||||||
if (tableFragments != NIL)
|
if (tableFragments != NIL)
|
||||||
{
|
{
|
||||||
RangeTableFragment *tableFragment =
|
RangeTableFragment *tableFragment =
|
||||||
(RangeTableFragment*) linitial(tableFragments);
|
(RangeTableFragment *) linitial(tableFragments);
|
||||||
if (tableFragment->rangeTableId == tableId)
|
if (tableFragment->rangeTableId == tableId)
|
||||||
{
|
{
|
||||||
foundTableFragments = tableFragments;
|
foundTableFragments = tableFragments;
|
||||||
|
@ -3706,7 +3710,7 @@ UniqueFragmentList(List *fragmentList)
|
||||||
foreach(uniqueFragmentCell, uniqueFragmentList)
|
foreach(uniqueFragmentCell, uniqueFragmentList)
|
||||||
{
|
{
|
||||||
RangeTableFragment *uniqueFragment =
|
RangeTableFragment *uniqueFragment =
|
||||||
(RangeTableFragment *) lfirst(uniqueFragmentCell);
|
(RangeTableFragment *) lfirst(uniqueFragmentCell);
|
||||||
uint64 *uniqueShardId = uniqueFragment->fragmentReference;
|
uint64 *uniqueShardId = uniqueFragment->fragmentReference;
|
||||||
|
|
||||||
if (*shardId == *uniqueShardId)
|
if (*shardId == *uniqueShardId)
|
||||||
|
@ -4046,6 +4050,7 @@ FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment)
|
||||||
return alias;
|
return alias;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AnchorShardId walks over each fragment in the given fragment list, finds the
|
* AnchorShardId walks over each fragment in the given fragment list, finds the
|
||||||
* fragment that corresponds to the given anchor range tableId, and returns this
|
* fragment that corresponds to the given anchor range tableId, and returns this
|
||||||
|
@ -4360,7 +4365,7 @@ MergeTaskList(MapMergeJob *mapMergeJob, List *mapTaskList, uint32 taskIdIndex)
|
||||||
StringInfo intermediateTableQueryString =
|
StringInfo intermediateTableQueryString =
|
||||||
IntermediateTableQueryString(jobId, taskIdIndex, reduceQuery);
|
IntermediateTableQueryString(jobId, taskIdIndex, reduceQuery);
|
||||||
|
|
||||||
StringInfo mergeAndRunQueryString= makeStringInfo();
|
StringInfo mergeAndRunQueryString = makeStringInfo();
|
||||||
appendStringInfo(mergeAndRunQueryString, MERGE_FILES_AND_RUN_QUERY_COMMAND,
|
appendStringInfo(mergeAndRunQueryString, MERGE_FILES_AND_RUN_QUERY_COMMAND,
|
||||||
jobId, taskIdIndex, mergeTableQueryString->data,
|
jobId, taskIdIndex, mergeTableQueryString->data,
|
||||||
intermediateTableQueryString->data);
|
intermediateTableQueryString->data);
|
||||||
|
@ -4686,7 +4691,7 @@ TaskListAppendUnique(List *list, Task *task)
|
||||||
List *
|
List *
|
||||||
TaskListConcatUnique(List *list1, List *list2)
|
TaskListConcatUnique(List *list1, List *list2)
|
||||||
{
|
{
|
||||||
ListCell *taskCell = NULL;
|
ListCell *taskCell = NULL;
|
||||||
|
|
||||||
foreach(taskCell, list2)
|
foreach(taskCell, list2)
|
||||||
{
|
{
|
||||||
|
@ -4960,7 +4965,7 @@ List *
|
||||||
FirstReplicaAssignTaskList(List *taskList)
|
FirstReplicaAssignTaskList(List *taskList)
|
||||||
{
|
{
|
||||||
/* No additional reordering need take place for this algorithm */
|
/* No additional reordering need take place for this algorithm */
|
||||||
List * (*reorderFunction)(Task *, List *) = NULL;
|
List *(*reorderFunction)(Task *, List *) = NULL;
|
||||||
|
|
||||||
taskList = ReorderAndAssignTaskList(taskList, reorderFunction);
|
taskList = ReorderAndAssignTaskList(taskList, reorderFunction);
|
||||||
|
|
||||||
|
@ -4984,6 +4989,7 @@ RoundRobinAssignTaskList(List *taskList)
|
||||||
return taskList;
|
return taskList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RoundRobinReorder implements the core of the round-robin assignment policy.
|
* RoundRobinReorder implements the core of the round-robin assignment policy.
|
||||||
* It takes a task and placement list and rotates a copy of the placement list
|
* It takes a task and placement list and rotates a copy of the placement list
|
||||||
|
@ -5116,7 +5122,8 @@ ActiveShardPlacementLists(List *taskList)
|
||||||
List *activeShardPlacementList = ActivePlacementList(shardPlacementList);
|
List *activeShardPlacementList = ActivePlacementList(shardPlacementList);
|
||||||
|
|
||||||
/* sort shard placements by their insertion time */
|
/* sort shard placements by their insertion time */
|
||||||
activeShardPlacementList = SortList(activeShardPlacementList, CompareShardPlacements);
|
activeShardPlacementList = SortList(activeShardPlacementList,
|
||||||
|
CompareShardPlacements);
|
||||||
shardPlacementLists = lappend(shardPlacementLists, activeShardPlacementList);
|
shardPlacementLists = lappend(shardPlacementLists, activeShardPlacementList);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5257,7 +5264,8 @@ AssignDualHashTaskList(List *taskList)
|
||||||
uint32 replicaIndex = 0;
|
uint32 replicaIndex = 0;
|
||||||
for (replicaIndex = 0; replicaIndex < ShardReplicationFactor; replicaIndex++)
|
for (replicaIndex = 0; replicaIndex < ShardReplicationFactor; replicaIndex++)
|
||||||
{
|
{
|
||||||
uint32 assignmentOffset = beginningNodeIndex + assignedTaskIndex + replicaIndex;
|
uint32 assignmentOffset = beginningNodeIndex + assignedTaskIndex +
|
||||||
|
replicaIndex;
|
||||||
uint32 assignmentIndex = assignmentOffset % workerNodeCount;
|
uint32 assignmentIndex = assignmentOffset % workerNodeCount;
|
||||||
WorkerNode *workerNode = list_nth(workerNodeList, assignmentIndex);
|
WorkerNode *workerNode = list_nth(workerNodeList, assignmentIndex);
|
||||||
|
|
||||||
|
|
|
@ -79,7 +79,7 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
|
||||||
|
|
||||||
AlterTableStmt *alterTableStmt = (AlterTableStmt *) parseTree;
|
AlterTableStmt *alterTableStmt = (AlterTableStmt *) parseTree;
|
||||||
char **relationName = &(alterTableStmt->relation->relname);
|
char **relationName = &(alterTableStmt->relation->relname);
|
||||||
RangeVar *relation = alterTableStmt->relation; /* for constraints */
|
RangeVar *relation = alterTableStmt->relation; /* for constraints */
|
||||||
|
|
||||||
List *commandList = alterTableStmt->cmds;
|
List *commandList = alterTableStmt->cmds;
|
||||||
ListCell *commandCell = NULL;
|
ListCell *commandCell = NULL;
|
||||||
|
@ -179,10 +179,10 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
|
||||||
objectType == OBJECT_INDEX || objectType == OBJECT_FOREIGN_TABLE ||
|
objectType == OBJECT_INDEX || objectType == OBJECT_FOREIGN_TABLE ||
|
||||||
objectType == OBJECT_FOREIGN_SERVER)
|
objectType == OBJECT_FOREIGN_SERVER)
|
||||||
{
|
{
|
||||||
List *relationNameList = NULL;
|
List *relationNameList = NULL;
|
||||||
int relationNameListLength = 0;
|
int relationNameListLength = 0;
|
||||||
Value *relationNameValue = NULL;
|
Value *relationNameValue = NULL;
|
||||||
char **relationName = NULL;
|
char **relationName = NULL;
|
||||||
|
|
||||||
uint32 dropCount = list_length(dropStmt->objects);
|
uint32 dropCount = list_length(dropStmt->objects);
|
||||||
if (dropCount > 1)
|
if (dropCount > 1)
|
||||||
|
@ -205,19 +205,30 @@ RelayEventExtendNames(Node *parseTree, uint64 shardId)
|
||||||
switch (relationNameListLength)
|
switch (relationNameListLength)
|
||||||
{
|
{
|
||||||
case 1:
|
case 1:
|
||||||
|
{
|
||||||
relationNameValue = linitial(relationNameList);
|
relationNameValue = linitial(relationNameList);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
|
{
|
||||||
relationNameValue = lsecond(relationNameList);
|
relationNameValue = lsecond(relationNameList);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
|
{
|
||||||
relationNameValue = lthird(relationNameList);
|
relationNameValue = lthird(relationNameList);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
|
ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
|
||||||
errmsg("improper relation name: \"%s\"",
|
errmsg("improper relation name: \"%s\"",
|
||||||
NameListToString(relationNameList))));
|
NameListToString(relationNameList))));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
relationName = &(relationNameValue->val.str);
|
relationName = &(relationNameValue->val.str);
|
||||||
|
@ -389,13 +400,13 @@ TypeDropIndexConstraint(const AlterTableCmd *command,
|
||||||
{
|
{
|
||||||
Relation pgConstraint = NULL;
|
Relation pgConstraint = NULL;
|
||||||
SysScanDesc scanDescriptor = NULL;
|
SysScanDesc scanDescriptor = NULL;
|
||||||
ScanKeyData scanKey[1];
|
ScanKeyData scanKey[1];
|
||||||
int scanKeyCount = 1;
|
int scanKeyCount = 1;
|
||||||
HeapTuple heapTuple = NULL;
|
HeapTuple heapTuple = NULL;
|
||||||
|
|
||||||
char *searchedConstraintName = NULL;
|
char *searchedConstraintName = NULL;
|
||||||
bool indexConstraint = false;
|
bool indexConstraint = false;
|
||||||
Oid relationId = InvalidOid;
|
Oid relationId = InvalidOid;
|
||||||
bool failOK = true;
|
bool failOK = true;
|
||||||
|
|
||||||
if (command->subtype != AT_DropConstraint)
|
if (command->subtype != AT_DropConstraint)
|
||||||
|
@ -489,7 +500,7 @@ AppendShardIdToConstraintName(AlterTableCmd *command, uint64 shardId)
|
||||||
void
|
void
|
||||||
AppendShardIdToName(char **name, uint64 shardId)
|
AppendShardIdToName(char **name, uint64 shardId)
|
||||||
{
|
{
|
||||||
char extendedName[NAMEDATALEN];
|
char extendedName[NAMEDATALEN];
|
||||||
uint32 extendedNameLength = 0;
|
uint32 extendedNameLength = 0;
|
||||||
|
|
||||||
snprintf(extendedName, NAMEDATALEN, "%s%c" UINT64_FORMAT,
|
snprintf(extendedName, NAMEDATALEN, "%s%c" UINT64_FORMAT,
|
||||||
|
|
|
@ -48,23 +48,23 @@ static void NormalizeWorkerListPath(void);
|
||||||
|
|
||||||
/* GUC enum definitions */
|
/* GUC enum definitions */
|
||||||
static const struct config_enum_entry task_assignment_policy_options[] = {
|
static const struct config_enum_entry task_assignment_policy_options[] = {
|
||||||
{"greedy", TASK_ASSIGNMENT_GREEDY, false},
|
{ "greedy", TASK_ASSIGNMENT_GREEDY, false },
|
||||||
{"first-replica", TASK_ASSIGNMENT_FIRST_REPLICA, false},
|
{ "first-replica", TASK_ASSIGNMENT_FIRST_REPLICA, false },
|
||||||
{"round-robin", TASK_ASSIGNMENT_ROUND_ROBIN, false},
|
{ "round-robin", TASK_ASSIGNMENT_ROUND_ROBIN, false },
|
||||||
{NULL, 0, false}
|
{ NULL, 0, false }
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct config_enum_entry task_executor_type_options[] = {
|
static const struct config_enum_entry task_executor_type_options[] = {
|
||||||
{"real-time", MULTI_EXECUTOR_REAL_TIME, false},
|
{ "real-time", MULTI_EXECUTOR_REAL_TIME, false },
|
||||||
{"task-tracker", MULTI_EXECUTOR_TASK_TRACKER, false},
|
{ "task-tracker", MULTI_EXECUTOR_TASK_TRACKER, false },
|
||||||
{"router", MULTI_EXECUTOR_ROUTER, false},
|
{ "router", MULTI_EXECUTOR_ROUTER, false },
|
||||||
{NULL, 0, false}
|
{ NULL, 0, false }
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct config_enum_entry shard_placement_policy_options[] = {
|
static const struct config_enum_entry shard_placement_policy_options[] = {
|
||||||
{"local-node-first", SHARD_PLACEMENT_LOCAL_NODE_FIRST, false},
|
{ "local-node-first", SHARD_PLACEMENT_LOCAL_NODE_FIRST, false },
|
||||||
{"round-robin", SHARD_PLACEMENT_ROUND_ROBIN, false},
|
{ "round-robin", SHARD_PLACEMENT_ROUND_ROBIN, false },
|
||||||
{NULL, 0, false}
|
{ NULL, 0, false }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -206,9 +206,10 @@ RegisterCitusConfigVariables(void)
|
||||||
|
|
||||||
DefineCustomBoolVariable(
|
DefineCustomBoolVariable(
|
||||||
"citusdb.expire_cached_shards",
|
"citusdb.expire_cached_shards",
|
||||||
gettext_noop("Enables shard cache expiration if a shard's size on disk has changed. "),
|
gettext_noop("Enables shard cache expiration if a shard's size on disk has "
|
||||||
gettext_noop("When appending to an existing shard, old data may still be cached on "
|
"changed."),
|
||||||
"other workers. This configuration entry activates automatic "
|
gettext_noop("When appending to an existing shard, old data may still be cached "
|
||||||
|
"on other workers. This configuration entry activates automatic "
|
||||||
"expiration, but should not be used with manual updates to shards."),
|
"expiration, but should not be used with manual updates to shards."),
|
||||||
&ExpireCachedShards,
|
&ExpireCachedShards,
|
||||||
false,
|
false,
|
||||||
|
@ -440,11 +441,11 @@ RegisterCitusConfigVariables(void)
|
||||||
"citusdb.task_assignment_policy",
|
"citusdb.task_assignment_policy",
|
||||||
gettext_noop("Sets the policy to use when assigning tasks to worker nodes."),
|
gettext_noop("Sets the policy to use when assigning tasks to worker nodes."),
|
||||||
gettext_noop("The master node assigns tasks to worker nodes based on shard "
|
gettext_noop("The master node assigns tasks to worker nodes based on shard "
|
||||||
"locations. This configuration value specifies the policy to "
|
"locations. This configuration value specifies the policy to "
|
||||||
"use when making these assignments. The greedy policy aims to "
|
"use when making these assignments. The greedy policy aims to "
|
||||||
"evenly distribute tasks across worker nodes, first-replica just "
|
"evenly distribute tasks across worker nodes, first-replica just "
|
||||||
"assigns tasks in the order shard placements were created, "
|
"assigns tasks in the order shard placements were created, "
|
||||||
"and the round-robin policy assigns tasks to worker nodes in "
|
"and the round-robin policy assigns tasks to worker nodes in "
|
||||||
"a round-robin fashion."),
|
"a round-robin fashion."),
|
||||||
&TaskAssignmentPolicy,
|
&TaskAssignmentPolicy,
|
||||||
TASK_ASSIGNMENT_GREEDY,
|
TASK_ASSIGNMENT_GREEDY,
|
||||||
|
@ -488,6 +489,7 @@ RegisterCitusConfigVariables(void)
|
||||||
|
|
||||||
/* warn about config items in the citusdb namespace that are not registered above */
|
/* warn about config items in the citusdb namespace that are not registered above */
|
||||||
EmitWarningsOnPlaceholders("citusdb");
|
EmitWarningsOnPlaceholders("citusdb");
|
||||||
|
|
||||||
/* Also warn about citus namespace, as that's a very likely misspelling */
|
/* Also warn about citus namespace, as that's a very likely misspelling */
|
||||||
EmitWarningsOnPlaceholders("citus");
|
EmitWarningsOnPlaceholders("citus");
|
||||||
}
|
}
|
||||||
|
@ -515,8 +517,10 @@ NormalizeWorkerListPath(void)
|
||||||
{
|
{
|
||||||
absoluteFileName = malloc(strlen(DataDir) + strlen(WORKER_LIST_FILENAME) + 2);
|
absoluteFileName = malloc(strlen(DataDir) + strlen(WORKER_LIST_FILENAME) + 2);
|
||||||
if (absoluteFileName == NULL)
|
if (absoluteFileName == NULL)
|
||||||
|
{
|
||||||
ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY),
|
ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY),
|
||||||
errmsg("out of memory")));
|
errmsg("out of memory")));
|
||||||
|
}
|
||||||
|
|
||||||
sprintf(absoluteFileName, "%s/%s", DataDir, WORKER_LIST_FILENAME);
|
sprintf(absoluteFileName, "%s/%s", DataDir, WORKER_LIST_FILENAME);
|
||||||
}
|
}
|
||||||
|
@ -530,6 +534,7 @@ NormalizeWorkerListPath(void)
|
||||||
"environment variable.\n", progname, ConfigFileName)));
|
"environment variable.\n", progname, ConfigFileName)));
|
||||||
}
|
}
|
||||||
|
|
||||||
SetConfigOption("citusdb.worker_list_file", absoluteFileName, PGC_POSTMASTER, PGC_S_OVERRIDE);
|
SetConfigOption("citusdb.worker_list_file", absoluteFileName, PGC_POSTMASTER,
|
||||||
|
PGC_S_OVERRIDE);
|
||||||
free(absoluteFileName);
|
free(absoluteFileName);
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,9 +116,9 @@ FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid,
|
||||||
ForeignPath *best_path, List *tlist, List *scan_clauses)
|
ForeignPath *best_path, List *tlist, List *scan_clauses)
|
||||||
#else
|
#else
|
||||||
static ForeignScan *
|
static ForeignScan *
|
||||||
FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid,
|
FakeGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreigntableid,
|
||||||
ForeignPath *best_path, List *tlist, List *scan_clauses,
|
ForeignPath * best_path, List * tlist, List * scan_clauses,
|
||||||
Plan *outer_plan)
|
Plan * outer_plan)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
Index scan_relid = baserel->relid;
|
Index scan_relid = baserel->relid;
|
||||||
|
@ -129,7 +129,7 @@ FakeGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid,
|
||||||
return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL);
|
return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL);
|
||||||
#else
|
#else
|
||||||
return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL, NIL, NIL,
|
return make_foreignscan(tlist, scan_clauses, scan_relid, NIL, NIL, NIL, NIL,
|
||||||
outer_plan);
|
outer_plan);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -265,7 +265,7 @@ GetRangeTblKind(RangeTblEntry *rte)
|
||||||
{
|
{
|
||||||
CitusRTEKind rteKind = CITUS_RTE_RELATION /* invalid */;
|
CitusRTEKind rteKind = CITUS_RTE_RELATION /* invalid */;
|
||||||
|
|
||||||
switch(rte->rtekind)
|
switch (rte->rtekind)
|
||||||
{
|
{
|
||||||
/* directly rtekind if it's not possibly an extended RTE */
|
/* directly rtekind if it's not possibly an extended RTE */
|
||||||
case RTE_RELATION:
|
case RTE_RELATION:
|
||||||
|
@ -273,9 +273,13 @@ GetRangeTblKind(RangeTblEntry *rte)
|
||||||
case RTE_JOIN:
|
case RTE_JOIN:
|
||||||
case RTE_VALUES:
|
case RTE_VALUES:
|
||||||
case RTE_CTE:
|
case RTE_CTE:
|
||||||
|
{
|
||||||
rteKind = (CitusRTEKind) rte->rtekind;
|
rteKind = (CitusRTEKind) rte->rtekind;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case RTE_FUNCTION:
|
case RTE_FUNCTION:
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
* Extract extra data - correct even if a plain RTE_FUNCTION, not
|
* Extract extra data - correct even if a plain RTE_FUNCTION, not
|
||||||
* an extended one, ExtractRangeTblExtraData handles that case
|
* an extended one, ExtractRangeTblExtraData handles that case
|
||||||
|
@ -283,6 +287,7 @@ GetRangeTblKind(RangeTblEntry *rte)
|
||||||
*/
|
*/
|
||||||
ExtractRangeTblExtraData(rte, &rteKind, NULL, NULL, NULL);
|
ExtractRangeTblExtraData(rte, &rteKind, NULL, NULL, NULL);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return rteKind;
|
return rteKind;
|
||||||
|
|
|
@ -186,7 +186,7 @@ AppendOptionListToString(StringInfo stringBuffer, List *optionList)
|
||||||
|
|
||||||
foreach(optionCell, optionList)
|
foreach(optionCell, optionList)
|
||||||
{
|
{
|
||||||
DefElem *option = (DefElem*) lfirst(optionCell);
|
DefElem *option = (DefElem *) lfirst(optionCell);
|
||||||
char *optionName = option->defname;
|
char *optionName = option->defname;
|
||||||
char *optionValue = defGetString(option);
|
char *optionValue = defGetString(option);
|
||||||
|
|
||||||
|
@ -219,7 +219,7 @@ pg_get_tableschemadef_string(Oid tableRelationId)
|
||||||
char relationKind = 0;
|
char relationKind = 0;
|
||||||
TupleDesc tupleDescriptor = NULL;
|
TupleDesc tupleDescriptor = NULL;
|
||||||
TupleConstr *tupleConstraints = NULL;
|
TupleConstr *tupleConstraints = NULL;
|
||||||
int attributeIndex = 0;
|
int attributeIndex = 0;
|
||||||
bool firstAttributePrinted = false;
|
bool firstAttributePrinted = false;
|
||||||
AttrNumber defaultValueIndex = 0;
|
AttrNumber defaultValueIndex = 0;
|
||||||
AttrNumber constraintIndex = 0;
|
AttrNumber constraintIndex = 0;
|
||||||
|
@ -447,21 +447,35 @@ pg_get_tablecolumnoptionsdef_string(Oid tableRelationId)
|
||||||
switch (attributeForm->attstorage)
|
switch (attributeForm->attstorage)
|
||||||
{
|
{
|
||||||
case 'p':
|
case 'p':
|
||||||
|
{
|
||||||
storageName = "PLAIN";
|
storageName = "PLAIN";
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 'e':
|
case 'e':
|
||||||
|
{
|
||||||
storageName = "EXTERNAL";
|
storageName = "EXTERNAL";
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 'm':
|
case 'm':
|
||||||
|
{
|
||||||
storageName = "MAIN";
|
storageName = "MAIN";
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 'x':
|
case 'x':
|
||||||
|
{
|
||||||
storageName = "EXTENDED";
|
storageName = "EXTENDED";
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
{
|
||||||
ereport(ERROR, (errmsg("unrecognized storage type: %c",
|
ereport(ERROR, (errmsg("unrecognized storage type: %c",
|
||||||
attributeForm->attstorage)));
|
attributeForm->attstorage)));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
appendStringInfo(&statement, "ALTER COLUMN %s ",
|
appendStringInfo(&statement, "ALTER COLUMN %s ",
|
||||||
|
|
|
@ -51,10 +51,10 @@ static void InvalidateDistRelationCacheCallback(Datum argument, Oid relationId);
|
||||||
static HeapTuple LookupDistPartitionTuple(Oid relationId);
|
static HeapTuple LookupDistPartitionTuple(Oid relationId);
|
||||||
static List * LookupDistShardTuples(Oid relationId);
|
static List * LookupDistShardTuples(Oid relationId);
|
||||||
static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
|
static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
|
||||||
Oid *intervalTypeId, int32 *intervalTypeMod);
|
Oid *intervalTypeId, int32 *intervalTypeMod);
|
||||||
static ShardInterval * TupleToShardInterval(HeapTuple heapTuple,
|
static ShardInterval * TupleToShardInterval(HeapTuple heapTuple,
|
||||||
TupleDesc tupleDescriptor, Oid intervalTypeId,
|
TupleDesc tupleDescriptor, Oid intervalTypeId,
|
||||||
int32 intervalTypeMod);
|
int32 intervalTypeMod);
|
||||||
static void CachedRelationLookup(const char *relationName, Oid *cachedOid);
|
static void CachedRelationLookup(const char *relationName, Oid *cachedOid);
|
||||||
|
|
||||||
|
|
||||||
|
@ -87,6 +87,7 @@ IsDistributedTable(Oid relationId)
|
||||||
return cacheEntry->isDistributedTable;
|
return cacheEntry->isDistributedTable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LoadShardInterval reads shard metadata for given shardId from pg_dist_shard,
|
* LoadShardInterval reads shard metadata for given shardId from pg_dist_shard,
|
||||||
* and converts min/max values in these metadata to their properly typed datum
|
* and converts min/max values in these metadata to their properly typed datum
|
||||||
|
@ -98,7 +99,7 @@ LoadShardInterval(uint64 shardId)
|
||||||
{
|
{
|
||||||
ShardInterval *shardInterval;
|
ShardInterval *shardInterval;
|
||||||
SysScanDesc scanDescriptor = NULL;
|
SysScanDesc scanDescriptor = NULL;
|
||||||
ScanKeyData scanKey[1];
|
ScanKeyData scanKey[1];
|
||||||
int scanKeyCount = 1;
|
int scanKeyCount = 1;
|
||||||
HeapTuple heapTuple = NULL;
|
HeapTuple heapTuple = NULL;
|
||||||
Form_pg_dist_shard shardForm = NULL;
|
Form_pg_dist_shard shardForm = NULL;
|
||||||
|
@ -127,11 +128,11 @@ LoadShardInterval(uint64 shardId)
|
||||||
partitionEntry = DistributedTableCacheEntry(shardForm->logicalrelid);
|
partitionEntry = DistributedTableCacheEntry(shardForm->logicalrelid);
|
||||||
|
|
||||||
GetPartitionTypeInputInfo(partitionEntry->partitionKeyString,
|
GetPartitionTypeInputInfo(partitionEntry->partitionKeyString,
|
||||||
partitionEntry->partitionMethod, &intervalTypeId,
|
partitionEntry->partitionMethod, &intervalTypeId,
|
||||||
&intervalTypeMod);
|
&intervalTypeMod);
|
||||||
|
|
||||||
shardInterval = TupleToShardInterval(heapTuple, tupleDescriptor, intervalTypeId,
|
shardInterval = TupleToShardInterval(heapTuple, tupleDescriptor, intervalTypeId,
|
||||||
intervalTypeMod);
|
intervalTypeMod);
|
||||||
|
|
||||||
systable_endscan(scanDescriptor);
|
systable_endscan(scanDescriptor);
|
||||||
heap_close(pgDistShard, AccessShareLock);
|
heap_close(pgDistShard, AccessShareLock);
|
||||||
|
@ -139,6 +140,7 @@ LoadShardInterval(uint64 shardId)
|
||||||
return shardInterval;
|
return shardInterval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DistributedTableCacheEntry looks up a pg_dist_partition entry for a
|
* DistributedTableCacheEntry looks up a pg_dist_partition entry for a
|
||||||
* relation.
|
* relation.
|
||||||
|
@ -239,19 +241,19 @@ LookupDistTableCacheEntry(Oid relationId)
|
||||||
int32 intervalTypeMod = -1;
|
int32 intervalTypeMod = -1;
|
||||||
|
|
||||||
GetPartitionTypeInputInfo(partitionKeyString, partitionMethod, &intervalTypeId,
|
GetPartitionTypeInputInfo(partitionKeyString, partitionMethod, &intervalTypeId,
|
||||||
&intervalTypeMod);
|
&intervalTypeMod);
|
||||||
|
|
||||||
shardIntervalArray = MemoryContextAllocZero(CacheMemoryContext,
|
shardIntervalArray = MemoryContextAllocZero(CacheMemoryContext,
|
||||||
shardIntervalArrayLength *
|
shardIntervalArrayLength *
|
||||||
sizeof(ShardInterval));
|
sizeof(ShardInterval));
|
||||||
|
|
||||||
foreach(distShardTupleCell, distShardTupleList)
|
foreach(distShardTupleCell, distShardTupleList)
|
||||||
{
|
{
|
||||||
HeapTuple shardTuple = lfirst(distShardTupleCell);
|
HeapTuple shardTuple = lfirst(distShardTupleCell);
|
||||||
ShardInterval *shardInterval = TupleToShardInterval(shardTuple,
|
ShardInterval *shardInterval = TupleToShardInterval(shardTuple,
|
||||||
distShardTupleDesc,
|
distShardTupleDesc,
|
||||||
intervalTypeId,
|
intervalTypeId,
|
||||||
intervalTypeMod);
|
intervalTypeMod);
|
||||||
MemoryContext oldContext = MemoryContextSwitchTo(CacheMemoryContext);
|
MemoryContext oldContext = MemoryContextSwitchTo(CacheMemoryContext);
|
||||||
|
|
||||||
CopyShardInterval(shardInterval, &shardIntervalArray[arrayIndex]);
|
CopyShardInterval(shardInterval, &shardIntervalArray[arrayIndex]);
|
||||||
|
@ -773,7 +775,7 @@ LookupDistShardTuples(Oid relationId)
|
||||||
scanKey[0].sk_argument = ObjectIdGetDatum(relationId);
|
scanKey[0].sk_argument = ObjectIdGetDatum(relationId);
|
||||||
|
|
||||||
scanDescriptor = systable_beginscan(pgDistShard, DistShardLogicalRelidIndexId(), true,
|
scanDescriptor = systable_beginscan(pgDistShard, DistShardLogicalRelidIndexId(), true,
|
||||||
NULL, 1, scanKey);
|
NULL, 1, scanKey);
|
||||||
|
|
||||||
currentShardTuple = systable_getnext(scanDescriptor);
|
currentShardTuple = systable_getnext(scanDescriptor);
|
||||||
while (HeapTupleIsValid(currentShardTuple))
|
while (HeapTupleIsValid(currentShardTuple))
|
||||||
|
@ -797,7 +799,7 @@ LookupDistShardTuples(Oid relationId)
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
|
GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
|
||||||
Oid *intervalTypeId, int32 *intervalTypeMod)
|
Oid *intervalTypeId, int32 *intervalTypeMod)
|
||||||
{
|
{
|
||||||
*intervalTypeId = InvalidOid;
|
*intervalTypeId = InvalidOid;
|
||||||
*intervalTypeMod = -1;
|
*intervalTypeMod = -1;
|
||||||
|
@ -826,7 +828,7 @@ GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("unsupported table partition type: %c",
|
errmsg("unsupported table partition type: %c",
|
||||||
partitionMethod)));
|
partitionMethod)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -838,7 +840,7 @@ GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
|
||||||
*/
|
*/
|
||||||
static ShardInterval *
|
static ShardInterval *
|
||||||
TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid intervalTypeId,
|
TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid intervalTypeId,
|
||||||
int32 intervalTypeMod)
|
int32 intervalTypeMod)
|
||||||
{
|
{
|
||||||
ShardInterval *shardInterval = NULL;
|
ShardInterval *shardInterval = NULL;
|
||||||
bool isNull = false;
|
bool isNull = false;
|
||||||
|
@ -847,16 +849,16 @@ TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid interva
|
||||||
Oid inputFunctionId = InvalidOid;
|
Oid inputFunctionId = InvalidOid;
|
||||||
Oid typeIoParam = InvalidOid;
|
Oid typeIoParam = InvalidOid;
|
||||||
Datum relationIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_logicalrelid,
|
Datum relationIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_logicalrelid,
|
||||||
tupleDescriptor, &isNull);
|
tupleDescriptor, &isNull);
|
||||||
Datum shardIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardid,
|
Datum shardIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardid,
|
||||||
tupleDescriptor, &isNull);
|
tupleDescriptor, &isNull);
|
||||||
Datum storageTypeDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardstorage,
|
Datum storageTypeDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardstorage,
|
||||||
tupleDescriptor, &isNull);
|
tupleDescriptor, &isNull);
|
||||||
|
|
||||||
Datum minValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardminvalue,
|
Datum minValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardminvalue,
|
||||||
tupleDescriptor, &minValueNull);
|
tupleDescriptor, &minValueNull);
|
||||||
Datum maxValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardmaxvalue,
|
Datum maxValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardmaxvalue,
|
||||||
tupleDescriptor, &maxValueNull);
|
tupleDescriptor, &maxValueNull);
|
||||||
|
|
||||||
Oid relationId = DatumGetObjectId(relationIdDatum);
|
Oid relationId = DatumGetObjectId(relationIdDatum);
|
||||||
int64 shardId = DatumGetInt64(shardIdDatum);
|
int64 shardId = DatumGetInt64(shardIdDatum);
|
||||||
|
@ -877,7 +879,7 @@ TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid interva
|
||||||
|
|
||||||
/* TODO: move this up the call stack to avoid per-tuple invocation? */
|
/* TODO: move this up the call stack to avoid per-tuple invocation? */
|
||||||
get_type_io_data(intervalTypeId, IOFunc_input, &intervalTypeLen, &intervalByVal,
|
get_type_io_data(intervalTypeId, IOFunc_input, &intervalTypeLen, &intervalByVal,
|
||||||
&intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId);
|
&intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId);
|
||||||
|
|
||||||
/* finally convert min/max values to their actual types */
|
/* finally convert min/max values to their actual types */
|
||||||
minValue = OidInputFunctionCall(inputFunctionId, minValueString,
|
minValue = OidInputFunctionCall(inputFunctionId, minValueString,
|
||||||
|
|
|
@ -22,7 +22,8 @@
|
||||||
#include "distributed/multi_resowner.h"
|
#include "distributed/multi_resowner.h"
|
||||||
|
|
||||||
|
|
||||||
typedef struct JobDirectoryEntry {
|
typedef struct JobDirectoryEntry
|
||||||
|
{
|
||||||
ResourceOwner owner;
|
ResourceOwner owner;
|
||||||
uint64 jobId;
|
uint64 jobId;
|
||||||
} JobDirectoryEntry;
|
} JobDirectoryEntry;
|
||||||
|
@ -44,8 +45,8 @@ MultiResourceOwnerReleaseCallback(ResourceReleasePhase phase,
|
||||||
bool isTopLevel,
|
bool isTopLevel,
|
||||||
void *arg)
|
void *arg)
|
||||||
{
|
{
|
||||||
int lastJobIndex = NumRegisteredJobDirectories - 1;
|
int lastJobIndex = NumRegisteredJobDirectories - 1;
|
||||||
int jobIndex = 0;
|
int jobIndex = 0;
|
||||||
|
|
||||||
if (phase == RESOURCE_RELEASE_AFTER_LOCKS)
|
if (phase == RESOURCE_RELEASE_AFTER_LOCKS)
|
||||||
{
|
{
|
||||||
|
@ -79,7 +80,7 @@ MultiResourceOwnerReleaseCallback(ResourceReleasePhase phase,
|
||||||
void
|
void
|
||||||
ResourceOwnerEnlargeJobDirectories(ResourceOwner owner)
|
ResourceOwnerEnlargeJobDirectories(ResourceOwner owner)
|
||||||
{
|
{
|
||||||
int newMax = 0;
|
int newMax = 0;
|
||||||
|
|
||||||
/* ensure callback is registered */
|
/* ensure callback is registered */
|
||||||
if (!RegisteredResownerCallback)
|
if (!RegisteredResownerCallback)
|
||||||
|
@ -91,15 +92,17 @@ ResourceOwnerEnlargeJobDirectories(ResourceOwner owner)
|
||||||
if (RegisteredJobDirectories == NULL)
|
if (RegisteredJobDirectories == NULL)
|
||||||
{
|
{
|
||||||
newMax = 16;
|
newMax = 16;
|
||||||
RegisteredJobDirectories = (JobDirectoryEntry *)
|
RegisteredJobDirectories =
|
||||||
MemoryContextAlloc(TopMemoryContext, newMax * sizeof(JobDirectoryEntry));
|
(JobDirectoryEntry *) MemoryContextAlloc(TopMemoryContext,
|
||||||
|
newMax * sizeof(JobDirectoryEntry));
|
||||||
NumAllocatedJobDirectories = newMax;
|
NumAllocatedJobDirectories = newMax;
|
||||||
}
|
}
|
||||||
else if (NumRegisteredJobDirectories + 1 > NumAllocatedJobDirectories)
|
else if (NumRegisteredJobDirectories + 1 > NumAllocatedJobDirectories)
|
||||||
{
|
{
|
||||||
newMax = NumAllocatedJobDirectories * 2;
|
newMax = NumAllocatedJobDirectories * 2;
|
||||||
RegisteredJobDirectories = (JobDirectoryEntry *)
|
RegisteredJobDirectories =
|
||||||
repalloc(RegisteredJobDirectories, newMax * sizeof(JobDirectoryEntry));
|
(JobDirectoryEntry *) repalloc(RegisteredJobDirectories,
|
||||||
|
newMax * sizeof(JobDirectoryEntry));
|
||||||
NumAllocatedJobDirectories = newMax;
|
NumAllocatedJobDirectories = newMax;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -123,8 +126,8 @@ ResourceOwnerRememberJobDirectory(ResourceOwner owner, uint64 jobId)
|
||||||
void
|
void
|
||||||
ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId)
|
ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId)
|
||||||
{
|
{
|
||||||
int lastJobIndex = NumRegisteredJobDirectories - 1;
|
int lastJobIndex = NumRegisteredJobDirectories - 1;
|
||||||
int jobIndex = 0;
|
int jobIndex = 0;
|
||||||
|
|
||||||
for (jobIndex = lastJobIndex; jobIndex >= 0; jobIndex--)
|
for (jobIndex = lastJobIndex; jobIndex >= 0; jobIndex--)
|
||||||
{
|
{
|
||||||
|
@ -135,7 +138,8 @@ ResourceOwnerForgetJobDirectory(ResourceOwner owner, uint64 jobId)
|
||||||
/* move all later entries one up */
|
/* move all later entries one up */
|
||||||
while (jobIndex < lastJobIndex)
|
while (jobIndex < lastJobIndex)
|
||||||
{
|
{
|
||||||
RegisteredJobDirectories[jobIndex] = RegisteredJobDirectories[jobIndex + 1];
|
RegisteredJobDirectories[jobIndex] =
|
||||||
|
RegisteredJobDirectories[jobIndex + 1];
|
||||||
jobIndex++;
|
jobIndex++;
|
||||||
}
|
}
|
||||||
NumRegisteredJobDirectories = lastJobIndex;
|
NumRegisteredJobDirectories = lastJobIndex;
|
||||||
|
|
|
@ -30,7 +30,7 @@
|
||||||
void
|
void
|
||||||
LockShardDistributionMetadata(int64 shardId, LOCKMODE lockMode)
|
LockShardDistributionMetadata(int64 shardId, LOCKMODE lockMode)
|
||||||
{
|
{
|
||||||
LOCKTAG tag;
|
LOCKTAG tag;
|
||||||
const bool sessionLock = false;
|
const bool sessionLock = false;
|
||||||
const bool dontWait = false;
|
const bool dontWait = false;
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ LockRelationDistributionMetadata(Oid relationId, LOCKMODE lockMode)
|
||||||
void
|
void
|
||||||
LockShardResource(uint64 shardId, LOCKMODE lockmode)
|
LockShardResource(uint64 shardId, LOCKMODE lockmode)
|
||||||
{
|
{
|
||||||
LOCKTAG tag;
|
LOCKTAG tag;
|
||||||
const bool sessionLock = false;
|
const bool sessionLock = false;
|
||||||
const bool dontWait = false;
|
const bool dontWait = false;
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ LockShardResource(uint64 shardId, LOCKMODE lockmode)
|
||||||
void
|
void
|
||||||
UnlockShardResource(uint64 shardId, LOCKMODE lockmode)
|
UnlockShardResource(uint64 shardId, LOCKMODE lockmode)
|
||||||
{
|
{
|
||||||
LOCKTAG tag;
|
LOCKTAG tag;
|
||||||
const bool sessionLock = false;
|
const bool sessionLock = false;
|
||||||
|
|
||||||
SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId);
|
SET_LOCKTAG_SHARD_RESOURCE(tag, MyDatabaseId, shardId);
|
||||||
|
@ -95,7 +95,7 @@ UnlockShardResource(uint64 shardId, LOCKMODE lockmode)
|
||||||
void
|
void
|
||||||
LockJobResource(uint64 jobId, LOCKMODE lockmode)
|
LockJobResource(uint64 jobId, LOCKMODE lockmode)
|
||||||
{
|
{
|
||||||
LOCKTAG tag;
|
LOCKTAG tag;
|
||||||
const bool sessionLock = false;
|
const bool sessionLock = false;
|
||||||
const bool dontWait = false;
|
const bool dontWait = false;
|
||||||
|
|
||||||
|
@ -109,7 +109,7 @@ LockJobResource(uint64 jobId, LOCKMODE lockmode)
|
||||||
void
|
void
|
||||||
UnlockJobResource(uint64 jobId, LOCKMODE lockmode)
|
UnlockJobResource(uint64 jobId, LOCKMODE lockmode)
|
||||||
{
|
{
|
||||||
LOCKTAG tag;
|
LOCKTAG tag;
|
||||||
const bool sessionLock = false;
|
const bool sessionLock = false;
|
||||||
|
|
||||||
SET_LOCKTAG_JOB_RESOURCE(tag, MyDatabaseId, jobId);
|
SET_LOCKTAG_JOB_RESOURCE(tag, MyDatabaseId, jobId);
|
||||||
|
|
|
@ -50,7 +50,7 @@
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
|
|
||||||
|
|
||||||
int TaskTrackerDelay = 200; /* process sleep interval in millisecs */
|
int TaskTrackerDelay = 200; /* process sleep interval in millisecs */
|
||||||
int MaxRunningTasksPerNode = 16; /* max number of running tasks */
|
int MaxRunningTasksPerNode = 16; /* max number of running tasks */
|
||||||
int MaxTrackedTasksPerNode = 1024; /* max number of tracked tasks */
|
int MaxTrackedTasksPerNode = 1024; /* max number of tracked tasks */
|
||||||
WorkerTasksSharedStateData *WorkerTasksSharedState; /* shared memory state */
|
WorkerTasksSharedStateData *WorkerTasksSharedState; /* shared memory state */
|
||||||
|
@ -76,10 +76,10 @@ static void TrackerCleanupJobSchemas(void);
|
||||||
static void TrackerCleanupConnections(HTAB *WorkerTasksHash);
|
static void TrackerCleanupConnections(HTAB *WorkerTasksHash);
|
||||||
static void TrackerRegisterShutDown(HTAB *WorkerTasksHash);
|
static void TrackerRegisterShutDown(HTAB *WorkerTasksHash);
|
||||||
static void TrackerDelayLoop(void);
|
static void TrackerDelayLoop(void);
|
||||||
static List *SchedulableTaskList(HTAB *WorkerTasksHash);
|
static List * SchedulableTaskList(HTAB *WorkerTasksHash);
|
||||||
static WorkerTask * SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash);
|
static WorkerTask * SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash);
|
||||||
static uint32 CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
|
static uint32 CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
|
||||||
bool (*CriteriaFunction) (WorkerTask *));
|
bool (*CriteriaFunction)(WorkerTask *));
|
||||||
static bool RunningTask(WorkerTask *workerTask);
|
static bool RunningTask(WorkerTask *workerTask);
|
||||||
static bool SchedulableTask(WorkerTask *workerTask);
|
static bool SchedulableTask(WorkerTask *workerTask);
|
||||||
static int CompareTasksByTime(const void *first, const void *second);
|
static int CompareTasksByTime(const void *first, const void *second);
|
||||||
|
@ -494,6 +494,7 @@ TrackerDelayLoop(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ------------------------------------------------------------
|
/* ------------------------------------------------------------
|
||||||
* Signal handling and shared hash initialization functions follow
|
* Signal handling and shared hash initialization functions follow
|
||||||
* ------------------------------------------------------------
|
* ------------------------------------------------------------
|
||||||
|
@ -503,7 +504,7 @@ TrackerDelayLoop(void)
|
||||||
static void
|
static void
|
||||||
TrackerSigHupHandler(SIGNAL_ARGS)
|
TrackerSigHupHandler(SIGNAL_ARGS)
|
||||||
{
|
{
|
||||||
int save_errno = errno;
|
int save_errno = errno;
|
||||||
|
|
||||||
got_SIGHUP = true;
|
got_SIGHUP = true;
|
||||||
if (MyProc != NULL)
|
if (MyProc != NULL)
|
||||||
|
@ -519,7 +520,7 @@ TrackerSigHupHandler(SIGNAL_ARGS)
|
||||||
static void
|
static void
|
||||||
TrackerShutdownHandler(SIGNAL_ARGS)
|
TrackerShutdownHandler(SIGNAL_ARGS)
|
||||||
{
|
{
|
||||||
int save_errno = errno;
|
int save_errno = errno;
|
||||||
|
|
||||||
got_SIGTERM = true;
|
got_SIGTERM = true;
|
||||||
if (MyProc != NULL)
|
if (MyProc != NULL)
|
||||||
|
@ -579,10 +580,10 @@ TaskTrackerShmemInit(void)
|
||||||
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
|
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
/* allocate struct containing task tracker related shared state */
|
/* allocate struct containing task tracker related shared state */
|
||||||
WorkerTasksSharedState = (WorkerTasksSharedStateData *)
|
WorkerTasksSharedState =
|
||||||
ShmemInitStruct("Worker Task Control",
|
(WorkerTasksSharedStateData *) ShmemInitStruct("Worker Task Control",
|
||||||
sizeof(WorkerTasksSharedStateData),
|
sizeof(WorkerTasksSharedStateData),
|
||||||
&alreadyInitialized);
|
&alreadyInitialized);
|
||||||
|
|
||||||
if (!alreadyInitialized)
|
if (!alreadyInitialized)
|
||||||
{
|
{
|
||||||
|
@ -607,6 +608,7 @@ TaskTrackerShmemInit(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ------------------------------------------------------------
|
/* ------------------------------------------------------------
|
||||||
* Task scheduling and management functions follow
|
* Task scheduling and management functions follow
|
||||||
* ------------------------------------------------------------
|
* ------------------------------------------------------------
|
||||||
|
@ -653,7 +655,7 @@ SchedulableTaskList(HTAB *WorkerTasksHash)
|
||||||
for (queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++)
|
for (queueIndex = 0; queueIndex < tasksToScheduleCount; queueIndex++)
|
||||||
{
|
{
|
||||||
WorkerTask *schedulableTask = (WorkerTask *) palloc0(sizeof(WorkerTask));
|
WorkerTask *schedulableTask = (WorkerTask *) palloc0(sizeof(WorkerTask));
|
||||||
schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId;
|
schedulableTask->jobId = schedulableTaskQueue[queueIndex].jobId;
|
||||||
schedulableTask->taskId = schedulableTaskQueue[queueIndex].taskId;
|
schedulableTask->taskId = schedulableTaskQueue[queueIndex].taskId;
|
||||||
|
|
||||||
schedulableTaskList = lappend(schedulableTaskList, schedulableTask);
|
schedulableTaskList = lappend(schedulableTaskList, schedulableTask);
|
||||||
|
@ -719,7 +721,7 @@ SchedulableTaskPriorityQueue(HTAB *WorkerTasksHash)
|
||||||
/* Counts the number of tasks that match the given criteria function. */
|
/* Counts the number of tasks that match the given criteria function. */
|
||||||
static uint32
|
static uint32
|
||||||
CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
|
CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
|
||||||
bool (*CriteriaFunction) (WorkerTask *))
|
bool (*CriteriaFunction)(WorkerTask *))
|
||||||
{
|
{
|
||||||
HASH_SEQ_STATUS status;
|
HASH_SEQ_STATUS status;
|
||||||
WorkerTask *currentTask = NULL;
|
WorkerTask *currentTask = NULL;
|
||||||
|
@ -730,7 +732,7 @@ CountTasksMatchingCriteria(HTAB *WorkerTasksHash,
|
||||||
currentTask = (WorkerTask *) hash_seq_search(&status);
|
currentTask = (WorkerTask *) hash_seq_search(&status);
|
||||||
while (currentTask != NULL)
|
while (currentTask != NULL)
|
||||||
{
|
{
|
||||||
bool matchesCriteria = (*CriteriaFunction) (currentTask);
|
bool matchesCriteria = (*CriteriaFunction)(currentTask);
|
||||||
if (matchesCriteria)
|
if (matchesCriteria)
|
||||||
{
|
{
|
||||||
taskCount++;
|
taskCount++;
|
||||||
|
@ -775,7 +777,7 @@ SchedulableTask(WorkerTask *workerTask)
|
||||||
static int
|
static int
|
||||||
CompareTasksByTime(const void *first, const void *second)
|
CompareTasksByTime(const void *first, const void *second)
|
||||||
{
|
{
|
||||||
WorkerTask *firstTask = (WorkerTask *) first;
|
WorkerTask *firstTask = (WorkerTask *) first;
|
||||||
WorkerTask *secondTask = (WorkerTask *) second;
|
WorkerTask *secondTask = (WorkerTask *) second;
|
||||||
|
|
||||||
/* tasks that are assigned earlier have higher priority */
|
/* tasks that are assigned earlier have higher priority */
|
||||||
|
@ -893,7 +895,7 @@ ManageWorkerTask(WorkerTask *workerTask, HTAB *WorkerTasksHash)
|
||||||
{
|
{
|
||||||
case TASK_ASSIGNED:
|
case TASK_ASSIGNED:
|
||||||
{
|
{
|
||||||
break; /* nothing to do until the task gets scheduled */
|
break; /* nothing to do until the task gets scheduled */
|
||||||
}
|
}
|
||||||
|
|
||||||
case TASK_SCHEDULED:
|
case TASK_SCHEDULED:
|
||||||
|
|
|
@ -57,7 +57,7 @@ task_tracker_assign_task(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
uint64 jobId = PG_GETARG_INT64(0);
|
uint64 jobId = PG_GETARG_INT64(0);
|
||||||
uint32 taskId = PG_GETARG_UINT32(1);
|
uint32 taskId = PG_GETARG_UINT32(1);
|
||||||
text *taskCallStringText = PG_GETARG_TEXT_P(2);
|
text *taskCallStringText = PG_GETARG_TEXT_P(2);
|
||||||
|
|
||||||
StringInfo jobSchemaName = JobSchemaName(jobId);
|
StringInfo jobSchemaName = JobSchemaName(jobId);
|
||||||
bool schemaExists = false;
|
bool schemaExists = false;
|
||||||
|
@ -331,7 +331,7 @@ UpdateTask(WorkerTask *workerTask, char *taskCallString)
|
||||||
if (taskStatus == TASK_SUCCEEDED || taskStatus == TASK_CANCEL_REQUESTED ||
|
if (taskStatus == TASK_SUCCEEDED || taskStatus == TASK_CANCEL_REQUESTED ||
|
||||||
taskStatus == TASK_CANCELED)
|
taskStatus == TASK_CANCELED)
|
||||||
{
|
{
|
||||||
; /* nothing to do */
|
/* nothing to do */
|
||||||
}
|
}
|
||||||
else if (taskStatus == TASK_PERMANENTLY_FAILED)
|
else if (taskStatus == TASK_PERMANENTLY_FAILED)
|
||||||
{
|
{
|
||||||
|
|
|
@ -53,11 +53,14 @@ static void ReceiveResourceCleanup(int32 connectionId, const char *filename,
|
||||||
static void DeleteFile(const char *filename);
|
static void DeleteFile(const char *filename);
|
||||||
static void FetchTableCommon(text *tableName, uint64 remoteTableSize,
|
static void FetchTableCommon(text *tableName, uint64 remoteTableSize,
|
||||||
ArrayType *nodeNameObject, ArrayType *nodePortObject,
|
ArrayType *nodeNameObject, ArrayType *nodePortObject,
|
||||||
bool (*FetchTableFunction) (const char *, uint32, StringInfo));
|
bool (*FetchTableFunction)(const char *, uint32,
|
||||||
|
StringInfo));
|
||||||
static uint64 LocalTableSize(Oid relationId);
|
static uint64 LocalTableSize(Oid relationId);
|
||||||
static uint64 ExtractShardId(StringInfo tableName);
|
static uint64 ExtractShardId(StringInfo tableName);
|
||||||
static bool FetchRegularTable(const char *nodeName, uint32 nodePort, StringInfo tableName);
|
static bool FetchRegularTable(const char *nodeName, uint32 nodePort,
|
||||||
static bool FetchForeignTable(const char *nodeName, uint32 nodePort, StringInfo tableName);
|
StringInfo tableName);
|
||||||
|
static bool FetchForeignTable(const char *nodeName, uint32 nodePort,
|
||||||
|
StringInfo tableName);
|
||||||
static List * TableDDLCommandList(const char *nodeName, uint32 nodePort,
|
static List * TableDDLCommandList(const char *nodeName, uint32 nodePort,
|
||||||
StringInfo tableName);
|
StringInfo tableName);
|
||||||
static StringInfo ForeignFilePath(const char *nodeName, uint32 nodePort,
|
static StringInfo ForeignFilePath(const char *nodeName, uint32 nodePort,
|
||||||
|
@ -85,7 +88,7 @@ worker_fetch_partition_file(PG_FUNCTION_ARGS)
|
||||||
uint64 jobId = PG_GETARG_INT64(0);
|
uint64 jobId = PG_GETARG_INT64(0);
|
||||||
uint32 partitionTaskId = PG_GETARG_UINT32(1);
|
uint32 partitionTaskId = PG_GETARG_UINT32(1);
|
||||||
uint32 partitionFileId = PG_GETARG_UINT32(2);
|
uint32 partitionFileId = PG_GETARG_UINT32(2);
|
||||||
uint32 upstreamTaskId = PG_GETARG_UINT32(3);
|
uint32 upstreamTaskId = PG_GETARG_UINT32(3);
|
||||||
text *nodeNameText = PG_GETARG_TEXT_P(4);
|
text *nodeNameText = PG_GETARG_TEXT_P(4);
|
||||||
uint32 nodePort = PG_GETARG_UINT32(5);
|
uint32 nodePort = PG_GETARG_UINT32(5);
|
||||||
char *nodeName = NULL;
|
char *nodeName = NULL;
|
||||||
|
@ -226,7 +229,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort,
|
||||||
char filename[MAXPGPATH];
|
char filename[MAXPGPATH];
|
||||||
int closed = -1;
|
int closed = -1;
|
||||||
const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY);
|
const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY);
|
||||||
const int fileMode = (S_IRUSR | S_IWUSR);
|
const int fileMode = (S_IRUSR | S_IWUSR);
|
||||||
|
|
||||||
QueryStatus queryStatus = CLIENT_INVALID_QUERY;
|
QueryStatus queryStatus = CLIENT_INVALID_QUERY;
|
||||||
int32 connectionId = INVALID_CONNECTION_ID;
|
int32 connectionId = INVALID_CONNECTION_ID;
|
||||||
|
@ -309,7 +312,7 @@ ReceiveRegularFile(const char *nodeName, uint32 nodePort,
|
||||||
}
|
}
|
||||||
else if (copyStatus == CLIENT_COPY_MORE)
|
else if (copyStatus == CLIENT_COPY_MORE)
|
||||||
{
|
{
|
||||||
; /* remote node will continue to send more data */
|
/* remote node will continue to send more data */
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -468,7 +471,7 @@ worker_fetch_foreign_file(PG_FUNCTION_ARGS)
|
||||||
static void
|
static void
|
||||||
FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
|
FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
|
||||||
ArrayType *nodeNameObject, ArrayType *nodePortObject,
|
ArrayType *nodeNameObject, ArrayType *nodePortObject,
|
||||||
bool (*FetchTableFunction) (const char *, uint32, StringInfo))
|
bool (*FetchTableFunction)(const char *, uint32, StringInfo))
|
||||||
{
|
{
|
||||||
StringInfo tableName = NULL;
|
StringInfo tableName = NULL;
|
||||||
char *tableNameCString = NULL;
|
char *tableNameCString = NULL;
|
||||||
|
@ -531,7 +534,7 @@ FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
|
||||||
if (remoteTableSize > localTableSize)
|
if (remoteTableSize > localTableSize)
|
||||||
{
|
{
|
||||||
/* table is not up to date, drop the table */
|
/* table is not up to date, drop the table */
|
||||||
ObjectAddress tableObject = {InvalidOid, InvalidOid, 0};
|
ObjectAddress tableObject = { InvalidOid, InvalidOid, 0 };
|
||||||
|
|
||||||
tableObject.classId = RelationRelationId;
|
tableObject.classId = RelationRelationId;
|
||||||
tableObject.objectId = relationId;
|
tableObject.objectId = relationId;
|
||||||
|
@ -554,7 +557,7 @@ FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
|
||||||
char *nodeName = TextDatumGetCString(nodeNameDatum);
|
char *nodeName = TextDatumGetCString(nodeNameDatum);
|
||||||
uint32 nodePort = DatumGetUInt32(nodePortDatum);
|
uint32 nodePort = DatumGetUInt32(nodePortDatum);
|
||||||
|
|
||||||
tableFetched = (*FetchTableFunction) (nodeName, nodePort, tableName);
|
tableFetched = (*FetchTableFunction)(nodeName, nodePort, tableName);
|
||||||
|
|
||||||
nodeIndex++;
|
nodeIndex++;
|
||||||
}
|
}
|
||||||
|
@ -1010,7 +1013,7 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
|
||||||
* the transaction for this function commits, this lock will automatically
|
* the transaction for this function commits, this lock will automatically
|
||||||
* be released. This ensures appends to a shard happen in a serial manner.
|
* be released. This ensures appends to a shard happen in a serial manner.
|
||||||
*/
|
*/
|
||||||
shardId = ExtractShardId(shardNameString);
|
shardId = ExtractShardId(shardNameString);
|
||||||
LockShardResource(shardId, AccessExclusiveLock);
|
LockShardResource(shardId, AccessExclusiveLock);
|
||||||
|
|
||||||
localFilePath = makeStringInfo();
|
localFilePath = makeStringInfo();
|
||||||
|
@ -1049,7 +1052,7 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
|
||||||
static bool
|
static bool
|
||||||
check_log_statement(List *statementList)
|
check_log_statement(List *statementList)
|
||||||
{
|
{
|
||||||
ListCell *statementCell;
|
ListCell *statementCell;
|
||||||
|
|
||||||
if (log_statement == LOGSTMT_NONE)
|
if (log_statement == LOGSTMT_NONE)
|
||||||
{
|
{
|
||||||
|
|
|
@ -40,22 +40,22 @@ worker_foreign_file_path(PG_FUNCTION_ARGS)
|
||||||
ForeignTable *foreignTable = GetForeignTable(relationId);
|
ForeignTable *foreignTable = GetForeignTable(relationId);
|
||||||
|
|
||||||
ListCell *optionCell = NULL;
|
ListCell *optionCell = NULL;
|
||||||
foreach(optionCell, foreignTable->options)
|
foreach(optionCell, foreignTable->options)
|
||||||
{
|
{
|
||||||
DefElem *option = (DefElem *) lfirst(optionCell);
|
DefElem *option = (DefElem *) lfirst(optionCell);
|
||||||
char *optionName = option->defname;
|
char *optionName = option->defname;
|
||||||
|
|
||||||
int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH);
|
int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH);
|
||||||
if (compareResult == 0)
|
if (compareResult == 0)
|
||||||
{
|
{
|
||||||
char *optionValue = defGetString(option);
|
char *optionValue = defGetString(option);
|
||||||
foreignFilePath = cstring_to_text(optionValue);
|
foreignFilePath = cstring_to_text(optionValue);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check that we found the filename option */
|
/* check that we found the filename option */
|
||||||
if (foreignFilePath == NULL)
|
if (foreignFilePath == NULL)
|
||||||
{
|
{
|
||||||
char *relationName = get_rel_name(relationId);
|
char *relationName = get_rel_name(relationId);
|
||||||
ereport(ERROR, (errmsg("could not find filename for foreign table: \"%s\"",
|
ereport(ERROR, (errmsg("could not find filename for foreign table: \"%s\"",
|
||||||
|
|
|
@ -133,7 +133,7 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS)
|
||||||
|
|
||||||
const char *createMergeTableQuery = text_to_cstring(createMergeTableQueryText);
|
const char *createMergeTableQuery = text_to_cstring(createMergeTableQueryText);
|
||||||
const char *createIntermediateTableQuery =
|
const char *createIntermediateTableQuery =
|
||||||
text_to_cstring(createIntermediateTableQueryText);
|
text_to_cstring(createIntermediateTableQueryText);
|
||||||
|
|
||||||
StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId);
|
StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId);
|
||||||
StringInfo jobSchemaName = JobSchemaName(jobId);
|
StringInfo jobSchemaName = JobSchemaName(jobId);
|
||||||
|
@ -170,14 +170,14 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS)
|
||||||
if (setSearchPathResult < 0)
|
if (setSearchPathResult < 0)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errmsg("execution was not successful \"%s\"",
|
ereport(ERROR, (errmsg("execution was not successful \"%s\"",
|
||||||
setSearchPathString->data)));
|
setSearchPathString->data)));
|
||||||
}
|
}
|
||||||
|
|
||||||
createMergeTableResult = SPI_exec(createMergeTableQuery, 0);
|
createMergeTableResult = SPI_exec(createMergeTableQuery, 0);
|
||||||
if (createMergeTableResult < 0)
|
if (createMergeTableResult < 0)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errmsg("execution was not successful \"%s\"",
|
ereport(ERROR, (errmsg("execution was not successful \"%s\"",
|
||||||
createMergeTableQuery)));
|
createMergeTableQuery)));
|
||||||
}
|
}
|
||||||
|
|
||||||
appendStringInfo(mergeTableName, "%s%s", intermediateTableName->data,
|
appendStringInfo(mergeTableName, "%s%s", intermediateTableName->data,
|
||||||
|
@ -188,7 +188,7 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS)
|
||||||
if (createIntermediateTableResult < 0)
|
if (createIntermediateTableResult < 0)
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errmsg("execution was not successful \"%s\"",
|
ereport(ERROR, (errmsg("execution was not successful \"%s\"",
|
||||||
createIntermediateTableQuery)));
|
createIntermediateTableQuery)));
|
||||||
}
|
}
|
||||||
|
|
||||||
finished = SPI_finish();
|
finished = SPI_finish();
|
||||||
|
@ -256,8 +256,8 @@ JobSchemaName(uint64 jobId)
|
||||||
*/
|
*/
|
||||||
#ifdef HAVE_INTTYPES_H
|
#ifdef HAVE_INTTYPES_H
|
||||||
StringInfo jobSchemaName = makeStringInfo();
|
StringInfo jobSchemaName = makeStringInfo();
|
||||||
appendStringInfo(jobSchemaName, "%s%0*"PRIu64,
|
appendStringInfo(jobSchemaName, "%s%0*" PRIu64, JOB_SCHEMA_PREFIX,
|
||||||
JOB_SCHEMA_PREFIX, MIN_JOB_DIRNAME_WIDTH, jobId);
|
MIN_JOB_DIRNAME_WIDTH, jobId);
|
||||||
#else
|
#else
|
||||||
StringInfo jobSchemaName = makeStringInfo();
|
StringInfo jobSchemaName = makeStringInfo();
|
||||||
appendStringInfo(jobSchemaName, "%s%0*llu",
|
appendStringInfo(jobSchemaName, "%s%0*llu",
|
||||||
|
|
|
@ -59,7 +59,7 @@ static void FileOutputStreamWrite(FileOutputStream file, StringInfo dataToWrite)
|
||||||
static void FileOutputStreamFlush(FileOutputStream file);
|
static void FileOutputStreamFlush(FileOutputStream file);
|
||||||
static void FilterAndPartitionTable(const char *filterQuery,
|
static void FilterAndPartitionTable(const char *filterQuery,
|
||||||
const char *columnName, Oid columnType,
|
const char *columnName, Oid columnType,
|
||||||
uint32 (*PartitionIdFunction) (Datum, const void *),
|
uint32 (*PartitionIdFunction)(Datum, const void *),
|
||||||
const void *partitionIdContext,
|
const void *partitionIdContext,
|
||||||
FileOutputStream *partitionFileArray,
|
FileOutputStream *partitionFileArray,
|
||||||
uint32 fileCount);
|
uint32 fileCount);
|
||||||
|
@ -105,7 +105,7 @@ worker_range_partition_table(PG_FUNCTION_ARGS)
|
||||||
uint32 taskId = PG_GETARG_UINT32(1);
|
uint32 taskId = PG_GETARG_UINT32(1);
|
||||||
text *filterQueryText = PG_GETARG_TEXT_P(2);
|
text *filterQueryText = PG_GETARG_TEXT_P(2);
|
||||||
text *partitionColumnText = PG_GETARG_TEXT_P(3);
|
text *partitionColumnText = PG_GETARG_TEXT_P(3);
|
||||||
Oid partitionColumnType = PG_GETARG_OID(4);
|
Oid partitionColumnType = PG_GETARG_OID(4);
|
||||||
ArrayType *splitPointObject = PG_GETARG_ARRAYTYPE_P(5);
|
ArrayType *splitPointObject = PG_GETARG_ARRAYTYPE_P(5);
|
||||||
|
|
||||||
const char *filterQuery = text_to_cstring(filterQueryText);
|
const char *filterQuery = text_to_cstring(filterQueryText);
|
||||||
|
@ -181,7 +181,7 @@ worker_hash_partition_table(PG_FUNCTION_ARGS)
|
||||||
uint32 taskId = PG_GETARG_UINT32(1);
|
uint32 taskId = PG_GETARG_UINT32(1);
|
||||||
text *filterQueryText = PG_GETARG_TEXT_P(2);
|
text *filterQueryText = PG_GETARG_TEXT_P(2);
|
||||||
text *partitionColumnText = PG_GETARG_TEXT_P(3);
|
text *partitionColumnText = PG_GETARG_TEXT_P(3);
|
||||||
Oid partitionColumnType = PG_GETARG_OID(4);
|
Oid partitionColumnType = PG_GETARG_OID(4);
|
||||||
uint32 partitionCount = PG_GETARG_UINT32(5);
|
uint32 partitionCount = PG_GETARG_UINT32(5);
|
||||||
|
|
||||||
const char *filterQuery = text_to_cstring(filterQueryText);
|
const char *filterQuery = text_to_cstring(filterQueryText);
|
||||||
|
@ -463,7 +463,7 @@ JobDirectoryName(uint64 jobId)
|
||||||
*/
|
*/
|
||||||
#ifdef HAVE_INTTYPES_H
|
#ifdef HAVE_INTTYPES_H
|
||||||
StringInfo jobDirectoryName = makeStringInfo();
|
StringInfo jobDirectoryName = makeStringInfo();
|
||||||
appendStringInfo(jobDirectoryName, "base/%s/%s%0*"PRIu64,
|
appendStringInfo(jobDirectoryName, "base/%s/%s%0*" PRIu64,
|
||||||
PG_JOB_CACHE_DIR, JOB_DIRECTORY_PREFIX,
|
PG_JOB_CACHE_DIR, JOB_DIRECTORY_PREFIX,
|
||||||
MIN_JOB_DIRNAME_WIDTH, jobId);
|
MIN_JOB_DIRNAME_WIDTH, jobId);
|
||||||
#else
|
#else
|
||||||
|
@ -726,7 +726,7 @@ FileOutputStreamFlush(FileOutputStream file)
|
||||||
static void
|
static void
|
||||||
FilterAndPartitionTable(const char *filterQuery,
|
FilterAndPartitionTable(const char *filterQuery,
|
||||||
const char *partitionColumnName, Oid partitionColumnType,
|
const char *partitionColumnName, Oid partitionColumnType,
|
||||||
uint32 (*PartitionIdFunction) (Datum, const void *),
|
uint32 (*PartitionIdFunction)(Datum, const void *),
|
||||||
const void *partitionIdContext,
|
const void *partitionIdContext,
|
||||||
FileOutputStream *partitionFileArray,
|
FileOutputStream *partitionFileArray,
|
||||||
uint32 fileCount)
|
uint32 fileCount)
|
||||||
|
@ -794,7 +794,7 @@ FilterAndPartitionTable(const char *filterQuery,
|
||||||
FileOutputStream partitionFile = { 0, 0, 0 };
|
FileOutputStream partitionFile = { 0, 0, 0 };
|
||||||
StringInfo rowText = NULL;
|
StringInfo rowText = NULL;
|
||||||
Datum partitionKey = 0;
|
Datum partitionKey = 0;
|
||||||
bool partitionKeyNull = false;
|
bool partitionKeyNull = false;
|
||||||
uint32 partitionId = 0;
|
uint32 partitionId = 0;
|
||||||
|
|
||||||
partitionKey = SPI_getbinval(row, rowDescriptor,
|
partitionKey = SPI_getbinval(row, rowDescriptor,
|
||||||
|
@ -808,7 +808,7 @@ FilterAndPartitionTable(const char *filterQuery,
|
||||||
*/
|
*/
|
||||||
if (!partitionKeyNull)
|
if (!partitionKeyNull)
|
||||||
{
|
{
|
||||||
partitionId = (*PartitionIdFunction) (partitionKey, partitionIdContext);
|
partitionId = (*PartitionIdFunction)(partitionKey, partitionIdContext);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -926,7 +926,7 @@ InitRowOutputState(void)
|
||||||
|
|
||||||
/* initialize defaults for printing null values */
|
/* initialize defaults for printing null values */
|
||||||
char *nullPrint = pstrdup("\\N");
|
char *nullPrint = pstrdup("\\N");
|
||||||
int nullPrintLen = strlen(nullPrint);
|
int nullPrintLen = strlen(nullPrint);
|
||||||
char *nullPrintClient = pg_server_to_any(nullPrint, nullPrintLen, fileEncoding);
|
char *nullPrintClient = pg_server_to_any(nullPrint, nullPrintLen, fileEncoding);
|
||||||
|
|
||||||
/* set default text output characters */
|
/* set default text output characters */
|
||||||
|
@ -946,7 +946,7 @@ InitRowOutputState(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set up transcoding information and default text output characters */
|
/* set up transcoding information and default text output characters */
|
||||||
if ( (fileEncoding != databaseEncoding) || (databaseEncodingMaxLength > 1) )
|
if ((fileEncoding != databaseEncoding) || (databaseEncodingMaxLength > 1))
|
||||||
{
|
{
|
||||||
rowOutputState->need_transcoding = true;
|
rowOutputState->need_transcoding = true;
|
||||||
}
|
}
|
||||||
|
@ -1057,7 +1057,7 @@ OutputRow(HeapTuple row, TupleDesc rowDescriptor,
|
||||||
CopySendString(rowOutputState, rowOutputState->null_print_client);
|
CopySendString(rowOutputState, rowOutputState->null_print_client);
|
||||||
}
|
}
|
||||||
|
|
||||||
lastColumn = ((columnIndex+1) == columnCount);
|
lastColumn = ((columnIndex + 1) == columnCount);
|
||||||
if (!lastColumn)
|
if (!lastColumn)
|
||||||
{
|
{
|
||||||
CopySendChar(rowOutputState, rowOutputState->delim[0]);
|
CopySendChar(rowOutputState, rowOutputState->delim[0]);
|
||||||
|
@ -1094,9 +1094,9 @@ OutputBinaryHeaders(FileOutputStream *partitionFileArray, uint32 fileCount)
|
||||||
{
|
{
|
||||||
/* Generate header for a binary copy */
|
/* Generate header for a binary copy */
|
||||||
const int32 zero = 0;
|
const int32 zero = 0;
|
||||||
FileOutputStream partitionFile = {0, 0, 0};
|
FileOutputStream partitionFile = { 0, 0, 0 };
|
||||||
PartialCopyStateData headerOutputStateData;
|
PartialCopyStateData headerOutputStateData;
|
||||||
PartialCopyState headerOutputState = (PartialCopyState) &headerOutputStateData;
|
PartialCopyState headerOutputState = (PartialCopyState) & headerOutputStateData;
|
||||||
|
|
||||||
memset(headerOutputState, 0, sizeof(PartialCopyStateData));
|
memset(headerOutputState, 0, sizeof(PartialCopyStateData));
|
||||||
headerOutputState->fe_msgbuf = makeStringInfo();
|
headerOutputState->fe_msgbuf = makeStringInfo();
|
||||||
|
@ -1128,9 +1128,9 @@ OutputBinaryFooters(FileOutputStream *partitionFileArray, uint32 fileCount)
|
||||||
{
|
{
|
||||||
/* Generate footer for a binary copy */
|
/* Generate footer for a binary copy */
|
||||||
int16 negative = -1;
|
int16 negative = -1;
|
||||||
FileOutputStream partitionFile = {0, 0, 0};
|
FileOutputStream partitionFile = { 0, 0, 0 };
|
||||||
PartialCopyStateData footerOutputStateData;
|
PartialCopyStateData footerOutputStateData;
|
||||||
PartialCopyState footerOutputState = (PartialCopyState) &footerOutputStateData;
|
PartialCopyState footerOutputState = (PartialCopyState) & footerOutputStateData;
|
||||||
|
|
||||||
memset(footerOutputState, 0, sizeof(PartialCopyStateData));
|
memset(footerOutputState, 0, sizeof(PartialCopyStateData));
|
||||||
footerOutputState->fe_msgbuf = makeStringInfo();
|
footerOutputState->fe_msgbuf = makeStringInfo();
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
*
|
*
|
||||||
* citus_ruleutils.h
|
* citus_ruleutils.h
|
||||||
* CitusDB ruleutils wrapper functions and exported PostgreSQL ruleutils
|
* CitusDB ruleutils wrapper functions and exported PostgreSQL ruleutils
|
||||||
* functions.
|
* functions.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2012-2015, Citus Data, Inc.
|
* Copyright (c) 2012-2015, Citus Data, Inc.
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
|
@ -16,16 +16,17 @@
|
||||||
|
|
||||||
|
|
||||||
/* Function declarations for version independent CitusDB ruleutils wrapper functions */
|
/* Function declarations for version independent CitusDB ruleutils wrapper functions */
|
||||||
extern char *pg_get_extensiondef_string(Oid tableRelationId);
|
extern char * pg_get_extensiondef_string(Oid tableRelationId);
|
||||||
extern char *pg_get_serverdef_string(Oid tableRelationId);
|
extern char * pg_get_serverdef_string(Oid tableRelationId);
|
||||||
extern char *pg_get_tableschemadef_string(Oid tableRelationId);
|
extern char * pg_get_tableschemadef_string(Oid tableRelationId);
|
||||||
extern char *pg_get_tablecolumnoptionsdef_string(Oid tableRelationId);
|
extern char * pg_get_tablecolumnoptionsdef_string(Oid tableRelationId);
|
||||||
extern char *pg_get_indexclusterdef_string(Oid indexRelationId);
|
extern char * pg_get_indexclusterdef_string(Oid indexRelationId);
|
||||||
|
|
||||||
/* Function declarations for version dependent PostgreSQL ruleutils functions */
|
/* Function declarations for version dependent PostgreSQL ruleutils functions */
|
||||||
extern void pg_get_query_def(Query *query, StringInfo buffer);
|
extern void pg_get_query_def(Query *query, StringInfo buffer);
|
||||||
extern void deparse_shard_query(Query *query, Oid distrelid, int64 shardid, StringInfo buffer);
|
extern void deparse_shard_query(Query *query, Oid distrelid, int64 shardid, StringInfo
|
||||||
extern char *generate_relation_name(Oid relid, List *namespaces);
|
buffer);
|
||||||
|
extern char * generate_relation_name(Oid relid, List *namespaces);
|
||||||
|
|
||||||
|
|
||||||
#endif /* CITUS_RULEUTILS_H */
|
#endif /* CITUS_RULEUTILS_H */
|
||||||
|
|
|
@ -30,15 +30,14 @@ typedef struct ShardInterval
|
||||||
CitusNodeTag type;
|
CitusNodeTag type;
|
||||||
Oid relationId;
|
Oid relationId;
|
||||||
char storageType;
|
char storageType;
|
||||||
Oid valueTypeId; /* min/max value datum's typeId */
|
Oid valueTypeId; /* min/max value datum's typeId */
|
||||||
int valueTypeLen; /* min/max value datum's typelen */
|
int valueTypeLen; /* min/max value datum's typelen */
|
||||||
bool valueByVal; /* min/max value datum's byval */
|
bool valueByVal; /* min/max value datum's byval */
|
||||||
bool minValueExists;
|
bool minValueExists;
|
||||||
bool maxValueExists;
|
bool maxValueExists;
|
||||||
Datum minValue; /* a shard's typed min value datum */
|
Datum minValue; /* a shard's typed min value datum */
|
||||||
Datum maxValue; /* a shard's typed max value datum */
|
Datum maxValue; /* a shard's typed max value datum */
|
||||||
uint64 shardId;
|
uint64 shardId;
|
||||||
|
|
||||||
} ShardInterval;
|
} ShardInterval;
|
||||||
|
|
||||||
|
|
||||||
|
@ -46,13 +45,12 @@ typedef struct ShardInterval
|
||||||
typedef struct ShardPlacement
|
typedef struct ShardPlacement
|
||||||
{
|
{
|
||||||
CitusNodeTag type;
|
CitusNodeTag type;
|
||||||
Oid tupleOid; /* unique oid that implies this row's insertion order */
|
Oid tupleOid; /* unique oid that implies this row's insertion order */
|
||||||
uint64 shardId;
|
uint64 shardId;
|
||||||
uint64 shardLength;
|
uint64 shardLength;
|
||||||
RelayFileState shardState;
|
RelayFileState shardState;
|
||||||
char *nodeName;
|
char *nodeName;
|
||||||
uint32 nodePort;
|
uint32 nodePort;
|
||||||
|
|
||||||
} ShardPlacement;
|
} ShardPlacement;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -49,10 +49,10 @@
|
||||||
#define SHARDID_SEQUENCE_NAME "pg_dist_shardid_seq"
|
#define SHARDID_SEQUENCE_NAME "pg_dist_shardid_seq"
|
||||||
|
|
||||||
/* Remote call definitions to help with data staging and deletion */
|
/* Remote call definitions to help with data staging and deletion */
|
||||||
#define WORKER_APPLY_SHARD_DDL_COMMAND "SELECT worker_apply_shard_ddl_command \
|
#define WORKER_APPLY_SHARD_DDL_COMMAND \
|
||||||
("UINT64_FORMAT", %s)"
|
"SELECT worker_apply_shard_ddl_command (" UINT64_FORMAT ", %s)"
|
||||||
#define WORKER_APPEND_TABLE_TO_SHARD "SELECT worker_append_table_to_shard \
|
#define WORKER_APPEND_TABLE_TO_SHARD \
|
||||||
(%s, %s, %s, %u)"
|
"SELECT worker_append_table_to_shard (%s, %s, %s, %u)"
|
||||||
#define SHARD_MIN_VALUE_QUERY "SELECT min(%s) FROM %s"
|
#define SHARD_MIN_VALUE_QUERY "SELECT min(%s) FROM %s"
|
||||||
#define SHARD_MAX_VALUE_QUERY "SELECT max(%s) FROM %s"
|
#define SHARD_MAX_VALUE_QUERY "SELECT max(%s) FROM %s"
|
||||||
#define SHARD_TABLE_SIZE_QUERY "SELECT pg_table_size('%s')"
|
#define SHARD_TABLE_SIZE_QUERY "SELECT pg_table_size('%s')"
|
||||||
|
@ -67,7 +67,6 @@ typedef enum
|
||||||
SHARD_PLACEMENT_INVALID_FIRST = 0,
|
SHARD_PLACEMENT_INVALID_FIRST = 0,
|
||||||
SHARD_PLACEMENT_LOCAL_NODE_FIRST = 1,
|
SHARD_PLACEMENT_LOCAL_NODE_FIRST = 1,
|
||||||
SHARD_PLACEMENT_ROUND_ROBIN = 2
|
SHARD_PLACEMENT_ROUND_ROBIN = 2
|
||||||
|
|
||||||
} ShardPlacementPolicyType;
|
} ShardPlacementPolicyType;
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,8 +82,8 @@ extern Oid ResolveRelationId(text *relationName);
|
||||||
extern List * GetTableDDLEvents(Oid relationId);
|
extern List * GetTableDDLEvents(Oid relationId);
|
||||||
extern void CheckDistributedTable(Oid relationId);
|
extern void CheckDistributedTable(Oid relationId);
|
||||||
extern void CreateShardPlacements(int64 shardId, List *ddlEventList,
|
extern void CreateShardPlacements(int64 shardId, List *ddlEventList,
|
||||||
List *workerNodeList, int workerStartIndex,
|
List *workerNodeList, int workerStartIndex,
|
||||||
int replicationFactor);
|
int replicationFactor);
|
||||||
|
|
||||||
/* Function declarations for generating metadata for shard creation */
|
/* Function declarations for generating metadata for shard creation */
|
||||||
extern Datum master_get_table_metadata(PG_FUNCTION_ARGS);
|
extern Datum master_get_table_metadata(PG_FUNCTION_ARGS);
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#define INVALID_TASK_ID 0
|
#define INVALID_TASK_ID 0
|
||||||
|
|
||||||
#if (PG_VERSION_NUM >= 90500)
|
#if (PG_VERSION_NUM >= 90500)
|
||||||
|
|
||||||
/* reserved alias name for UPSERTs */
|
/* reserved alias name for UPSERTs */
|
||||||
#define UPSERT_ALIAS "citus_table_alias"
|
#define UPSERT_ALIAS "citus_table_alias"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -15,21 +15,20 @@
|
||||||
#define MULTI_CLIENT_EXECUTOR_H
|
#define MULTI_CLIENT_EXECUTOR_H
|
||||||
|
|
||||||
|
|
||||||
#define INVALID_CONNECTION_ID -1 /* identifies an invalid connection */
|
#define INVALID_CONNECTION_ID -1 /* identifies an invalid connection */
|
||||||
#define CLIENT_CONNECT_TIMEOUT 5 /* connection timeout in seconds */
|
#define CLIENT_CONNECT_TIMEOUT 5 /* connection timeout in seconds */
|
||||||
#define MAX_CONNECTION_COUNT 2048 /* simultaneous client connection count */
|
#define MAX_CONNECTION_COUNT 2048 /* simultaneous client connection count */
|
||||||
#define STRING_BUFFER_SIZE 1024 /* buffer size for character arrays */
|
#define STRING_BUFFER_SIZE 1024 /* buffer size for character arrays */
|
||||||
#define CONN_INFO_TEMPLATE "host=%s port=%u dbname=%s connect_timeout=%u"
|
#define CONN_INFO_TEMPLATE "host=%s port=%u dbname=%s connect_timeout=%u"
|
||||||
|
|
||||||
|
|
||||||
/* Enumeration to track one client connection's status */
|
/* Enumeration to track one client connection's status */
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
CLIENT_INVALID_CONNECT = 0,
|
CLIENT_INVALID_CONNECT = 0,
|
||||||
CLIENT_CONNECTION_BAD = 1,
|
CLIENT_CONNECTION_BAD = 1,
|
||||||
CLIENT_CONNECTION_BUSY = 2,
|
CLIENT_CONNECTION_BUSY = 2,
|
||||||
CLIENT_CONNECTION_READY = 3
|
CLIENT_CONNECTION_READY = 3
|
||||||
|
|
||||||
} ConnectStatus;
|
} ConnectStatus;
|
||||||
|
|
||||||
|
|
||||||
|
@ -38,9 +37,8 @@ typedef enum
|
||||||
{
|
{
|
||||||
CLIENT_INVALID_RESULT_STATUS = 0,
|
CLIENT_INVALID_RESULT_STATUS = 0,
|
||||||
CLIENT_RESULT_UNAVAILABLE = 1,
|
CLIENT_RESULT_UNAVAILABLE = 1,
|
||||||
CLIENT_RESULT_BUSY = 2,
|
CLIENT_RESULT_BUSY = 2,
|
||||||
CLIENT_RESULT_READY = 3
|
CLIENT_RESULT_READY = 3
|
||||||
|
|
||||||
} ResultStatus;
|
} ResultStatus;
|
||||||
|
|
||||||
|
|
||||||
|
@ -48,10 +46,9 @@ typedef enum
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
CLIENT_INVALID_QUERY = 0,
|
CLIENT_INVALID_QUERY = 0,
|
||||||
CLIENT_QUERY_FAILED = 1,
|
CLIENT_QUERY_FAILED = 1,
|
||||||
CLIENT_QUERY_DONE = 2,
|
CLIENT_QUERY_DONE = 2,
|
||||||
CLIENT_QUERY_COPY = 3
|
CLIENT_QUERY_COPY = 3
|
||||||
|
|
||||||
} QueryStatus;
|
} QueryStatus;
|
||||||
|
|
||||||
|
|
||||||
|
@ -59,21 +56,19 @@ typedef enum
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
CLIENT_INVALID_COPY = 0,
|
CLIENT_INVALID_COPY = 0,
|
||||||
CLIENT_COPY_MORE = 1,
|
CLIENT_COPY_MORE = 1,
|
||||||
CLIENT_COPY_FAILED = 2,
|
CLIENT_COPY_FAILED = 2,
|
||||||
CLIENT_COPY_DONE = 3
|
CLIENT_COPY_DONE = 3
|
||||||
|
|
||||||
} CopyStatus;
|
} CopyStatus;
|
||||||
|
|
||||||
|
|
||||||
/* Enumeration to track the status of a query in a batch on the client */
|
/* Enumeration to track the status of a query in a batch on the client */
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
CLIENT_INVALID_BATCH_QUERY = 0,
|
CLIENT_INVALID_BATCH_QUERY = 0,
|
||||||
CLIENT_BATCH_QUERY_FAILED = 1,
|
CLIENT_BATCH_QUERY_FAILED = 1,
|
||||||
CLIENT_BATCH_QUERY_CONTINUE = 2,
|
CLIENT_BATCH_QUERY_CONTINUE = 2,
|
||||||
CLIENT_BATCH_QUERY_DONE = 3
|
CLIENT_BATCH_QUERY_DONE = 3
|
||||||
|
|
||||||
} BatchQueryStatus;
|
} BatchQueryStatus;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -14,12 +14,12 @@
|
||||||
#include "nodes/parsenodes.h"
|
#include "nodes/parsenodes.h"
|
||||||
|
|
||||||
/* signal currently executed statement is a master select statement or router execution */
|
/* signal currently executed statement is a master select statement or router execution */
|
||||||
#define EXEC_FLAG_CITUS_MASTER_SELECT 0x100
|
#define EXEC_FLAG_CITUS_MASTER_SELECT 0x100
|
||||||
#define EXEC_FLAG_CITUS_ROUTER_EXECUTOR 0x200
|
#define EXEC_FLAG_CITUS_ROUTER_EXECUTOR 0x200
|
||||||
|
|
||||||
extern void multi_ExecutorStart(QueryDesc *queryDesc, int eflags);
|
extern void multi_ExecutorStart(QueryDesc *queryDesc, int eflags);
|
||||||
extern void multi_ExecutorRun(QueryDesc *queryDesc,
|
extern void multi_ExecutorRun(QueryDesc *queryDesc,
|
||||||
ScanDirection direction, long count);
|
ScanDirection direction, long count);
|
||||||
extern void multi_ExecutorFinish(QueryDesc *queryDesc);
|
extern void multi_ExecutorFinish(QueryDesc *queryDesc);
|
||||||
extern void multi_ExecutorEnd(QueryDesc *queryDesc);
|
extern void multi_ExecutorEnd(QueryDesc *queryDesc);
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ typedef enum JoinRuleType
|
||||||
{
|
{
|
||||||
JOIN_RULE_INVALID_FIRST = 0,
|
JOIN_RULE_INVALID_FIRST = 0,
|
||||||
BROADCAST_JOIN = 1,
|
BROADCAST_JOIN = 1,
|
||||||
LOCAL_PARTITION_JOIN = 2,
|
LOCAL_PARTITION_JOIN = 2,
|
||||||
SINGLE_PARTITION_JOIN = 3,
|
SINGLE_PARTITION_JOIN = 3,
|
||||||
DUAL_PARTITION_JOIN = 4,
|
DUAL_PARTITION_JOIN = 4,
|
||||||
CARTESIAN_PRODUCT = 5,
|
CARTESIAN_PRODUCT = 5,
|
||||||
|
@ -40,7 +40,6 @@ typedef enum JoinRuleType
|
||||||
* RuleNameArray.
|
* RuleNameArray.
|
||||||
*/
|
*/
|
||||||
JOIN_RULE_LAST
|
JOIN_RULE_LAST
|
||||||
|
|
||||||
} JoinRuleType;
|
} JoinRuleType;
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,7 +52,6 @@ typedef struct TableEntry
|
||||||
{
|
{
|
||||||
Oid relationId;
|
Oid relationId;
|
||||||
uint32 rangeTableId;
|
uint32 rangeTableId;
|
||||||
|
|
||||||
} TableEntry;
|
} TableEntry;
|
||||||
|
|
||||||
|
|
||||||
|
@ -65,14 +63,13 @@ typedef struct TableEntry
|
||||||
*/
|
*/
|
||||||
typedef struct JoinOrderNode
|
typedef struct JoinOrderNode
|
||||||
{
|
{
|
||||||
TableEntry *tableEntry; /* this node's relation and range table id */
|
TableEntry *tableEntry; /* this node's relation and range table id */
|
||||||
JoinRuleType joinRuleType; /* not relevant for the first table */
|
JoinRuleType joinRuleType; /* not relevant for the first table */
|
||||||
JoinType joinType; /* not relevant for the first table */
|
JoinType joinType; /* not relevant for the first table */
|
||||||
Var *partitionColumn; /* not relevant for the first table */
|
Var *partitionColumn; /* not relevant for the first table */
|
||||||
char partitionMethod;
|
char partitionMethod;
|
||||||
List *joinClauseList; /* not relevant for the first table */
|
List *joinClauseList; /* not relevant for the first table */
|
||||||
List *shardIntervalList;
|
List *shardIntervalList;
|
||||||
|
|
||||||
} JoinOrderNode;
|
} JoinOrderNode;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,6 @@ typedef enum
|
||||||
AGGREGATE_SUM = 4,
|
AGGREGATE_SUM = 4,
|
||||||
AGGREGATE_COUNT = 5,
|
AGGREGATE_COUNT = 5,
|
||||||
AGGREGATE_ARRAY_AGG = 6
|
AGGREGATE_ARRAY_AGG = 6
|
||||||
|
|
||||||
} AggregateType;
|
} AggregateType;
|
||||||
|
|
||||||
|
|
||||||
|
@ -69,7 +68,6 @@ typedef enum
|
||||||
PUSH_DOWN_VALID = 1,
|
PUSH_DOWN_VALID = 1,
|
||||||
PUSH_DOWN_NOT_VALID = 2,
|
PUSH_DOWN_NOT_VALID = 2,
|
||||||
PUSH_DOWN_SPECIAL_CONDITIONS = 3
|
PUSH_DOWN_SPECIAL_CONDITIONS = 3
|
||||||
|
|
||||||
} PushDownStatus;
|
} PushDownStatus;
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,7 +80,6 @@ typedef enum
|
||||||
PULL_UP_INVALID_FIRST = 0,
|
PULL_UP_INVALID_FIRST = 0,
|
||||||
PULL_UP_VALID = 1,
|
PULL_UP_VALID = 1,
|
||||||
PULL_UP_NOT_VALID = 2
|
PULL_UP_NOT_VALID = 2
|
||||||
|
|
||||||
} PullUpStatus;
|
} PullUpStatus;
|
||||||
|
|
||||||
|
|
||||||
|
@ -97,8 +94,10 @@ typedef enum
|
||||||
* Please note that the order of elements in this array is tied to the order of
|
* Please note that the order of elements in this array is tied to the order of
|
||||||
* values in the preceding AggregateType enum. This order needs to be preserved.
|
* values in the preceding AggregateType enum. This order needs to be preserved.
|
||||||
*/
|
*/
|
||||||
static const char * const AggregateNames[] = { "invalid", "avg", "min", "max",
|
static const char *const AggregateNames[] = {
|
||||||
"sum", "count", "array_agg" };
|
"invalid", "avg", "min", "max", "sum",
|
||||||
|
"count", "array_agg"
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/* Config variable managed via guc.c */
|
/* Config variable managed via guc.c */
|
||||||
|
|
|
@ -40,8 +40,8 @@ typedef struct MultiNode
|
||||||
CitusNodeTag type;
|
CitusNodeTag type;
|
||||||
|
|
||||||
struct MultiNode *parentNode;
|
struct MultiNode *parentNode;
|
||||||
/* child node(s) are defined in unary and binary nodes */
|
|
||||||
|
|
||||||
|
/* child node(s) are defined in unary and binary nodes */
|
||||||
} MultiNode;
|
} MultiNode;
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,7 +51,6 @@ typedef struct MultiUnaryNode
|
||||||
MultiNode node;
|
MultiNode node;
|
||||||
|
|
||||||
struct MultiNode *childNode;
|
struct MultiNode *childNode;
|
||||||
|
|
||||||
} MultiUnaryNode;
|
} MultiUnaryNode;
|
||||||
|
|
||||||
|
|
||||||
|
@ -62,7 +61,6 @@ typedef struct MultiBinaryNode
|
||||||
|
|
||||||
struct MultiNode *leftChildNode;
|
struct MultiNode *leftChildNode;
|
||||||
struct MultiNode *rightChildNode;
|
struct MultiNode *rightChildNode;
|
||||||
|
|
||||||
} MultiBinaryNode;
|
} MultiBinaryNode;
|
||||||
|
|
||||||
|
|
||||||
|
@ -73,7 +71,6 @@ typedef struct MultiBinaryNode
|
||||||
typedef struct MultiTreeRoot
|
typedef struct MultiTreeRoot
|
||||||
{
|
{
|
||||||
MultiUnaryNode unaryNode;
|
MultiUnaryNode unaryNode;
|
||||||
|
|
||||||
} MultiTreeRoot;
|
} MultiTreeRoot;
|
||||||
|
|
||||||
|
|
||||||
|
@ -91,7 +88,6 @@ typedef struct MultiTable
|
||||||
Alias *alias;
|
Alias *alias;
|
||||||
Alias *referenceNames;
|
Alias *referenceNames;
|
||||||
Query *subquery; /* this field is only valid for non-relation subquery types */
|
Query *subquery; /* this field is only valid for non-relation subquery types */
|
||||||
|
|
||||||
} MultiTable;
|
} MultiTable;
|
||||||
|
|
||||||
|
|
||||||
|
@ -100,7 +96,6 @@ typedef struct MultiProject
|
||||||
{
|
{
|
||||||
MultiUnaryNode unaryNode;
|
MultiUnaryNode unaryNode;
|
||||||
List *columnList;
|
List *columnList;
|
||||||
|
|
||||||
} MultiProject;
|
} MultiProject;
|
||||||
|
|
||||||
|
|
||||||
|
@ -112,7 +107,6 @@ typedef struct MultiProject
|
||||||
typedef struct MultiCollect
|
typedef struct MultiCollect
|
||||||
{
|
{
|
||||||
MultiUnaryNode unaryNode;
|
MultiUnaryNode unaryNode;
|
||||||
|
|
||||||
} MultiCollect;
|
} MultiCollect;
|
||||||
|
|
||||||
|
|
||||||
|
@ -125,7 +119,6 @@ typedef struct MultiSelect
|
||||||
{
|
{
|
||||||
MultiUnaryNode unaryNode;
|
MultiUnaryNode unaryNode;
|
||||||
List *selectClauseList;
|
List *selectClauseList;
|
||||||
|
|
||||||
} MultiSelect;
|
} MultiSelect;
|
||||||
|
|
||||||
|
|
||||||
|
@ -140,7 +133,6 @@ typedef struct MultiJoin
|
||||||
List *joinClauseList;
|
List *joinClauseList;
|
||||||
JoinRuleType joinRuleType;
|
JoinRuleType joinRuleType;
|
||||||
JoinType joinType;
|
JoinType joinType;
|
||||||
|
|
||||||
} MultiJoin;
|
} MultiJoin;
|
||||||
|
|
||||||
|
|
||||||
|
@ -150,7 +142,6 @@ typedef struct MultiPartition
|
||||||
MultiUnaryNode unaryNode;
|
MultiUnaryNode unaryNode;
|
||||||
Var *partitionColumn;
|
Var *partitionColumn;
|
||||||
uint32 splitPointTableId;
|
uint32 splitPointTableId;
|
||||||
|
|
||||||
} MultiPartition;
|
} MultiPartition;
|
||||||
|
|
||||||
|
|
||||||
|
@ -158,7 +149,6 @@ typedef struct MultiPartition
|
||||||
typedef struct MultiCartesianProduct
|
typedef struct MultiCartesianProduct
|
||||||
{
|
{
|
||||||
MultiBinaryNode binaryNode;
|
MultiBinaryNode binaryNode;
|
||||||
|
|
||||||
} MultiCartesianProduct;
|
} MultiCartesianProduct;
|
||||||
|
|
||||||
|
|
||||||
|
@ -183,7 +173,6 @@ typedef struct MultiExtendedOp
|
||||||
List *sortClauseList;
|
List *sortClauseList;
|
||||||
Node *limitCount;
|
Node *limitCount;
|
||||||
Node *limitOffset;
|
Node *limitOffset;
|
||||||
|
|
||||||
} MultiExtendedOp;
|
} MultiExtendedOp;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -40,17 +40,18 @@
|
||||||
(" UINT64_FORMAT ", %d, %s, '%s', %d, %d)"
|
(" UINT64_FORMAT ", %d, %s, '%s', %d, %d)"
|
||||||
#define MERGE_FILES_INTO_TABLE_COMMAND "SELECT worker_merge_files_into_table \
|
#define MERGE_FILES_INTO_TABLE_COMMAND "SELECT worker_merge_files_into_table \
|
||||||
(" UINT64_FORMAT ", %d, '%s', '%s')"
|
(" UINT64_FORMAT ", %d, '%s', '%s')"
|
||||||
#define MERGE_FILES_AND_RUN_QUERY_COMMAND "SELECT worker_merge_files_and_run_query(" UINT64_FORMAT ", %d, '%s', '%s')"
|
#define MERGE_FILES_AND_RUN_QUERY_COMMAND \
|
||||||
|
"SELECT worker_merge_files_and_run_query(" UINT64_FORMAT ", %d, '%s', '%s')"
|
||||||
|
|
||||||
|
|
||||||
typedef enum CitusRTEKind
|
typedef enum CitusRTEKind
|
||||||
{
|
{
|
||||||
CITUS_RTE_RELATION = RTE_RELATION, /* ordinary relation reference */
|
CITUS_RTE_RELATION = RTE_RELATION, /* ordinary relation reference */
|
||||||
CITUS_RTE_SUBQUERY = RTE_SUBQUERY, /* subquery in FROM */
|
CITUS_RTE_SUBQUERY = RTE_SUBQUERY, /* subquery in FROM */
|
||||||
CITUS_RTE_JOIN = RTE_JOIN, /* join */
|
CITUS_RTE_JOIN = RTE_JOIN, /* join */
|
||||||
CITUS_RTE_FUNCTION = RTE_FUNCTION, /* function in FROM */
|
CITUS_RTE_FUNCTION = RTE_FUNCTION, /* function in FROM */
|
||||||
CITUS_RTE_VALUES = RTE_VALUES, /* VALUES (<exprlist>), (<exprlist>), ... */
|
CITUS_RTE_VALUES = RTE_VALUES, /* VALUES (<exprlist>), (<exprlist>), ... */
|
||||||
CITUS_RTE_CTE = RTE_CTE, /* common table expr (WITH list element) */
|
CITUS_RTE_CTE = RTE_CTE, /* common table expr (WITH list element) */
|
||||||
CITUS_RTE_SHARD,
|
CITUS_RTE_SHARD,
|
||||||
CITUS_RTE_REMOTE_QUERY
|
CITUS_RTE_REMOTE_QUERY
|
||||||
} CitusRTEKind;
|
} CitusRTEKind;
|
||||||
|
@ -61,8 +62,7 @@ typedef enum
|
||||||
{
|
{
|
||||||
PARTITION_INVALID_FIRST = 0,
|
PARTITION_INVALID_FIRST = 0,
|
||||||
RANGE_PARTITION_TYPE = 1,
|
RANGE_PARTITION_TYPE = 1,
|
||||||
HASH_PARTITION_TYPE = 2
|
HASH_PARTITION_TYPE = 2
|
||||||
|
|
||||||
} PartitionType;
|
} PartitionType;
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,7 +77,6 @@ typedef enum
|
||||||
MAP_OUTPUT_FETCH_TASK = 5,
|
MAP_OUTPUT_FETCH_TASK = 5,
|
||||||
MERGE_FETCH_TASK = 6,
|
MERGE_FETCH_TASK = 6,
|
||||||
MODIFY_TASK = 7
|
MODIFY_TASK = 7
|
||||||
|
|
||||||
} TaskType;
|
} TaskType;
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,7 +87,6 @@ typedef enum
|
||||||
TASK_ASSIGNMENT_GREEDY = 1,
|
TASK_ASSIGNMENT_GREEDY = 1,
|
||||||
TASK_ASSIGNMENT_ROUND_ROBIN = 2,
|
TASK_ASSIGNMENT_ROUND_ROBIN = 2,
|
||||||
TASK_ASSIGNMENT_FIRST_REPLICA = 3
|
TASK_ASSIGNMENT_FIRST_REPLICA = 3
|
||||||
|
|
||||||
} TaskAssignmentPolicyType;
|
} TaskAssignmentPolicyType;
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,7 +97,6 @@ typedef enum
|
||||||
JOIN_MAP_MERGE_JOB = 1,
|
JOIN_MAP_MERGE_JOB = 1,
|
||||||
SUBQUERY_MAP_MERGE_JOB = 2,
|
SUBQUERY_MAP_MERGE_JOB = 2,
|
||||||
TOP_LEVEL_WORKER_JOB = 3
|
TOP_LEVEL_WORKER_JOB = 3
|
||||||
|
|
||||||
} BoundaryNodeJobType;
|
} BoundaryNodeJobType;
|
||||||
|
|
||||||
|
|
||||||
|
@ -133,7 +130,6 @@ typedef struct MapMergeJob
|
||||||
ShardInterval **sortedShardIntervalArray; /* only applies to range partitioning */
|
ShardInterval **sortedShardIntervalArray; /* only applies to range partitioning */
|
||||||
List *mapTaskList;
|
List *mapTaskList;
|
||||||
List *mergeTaskList;
|
List *mergeTaskList;
|
||||||
|
|
||||||
} MapMergeJob;
|
} MapMergeJob;
|
||||||
|
|
||||||
|
|
||||||
|
@ -153,18 +149,17 @@ typedef struct Task
|
||||||
uint64 jobId;
|
uint64 jobId;
|
||||||
uint32 taskId;
|
uint32 taskId;
|
||||||
char *queryString;
|
char *queryString;
|
||||||
uint64 anchorShardId; /* only applies to compute tasks */
|
uint64 anchorShardId; /* only applies to compute tasks */
|
||||||
List *taskPlacementList; /* only applies to compute tasks */
|
List *taskPlacementList; /* only applies to compute tasks */
|
||||||
List *dependedTaskList; /* only applies to compute tasks */
|
List *dependedTaskList; /* only applies to compute tasks */
|
||||||
|
|
||||||
uint32 partitionId;
|
uint32 partitionId;
|
||||||
uint32 upstreamTaskId; /* only applies to data fetch tasks */
|
uint32 upstreamTaskId; /* only applies to data fetch tasks */
|
||||||
ShardInterval *shardInterval; /* only applies to merge tasks */
|
ShardInterval *shardInterval; /* only applies to merge tasks */
|
||||||
bool assignmentConstrained; /* only applies to merge tasks */
|
bool assignmentConstrained; /* only applies to merge tasks */
|
||||||
uint64 shardId; /* only applies to shard fetch tasks */
|
uint64 shardId; /* only applies to shard fetch tasks */
|
||||||
TaskExecution *taskExecution; /* used by task tracker executor */
|
TaskExecution *taskExecution; /* used by task tracker executor */
|
||||||
bool upsertQuery; /* only applies to modify tasks */
|
bool upsertQuery; /* only applies to modify tasks */
|
||||||
|
|
||||||
} Task;
|
} Task;
|
||||||
|
|
||||||
|
|
||||||
|
@ -177,7 +172,6 @@ typedef struct RangeTableFragment
|
||||||
CitusRTEKind fragmentType;
|
CitusRTEKind fragmentType;
|
||||||
void *fragmentReference;
|
void *fragmentReference;
|
||||||
uint32 rangeTableId;
|
uint32 rangeTableId;
|
||||||
|
|
||||||
} RangeTableFragment;
|
} RangeTableFragment;
|
||||||
|
|
||||||
|
|
||||||
|
@ -190,7 +184,6 @@ typedef struct JoinSequenceNode
|
||||||
{
|
{
|
||||||
uint32 rangeTableId;
|
uint32 rangeTableId;
|
||||||
int32 joiningRangeTableId;
|
int32 joiningRangeTableId;
|
||||||
|
|
||||||
} JoinSequenceNode;
|
} JoinSequenceNode;
|
||||||
|
|
||||||
|
|
||||||
|
@ -203,7 +196,6 @@ typedef struct MultiPlan
|
||||||
Job *workerJob;
|
Job *workerJob;
|
||||||
Query *masterQuery;
|
Query *masterQuery;
|
||||||
char *masterTableName;
|
char *masterTableName;
|
||||||
|
|
||||||
} MultiPlan;
|
} MultiPlan;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -13,8 +13,8 @@
|
||||||
#include "nodes/plannodes.h"
|
#include "nodes/plannodes.h"
|
||||||
#include "nodes/relation.h"
|
#include "nodes/relation.h"
|
||||||
|
|
||||||
extern PlannedStmt *multi_planner(Query *parse, int cursorOptions,
|
extern PlannedStmt * multi_planner(Query *parse, int cursorOptions,
|
||||||
ParamListInfo boundParams);
|
ParamListInfo boundParams);
|
||||||
|
|
||||||
extern bool HasCitusToplevelNode(PlannedStmt *planStatement);
|
extern bool HasCitusToplevelNode(PlannedStmt *planStatement);
|
||||||
struct MultiPlan;
|
struct MultiPlan;
|
||||||
|
|
|
@ -20,9 +20,9 @@
|
||||||
|
|
||||||
|
|
||||||
#define MAX_TASK_EXECUTION_FAILURES 3 /* allowed failure count for one task */
|
#define MAX_TASK_EXECUTION_FAILURES 3 /* allowed failure count for one task */
|
||||||
#define MAX_TRACKER_FAILURE_COUNT 3 /* allowed failure count for one tracker */
|
#define MAX_TRACKER_FAILURE_COUNT 3 /* allowed failure count for one tracker */
|
||||||
#define REMOTE_NODE_CONNECT_TIMEOUT 4000 /* async connect timeout in ms */
|
#define REMOTE_NODE_CONNECT_TIMEOUT 4000 /* async connect timeout in ms */
|
||||||
#define RESERVED_FD_COUNT 64 /* file descriptors unavailable to executor */
|
#define RESERVED_FD_COUNT 64 /* file descriptors unavailable to executor */
|
||||||
|
|
||||||
/* copy out query results */
|
/* copy out query results */
|
||||||
#define COPY_QUERY_TO_STDOUT_TEXT "COPY (%s) TO STDOUT"
|
#define COPY_QUERY_TO_STDOUT_TEXT "COPY (%s) TO STDOUT"
|
||||||
|
@ -32,9 +32,9 @@
|
||||||
|
|
||||||
/* Task tracker executor related defines */
|
/* Task tracker executor related defines */
|
||||||
#define TASK_ASSIGNMENT_QUERY "SELECT task_tracker_assign_task \
|
#define TASK_ASSIGNMENT_QUERY "SELECT task_tracker_assign_task \
|
||||||
("UINT64_FORMAT", %u, %s)"
|
("UINT64_FORMAT ", %u, %s)"
|
||||||
#define TASK_STATUS_QUERY "SELECT task_tracker_task_status("UINT64_FORMAT", %u)"
|
#define TASK_STATUS_QUERY "SELECT task_tracker_task_status("UINT64_FORMAT ", %u)"
|
||||||
#define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT")"
|
#define JOB_CLEANUP_QUERY "SELECT task_tracker_cleanup_job("UINT64_FORMAT ")"
|
||||||
#define JOB_CLEANUP_TASK_ID INT_MAX
|
#define JOB_CLEANUP_TASK_ID INT_MAX
|
||||||
|
|
||||||
|
|
||||||
|
@ -43,9 +43,9 @@ typedef enum
|
||||||
{
|
{
|
||||||
EXEC_TASK_INVALID_FIRST = 0,
|
EXEC_TASK_INVALID_FIRST = 0,
|
||||||
EXEC_TASK_CONNECT_START = 1,
|
EXEC_TASK_CONNECT_START = 1,
|
||||||
EXEC_TASK_CONNECT_POLL = 2,
|
EXEC_TASK_CONNECT_POLL = 2,
|
||||||
EXEC_TASK_FAILED = 3,
|
EXEC_TASK_FAILED = 3,
|
||||||
EXEC_FETCH_TASK_LOOP = 4,
|
EXEC_FETCH_TASK_LOOP = 4,
|
||||||
EXEC_FETCH_TASK_START = 5,
|
EXEC_FETCH_TASK_START = 5,
|
||||||
EXEC_FETCH_TASK_RUNNING = 6,
|
EXEC_FETCH_TASK_RUNNING = 6,
|
||||||
EXEC_COMPUTE_TASK_START = 7,
|
EXEC_COMPUTE_TASK_START = 7,
|
||||||
|
@ -60,7 +60,6 @@ typedef enum
|
||||||
EXEC_TASK_TRACKER_FAILED = 14,
|
EXEC_TASK_TRACKER_FAILED = 14,
|
||||||
EXEC_SOURCE_TASK_TRACKER_RETRY = 15,
|
EXEC_SOURCE_TASK_TRACKER_RETRY = 15,
|
||||||
EXEC_SOURCE_TASK_TRACKER_FAILED = 16
|
EXEC_SOURCE_TASK_TRACKER_FAILED = 16
|
||||||
|
|
||||||
} TaskExecStatus;
|
} TaskExecStatus;
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,7 +73,6 @@ typedef enum
|
||||||
EXEC_TRANSMIT_TRACKER_RETRY = 4,
|
EXEC_TRANSMIT_TRACKER_RETRY = 4,
|
||||||
EXEC_TRANSMIT_TRACKER_FAILED = 5,
|
EXEC_TRANSMIT_TRACKER_FAILED = 5,
|
||||||
EXEC_TRANSMIT_DONE = 6
|
EXEC_TRANSMIT_DONE = 6
|
||||||
|
|
||||||
} TransmitExecStatus;
|
} TransmitExecStatus;
|
||||||
|
|
||||||
|
|
||||||
|
@ -86,7 +84,6 @@ typedef enum
|
||||||
TRACKER_CONNECT_POLL = 2,
|
TRACKER_CONNECT_POLL = 2,
|
||||||
TRACKER_CONNECTED = 3,
|
TRACKER_CONNECTED = 3,
|
||||||
TRACKER_CONNECTION_FAILED = 4
|
TRACKER_CONNECTION_FAILED = 4
|
||||||
|
|
||||||
} TrackerStatus;
|
} TrackerStatus;
|
||||||
|
|
||||||
|
|
||||||
|
@ -97,7 +94,6 @@ typedef enum
|
||||||
MULTI_EXECUTOR_REAL_TIME = 1,
|
MULTI_EXECUTOR_REAL_TIME = 1,
|
||||||
MULTI_EXECUTOR_TASK_TRACKER = 2,
|
MULTI_EXECUTOR_TASK_TRACKER = 2,
|
||||||
MULTI_EXECUTOR_ROUTER = 3
|
MULTI_EXECUTOR_ROUTER = 3
|
||||||
|
|
||||||
} MultiExecutorType;
|
} MultiExecutorType;
|
||||||
|
|
||||||
|
|
||||||
|
@ -107,7 +103,6 @@ typedef enum
|
||||||
CONNECT_ACTION_NONE = 0,
|
CONNECT_ACTION_NONE = 0,
|
||||||
CONNECT_ACTION_OPENED = 1,
|
CONNECT_ACTION_OPENED = 1,
|
||||||
CONNECT_ACTION_CLOSED = 2
|
CONNECT_ACTION_CLOSED = 2
|
||||||
|
|
||||||
} ConnectAction;
|
} ConnectAction;
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,7 +127,6 @@ struct TaskExecution
|
||||||
uint32 querySourceNodeIndex; /* only applies to map fetch tasks */
|
uint32 querySourceNodeIndex; /* only applies to map fetch tasks */
|
||||||
int32 dataFetchTaskIndex;
|
int32 dataFetchTaskIndex;
|
||||||
uint32 failureCount;
|
uint32 failureCount;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -147,7 +141,6 @@ typedef struct TrackerTaskState
|
||||||
uint32 taskId;
|
uint32 taskId;
|
||||||
TaskStatus status;
|
TaskStatus status;
|
||||||
StringInfo taskAssignmentQuery;
|
StringInfo taskAssignmentQuery;
|
||||||
|
|
||||||
} TrackerTaskState;
|
} TrackerTaskState;
|
||||||
|
|
||||||
|
|
||||||
|
@ -158,7 +151,7 @@ typedef struct TrackerTaskState
|
||||||
*/
|
*/
|
||||||
typedef struct TaskTracker
|
typedef struct TaskTracker
|
||||||
{
|
{
|
||||||
uint32 workerPort; /* node's port; part of hash table key */
|
uint32 workerPort; /* node's port; part of hash table key */
|
||||||
char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */
|
char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */
|
||||||
TrackerStatus trackerStatus;
|
TrackerStatus trackerStatus;
|
||||||
int32 connectionId;
|
int32 connectionId;
|
||||||
|
@ -171,7 +164,6 @@ typedef struct TaskTracker
|
||||||
int32 currentTaskIndex;
|
int32 currentTaskIndex;
|
||||||
bool connectionBusy;
|
bool connectionBusy;
|
||||||
TrackerTaskState *connectionBusyOnTask;
|
TrackerTaskState *connectionBusyOnTask;
|
||||||
|
|
||||||
} TaskTracker;
|
} TaskTracker;
|
||||||
|
|
||||||
|
|
||||||
|
@ -184,7 +176,6 @@ typedef struct WorkerNodeState
|
||||||
uint32 workerPort;
|
uint32 workerPort;
|
||||||
char workerName[WORKER_LENGTH];
|
char workerName[WORKER_LENGTH];
|
||||||
uint32 openConnectionCount;
|
uint32 openConnectionCount;
|
||||||
|
|
||||||
} WorkerNodeState;
|
} WorkerNodeState;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,9 +21,9 @@
|
||||||
*/
|
*/
|
||||||
typedef struct FormData_pg_dist_partition
|
typedef struct FormData_pg_dist_partition
|
||||||
{
|
{
|
||||||
Oid logicalrelid; /* logical relation id; references pg_class oid */
|
Oid logicalrelid; /* logical relation id; references pg_class oid */
|
||||||
char partmethod; /* partition method; see codes below */
|
char partmethod; /* partition method; see codes below */
|
||||||
text partkey; /* partition key expression */
|
text partkey; /* partition key expression */
|
||||||
} FormData_pg_dist_partition;
|
} FormData_pg_dist_partition;
|
||||||
|
|
||||||
/* ----------------
|
/* ----------------
|
||||||
|
@ -37,16 +37,16 @@ typedef FormData_pg_dist_partition *Form_pg_dist_partition;
|
||||||
* compiler constants for pg_dist_partitions
|
* compiler constants for pg_dist_partitions
|
||||||
* ----------------
|
* ----------------
|
||||||
*/
|
*/
|
||||||
#define Natts_pg_dist_partition 3
|
#define Natts_pg_dist_partition 3
|
||||||
#define Anum_pg_dist_partition_logicalrelid 1
|
#define Anum_pg_dist_partition_logicalrelid 1
|
||||||
#define Anum_pg_dist_partition_partmethod 2
|
#define Anum_pg_dist_partition_partmethod 2
|
||||||
#define Anum_pg_dist_partition_partkey 3
|
#define Anum_pg_dist_partition_partkey 3
|
||||||
|
|
||||||
/* valid values for partmethod include append, hash, and range */
|
/* valid values for partmethod include append, hash, and range */
|
||||||
#define DISTRIBUTE_BY_APPEND 'a'
|
#define DISTRIBUTE_BY_APPEND 'a'
|
||||||
#define DISTRIBUTE_BY_HASH 'h'
|
#define DISTRIBUTE_BY_HASH 'h'
|
||||||
#define DISTRIBUTE_BY_RANGE 'r'
|
#define DISTRIBUTE_BY_RANGE 'r'
|
||||||
#define REDISTRIBUTE_BY_HASH 'x'
|
#define REDISTRIBUTE_BY_HASH 'x'
|
||||||
|
|
||||||
|
|
||||||
#endif /* PG_DIST_PARTITION_H */
|
#endif /* PG_DIST_PARTITION_H */
|
||||||
|
|
|
@ -22,13 +22,13 @@
|
||||||
*/
|
*/
|
||||||
typedef struct FormData_pg_dist_shard
|
typedef struct FormData_pg_dist_shard
|
||||||
{
|
{
|
||||||
Oid logicalrelid; /* logical relation id; references pg_class oid */
|
Oid logicalrelid; /* logical relation id; references pg_class oid */
|
||||||
int64 shardid; /* global shardId representing remote partition */
|
int64 shardid; /* global shardId representing remote partition */
|
||||||
char shardstorage; /* shard storage type; see codes below */
|
char shardstorage; /* shard storage type; see codes below */
|
||||||
#ifdef CATALOG_VARLEN /* variable-length fields start here */
|
#ifdef CATALOG_VARLEN /* variable-length fields start here */
|
||||||
text shardalias; /* user specified table name for shard, if any */
|
text shardalias; /* user specified table name for shard, if any */
|
||||||
text shardminvalue; /* partition key's minimum value in shard */
|
text shardminvalue; /* partition key's minimum value in shard */
|
||||||
text shardmaxvalue; /* partition key's maximum value in shard */
|
text shardmaxvalue; /* partition key's maximum value in shard */
|
||||||
#endif
|
#endif
|
||||||
} FormData_pg_dist_shard;
|
} FormData_pg_dist_shard;
|
||||||
|
|
||||||
|
@ -43,22 +43,22 @@ typedef FormData_pg_dist_shard *Form_pg_dist_shard;
|
||||||
* compiler constants for pg_dist_shards
|
* compiler constants for pg_dist_shards
|
||||||
* ----------------
|
* ----------------
|
||||||
*/
|
*/
|
||||||
#define Natts_pg_dist_shard 6
|
#define Natts_pg_dist_shard 6
|
||||||
#define Anum_pg_dist_shard_logicalrelid 1
|
#define Anum_pg_dist_shard_logicalrelid 1
|
||||||
#define Anum_pg_dist_shard_shardid 2
|
#define Anum_pg_dist_shard_shardid 2
|
||||||
#define Anum_pg_dist_shard_shardstorage 3
|
#define Anum_pg_dist_shard_shardstorage 3
|
||||||
#define Anum_pg_dist_shard_shardalias 4
|
#define Anum_pg_dist_shard_shardalias 4
|
||||||
#define Anum_pg_dist_shard_shardminvalue 5
|
#define Anum_pg_dist_shard_shardminvalue 5
|
||||||
#define Anum_pg_dist_shard_shardmaxvalue 6
|
#define Anum_pg_dist_shard_shardmaxvalue 6
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Valid values for shard storage types include relay file, foreign table,
|
* Valid values for shard storage types include relay file, foreign table,
|
||||||
* (standard) table and columnar table. Relay file types are currently unused.
|
* (standard) table and columnar table. Relay file types are currently unused.
|
||||||
*/
|
*/
|
||||||
#define SHARD_STORAGE_RELAY 'r'
|
#define SHARD_STORAGE_RELAY 'r'
|
||||||
#define SHARD_STORAGE_FOREIGN 'f'
|
#define SHARD_STORAGE_FOREIGN 'f'
|
||||||
#define SHARD_STORAGE_TABLE 't'
|
#define SHARD_STORAGE_TABLE 't'
|
||||||
#define SHARD_STORAGE_COLUMNAR 'c'
|
#define SHARD_STORAGE_COLUMNAR 'c'
|
||||||
|
|
||||||
|
|
||||||
#endif /* PG_DIST_SHARD_H */
|
#endif /* PG_DIST_SHARD_H */
|
||||||
|
|
|
@ -23,12 +23,12 @@
|
||||||
*/
|
*/
|
||||||
typedef struct FormData_pg_dist_shard_placement
|
typedef struct FormData_pg_dist_shard_placement
|
||||||
{
|
{
|
||||||
int64 shardid; /* global shardId on remote node */
|
int64 shardid; /* global shardId on remote node */
|
||||||
int32 shardstate; /* shard state on remote node; see RelayFileState */
|
int32 shardstate; /* shard state on remote node; see RelayFileState */
|
||||||
int64 shardlength; /* shard length on remote node; stored as bigint */
|
int64 shardlength; /* shard length on remote node; stored as bigint */
|
||||||
#ifdef CATALOG_VARLEN /* variable-length fields start here */
|
#ifdef CATALOG_VARLEN /* variable-length fields start here */
|
||||||
text nodename; /* remote node's host name */
|
text nodename; /* remote node's host name */
|
||||||
int32 nodeport; /* remote node's port number */
|
int32 nodeport; /* remote node's port number */
|
||||||
#endif
|
#endif
|
||||||
} FormData_pg_dist_shard_placement;
|
} FormData_pg_dist_shard_placement;
|
||||||
|
|
||||||
|
@ -43,12 +43,12 @@ typedef FormData_pg_dist_shard_placement *Form_pg_dist_shard_placement;
|
||||||
* compiler constants for pg_dist_shard_placement
|
* compiler constants for pg_dist_shard_placement
|
||||||
* ----------------
|
* ----------------
|
||||||
*/
|
*/
|
||||||
#define Natts_pg_dist_shard_placement 5
|
#define Natts_pg_dist_shard_placement 5
|
||||||
#define Anum_pg_dist_shard_placement_shardid 1
|
#define Anum_pg_dist_shard_placement_shardid 1
|
||||||
#define Anum_pg_dist_shard_placement_shardstate 2
|
#define Anum_pg_dist_shard_placement_shardstate 2
|
||||||
#define Anum_pg_dist_shard_placement_shardlength 3
|
#define Anum_pg_dist_shard_placement_shardlength 3
|
||||||
#define Anum_pg_dist_shard_placement_nodename 4
|
#define Anum_pg_dist_shard_placement_nodename 4
|
||||||
#define Anum_pg_dist_shard_placement_nodeport 5
|
#define Anum_pg_dist_shard_placement_nodeport 5
|
||||||
|
|
||||||
|
|
||||||
#endif /* PG_DIST_SHARD_PLACEMENT_H */
|
#endif /* PG_DIST_SHARD_PLACEMENT_H */
|
||||||
|
|
|
@ -35,7 +35,6 @@ typedef enum
|
||||||
FILE_CACHED = 2,
|
FILE_CACHED = 2,
|
||||||
FILE_INACTIVE = 3,
|
FILE_INACTIVE = 3,
|
||||||
FILE_TO_DELETE = 4
|
FILE_TO_DELETE = 4
|
||||||
|
|
||||||
} RelayFileState;
|
} RelayFileState;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@ typedef enum AdvisoryLocktagClass
|
||||||
/* values defined in postgres' lockfuncs.c */
|
/* values defined in postgres' lockfuncs.c */
|
||||||
ADV_LOCKTAG_CLASS_INT64 = 1,
|
ADV_LOCKTAG_CLASS_INT64 = 1,
|
||||||
ADV_LOCKTAG_CLASS_INT32 = 2,
|
ADV_LOCKTAG_CLASS_INT32 = 2,
|
||||||
|
|
||||||
/* CitusDB lock types */
|
/* CitusDB lock types */
|
||||||
ADV_LOCKTAG_CLASS_CITUS_SHARD_METADATA = 4,
|
ADV_LOCKTAG_CLASS_CITUS_SHARD_METADATA = 4,
|
||||||
ADV_LOCKTAG_CLASS_CITUS_SHARD = 5,
|
ADV_LOCKTAG_CLASS_CITUS_SHARD = 5,
|
||||||
|
|
|
@ -19,10 +19,10 @@
|
||||||
#include "utils/hsearch.h"
|
#include "utils/hsearch.h"
|
||||||
|
|
||||||
|
|
||||||
#define HIGH_PRIORITY_TASK_TIME 1 /* assignment time for high priority tasks */
|
#define HIGH_PRIORITY_TASK_TIME 1 /* assignment time for high priority tasks */
|
||||||
#define RESERVED_JOB_ID 1 /* reserved for cleanup and shutdown tasks */
|
#define RESERVED_JOB_ID 1 /* reserved for cleanup and shutdown tasks */
|
||||||
#define SHUTDOWN_MARKER_TASK_ID UINT_MAX /* used to identify task tracker shutdown */
|
#define SHUTDOWN_MARKER_TASK_ID UINT_MAX /* used to identify task tracker shutdown */
|
||||||
#define MAX_TASK_FAILURE_COUNT 2 /* allowed failure count for one task */
|
#define MAX_TASK_FAILURE_COUNT 2 /* allowed failure count for one task */
|
||||||
#define LOCAL_HOST_NAME "localhost" /* connect to local backends using this name */
|
#define LOCAL_HOST_NAME "localhost" /* connect to local backends using this name */
|
||||||
#define TASK_CALL_STRING_SIZE 12288 /* max length of task call string */
|
#define TASK_CALL_STRING_SIZE 12288 /* max length of task call string */
|
||||||
#define TEMPLATE0_NAME "template0" /* skip job schema cleanup for template0 */
|
#define TEMPLATE0_NAME "template0" /* skip job schema cleanup for template0 */
|
||||||
|
@ -37,13 +37,13 @@
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
TASK_STATUS_INVALID_FIRST = 0,
|
TASK_STATUS_INVALID_FIRST = 0,
|
||||||
TASK_ASSIGNED = 1, /* master node and task tracker */
|
TASK_ASSIGNED = 1, /* master node and task tracker */
|
||||||
TASK_SCHEDULED = 2,
|
TASK_SCHEDULED = 2,
|
||||||
TASK_RUNNING = 3,
|
TASK_RUNNING = 3,
|
||||||
TASK_FAILED = 4,
|
TASK_FAILED = 4,
|
||||||
TASK_PERMANENTLY_FAILED = 5,
|
TASK_PERMANENTLY_FAILED = 5,
|
||||||
TASK_SUCCEEDED = 6,
|
TASK_SUCCEEDED = 6,
|
||||||
TASK_CANCEL_REQUESTED = 7, /* master node only */
|
TASK_CANCEL_REQUESTED = 7, /* master node only */
|
||||||
TASK_CANCELED = 8,
|
TASK_CANCELED = 8,
|
||||||
TASK_TO_REMOVE = 9,
|
TASK_TO_REMOVE = 9,
|
||||||
|
|
||||||
|
@ -63,7 +63,6 @@ typedef enum
|
||||||
* TASK_STATUS_LAST, should never have their numbers changed.
|
* TASK_STATUS_LAST, should never have their numbers changed.
|
||||||
*/
|
*/
|
||||||
TASK_STATUS_LAST
|
TASK_STATUS_LAST
|
||||||
|
|
||||||
} TaskStatus;
|
} TaskStatus;
|
||||||
|
|
||||||
|
|
||||||
|
@ -76,16 +75,15 @@ typedef enum
|
||||||
*/
|
*/
|
||||||
typedef struct WorkerTask
|
typedef struct WorkerTask
|
||||||
{
|
{
|
||||||
uint64 jobId; /* job id (upper 32-bits reserved); part of hash table key */
|
uint64 jobId; /* job id (upper 32-bits reserved); part of hash table key */
|
||||||
uint32 taskId; /* task id; part of hash table key */
|
uint32 taskId; /* task id; part of hash table key */
|
||||||
uint32 assignedAt; /* task assignment time in epoch seconds */
|
uint32 assignedAt; /* task assignment time in epoch seconds */
|
||||||
|
|
||||||
char taskCallString[TASK_CALL_STRING_SIZE]; /* query or function call string */
|
char taskCallString[TASK_CALL_STRING_SIZE]; /* query or function call string */
|
||||||
TaskStatus taskStatus; /* task's current execution status */
|
TaskStatus taskStatus; /* task's current execution status */
|
||||||
char databaseName[NAMEDATALEN]; /* name to use for local backend connection */
|
char databaseName[NAMEDATALEN]; /* name to use for local backend connection */
|
||||||
int32 connectionId; /* connection id to local backend */
|
int32 connectionId; /* connection id to local backend */
|
||||||
uint32 failureCount; /* number of task failures */
|
uint32 failureCount; /* number of task failures */
|
||||||
|
|
||||||
} WorkerTask;
|
} WorkerTask;
|
||||||
|
|
||||||
|
|
||||||
|
@ -97,6 +95,7 @@ typedef struct WorkerTasksSharedStateData
|
||||||
{
|
{
|
||||||
/* Hash table shared by the task tracker and task tracker protocol functions */
|
/* Hash table shared by the task tracker and task tracker protocol functions */
|
||||||
HTAB *taskHash;
|
HTAB *taskHash;
|
||||||
|
|
||||||
/* Lock protecting workerNodesHash */
|
/* Lock protecting workerNodesHash */
|
||||||
LWLock *taskHashLock;
|
LWLock *taskHashLock;
|
||||||
} WorkerTasksSharedStateData;
|
} WorkerTasksSharedStateData;
|
||||||
|
|
|
@ -43,12 +43,11 @@
|
||||||
*/
|
*/
|
||||||
typedef struct WorkerNode
|
typedef struct WorkerNode
|
||||||
{
|
{
|
||||||
uint32 workerPort; /* node's port; part of hash table key */
|
uint32 workerPort; /* node's port; part of hash table key */
|
||||||
char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */
|
char workerName[WORKER_LENGTH]; /* node's name; part of hash table key */
|
||||||
char workerRack[WORKER_LENGTH]; /* node's network location */
|
char workerRack[WORKER_LENGTH]; /* node's network location */
|
||||||
|
|
||||||
bool inWorkerFile; /* is node in current membership file? */
|
bool inWorkerFile; /* is node in current membership file? */
|
||||||
|
|
||||||
} WorkerNode;
|
} WorkerNode;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -64,8 +64,7 @@ typedef struct RangePartitionContext
|
||||||
{
|
{
|
||||||
FmgrInfo *comparisonFunction;
|
FmgrInfo *comparisonFunction;
|
||||||
Datum *splitPointArray;
|
Datum *splitPointArray;
|
||||||
int32 splitPointCount;
|
int32 splitPointCount;
|
||||||
|
|
||||||
} RangePartitionContext;
|
} RangePartitionContext;
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,7 +76,6 @@ typedef struct HashPartitionContext
|
||||||
{
|
{
|
||||||
FmgrInfo *hashFunction;
|
FmgrInfo *hashFunction;
|
||||||
uint32 partitionCount;
|
uint32 partitionCount;
|
||||||
|
|
||||||
} HashPartitionContext;
|
} HashPartitionContext;
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,16 +86,16 @@ typedef struct HashPartitionContext
|
||||||
*/
|
*/
|
||||||
typedef struct PartialCopyStateData
|
typedef struct PartialCopyStateData
|
||||||
{
|
{
|
||||||
StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for
|
StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for
|
||||||
* dest == COPY_NEW_FE in COPY FROM */
|
* dest == COPY_NEW_FE in COPY FROM */
|
||||||
int file_encoding; /* file or remote side's character encoding */
|
int file_encoding; /* file or remote side's character encoding */
|
||||||
bool need_transcoding; /* file encoding diff from server? */
|
bool need_transcoding; /* file encoding diff from server? */
|
||||||
bool binary; /* binary format? */
|
bool binary; /* binary format? */
|
||||||
char *null_print; /* NULL marker string (server encoding!) */
|
char *null_print; /* NULL marker string (server encoding!) */
|
||||||
char *null_print_client; /* same converted to file encoding */
|
char *null_print_client; /* same converted to file encoding */
|
||||||
char *delim; /* column delimiter (must be 1 byte) */
|
char *delim; /* column delimiter (must be 1 byte) */
|
||||||
|
|
||||||
MemoryContext rowcontext; /* per-row evaluation context */
|
MemoryContext rowcontext; /* per-row evaluation context */
|
||||||
} PartialCopyStateData;
|
} PartialCopyStateData;
|
||||||
|
|
||||||
typedef struct PartialCopyStateData *PartialCopyState;
|
typedef struct PartialCopyStateData *PartialCopyState;
|
||||||
|
@ -114,7 +112,6 @@ typedef struct FileOutputStream
|
||||||
File fileDescriptor;
|
File fileDescriptor;
|
||||||
StringInfo fileBuffer;
|
StringInfo fileBuffer;
|
||||||
StringInfo filePath;
|
StringInfo filePath;
|
||||||
|
|
||||||
} FileOutputStream;
|
} FileOutputStream;
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue