citus/src/backend/distributed/executor/repartition_join_execution.c

101 lines
3.1 KiB
C

/*-------------------------------------------------------------------------
*
* repartition_join_execution.c
*
* This file contains repartition specific logic.
* ExecuteDependentTasks takes a list of top level tasks. Its logic is as follows:
* - It generates all the tasks by descending in the tasks tree. Note that each task
* has a dependentTaskList.
* - It generates FetchTask queryStrings with the MapTask queries. It uses the first replicate to
* fetch data when replication factor is > 1. Note that if a task fails in any replica adaptive executor
* gives an error, so if we come to a fetchTask we know for sure that its dependedMapTask is executed in all
* replicas.
* - It creates schemas in each worker in a single transaction to store intermediate results.
* - It iterates all tasks and finds the ones whose dependencies are already executed, and executes them with
* adaptive executor logic.
*
*
* Repartition queries do not begin a transaction even if we are in
* a transaction block. As we don't begin a transaction, they won't see the
* DDLs that happened earlier in the transaction because we don't have that
* transaction id with repartition queries. Therefore we error in this case.
*
* Copyright (c) Citus Data, Inc.
*/
#include "postgres.h"
#include "miscadmin.h"
#include "access/hash.h"
#include "utils/builtins.h"
#include "distributed/adaptive_executor.h"
#include "distributed/directed_acyclic_graph_execution.h"
#include "distributed/hash_helpers.h"
#include "distributed/listutils.h"
#include "distributed/local_executor.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_physical_planner.h"
#include "distributed/multi_server_executor.h"
#include "distributed/repartition_join_execution.h"
#include "distributed/task_execution_utils.h"
#include "distributed/transaction_management.h"
#include "distributed/transmit.h"
#include "distributed/worker_manager.h"
#include "distributed/worker_transaction.h"
static List * ExtractJobsInJobTree(Job *job);
static void TraverseJobTree(Job *curJob, List **jobs);
/*
* ExecuteDependentTasks executes all tasks except the top level tasks
* in order from the task tree. At a time, it can execute different tasks from
* different jobs.
*/
List *
ExecuteDependentTasks(List *topLevelTasks, Job *topLevelJob)
{
List *allTasks = CreateTaskListForJobTree(topLevelTasks);
List *jobIds = ExtractJobsInJobTree(topLevelJob);
ExecuteTasksInDependencyOrder(allTasks, topLevelTasks, jobIds);
return jobIds;
}
/*
* ExtractJobsInJobTree returns all job ids in the job tree
* where the given job is root.
*/
static List *
ExtractJobsInJobTree(Job *job)
{
List *jobIds = NIL;
TraverseJobTree(job, &jobIds);
return jobIds;
}
/*
* TraverseJobTree does a dfs in the current job and adds
* all of its job ids.
*/
static void
TraverseJobTree(Job *curJob, List **jobIds)
{
uint64 *jobIdPointer = palloc(sizeof(uint64));
*jobIdPointer = curJob->jobId;
*jobIds = lappend(*jobIds, jobIdPointer);
Job *childJob = NULL;
foreach_declared_ptr(childJob, curJob->dependentJobList)
{
TraverseJobTree(childJob, jobIds);
}
}