mirror of https://github.com/citusdata/citus.git
ActivePrimaryNodeList: add lockMode parameter
parent
482f3b1474
commit
492d1b2cba
|
@ -64,6 +64,7 @@
|
|||
#include "parser/parse_node.h"
|
||||
#include "parser/parse_relation.h"
|
||||
#include "parser/parser.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "tcop/pquery.h"
|
||||
#include "tcop/tcopprot.h"
|
||||
#include "utils/builtins.h"
|
||||
|
@ -308,7 +309,7 @@ create_reference_table(PG_FUNCTION_ARGS)
|
|||
*/
|
||||
EnsureRelationKindSupported(relationId);
|
||||
|
||||
workerNodeList = ActivePrimaryNodeList();
|
||||
workerNodeList = ActivePrimaryNodeList(ShareLock);
|
||||
workerCount = list_length(workerNodeList);
|
||||
|
||||
/* if there are no workers, error out */
|
||||
|
@ -333,7 +334,7 @@ create_reference_table(PG_FUNCTION_ARGS)
|
|||
/*
|
||||
* CreateDistributedTable creates distributed table in the given configuration.
|
||||
* This functions contains all necessary logic to create distributed tables. It
|
||||
* perform necessary checks to ensure distributing the table is safe. If it is
|
||||
* performs necessary checks to ensure distributing the table is safe. If it is
|
||||
* safe to distribute the table, this function creates distributed table metadata,
|
||||
* creates shards and copies local data to shards. This function also handles
|
||||
* partitioned tables by distributing its partitions as well.
|
||||
|
|
|
@ -89,7 +89,7 @@ EnsureDependenciesExistsOnAllNodes(const ObjectAddress *target)
|
|||
* either get it now, or get it in master_add_node after this transaction finishes and
|
||||
* the pg_dist_object record becomes visible.
|
||||
*/
|
||||
LockRelationOid(DistNodeRelationId(), RowShareLock);
|
||||
workerNodeList = ActivePrimaryNodeList(RowShareLock);
|
||||
|
||||
/*
|
||||
* right after we acquired the lock we mark our objects as distributed, these changes
|
||||
|
@ -110,7 +110,6 @@ EnsureDependenciesExistsOnAllNodes(const ObjectAddress *target)
|
|||
/*
|
||||
* collect and connect to all applicable nodes
|
||||
*/
|
||||
workerNodeList = ActivePrimaryNodeList();
|
||||
if (list_length(workerNodeList) <= 0)
|
||||
{
|
||||
/* no nodes to execute on */
|
||||
|
|
|
@ -229,7 +229,7 @@ static void
|
|||
AcquireDistributedLockOnRelations(List *relationIdList, LOCKMODE lockMode)
|
||||
{
|
||||
ListCell *relationIdCell = NULL;
|
||||
List *workerNodeList = ActivePrimaryNodeList();
|
||||
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
const char *lockModeText = LockModeToLockModeText(lockMode);
|
||||
|
||||
/*
|
||||
|
|
|
@ -1066,7 +1066,7 @@ DDLTaskList(Oid relationId, const char *commandString)
|
|||
List *
|
||||
NodeDDLTaskList(TargetWorkerSet targets, List *commands)
|
||||
{
|
||||
List *workerNodes = TargetWorkerSetNodeList(targets);
|
||||
List *workerNodes = TargetWorkerSetNodeList(targets, NoLock);
|
||||
char *concatenatedCommands = StringJoin(commands, ';');
|
||||
DDLJob *ddlJob = NULL;
|
||||
ListCell *workerNodeCell = NULL;
|
||||
|
|
|
@ -979,7 +979,7 @@ CloseShardPlacementAssociation(struct MultiConnection *connection)
|
|||
|
||||
/*
|
||||
* Note that we don't reset ConnectionPlacementHashEntry's
|
||||
* primaryConnection here, that'd more complicated than it seems
|
||||
* primaryConnection here, that'd be more complicated than it seems
|
||||
* worth. That means we'll error out spuriously if a DML/DDL
|
||||
* executing connection is closed earlier in a transaction.
|
||||
*/
|
||||
|
|
|
@ -127,7 +127,7 @@ broadcast_intermediate_result(PG_FUNCTION_ARGS)
|
|||
*/
|
||||
BeginOrContinueCoordinatedTransaction();
|
||||
|
||||
nodeList = ActivePrimaryNodeList();
|
||||
nodeList = ActivePrimaryNodeList(NoLock);
|
||||
estate = CreateExecutorState();
|
||||
resultDest = (RemoteFileDestReceiver *) CreateRemoteFileDestReceiver(resultIdString,
|
||||
estate, nodeList,
|
||||
|
|
|
@ -209,7 +209,7 @@ MultiTaskTrackerExecute(Job *job)
|
|||
* assigning and checking the status of tasks. The second (temporary) hash
|
||||
* helps us in fetching results data from worker nodes to the master node.
|
||||
*/
|
||||
workerNodeList = ActivePrimaryNodeList();
|
||||
workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
taskTrackerCount = (uint32) list_length(workerNodeList);
|
||||
|
||||
/* connect as the current user for running queries */
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
|
||||
|
||||
/* local functions forward declarations */
|
||||
static List * OpenConnectionsToAllNodes(void);
|
||||
static List * OpenConnectionsToAllNodes(LOCKMODE lockMode);
|
||||
static void BlockDistributedTransactions(void);
|
||||
static void CreateRemoteRestorePoints(char *restoreName, List *connectionList);
|
||||
|
||||
|
@ -83,8 +83,11 @@ citus_create_restore_point(PG_FUNCTION_ARGS)
|
|||
MAXFNAMELEN - 1)));
|
||||
}
|
||||
|
||||
/* establish connections to all nodes before taking any locks */
|
||||
connectionList = OpenConnectionsToAllNodes();
|
||||
/*
|
||||
* establish connections to all nodes before taking any locks
|
||||
* ShareLock prevents new nodes being added, rendering connectionList incomplete
|
||||
*/
|
||||
connectionList = OpenConnectionsToAllNodes(ShareLock);
|
||||
|
||||
/*
|
||||
* Send a BEGIN to bust through pgbouncer. We won't actually commit since
|
||||
|
@ -111,14 +114,14 @@ citus_create_restore_point(PG_FUNCTION_ARGS)
|
|||
* of connections.
|
||||
*/
|
||||
static List *
|
||||
OpenConnectionsToAllNodes(void)
|
||||
OpenConnectionsToAllNodes(LOCKMODE lockMode)
|
||||
{
|
||||
List *connectionList = NIL;
|
||||
List *workerNodeList = NIL;
|
||||
ListCell *workerNodeCell = NULL;
|
||||
int connectionFlags = FORCE_NEW_CONNECTION;
|
||||
|
||||
workerNodeList = ActivePrimaryNodeList();
|
||||
workerNodeList = ActivePrimaryNodeList(lockMode);
|
||||
|
||||
foreach(workerNodeCell, workerNodeList)
|
||||
{
|
||||
|
|
|
@ -177,7 +177,7 @@ CreateShardsWithRoundRobinPolicy(Oid distributedTableId, int32 shardCount,
|
|||
LockRelationOid(DistNodeRelationId(), RowShareLock);
|
||||
|
||||
/* load and sort the worker node list for deterministic placement */
|
||||
workerNodeList = ActivePrimaryNodeList();
|
||||
workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
|
||||
|
||||
/*
|
||||
|
@ -405,8 +405,11 @@ CreateReferenceTableShard(Oid distributedTableId)
|
|||
tableName)));
|
||||
}
|
||||
|
||||
/* load and sort the worker node list for deterministic placement */
|
||||
workerNodeList = ActivePrimaryNodeList();
|
||||
/*
|
||||
* load and sort the worker node list for deterministic placements
|
||||
* create_reference_table has already acquired ActivePrimaryNodeList lock
|
||||
*/
|
||||
workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
|
||||
|
||||
/* get the next shard id */
|
||||
|
|
|
@ -94,6 +94,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
|
|||
|
||||
uint32 candidateNodeIndex = 0;
|
||||
List *candidateNodeList = NIL;
|
||||
List *workerNodeList = NIL;
|
||||
text *nullMinValue = NULL;
|
||||
text *nullMaxValue = NULL;
|
||||
char partitionMethod = 0;
|
||||
|
@ -167,14 +168,15 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
|
|||
shardId = GetNextShardId();
|
||||
|
||||
/* if enough live groups, add an extra candidate node as backup */
|
||||
{
|
||||
uint32 primaryNodeCount = ActivePrimaryNodeCount();
|
||||
workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
|
||||
if (list_length(workerNodeList) > ShardReplicationFactor)
|
||||
{
|
||||
attemptableNodeCount = ShardReplicationFactor + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
attemptableNodeCount = ShardReplicationFactor;
|
||||
if (primaryNodeCount > ShardReplicationFactor)
|
||||
{
|
||||
attemptableNodeCount = ShardReplicationFactor + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* first retrieve a list of random nodes for shard placements */
|
||||
|
@ -188,7 +190,6 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
|
|||
}
|
||||
else if (ShardPlacementPolicy == SHARD_PLACEMENT_ROUND_ROBIN)
|
||||
{
|
||||
List *workerNodeList = ActivePrimaryNodeList();
|
||||
candidateNode = WorkerGetRoundRobinCandidateNode(workerNodeList, shardId,
|
||||
candidateNodeIndex);
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "postmaster/postmaster.h"
|
||||
#include "storage/fd.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/hsearch.h"
|
||||
|
@ -297,7 +298,7 @@ WorkerGetNodeWithName(const char *hostname)
|
|||
uint32
|
||||
ActivePrimaryNodeCount(void)
|
||||
{
|
||||
List *workerNodeList = ActivePrimaryNodeList();
|
||||
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
uint32 liveWorkerCount = list_length(workerNodeList);
|
||||
|
||||
return liveWorkerCount;
|
||||
|
@ -319,17 +320,25 @@ ActiveReadableNodeCount(void)
|
|||
|
||||
/*
|
||||
* ActivePrimaryNodeList returns a list of all the active primary nodes in workerNodeHash
|
||||
* lockMode specifies which lock to use on pg_dist_node, this is necessary when
|
||||
* the caller wouldn't want nodes to be added concurrent to their use of this list
|
||||
*/
|
||||
List *
|
||||
ActivePrimaryNodeList(void)
|
||||
ActivePrimaryNodeList(LOCKMODE lockMode)
|
||||
{
|
||||
List *workerNodeList = NIL;
|
||||
WorkerNode *workerNode = NULL;
|
||||
HTAB *workerNodeHash = GetWorkerNodeHash();
|
||||
HTAB *workerNodeHash = NULL;
|
||||
HASH_SEQ_STATUS status;
|
||||
|
||||
EnsureModificationsCanRun();
|
||||
|
||||
if (lockMode != NoLock)
|
||||
{
|
||||
LockRelationOid(DistNodeRelationId(), lockMode);
|
||||
}
|
||||
|
||||
workerNodeHash = GetWorkerNodeHash();
|
||||
hash_seq_init(&status, workerNodeHash);
|
||||
|
||||
while ((workerNode = hash_seq_search(&status)) != NULL)
|
||||
|
|
|
@ -102,13 +102,13 @@ List *
|
|||
OrderObjectAddressListInDependencyOrder(List *objectAddressList)
|
||||
{
|
||||
ObjectAddressCollector collector = { 0 };
|
||||
ListCell *ojectAddressCell = NULL;
|
||||
ListCell *objectAddressCell = NULL;
|
||||
|
||||
InitObjectAddressCollector(&collector);
|
||||
|
||||
foreach(ojectAddressCell, objectAddressList)
|
||||
foreach(objectAddressCell, objectAddressList)
|
||||
{
|
||||
ObjectAddress *objectAddress = (ObjectAddress *) lfirst(ojectAddressCell);
|
||||
ObjectAddress *objectAddress = (ObjectAddress *) lfirst(objectAddressCell);
|
||||
|
||||
if (IsObjectAddressCollected(objectAddress, &collector))
|
||||
{
|
||||
|
|
|
@ -1097,7 +1097,7 @@ SchemaOwnerName(Oid objectId)
|
|||
static bool
|
||||
HasMetadataWorkers(void)
|
||||
{
|
||||
List *workerNodeList = ActivePrimaryNodeList();
|
||||
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
ListCell *workerNodeCell = NULL;
|
||||
|
||||
foreach(workerNodeCell, workerNodeList)
|
||||
|
|
|
@ -209,14 +209,14 @@ get_current_transaction_id(PG_FUNCTION_ARGS)
|
|||
* the active backends from each node of the cluster. If you call that function from
|
||||
* the coordinator, it will returns back active transaction from the coordinator as
|
||||
* well. Yet, if you call it from the worker, result won't include the transactions
|
||||
* on the coordinator node, since worker nodes do not aware of the coordinator.
|
||||
* on the coordinator node, since worker nodes are not aware of the coordinator.
|
||||
*/
|
||||
Datum
|
||||
get_global_active_transactions(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TupleDesc tupleDescriptor = NULL;
|
||||
Tuplestorestate *tupleStore = NULL;
|
||||
List *workerNodeList = ActivePrimaryNodeList();
|
||||
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
ListCell *workerNodeCell = NULL;
|
||||
List *connectionList = NIL;
|
||||
ListCell *connectionCell = NULL;
|
||||
|
|
|
@ -315,7 +315,7 @@ CitusStatActivity(const char *statQuery)
|
|||
{
|
||||
List *citusStatsList = NIL;
|
||||
|
||||
List *workerNodeList = ActivePrimaryNodeList();
|
||||
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
ListCell *workerNodeCell = NULL;
|
||||
char *nodeUser = NULL;
|
||||
List *connectionList = NIL;
|
||||
|
@ -453,7 +453,7 @@ GetLocalNodeCitusDistStat(const char *statQuery)
|
|||
localGroupId = GetLocalGroupId();
|
||||
|
||||
/* get the current worker's node stats */
|
||||
workerNodeList = ActivePrimaryNodeList();
|
||||
workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
foreach(workerNodeCell, workerNodeList)
|
||||
{
|
||||
WorkerNode *workerNode = (WorkerNode *) lfirst(workerNodeCell);
|
||||
|
|
|
@ -122,7 +122,7 @@ RecoverTwoPhaseCommits(void)
|
|||
ListCell *workerNodeCell = NULL;
|
||||
int recoveredTransactionCount = 0;
|
||||
|
||||
workerList = ActivePrimaryNodeList();
|
||||
workerList = ActivePrimaryNodeList(NoLock);
|
||||
|
||||
foreach(workerNodeCell, workerList)
|
||||
{
|
||||
|
|
|
@ -75,7 +75,7 @@ SendCommandToWorkerAsUser(char *nodeName, int32 nodePort, const char *nodeUser,
|
|||
void
|
||||
SendCommandToFirstWorker(char *command)
|
||||
{
|
||||
List *workerNodeList = ActivePrimaryNodeList();
|
||||
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
WorkerNode *firstWorkerNode = NULL;
|
||||
|
||||
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
|
||||
|
@ -111,9 +111,9 @@ SendCommandToWorkers(TargetWorkerSet targetWorkerSet, const char *command)
|
|||
* TargetWorkerSet.
|
||||
*/
|
||||
List *
|
||||
TargetWorkerSetNodeList(TargetWorkerSet targetWorkerSet)
|
||||
TargetWorkerSetNodeList(TargetWorkerSet targetWorkerSet, LOCKMODE lockMode)
|
||||
{
|
||||
List *workerNodeList = ActivePrimaryNodeList();
|
||||
List *workerNodeList = ActivePrimaryNodeList(lockMode);
|
||||
ListCell *workerNodeCell = NULL;
|
||||
List *result = NIL;
|
||||
|
||||
|
@ -148,7 +148,7 @@ TargetWorkerSetNodeList(TargetWorkerSet targetWorkerSet)
|
|||
void
|
||||
SendBareCommandListToWorkers(TargetWorkerSet targetWorkerSet, List *commandList)
|
||||
{
|
||||
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet);
|
||||
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
|
||||
ListCell *workerNodeCell = NULL;
|
||||
char *nodeUser = CitusExtensionOwnerName();
|
||||
ListCell *commandCell = NULL;
|
||||
|
@ -187,7 +187,7 @@ int
|
|||
SendBareOptionalCommandListToWorkersAsUser(TargetWorkerSet targetWorkerSet,
|
||||
List *commandList, const char *user)
|
||||
{
|
||||
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet);
|
||||
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
|
||||
ListCell *workerNodeCell = NULL;
|
||||
ListCell *commandCell = NULL;
|
||||
int maxError = RESPONSE_OKAY;
|
||||
|
@ -239,7 +239,7 @@ SendCommandToWorkersParams(TargetWorkerSet targetWorkerSet, const char *command,
|
|||
{
|
||||
List *connectionList = NIL;
|
||||
ListCell *connectionCell = NULL;
|
||||
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet);
|
||||
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
|
||||
ListCell *workerNodeCell = NULL;
|
||||
|
||||
BeginOrContinueCoordinatedTransaction();
|
||||
|
|
|
@ -967,7 +967,7 @@ CountPrimariesWithMetadata(void)
|
|||
* AddNodeMetadata checks the given node information and adds the specified node to the
|
||||
* pg_dist_node table of the master and workers with metadata.
|
||||
* If the node already exists, the function returns the id of the node.
|
||||
* If not, the following prodecure is followed while adding a node: If the groupId is not
|
||||
* If not, the following procedure is followed while adding a node: If the groupId is not
|
||||
* explicitly given by the user, the function picks the group that the new node should
|
||||
* be in with respect to GroupSize. Then, the new node is inserted into the local
|
||||
* pg_dist_node as well as the nodes with hasmetadata=true.
|
||||
|
|
|
@ -253,13 +253,11 @@ ReplicateShardToAllWorkers(ShardInterval *shardInterval)
|
|||
ListCell *workerNodeCell = NULL;
|
||||
|
||||
/* prevent concurrent pg_dist_node changes */
|
||||
LockRelationOid(DistNodeRelationId(), RowShareLock);
|
||||
|
||||
workerNodeList = ActivePrimaryNodeList();
|
||||
workerNodeList = ActivePrimaryNodeList(ShareLock);
|
||||
|
||||
/*
|
||||
* We will iterate over all worker nodes and if healthy placement is not exist at
|
||||
* given node we will copy the shard to that node. Then we will also modify
|
||||
* We will iterate over all worker nodes and if a healthy placement does not exist
|
||||
* at given node we will copy the shard to that node. Then we will also modify
|
||||
* the metadata to reflect newly copied shard.
|
||||
*/
|
||||
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
|
||||
|
@ -391,7 +389,7 @@ uint32
|
|||
CreateReferenceTableColocationId()
|
||||
{
|
||||
uint32 colocationId = INVALID_COLOCATION_ID;
|
||||
List *workerNodeList = ActivePrimaryNodeList();
|
||||
List *workerNodeList = ActivePrimaryNodeList(ShareLock);
|
||||
int shardCount = 1;
|
||||
int replicationFactor = list_length(workerNodeList);
|
||||
Oid distributionColumnType = InvalidOid;
|
||||
|
|
|
@ -209,7 +209,7 @@ LockShardListResourcesOnFirstWorker(LOCKMODE lockmode, List *shardIntervalList)
|
|||
static bool
|
||||
IsFirstWorkerNode()
|
||||
{
|
||||
List *workerNodeList = ActivePrimaryNodeList();
|
||||
List *workerNodeList = ActivePrimaryNodeList(NoLock);
|
||||
WorkerNode *firstWorkerNode = NULL;
|
||||
|
||||
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
|
||||
|
|
|
@ -63,7 +63,7 @@ extern WorkerNode * WorkerGetRoundRobinCandidateNode(List *workerNodeList,
|
|||
uint32 placementIndex);
|
||||
extern WorkerNode * WorkerGetLocalFirstCandidateNode(List *currentNodeList);
|
||||
extern uint32 ActivePrimaryNodeCount(void);
|
||||
extern List * ActivePrimaryNodeList(void);
|
||||
extern List * ActivePrimaryNodeList(LOCKMODE lockMode);
|
||||
extern uint32 ActiveReadableNodeCount(void);
|
||||
extern List * ActiveReadableNodeList(void);
|
||||
extern WorkerNode * GetWorkerNodeByNodeId(int nodeId);
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#define WORKER_TRANSACTION_H
|
||||
|
||||
#include "distributed/worker_manager.h"
|
||||
#include "storage/lockdefs.h"
|
||||
|
||||
|
||||
/*
|
||||
|
@ -29,7 +30,7 @@ typedef enum TargetWorkerSet
|
|||
|
||||
/* Functions declarations for worker transactions */
|
||||
extern List * GetWorkerTransactions(void);
|
||||
extern List * TargetWorkerSetNodeList(TargetWorkerSet targetWorkerSet);
|
||||
extern List * TargetWorkerSetNodeList(TargetWorkerSet targetWorkerSet, LOCKMODE lockMode);
|
||||
extern void SendCommandToWorker(char *nodeName, int32 nodePort, const char *command);
|
||||
extern void SendCommandToWorkerAsUser(char *nodeName, int32 nodePort,
|
||||
const char *nodeUser, const char *command);
|
||||
|
|
Loading…
Reference in New Issue