ActivePrimaryNodeList: add lockMode parameter

pull/2927/head
Philip Dubé 2019-09-03 19:27:16 +00:00
parent 482f3b1474
commit 492d1b2cba
22 changed files with 67 additions and 52 deletions

View File

@ -64,6 +64,7 @@
#include "parser/parse_node.h"
#include "parser/parse_relation.h"
#include "parser/parser.h"
#include "storage/lmgr.h"
#include "tcop/pquery.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
@ -308,7 +309,7 @@ create_reference_table(PG_FUNCTION_ARGS)
*/
EnsureRelationKindSupported(relationId);
workerNodeList = ActivePrimaryNodeList();
workerNodeList = ActivePrimaryNodeList(ShareLock);
workerCount = list_length(workerNodeList);
/* if there are no workers, error out */
@ -333,7 +334,7 @@ create_reference_table(PG_FUNCTION_ARGS)
/*
* CreateDistributedTable creates distributed table in the given configuration.
* This functions contains all necessary logic to create distributed tables. It
* perform necessary checks to ensure distributing the table is safe. If it is
* performs necessary checks to ensure distributing the table is safe. If it is
* safe to distribute the table, this function creates distributed table metadata,
* creates shards and copies local data to shards. This function also handles
* partitioned tables by distributing its partitions as well.

View File

@ -89,7 +89,7 @@ EnsureDependenciesExistsOnAllNodes(const ObjectAddress *target)
* either get it now, or get it in master_add_node after this transaction finishes and
* the pg_dist_object record becomes visible.
*/
LockRelationOid(DistNodeRelationId(), RowShareLock);
workerNodeList = ActivePrimaryNodeList(RowShareLock);
/*
* right after we acquired the lock we mark our objects as distributed, these changes
@ -110,7 +110,6 @@ EnsureDependenciesExistsOnAllNodes(const ObjectAddress *target)
/*
* collect and connect to all applicable nodes
*/
workerNodeList = ActivePrimaryNodeList();
if (list_length(workerNodeList) <= 0)
{
/* no nodes to execute on */

View File

@ -229,7 +229,7 @@ static void
AcquireDistributedLockOnRelations(List *relationIdList, LOCKMODE lockMode)
{
ListCell *relationIdCell = NULL;
List *workerNodeList = ActivePrimaryNodeList();
List *workerNodeList = ActivePrimaryNodeList(NoLock);
const char *lockModeText = LockModeToLockModeText(lockMode);
/*

View File

@ -1066,7 +1066,7 @@ DDLTaskList(Oid relationId, const char *commandString)
List *
NodeDDLTaskList(TargetWorkerSet targets, List *commands)
{
List *workerNodes = TargetWorkerSetNodeList(targets);
List *workerNodes = TargetWorkerSetNodeList(targets, NoLock);
char *concatenatedCommands = StringJoin(commands, ';');
DDLJob *ddlJob = NULL;
ListCell *workerNodeCell = NULL;

View File

@ -979,7 +979,7 @@ CloseShardPlacementAssociation(struct MultiConnection *connection)
/*
* Note that we don't reset ConnectionPlacementHashEntry's
* primaryConnection here, that'd more complicated than it seems
* primaryConnection here, that'd be more complicated than it seems
* worth. That means we'll error out spuriously if a DML/DDL
* executing connection is closed earlier in a transaction.
*/

View File

@ -127,7 +127,7 @@ broadcast_intermediate_result(PG_FUNCTION_ARGS)
*/
BeginOrContinueCoordinatedTransaction();
nodeList = ActivePrimaryNodeList();
nodeList = ActivePrimaryNodeList(NoLock);
estate = CreateExecutorState();
resultDest = (RemoteFileDestReceiver *) CreateRemoteFileDestReceiver(resultIdString,
estate, nodeList,

View File

@ -209,7 +209,7 @@ MultiTaskTrackerExecute(Job *job)
* assigning and checking the status of tasks. The second (temporary) hash
* helps us in fetching results data from worker nodes to the master node.
*/
workerNodeList = ActivePrimaryNodeList();
workerNodeList = ActivePrimaryNodeList(NoLock);
taskTrackerCount = (uint32) list_length(workerNodeList);
/* connect as the current user for running queries */

View File

@ -30,7 +30,7 @@
/* local functions forward declarations */
static List * OpenConnectionsToAllNodes(void);
static List * OpenConnectionsToAllNodes(LOCKMODE lockMode);
static void BlockDistributedTransactions(void);
static void CreateRemoteRestorePoints(char *restoreName, List *connectionList);
@ -83,8 +83,11 @@ citus_create_restore_point(PG_FUNCTION_ARGS)
MAXFNAMELEN - 1)));
}
/* establish connections to all nodes before taking any locks */
connectionList = OpenConnectionsToAllNodes();
/*
* establish connections to all nodes before taking any locks
* ShareLock prevents new nodes being added, rendering connectionList incomplete
*/
connectionList = OpenConnectionsToAllNodes(ShareLock);
/*
* Send a BEGIN to bust through pgbouncer. We won't actually commit since
@ -111,14 +114,14 @@ citus_create_restore_point(PG_FUNCTION_ARGS)
* of connections.
*/
static List *
OpenConnectionsToAllNodes(void)
OpenConnectionsToAllNodes(LOCKMODE lockMode)
{
List *connectionList = NIL;
List *workerNodeList = NIL;
ListCell *workerNodeCell = NULL;
int connectionFlags = FORCE_NEW_CONNECTION;
workerNodeList = ActivePrimaryNodeList();
workerNodeList = ActivePrimaryNodeList(lockMode);
foreach(workerNodeCell, workerNodeList)
{

View File

@ -177,7 +177,7 @@ CreateShardsWithRoundRobinPolicy(Oid distributedTableId, int32 shardCount,
LockRelationOid(DistNodeRelationId(), RowShareLock);
/* load and sort the worker node list for deterministic placement */
workerNodeList = ActivePrimaryNodeList();
workerNodeList = ActivePrimaryNodeList(NoLock);
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
/*
@ -405,8 +405,11 @@ CreateReferenceTableShard(Oid distributedTableId)
tableName)));
}
/* load and sort the worker node list for deterministic placement */
workerNodeList = ActivePrimaryNodeList();
/*
* load and sort the worker node list for deterministic placements
* create_reference_table has already acquired ActivePrimaryNodeList lock
*/
workerNodeList = ActivePrimaryNodeList(NoLock);
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
/* get the next shard id */

View File

@ -94,6 +94,7 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
uint32 candidateNodeIndex = 0;
List *candidateNodeList = NIL;
List *workerNodeList = NIL;
text *nullMinValue = NULL;
text *nullMaxValue = NULL;
char partitionMethod = 0;
@ -167,14 +168,15 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
shardId = GetNextShardId();
/* if enough live groups, add an extra candidate node as backup */
{
uint32 primaryNodeCount = ActivePrimaryNodeCount();
workerNodeList = ActivePrimaryNodeList(NoLock);
if (list_length(workerNodeList) > ShardReplicationFactor)
{
attemptableNodeCount = ShardReplicationFactor + 1;
}
else
{
attemptableNodeCount = ShardReplicationFactor;
if (primaryNodeCount > ShardReplicationFactor)
{
attemptableNodeCount = ShardReplicationFactor + 1;
}
}
/* first retrieve a list of random nodes for shard placements */
@ -188,7 +190,6 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
}
else if (ShardPlacementPolicy == SHARD_PLACEMENT_ROUND_ROBIN)
{
List *workerNodeList = ActivePrimaryNodeList();
candidateNode = WorkerGetRoundRobinCandidateNode(workerNodeList, shardId,
candidateNodeIndex);
}

View File

@ -24,6 +24,7 @@
#include "postmaster/postmaster.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/shmem.h"
#include "utils/guc.h"
#include "utils/hsearch.h"
@ -297,7 +298,7 @@ WorkerGetNodeWithName(const char *hostname)
uint32
ActivePrimaryNodeCount(void)
{
List *workerNodeList = ActivePrimaryNodeList();
List *workerNodeList = ActivePrimaryNodeList(NoLock);
uint32 liveWorkerCount = list_length(workerNodeList);
return liveWorkerCount;
@ -319,17 +320,25 @@ ActiveReadableNodeCount(void)
/*
* ActivePrimaryNodeList returns a list of all the active primary nodes in workerNodeHash
* lockMode specifies which lock to use on pg_dist_node, this is necessary when
* the caller wouldn't want nodes to be added concurrent to their use of this list
*/
List *
ActivePrimaryNodeList(void)
ActivePrimaryNodeList(LOCKMODE lockMode)
{
List *workerNodeList = NIL;
WorkerNode *workerNode = NULL;
HTAB *workerNodeHash = GetWorkerNodeHash();
HTAB *workerNodeHash = NULL;
HASH_SEQ_STATUS status;
EnsureModificationsCanRun();
if (lockMode != NoLock)
{
LockRelationOid(DistNodeRelationId(), lockMode);
}
workerNodeHash = GetWorkerNodeHash();
hash_seq_init(&status, workerNodeHash);
while ((workerNode = hash_seq_search(&status)) != NULL)

View File

@ -102,13 +102,13 @@ List *
OrderObjectAddressListInDependencyOrder(List *objectAddressList)
{
ObjectAddressCollector collector = { 0 };
ListCell *ojectAddressCell = NULL;
ListCell *objectAddressCell = NULL;
InitObjectAddressCollector(&collector);
foreach(ojectAddressCell, objectAddressList)
foreach(objectAddressCell, objectAddressList)
{
ObjectAddress *objectAddress = (ObjectAddress *) lfirst(ojectAddressCell);
ObjectAddress *objectAddress = (ObjectAddress *) lfirst(objectAddressCell);
if (IsObjectAddressCollected(objectAddress, &collector))
{

View File

@ -1097,7 +1097,7 @@ SchemaOwnerName(Oid objectId)
static bool
HasMetadataWorkers(void)
{
List *workerNodeList = ActivePrimaryNodeList();
List *workerNodeList = ActivePrimaryNodeList(NoLock);
ListCell *workerNodeCell = NULL;
foreach(workerNodeCell, workerNodeList)

View File

@ -209,14 +209,14 @@ get_current_transaction_id(PG_FUNCTION_ARGS)
* the active backends from each node of the cluster. If you call that function from
* the coordinator, it will returns back active transaction from the coordinator as
* well. Yet, if you call it from the worker, result won't include the transactions
* on the coordinator node, since worker nodes do not aware of the coordinator.
* on the coordinator node, since worker nodes are not aware of the coordinator.
*/
Datum
get_global_active_transactions(PG_FUNCTION_ARGS)
{
TupleDesc tupleDescriptor = NULL;
Tuplestorestate *tupleStore = NULL;
List *workerNodeList = ActivePrimaryNodeList();
List *workerNodeList = ActivePrimaryNodeList(NoLock);
ListCell *workerNodeCell = NULL;
List *connectionList = NIL;
ListCell *connectionCell = NULL;

View File

@ -315,7 +315,7 @@ CitusStatActivity(const char *statQuery)
{
List *citusStatsList = NIL;
List *workerNodeList = ActivePrimaryNodeList();
List *workerNodeList = ActivePrimaryNodeList(NoLock);
ListCell *workerNodeCell = NULL;
char *nodeUser = NULL;
List *connectionList = NIL;
@ -453,7 +453,7 @@ GetLocalNodeCitusDistStat(const char *statQuery)
localGroupId = GetLocalGroupId();
/* get the current worker's node stats */
workerNodeList = ActivePrimaryNodeList();
workerNodeList = ActivePrimaryNodeList(NoLock);
foreach(workerNodeCell, workerNodeList)
{
WorkerNode *workerNode = (WorkerNode *) lfirst(workerNodeCell);

View File

@ -122,7 +122,7 @@ RecoverTwoPhaseCommits(void)
ListCell *workerNodeCell = NULL;
int recoveredTransactionCount = 0;
workerList = ActivePrimaryNodeList();
workerList = ActivePrimaryNodeList(NoLock);
foreach(workerNodeCell, workerList)
{

View File

@ -75,7 +75,7 @@ SendCommandToWorkerAsUser(char *nodeName, int32 nodePort, const char *nodeUser,
void
SendCommandToFirstWorker(char *command)
{
List *workerNodeList = ActivePrimaryNodeList();
List *workerNodeList = ActivePrimaryNodeList(NoLock);
WorkerNode *firstWorkerNode = NULL;
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
@ -111,9 +111,9 @@ SendCommandToWorkers(TargetWorkerSet targetWorkerSet, const char *command)
* TargetWorkerSet.
*/
List *
TargetWorkerSetNodeList(TargetWorkerSet targetWorkerSet)
TargetWorkerSetNodeList(TargetWorkerSet targetWorkerSet, LOCKMODE lockMode)
{
List *workerNodeList = ActivePrimaryNodeList();
List *workerNodeList = ActivePrimaryNodeList(lockMode);
ListCell *workerNodeCell = NULL;
List *result = NIL;
@ -148,7 +148,7 @@ TargetWorkerSetNodeList(TargetWorkerSet targetWorkerSet)
void
SendBareCommandListToWorkers(TargetWorkerSet targetWorkerSet, List *commandList)
{
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet);
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
ListCell *workerNodeCell = NULL;
char *nodeUser = CitusExtensionOwnerName();
ListCell *commandCell = NULL;
@ -187,7 +187,7 @@ int
SendBareOptionalCommandListToWorkersAsUser(TargetWorkerSet targetWorkerSet,
List *commandList, const char *user)
{
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet);
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
ListCell *workerNodeCell = NULL;
ListCell *commandCell = NULL;
int maxError = RESPONSE_OKAY;
@ -239,7 +239,7 @@ SendCommandToWorkersParams(TargetWorkerSet targetWorkerSet, const char *command,
{
List *connectionList = NIL;
ListCell *connectionCell = NULL;
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet);
List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock);
ListCell *workerNodeCell = NULL;
BeginOrContinueCoordinatedTransaction();

View File

@ -967,7 +967,7 @@ CountPrimariesWithMetadata(void)
* AddNodeMetadata checks the given node information and adds the specified node to the
* pg_dist_node table of the master and workers with metadata.
* If the node already exists, the function returns the id of the node.
* If not, the following prodecure is followed while adding a node: If the groupId is not
* If not, the following procedure is followed while adding a node: If the groupId is not
* explicitly given by the user, the function picks the group that the new node should
* be in with respect to GroupSize. Then, the new node is inserted into the local
* pg_dist_node as well as the nodes with hasmetadata=true.

View File

@ -253,13 +253,11 @@ ReplicateShardToAllWorkers(ShardInterval *shardInterval)
ListCell *workerNodeCell = NULL;
/* prevent concurrent pg_dist_node changes */
LockRelationOid(DistNodeRelationId(), RowShareLock);
workerNodeList = ActivePrimaryNodeList();
workerNodeList = ActivePrimaryNodeList(ShareLock);
/*
* We will iterate over all worker nodes and if healthy placement is not exist at
* given node we will copy the shard to that node. Then we will also modify
* We will iterate over all worker nodes and if a healthy placement does not exist
* at given node we will copy the shard to that node. Then we will also modify
* the metadata to reflect newly copied shard.
*/
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
@ -391,7 +389,7 @@ uint32
CreateReferenceTableColocationId()
{
uint32 colocationId = INVALID_COLOCATION_ID;
List *workerNodeList = ActivePrimaryNodeList();
List *workerNodeList = ActivePrimaryNodeList(ShareLock);
int shardCount = 1;
int replicationFactor = list_length(workerNodeList);
Oid distributionColumnType = InvalidOid;

View File

@ -209,7 +209,7 @@ LockShardListResourcesOnFirstWorker(LOCKMODE lockmode, List *shardIntervalList)
static bool
IsFirstWorkerNode()
{
List *workerNodeList = ActivePrimaryNodeList();
List *workerNodeList = ActivePrimaryNodeList(NoLock);
WorkerNode *firstWorkerNode = NULL;
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);

View File

@ -63,7 +63,7 @@ extern WorkerNode * WorkerGetRoundRobinCandidateNode(List *workerNodeList,
uint32 placementIndex);
extern WorkerNode * WorkerGetLocalFirstCandidateNode(List *currentNodeList);
extern uint32 ActivePrimaryNodeCount(void);
extern List * ActivePrimaryNodeList(void);
extern List * ActivePrimaryNodeList(LOCKMODE lockMode);
extern uint32 ActiveReadableNodeCount(void);
extern List * ActiveReadableNodeList(void);
extern WorkerNode * GetWorkerNodeByNodeId(int nodeId);

View File

@ -13,6 +13,7 @@
#define WORKER_TRANSACTION_H
#include "distributed/worker_manager.h"
#include "storage/lockdefs.h"
/*
@ -29,7 +30,7 @@ typedef enum TargetWorkerSet
/* Functions declarations for worker transactions */
extern List * GetWorkerTransactions(void);
extern List * TargetWorkerSetNodeList(TargetWorkerSet targetWorkerSet);
extern List * TargetWorkerSetNodeList(TargetWorkerSet targetWorkerSet, LOCKMODE lockMode);
extern void SendCommandToWorker(char *nodeName, int32 nodePort, const char *command);
extern void SendCommandToWorkerAsUser(char *nodeName, int32 nodePort,
const char *nodeUser, const char *command);