diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c index 0284ea64d..23847ac01 100644 --- a/src/backend/distributed/commands/multi_copy.c +++ b/src/backend/distributed/commands/multi_copy.c @@ -2663,7 +2663,6 @@ CreateLocalColocatedIntermediateFile(CitusCopyDestReceiver *copyDest, CreateIntermediateResultsDirectory(); const int fileFlags = (O_CREAT | O_RDWR | O_TRUNC); - const int fileMode = (S_IRUSR | S_IWUSR); StringInfo filePath = makeStringInfo(); appendStringInfo(filePath, "%s_%ld", copyDest->colocatedIntermediateResultIdPrefix, @@ -2671,7 +2670,7 @@ CreateLocalColocatedIntermediateFile(CitusCopyDestReceiver *copyDest, const char *fileName = QueryResultFileName(filePath->data); shardState->fileDest = - FileCompatFromFileStart(FileOpenForTransmit(fileName, fileFlags, fileMode)); + FileCompatFromFileStart(FileOpenForTransmit(fileName, fileFlags)); CopyOutState localFileCopyOutState = shardState->copyOutState; bool isBinaryCopy = localFileCopyOutState->binary; diff --git a/src/backend/distributed/executor/intermediate_results.c b/src/backend/distributed/executor/intermediate_results.c index 0e18d4416..daf707b24 100644 --- a/src/backend/distributed/executor/intermediate_results.c +++ b/src/backend/distributed/executor/intermediate_results.c @@ -295,7 +295,6 @@ PrepareIntermediateResultBroadcast(RemoteFileDestReceiver *resultDest) if (resultDest->writeLocalFile) { const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); - const int fileMode = (S_IRUSR | S_IWUSR); /* make sure the directory exists */ CreateIntermediateResultsDirectory(); @@ -303,8 +302,7 @@ PrepareIntermediateResultBroadcast(RemoteFileDestReceiver *resultDest) const char *fileName = QueryResultFileName(resultId); resultDest->fileCompat = FileCompatFromFileStart(FileOpenForTransmit(fileName, - fileFlags, - fileMode)); + fileFlags)); } WorkerNode *workerNode = NULL; @@ -606,7 +604,7 @@ CreateIntermediateResultsDirectory(void) { char *resultDirectory = IntermediateResultsDirectory(); - int makeOK = mkdir(resultDirectory, S_IRWXU); + int makeOK = MakePGDirectory(resultDirectory); if (makeOK != 0) { if (errno == EEXIST) @@ -976,7 +974,6 @@ FetchRemoteIntermediateResult(MultiConnection *connection, char *resultId) StringInfo copyCommand = makeStringInfo(); const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); - const int fileMode = (S_IRUSR | S_IWUSR); PGconn *pgConn = connection->pgConn; int socket = PQsocket(pgConn); @@ -998,7 +995,7 @@ FetchRemoteIntermediateResult(MultiConnection *connection, char *resultId) PQclear(result); - File fileDesc = FileOpenForTransmit(localPath, fileFlags, fileMode); + File fileDesc = FileOpenForTransmit(localPath, fileFlags); FileCompat fileCompat = FileCompatFromFileStart(fileDesc); while (true) diff --git a/src/backend/distributed/executor/transmit.c b/src/backend/distributed/executor/transmit.c index a10ae4fbf..224d8e589 100644 --- a/src/backend/distributed/executor/transmit.c +++ b/src/backend/distributed/executor/transmit.c @@ -17,6 +17,7 @@ #include "pgstat.h" #include "commands/defrem.h" +#include "common/file_perm.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "storage/fd.h" @@ -48,8 +49,7 @@ RedirectCopyDataToRegularFile(const char *filename) { StringInfo copyData = makeStringInfo(); const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); - const int fileMode = (S_IRUSR | S_IWUSR); - File fileDesc = FileOpenForTransmit(filename, fileFlags, fileMode); + File fileDesc = FileOpenForTransmit(filename, fileFlags); FileCompat fileCompat = FileCompatFromFileStart(fileDesc); SendCopyInStart(); @@ -92,7 +92,7 @@ SendRegularFile(const char *filename) const int fileMode = 0; /* we currently do not check if the caller has permissions for this file */ - File fileDesc = FileOpenForTransmit(filename, fileFlags, fileMode); + File fileDesc = FileOpenForTransmitPerm(filename, fileFlags, fileMode); FileCompat fileCompat = FileCompatFromFileStart(fileDesc); /* @@ -136,12 +136,23 @@ FreeStringInfo(StringInfo stringInfo) /* - * FileOpenForTransmit opens file with the given filename and flags. On success, - * the function returns the internal file handle for the opened file. On failure - * the function errors out. + * Open a file with FileOpenForTransmitPerm() and pass default file mode for + * the fileMode parameter. */ File -FileOpenForTransmit(const char *filename, int fileFlags, int fileMode) +FileOpenForTransmit(const char *filename, int fileFlags) +{ + return FileOpenForTransmitPerm(filename, fileFlags, pg_file_create_mode); +} + + +/* + * FileOpenForTransmitPerm opens file with the given filename and flags. On + * success, the function returns the internal file handle for the opened file. + * On failure the function errors out. + */ +File +FileOpenForTransmitPerm(const char *filename, int fileFlags, int fileMode) { struct stat fileStat; diff --git a/src/backend/distributed/operations/shard_transfer.c b/src/backend/distributed/operations/shard_transfer.c index d46159d2b..6796346c5 100644 --- a/src/backend/distributed/operations/shard_transfer.c +++ b/src/backend/distributed/operations/shard_transfer.c @@ -294,6 +294,17 @@ citus_move_shard_placement(PG_FUNCTION_ARGS) CheckCitusVersion(ERROR); EnsureCoordinator(); + List *referenceTableIdList = NIL; + + if (HasNodesWithMissingReferenceTables(&referenceTableIdList)) + { + ereport(ERROR, (errmsg("there are missing reference tables on some nodes"), + errhint("Copy reference tables first with " + "replicate_reference_tables() or use " + "citus_rebalance_start() that will do it automatically." + ))); + } + int64 shardId = PG_GETARG_INT64(0); char *sourceNodeName = text_to_cstring(PG_GETARG_TEXT_P(1)); int32 sourceNodePort = PG_GETARG_INT32(2); diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index ad5a14a25..45e212e8b 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -895,22 +895,13 @@ DecrementExternalClientBackendCounterAtExit(int code, Datum arg) static void CreateRequiredDirectories(void) { - const char *subdirs[] = { - "pg_foreign_file", - "pg_foreign_file/cached", - ("base/" PG_JOB_CACHE_DIR) - }; + const char *subdir = ("base/" PG_JOB_CACHE_DIR); - for (int dirNo = 0; dirNo < lengthof(subdirs); dirNo++) + if (MakePGDirectory(subdir) != 0 && errno != EEXIST) { - int ret = mkdir(subdirs[dirNo], S_IRWXU); - - if (ret != 0 && errno != EEXIST) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not create directory \"%s\": %m", - subdirs[dirNo]))); - } + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + subdir))); } } diff --git a/src/backend/distributed/utils/directory.c b/src/backend/distributed/utils/directory.c index bad585809..6701bf8fb 100644 --- a/src/backend/distributed/utils/directory.c +++ b/src/backend/distributed/utils/directory.c @@ -29,7 +29,7 @@ static bool FileIsLink(const char *filename, struct stat filestat); void CitusCreateDirectory(StringInfo directoryName) { - int makeOK = mkdir(directoryName->data, S_IRWXU); + int makeOK = MakePGDirectory(directoryName->data); if (makeOK != 0) { ereport(ERROR, (errcode_for_file_access(), diff --git a/src/backend/distributed/worker/worker_sql_task_protocol.c b/src/backend/distributed/worker/worker_sql_task_protocol.c index 2cf48fc6f..708fee15d 100644 --- a/src/backend/distributed/worker/worker_sql_task_protocol.c +++ b/src/backend/distributed/worker/worker_sql_task_protocol.c @@ -126,7 +126,6 @@ TaskFileDestReceiverStartup(DestReceiver *dest, int operation, const char *nullPrintCharacter = "\\N"; const int fileFlags = (O_APPEND | O_CREAT | O_RDWR | O_TRUNC | PG_BINARY); - const int fileMode = (S_IRUSR | S_IWUSR); /* use the memory context that was in place when the DestReceiver was created */ MemoryContext oldContext = MemoryContextSwitchTo(taskFileDest->memoryContext); @@ -148,8 +147,7 @@ TaskFileDestReceiverStartup(DestReceiver *dest, int operation, taskFileDest->fileCompat = FileCompatFromFileStart(FileOpenForTransmit( taskFileDest->filePath, - fileFlags, - fileMode)); + fileFlags)); if (copyOutState->binary) { diff --git a/src/include/distributed/transmit.h b/src/include/distributed/transmit.h index b86fd9150..9c2ab87ab 100644 --- a/src/include/distributed/transmit.h +++ b/src/include/distributed/transmit.h @@ -21,7 +21,8 @@ /* Function declarations for transmitting files between two nodes */ extern void RedirectCopyDataToRegularFile(const char *filename); extern void SendRegularFile(const char *filename); -extern File FileOpenForTransmit(const char *filename, int fileFlags, int fileMode); +extern File FileOpenForTransmit(const char *filename, int fileFlags); +extern File FileOpenForTransmitPerm(const char *filename, int fileFlags, int fileMode); #endif /* TRANSMIT_H */ diff --git a/src/test/regress/expected/shard_rebalancer.out b/src/test/regress/expected/shard_rebalancer.out index a7cd6b38c..988fa68be 100644 --- a/src/test/regress/expected/shard_rebalancer.out +++ b/src/test/regress/expected/shard_rebalancer.out @@ -2395,6 +2395,74 @@ SELECT count(*) FROM pg_dist_partition; 0 (1 row) +-- verify a system with a new node won't copy distributed table shards without reference tables +SELECT 1 from master_remove_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT public.wait_until_metadata_sync(30000); + wait_until_metadata_sync +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE r1 (a int PRIMARY KEY, b int); +SELECT create_reference_table('r1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE d1 (a int PRIMARY KEY, b int); +SELECT create_distributed_table('d1', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 15; +SELECT 1 from master_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- count the number of placements for the reference table to verify it is not available on +-- all nodes +SELECT count(*) +FROM pg_dist_shard +JOIN pg_dist_shard_placement USING (shardid) +WHERE logicalrelid = 'r1'::regclass; + count +--------------------------------------------------------------------- + 1 +(1 row) + +-- #7426 We can't move shards to the fresh node before we copy reference tables there. +-- rebalance_table_shards() will do the copy, but the low-level +-- citus_move_shard_placement() should raise an error +SELECT citus_move_shard_placement(pg_dist_shard.shardid, nodename, nodeport, 'localhost', :worker_2_port) + FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) + WHERE logicalrelid = 'd1'::regclass AND nodename = 'localhost' AND nodeport = :worker_1_port LIMIT 1; +ERROR: there are missing reference tables on some nodes +SELECT replicate_reference_tables(); + replicate_reference_tables +--------------------------------------------------------------------- + +(1 row) + +-- After replication, the move should succeed. +SELECT citus_move_shard_placement(pg_dist_shard.shardid, nodename, nodeport, 'localhost', :worker_2_port) + FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) + WHERE logicalrelid = 'd1'::regclass AND nodename = 'localhost' AND nodeport = :worker_1_port LIMIT 1; + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE d1, r1; -- verify a system having only reference tables will copy the reference tables when -- executing the rebalancer SELECT 1 from master_remove_node('localhost', :worker_2_port); diff --git a/src/test/regress/sql/shard_rebalancer.sql b/src/test/regress/sql/shard_rebalancer.sql index 5d8e89b36..9037f8f75 100644 --- a/src/test/regress/sql/shard_rebalancer.sql +++ b/src/test/regress/sql/shard_rebalancer.sql @@ -1340,6 +1340,43 @@ DROP TABLE t1, r1, r2; -- test suites should clean up their distributed tables. SELECT count(*) FROM pg_dist_partition; +-- verify a system with a new node won't copy distributed table shards without reference tables + +SELECT 1 from master_remove_node('localhost', :worker_2_port); +SELECT public.wait_until_metadata_sync(30000); + +CREATE TABLE r1 (a int PRIMARY KEY, b int); +SELECT create_reference_table('r1'); + +CREATE TABLE d1 (a int PRIMARY KEY, b int); +SELECT create_distributed_table('d1', 'a'); + +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 15; +SELECT 1 from master_add_node('localhost', :worker_2_port); + +-- count the number of placements for the reference table to verify it is not available on +-- all nodes +SELECT count(*) +FROM pg_dist_shard +JOIN pg_dist_shard_placement USING (shardid) +WHERE logicalrelid = 'r1'::regclass; + +-- #7426 We can't move shards to the fresh node before we copy reference tables there. +-- rebalance_table_shards() will do the copy, but the low-level +-- citus_move_shard_placement() should raise an error +SELECT citus_move_shard_placement(pg_dist_shard.shardid, nodename, nodeport, 'localhost', :worker_2_port) + FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) + WHERE logicalrelid = 'd1'::regclass AND nodename = 'localhost' AND nodeport = :worker_1_port LIMIT 1; + +SELECT replicate_reference_tables(); + +-- After replication, the move should succeed. +SELECT citus_move_shard_placement(pg_dist_shard.shardid, nodename, nodeport, 'localhost', :worker_2_port) + FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) + WHERE logicalrelid = 'd1'::regclass AND nodename = 'localhost' AND nodeport = :worker_1_port LIMIT 1; + +DROP TABLE d1, r1; + -- verify a system having only reference tables will copy the reference tables when -- executing the rebalancer