Remove old re-partitioning functions

pull/5878/head
Marco Slot 2022-04-04 16:54:32 +02:00
parent b511e28e80
commit 9476f377b5
78 changed files with 645 additions and 4578 deletions

View File

@ -568,12 +568,6 @@ workflows:
image_tag: '<< pipeline.parameters.pg13_version >>'
make: check-isolation
requires: [build-13]
- test-citus:
name: 'test-13_check-worker'
pg_major: 13
image_tag: '<< pipeline.parameters.pg13_version >>'
make: check-worker
requires: [build-13]
- test-citus:
name: 'test-13_check-operations'
pg_major: 13
@ -642,12 +636,6 @@ workflows:
image_tag: '<< pipeline.parameters.pg14_version >>'
make: check-isolation
requires: [build-14]
- test-citus:
name: 'test-14_check-worker'
pg_major: 14
image_tag: '<< pipeline.parameters.pg14_version >>'
make: check-worker
requires: [build-14]
- test-citus:
name: 'test-14_check-operations'
pg_major: 14

18
configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for Citus 11.0devel.
# Generated by GNU Autoconf 2.69 for Citus 11.1devel.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@ -579,8 +579,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='Citus'
PACKAGE_TARNAME='citus'
PACKAGE_VERSION='11.0devel'
PACKAGE_STRING='Citus 11.0devel'
PACKAGE_VERSION='11.1devel'
PACKAGE_STRING='Citus 11.1devel'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@ -1260,7 +1260,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures Citus 11.0devel to adapt to many kinds of systems.
\`configure' configures Citus 11.1devel to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1322,7 +1322,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of Citus 11.0devel:";;
short | recursive ) echo "Configuration of Citus 11.1devel:";;
esac
cat <<\_ACEOF
@ -1425,7 +1425,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
Citus configure 11.0devel
Citus configure 11.1devel
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@ -1908,7 +1908,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by Citus $as_me 11.0devel, which was
It was created by Citus $as_me 11.1devel, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@ -5360,7 +5360,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by Citus $as_me 11.0devel, which was
This file was extended by Citus $as_me 11.1devel, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -5422,7 +5422,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
Citus config.status 11.0devel
Citus config.status 11.1devel
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@ -5,7 +5,7 @@
# everyone needing autoconf installed, the resulting files are checked
# into the SCM.
AC_INIT([Citus], [11.0devel])
AC_INIT([Citus], [11.1devel])
AC_COPYRIGHT([Copyright (c) Citus Data, Inc.])
# we'll need sed and awk for some of the version commands

View File

@ -1,6 +1,6 @@
# Citus extension
comment = 'Citus distributed database'
default_version = '11.0-1'
default_version = '11.1-1'
module_pathname = '$libdir/citus'
relocatable = false
schema = pg_catalog

View File

@ -64,7 +64,6 @@
#include "distributed/multi_physical_planner.h"
#include "distributed/reference_table_utils.h"
#include "distributed/resource_lock.h"
#include "distributed/transmit.h"
#include "distributed/version_compat.h"
#include "distributed/worker_shard_visibility.h"
#include "distributed/worker_transaction.h"
@ -427,42 +426,6 @@ ProcessUtilityInternal(PlannedStmt *pstmt,
}
}
/*
* TRANSMIT used to be separate command, but to avoid patching the grammar
* it's now overlaid onto COPY, but with FORMAT = 'transmit' instead of the
* normal FORMAT options.
*/
if (IsTransmitStmt(parsetree))
{
CopyStmt *copyStatement = (CopyStmt *) parsetree;
char *userName = TransmitStatementUser(copyStatement);
bool missingOK = false;
StringInfo transmitPath = makeStringInfo();
VerifyTransmitStmt(copyStatement);
/* ->relation->relname is the target file in our overloaded COPY */
appendStringInfoString(transmitPath, copyStatement->relation->relname);
if (userName != NULL)
{
Oid userId = get_role_oid(userName, missingOK);
appendStringInfo(transmitPath, ".%d", userId);
}
if (copyStatement->is_from)
{
RedirectCopyDataToRegularFile(transmitPath->data);
}
else
{
SendRegularFile(transmitPath->data);
}
/* Don't execute the faux copy statement */
return;
}
if (IsA(parsetree, CopyStmt))
{
MemoryContext planContext = GetMemoryChunkContext(parsetree);

View File

@ -32,6 +32,8 @@
#include "distributed/transmit.h"
#include "distributed/transaction_identifier.h"
#include "distributed/tuplestore.h"
#include "distributed/utils/array_type.h"
#include "distributed/utils/directory.h"
#include "distributed/version_compat.h"
#include "distributed/worker_protocol.h"
#include "nodes/makefuncs.h"

View File

@ -27,6 +27,8 @@
#include "distributed/pg_dist_shard.h"
#include "distributed/remote_commands.h"
#include "distributed/tuplestore.h"
#include "distributed/utils/array_type.h"
#include "distributed/utils/function.h"
#include "distributed/version_compat.h"
#include "distributed/worker_protocol.h"
#include "nodes/makefuncs.h"

View File

@ -19,6 +19,7 @@
#include "distributed/listutils.h"
#include "distributed/relay_utility.h"
#include "distributed/transmit.h"
#include "distributed/utils/directory.h"
#include "distributed/worker_protocol.h"
#include "distributed/version_compat.h"
#include "libpq/libpq.h"
@ -32,6 +33,7 @@ static void SendCopyOutStart(void);
static void SendCopyDone(void);
static void SendCopyData(StringInfo fileBuffer);
static bool ReceiveCopyData(StringInfo copyData);
static void FreeStringInfo(StringInfo stringInfo);
/*
@ -121,7 +123,7 @@ SendRegularFile(const char *filename)
/* Helper function that deallocates string info object. */
void
static void
FreeStringInfo(StringInfo stringInfo)
{
resetStringInfo(stringInfo);
@ -310,113 +312,3 @@ ReceiveCopyData(StringInfo copyData)
return copyDone;
}
/* Is the passed in statement a transmit statement? */
bool
IsTransmitStmt(Node *parsetree)
{
if (IsA(parsetree, CopyStmt))
{
CopyStmt *copyStatement = (CopyStmt *) parsetree;
/* Extract options from the statement node tree */
DefElem *defel = NULL;
foreach_ptr(defel, copyStatement->options)
{
if (strncmp(defel->defname, "format", NAMEDATALEN) == 0 &&
strncmp(defGetString(defel), "transmit", NAMEDATALEN) == 0)
{
return true;
}
}
}
return false;
}
/*
* TransmitStatementUser extracts the user attribute from a
* COPY ... (format 'transmit', user '...') statement.
*/
char *
TransmitStatementUser(CopyStmt *copyStatement)
{
AssertArg(IsTransmitStmt((Node *) copyStatement));
DefElem *lastUserDefElem = NULL;
DefElem *defel = NULL;
foreach_ptr(defel, copyStatement->options)
{
if (strncmp(defel->defname, "user", NAMEDATALEN) == 0)
{
lastUserDefElem = defel;
}
}
if (lastUserDefElem == NULL)
{
return NULL;
}
return defGetString(lastUserDefElem);
}
/*
* VerifyTransmitStmt checks that the passed in command is a valid transmit
* statement. Raise ERROR if not.
*
* Note that only 'toplevel' options in the CopyStmt struct are checked, and
* that verification of the target files existance is not done here.
*/
void
VerifyTransmitStmt(CopyStmt *copyStatement)
{
EnsureSuperUser();
/* do some minimal option verification */
if (copyStatement->relation == NULL ||
copyStatement->relation->relname == NULL)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("FORMAT 'transmit' requires a target file")));
}
char *fileName = copyStatement->relation->relname;
if (is_absolute_path(fileName))
{
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("absolute path not allowed"))));
}
else if (!path_is_relative_and_below_cwd(fileName))
{
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("path must be in or below the current directory"))));
}
else if (!CacheDirectoryElement(fileName))
{
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("path must be in the " PG_JOB_CACHE_DIR " directory"))));
}
if (copyStatement->filename != NULL)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("FORMAT 'transmit' only accepts STDIN/STDOUT"
" as input/output")));
}
if (copyStatement->query != NULL ||
copyStatement->attlist != NULL ||
copyStatement->is_program)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("FORMAT 'transmit' does not accept query, attribute list"
" or PROGRAM parameters ")));
}
}

View File

@ -52,6 +52,7 @@
#include "distributed/pg_dist_placement.h"
#include "distributed/shared_library_init.h"
#include "distributed/shardinterval_utils.h"
#include "distributed/utils/function.h"
#include "distributed/version_compat.h"
#include "distributed/worker_manager.h"
#include "distributed/worker_protocol.h"

View File

@ -62,6 +62,8 @@
#include "distributed/relation_access_tracking.h"
#include "distributed/remote_commands.h"
#include "distributed/resource_lock.h"
#include "distributed/utils/array_type.h"
#include "distributed/utils/function.h"
#include "distributed/worker_manager.h"
#include "distributed/worker_protocol.h"
#include "distributed/worker_transaction.h"

View File

@ -20,6 +20,8 @@
#include "distributed/multi_client_executor.h"
#include "distributed/multi_server_executor.h"
#include "distributed/remote_commands.h"
#include "distributed/utils/array_type.h"
#include "distributed/utils/function.h"
#include "distributed/version_compat.h"
#include "distributed/worker_protocol.h"
#include "funcapi.h"

View File

@ -46,6 +46,7 @@
#include "distributed/shard_rebalancer.h"
#include "distributed/shard_cleaner.h"
#include "distributed/tuplestore.h"
#include "distributed/utils/array_type.h"
#include "distributed/worker_protocol.h"
#include "funcapi.h"
#include "miscadmin.h"

View File

@ -77,6 +77,7 @@
#include "distributed/transaction_management.h"
#include "distributed/transaction_recovery.h"
#include "distributed/utils/directory.h"
#include "distributed/worker_log_messages.h"
#include "distributed/worker_manager.h"
#include "distributed/worker_protocol.h"
@ -432,12 +433,12 @@ _PG_init(void)
/*
* DoInitialCleanup does cleanup at start time.
* Currently it:
* - Removes repartition directories ( in case there are any leftovers)
* - Removes intermediate result directories ( in case there are any leftovers)
*/
static void
DoInitialCleanup(void)
{
RepartitionCleanupJobDirectories();
CleanupJobCacheDirectory();
}
@ -669,22 +670,6 @@ RegisterCitusConfigVariables(void)
GUC_NO_SHOW_ALL,
NULL, NULL, NULL);
DefineCustomBoolVariable(
"citus.binary_worker_copy_format",
gettext_noop("Use the binary worker copy format."),
gettext_noop("When enabled, data is copied from workers to workers "
"in PostgreSQL's binary serialization format when "
"joining large tables."),
&BinaryWorkerCopyFormat,
#if PG_VERSION_NUM >= PG_VERSION_14
true,
#else
false,
#endif
PGC_SIGHUP,
GUC_NO_SHOW_ALL,
NULL, NULL, NULL);
DefineCustomBoolVariable(
"citus.check_available_space_before_move",
gettext_noop("When enabled will check free disk space before a shard move"),
@ -1577,20 +1562,6 @@ RegisterCitusConfigVariables(void)
GUC_NO_SHOW_ALL,
NULL, NULL, NULL);
DefineCustomIntVariable(
"citus.partition_buffer_size",
gettext_noop("Sets the buffer size to use for partition operations."),
gettext_noop("Worker nodes allow for table data to be repartitioned "
"into multiple text files, much like Hadoop's Map "
"command. This configuration value sets the buffer size "
"to use per partition operation. After the buffer fills "
"up, we flush the repartitioned data into text files."),
&PartitionBufferSize,
8192, 0, (INT_MAX / 1024), /* result stored in int variable */
PGC_USERSET,
GUC_UNIT_KB | GUC_STANDARD,
NULL, NULL, NULL);
DefineCustomBoolVariable(
"citus.prevent_incomplete_connection_establishment",
gettext_noop("When enabled, the executor waits until all the connections "

View File

@ -0,0 +1 @@
-- bump version to 11.0-2

View File

@ -0,0 +1,8 @@
DROP FUNCTION pg_catalog.worker_create_schema(bigint,text);
DROP FUNCTION pg_catalog.worker_cleanup_job_schema_cache();
DROP FUNCTION pg_catalog.worker_fetch_foreign_file(text, text, bigint, text[], integer[]);
DROP FUNCTION pg_catalog.worker_fetch_partition_file(bigint, integer, integer, integer, text, integer);
DROP FUNCTION pg_catalog.worker_hash_partition_table(bigint, integer, text, text, oid, anyarray);
DROP FUNCTION pg_catalog.worker_merge_files_into_table(bigint, integer, text[], text[]);
DROP FUNCTION pg_catalog.worker_range_partition_table(bigint, integer, text, text, oid, anyarray);
DROP FUNCTION pg_catalog.worker_repartition_cleanup(bigint);

View File

@ -0,0 +1 @@
-- bump down version to 11.0-1

View File

@ -0,0 +1,47 @@
CREATE FUNCTION pg_catalog.worker_create_schema(jobid bigint, username text)
RETURNS void
LANGUAGE c
STRICT
AS 'MODULE_PATHNAME', $function$worker_create_schema$function$;
CREATE FUNCTION pg_catalog.worker_cleanup_job_schema_cache()
RETURNS void
LANGUAGE c
STRICT
AS 'MODULE_PATHNAME', $function$worker_cleanup_job_schema_cache$function$;
CREATE FUNCTION pg_catalog.worker_fetch_foreign_file(text, text, bigint, text[], integer[])
RETURNS void
LANGUAGE c
STRICT
AS 'MODULE_PATHNAME', $function$worker_fetch_foreign_file$function$;
CREATE FUNCTION pg_catalog.worker_fetch_partition_file(bigint, integer, integer, integer, text, integer)
RETURNS void
LANGUAGE c
STRICT
AS 'MODULE_PATHNAME', $function$worker_fetch_partition_file$function$;
CREATE FUNCTION pg_catalog.worker_hash_partition_table(bigint, integer, text, text, oid, anyarray)
RETURNS void
LANGUAGE c
STRICT
AS 'MODULE_PATHNAME', $function$worker_hash_partition_table$function$;
CREATE FUNCTION pg_catalog.worker_merge_files_into_table(bigint, integer, text[], text[])
RETURNS void
LANGUAGE c
STRICT
AS 'MODULE_PATHNAME', $function$worker_merge_files_into_table$function$;
CREATE FUNCTION pg_catalog.worker_range_partition_table(bigint, integer, text, text, oid, anyarray)
RETURNS void
LANGUAGE c
STRICT
AS 'MODULE_PATHNAME', $function$worker_range_partition_table$function$;
CREATE FUNCTION pg_catalog.worker_repartition_cleanup(bigint)
RETURNS void
LANGUAGE c
STRICT
AS 'MODULE_PATHNAME', $function$worker_repartition_cleanup$function$

View File

@ -17,6 +17,7 @@
#include "distributed/colocation_utils.h"
#include "distributed/listutils.h"
#include "distributed/metadata_cache.h"
#include "distributed/utils/array_type.h"
/* declarations for dynamic loading */

View File

@ -27,6 +27,7 @@
#include "distributed/remote_commands.h"
#include "distributed/tuplestore.h"
#include "distributed/listutils.h"
#include "distributed/utils/array_type.h"
#include "distributed/version_compat.h"
#include "tcop/tcopprot.h"

View File

@ -29,6 +29,7 @@
#include "distributed/pg_dist_shard.h"
#include "distributed/query_utils.h"
#include "distributed/resource_lock.h"
#include "distributed/utils/array_type.h"
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
#include "nodes/primnodes.h"

View File

@ -1,30 +0,0 @@
#include "postgres.h"
#include "distributed/worker_protocol.h"
#include "distributed/transmit.h"
#include "distributed/metadata_utility.h"
#include "fmgr.h"
#include "lib/stringinfo.h"
PG_FUNCTION_INFO_V1(citus_rm_job_directory);
/*
* citus_rm_job_directory removes the job directory for the given job id.
* Used before beginning multi_query_directory_cleanup.
*/
Datum
citus_rm_job_directory(PG_FUNCTION_ARGS)
{
uint64 jobId = PG_GETARG_INT64(0);
StringInfo jobCacheDirectory = makeStringInfo();
EnsureSuperUser();
appendStringInfo(jobCacheDirectory, "base/%s/%s%0*" INT64_MODIFIER "u",
PG_JOB_CACHE_DIR, JOB_DIRECTORY_PREFIX,
MIN_JOB_DIRNAME_WIDTH, jobId);
CitusRemoveDirectory(jobCacheDirectory->data);
FreeStringInfo(jobCacheDirectory);
PG_RETURN_VOID();
}

View File

@ -20,6 +20,7 @@
#include "distributed/maintenanced.h"
#include "distributed/metadata_sync.h"
#include "distributed/remote_commands.h"
#include "distributed/utils/array_type.h"
#include "distributed/worker_manager.h"
#include "postmaster/postmaster.h"
#include "miscadmin.h"

View File

@ -28,6 +28,7 @@
#include "distributed/multi_physical_planner.h"
#include "distributed/resource_lock.h"
#include "distributed/shard_pruning.h"
#include "distributed/utils/array_type.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/nodes.h"

View File

@ -0,0 +1,98 @@
/*-------------------------------------------------------------------------
*
* array_type.c
*
* Utility functions for dealing with array types.
*
* Copyright (c) Citus Data, Inc.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "miscadmin.h"
#include "distributed/utils/array_type.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
/*
* DeconstructArrayObject takes in a single dimensional array, and deserializes
* this array's members into an array of datum objects. The function then
* returns this datum array.
*/
Datum *
DeconstructArrayObject(ArrayType *arrayObject)
{
Datum *datumArray = NULL;
bool *datumArrayNulls = NULL;
int datumArrayLength = 0;
bool typeByVal = false;
char typeAlign = 0;
int16 typeLength = 0;
bool arrayHasNull = ARR_HASNULL(arrayObject);
if (arrayHasNull)
{
ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("worker array object cannot contain null values")));
}
Oid typeId = ARR_ELEMTYPE(arrayObject);
get_typlenbyvalalign(typeId, &typeLength, &typeByVal, &typeAlign);
deconstruct_array(arrayObject, typeId, typeLength, typeByVal, typeAlign,
&datumArray, &datumArrayNulls, &datumArrayLength);
return datumArray;
}
/*
* ArrayObjectCount takes in a single dimensional array, and returns the number
* of elements in this array.
*/
int32
ArrayObjectCount(ArrayType *arrayObject)
{
int32 dimensionCount = ARR_NDIM(arrayObject);
int32 *dimensionLengthArray = ARR_DIMS(arrayObject);
if (dimensionCount == 0)
{
return 0;
}
/* we currently allow split point arrays to have only one subarray */
Assert(dimensionCount == 1);
int32 arrayLength = ArrayGetNItems(dimensionCount, dimensionLengthArray);
if (arrayLength <= 0)
{
ereport(ERROR, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("worker array object cannot be empty")));
}
return arrayLength;
}
/*
* DatumArrayToArrayType converts the provided Datum array (of the specified
* length and type) into an ArrayType suitable for returning from a UDF.
*/
ArrayType *
DatumArrayToArrayType(Datum *datumArray, int datumCount, Oid datumTypeId)
{
int16 typeLength = 0;
bool typeByValue = false;
char typeAlignment = 0;
get_typlenbyvalalign(datumTypeId, &typeLength, &typeByValue, &typeAlignment);
ArrayType *arrayObject = construct_array(datumArray, datumCount, datumTypeId,
typeLength, typeByValue, typeAlignment);
return arrayObject;
}

View File

@ -31,6 +31,7 @@
#include "distributed/resource_lock.h"
#include "distributed/shardinterval_utils.h"
#include "distributed/version_compat.h"
#include "distributed/utils/array_type.h"
#include "distributed/worker_protocol.h"
#include "distributed/worker_transaction.h"
#include "storage/lmgr.h"

View File

@ -0,0 +1,203 @@
/*-------------------------------------------------------------------------
*
* directory.c
*
* Utility functions for dealing with directories.
*
* Copyright (c) Citus Data, Inc.
*
*-------------------------------------------------------------------------
*/
#include <sys/stat.h>
#include <unistd.h>
#include "postgres.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "distributed/utils/directory.h"
/* Local functions forward declarations */
static bool FileIsLink(const char *filename, struct stat filestat);
/*
* CacheDirectoryElement takes in a filename, and checks if this name lives in
* the directory path that is used for job, task, table etc. files.
*/
bool
CacheDirectoryElement(const char *filename)
{
bool directoryElement = false;
StringInfo directoryPath = makeStringInfo();
appendStringInfo(directoryPath, "base/%s/", PG_JOB_CACHE_DIR);
char *directoryPathFound = strstr(filename, directoryPath->data);
/*
* If directoryPath occurs at the beginning of the filename, then the
* pointers should now be equal.
*/
if (directoryPathFound == filename)
{
directoryElement = true;
}
pfree(directoryPath);
return directoryElement;
}
/*
* CitusCreateDirectory creates a new directory with the given directory name.
*/
void
CitusCreateDirectory(StringInfo directoryName)
{
int makeOK = mkdir(directoryName->data, S_IRWXU);
if (makeOK != 0)
{
ereport(ERROR, (errcode_for_file_access(),
errmsg("could not create directory \"%s\": %m",
directoryName->data)));
}
}
/*
* FileIsLink checks whether a file is a symbolic link.
*/
static bool
FileIsLink(const char *filename, struct stat filestat)
{
return S_ISLNK(filestat.st_mode);
}
/*
* CitusRemoveDirectory first checks if the given directory exists. If it does, the
* function recursively deletes the contents of the given directory, and then
* deletes the directory itself. This function is modeled on the Boost file
* system library's remove_all() method.
*/
void
CitusRemoveDirectory(const char *filename)
{
/* files may be added during execution, loop when that occurs */
while (true)
{
struct stat fileStat;
int removed = 0;
int statOK = stat(filename, &fileStat);
if (statOK < 0)
{
if (errno == ENOENT)
{
return; /* if file does not exist, return */
}
else
{
ereport(ERROR, (errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", filename)));
}
}
/*
* If this is a directory, iterate over all its contents and for each
* content, recurse into this function. Also, make sure that we do not
* recurse into symbolic links.
*/
if (S_ISDIR(fileStat.st_mode) && !FileIsLink(filename, fileStat))
{
const char *directoryName = filename;
DIR *directory = AllocateDir(directoryName);
if (directory == NULL)
{
ereport(ERROR, (errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
directoryName)));
}
StringInfo fullFilename = makeStringInfo();
struct dirent *directoryEntry = ReadDir(directory, directoryName);
for (; directoryEntry != NULL; directoryEntry = ReadDir(directory,
directoryName))
{
const char *baseFilename = directoryEntry->d_name;
/* if system file, skip it */
if (strncmp(baseFilename, ".", MAXPGPATH) == 0 ||
strncmp(baseFilename, "..", MAXPGPATH) == 0)
{
continue;
}
resetStringInfo(fullFilename);
appendStringInfo(fullFilename, "%s/%s", directoryName, baseFilename);
CitusRemoveDirectory(fullFilename->data);
}
pfree(fullFilename->data);
pfree(fullFilename);
FreeDir(directory);
}
/* we now have an empty directory or a regular file, remove it */
if (S_ISDIR(fileStat.st_mode))
{
/*
* We ignore the TOCTUO race condition static analysis warning
* here, since we don't actually read the files or directories. We
* simply want to remove them.
*/
removed = rmdir(filename); /* lgtm[cpp/toctou-race-condition] */
if (errno == ENOTEMPTY || errno == EEXIST)
{
continue;
}
}
else
{
/*
* We ignore the TOCTUO race condition static analysis warning
* here, since we don't actually read the files or directories. We
* simply want to remove them.
*/
removed = unlink(filename); /* lgtm[cpp/toctou-race-condition] */
}
if (removed != 0 && errno != ENOENT)
{
ereport(ERROR, (errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m", filename)));
}
return;
}
}
/*
* CleanupJobCacheDirectory cleans up all files in the job cache directory
* as part of this process's start-up logic.
*/
void
CleanupJobCacheDirectory(void)
{
/* use the default tablespace in {datadir}/base */
StringInfo jobCacheDirectory = makeStringInfo();
appendStringInfo(jobCacheDirectory, "base/%s", PG_JOB_CACHE_DIR);
CitusRemoveDirectory(jobCacheDirectory->data);
CitusCreateDirectory(jobCacheDirectory);
pfree(jobCacheDirectory->data);
pfree(jobCacheDirectory);
}

View File

@ -0,0 +1,49 @@
/*-------------------------------------------------------------------------
*
* function.c
*
* Utility functions for dealing with functions.
*
* Copyright (c) Citus Data, Inc.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "miscadmin.h"
#include "commands/defrem.h"
#include "distributed/utils/function.h"
#include "utils/lsyscache.h"
/*
* GetFunctionInfo first resolves the operator for the given data type, access
* method, and support procedure. The function then uses the resolved operator's
* identifier to fill in a function manager object, and returns this object.
*/
FmgrInfo *
GetFunctionInfo(Oid typeId, Oid accessMethodId, int16 procedureId)
{
FmgrInfo *functionInfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
/* get default operator class from pg_opclass for datum type */
Oid operatorClassId = GetDefaultOpClass(typeId, accessMethodId);
Oid operatorFamilyId = get_opclass_family(operatorClassId);
Oid operatorClassInputType = get_opclass_input_type(operatorClassId);
Oid operatorId = get_opfamily_proc(operatorFamilyId, operatorClassInputType,
operatorClassInputType, procedureId);
if (operatorId == InvalidOid)
{
ereport(ERROR, (errmsg("could not find function for data typeId %u", typeId)));
}
/* fill in the FmgrInfo struct using the operatorId */
fmgr_info(operatorId, functionInfo);
return functionInfo;
}

View File

@ -90,25 +90,6 @@ PointerArrayFromList(List *pointerList)
}
/*
* DatumArrayToArrayType converts the provided Datum array (of the specified
* length and type) into an ArrayType suitable for returning from a UDF.
*/
ArrayType *
DatumArrayToArrayType(Datum *datumArray, int datumCount, Oid datumTypeId)
{
int16 typeLength = 0;
bool typeByValue = false;
char typeAlignment = 0;
get_typlenbyvalalign(datumTypeId, &typeLength, &typeByValue, &typeAlignment);
ArrayType *arrayObject = construct_array(datumArray, datumCount, datumTypeId,
typeLength, typeByValue, typeAlignment);
return arrayObject;
}
/*
* ListToHashSet creates a hash table in which the keys are copied from
* from itemList and the values are the same as the keys. This can

View File

@ -36,6 +36,7 @@
#include "distributed/resource_lock.h"
#include "distributed/shardinterval_utils.h"
#include "distributed/worker_protocol.h"
#include "distributed/utils/array_type.h"
#include "distributed/version_compat.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"

View File

@ -360,7 +360,7 @@ FindShardIntervalIndex(Datum searchedValue, CitusTableCacheEntry *cacheEntry)
* array. If it can not find any shard interval with the given value, it returns
* INVALID_SHARD_INDEX.
*
* TODO: Data re-partitioning logic (e.g., worker_hash_partition_table())
* TODO: Data re-partitioning logic (worker_partition_query_resul))
* on the worker nodes relies on this function in order to be consistent
* with shard pruning. Since the worker nodes don't have the metadata, a
* synthetically generated ShardInterval ** is passed to the to this

View File

@ -62,14 +62,12 @@
/* Local functions forward declarations */
static void FetchRegularFileAsSuperUser(const char *nodeName, uint32 nodePort,
StringInfo remoteFilename,
StringInfo localFilename);
static bool ReceiveRegularFile(const char *nodeName, uint32 nodePort,
const char *nodeUser, StringInfo transmitCommand,
StringInfo filePath);
static void ReceiveResourceCleanup(int32 connectionId, const char *filename,
int32 fileDescriptor);
static CopyStmt * CopyStatement(RangeVar *relation, char *sourceFilename);
static void CitusDeleteFile(const char *filename);
static bool check_log_statement(List *stmt_list);
static void AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequenceName,
@ -77,141 +75,12 @@ static void AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequence
/* exports for SQL callable functions */
PG_FUNCTION_INFO_V1(worker_fetch_partition_file);
PG_FUNCTION_INFO_V1(worker_apply_shard_ddl_command);
PG_FUNCTION_INFO_V1(worker_apply_inter_shard_ddl_command);
PG_FUNCTION_INFO_V1(worker_apply_sequence_command);
PG_FUNCTION_INFO_V1(worker_append_table_to_shard);
PG_FUNCTION_INFO_V1(worker_nextval);
/*
* Following UDFs are stub functions, you can check their comments for more
* detail.
*/
PG_FUNCTION_INFO_V1(worker_fetch_regular_table);
PG_FUNCTION_INFO_V1(worker_fetch_foreign_file);
PG_FUNCTION_INFO_V1(master_expire_table_cache);
/*
* worker_fetch_partition_file fetches a partition file from the remote node.
* The function assumes an upstream compute task depends on this partition file,
* and therefore directly fetches the file into the upstream task's directory.
*/
Datum
worker_fetch_partition_file(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
uint64 jobId = PG_GETARG_INT64(0);
uint32 partitionTaskId = PG_GETARG_UINT32(1);
uint32 partitionFileId = PG_GETARG_UINT32(2);
uint32 upstreamTaskId = PG_GETARG_UINT32(3);
text *nodeNameText = PG_GETARG_TEXT_P(4);
uint32 nodePort = PG_GETARG_UINT32(5);
/* remote filename is <jobId>/<partitionTaskId>/<partitionFileId> */
StringInfo remoteDirectoryName = TaskDirectoryName(jobId, partitionTaskId);
StringInfo remoteFilename = PartitionFilename(remoteDirectoryName, partitionFileId);
/* local filename is <jobId>/<upstreamTaskId>/<partitionTaskId> */
StringInfo taskDirectoryName = TaskDirectoryName(jobId, upstreamTaskId);
StringInfo taskFilename = UserTaskFilename(taskDirectoryName, partitionTaskId);
/*
* If we are the first function to fetch a file for the upstream task, the
* task directory does not exist. We then lock and create the directory.
*/
bool taskDirectoryExists = DirectoryExists(taskDirectoryName);
if (!taskDirectoryExists)
{
InitTaskDirectory(jobId, upstreamTaskId);
}
char *nodeName = text_to_cstring(nodeNameText);
/* we've made sure the file names are sanitized, safe to fetch as superuser */
FetchRegularFileAsSuperUser(nodeName, nodePort, remoteFilename, taskFilename);
PG_RETURN_VOID();
}
/* Constructs a standardized task file path for given directory and task id. */
StringInfo
TaskFilename(StringInfo directoryName, uint32 taskId)
{
StringInfo taskFilename = makeStringInfo();
appendStringInfo(taskFilename, "%s/%s%0*u",
directoryName->data,
TASK_FILE_PREFIX, MIN_TASK_FILENAME_WIDTH, taskId);
return taskFilename;
}
/*
* UserTaskFilename returns a full file path for a task file including the
* current user ID as a suffix.
*/
StringInfo
UserTaskFilename(StringInfo directoryName, uint32 taskId)
{
StringInfo taskFilename = TaskFilename(directoryName, taskId);
appendStringInfo(taskFilename, ".%u", GetUserId());
return taskFilename;
}
/*
* FetchRegularFileAsSuperUser copies a file from a remote node in an idempotent
* manner. It connects to the remote node as superuser to give file access.
* Callers must make sure that the file names are sanitized.
*/
static void
FetchRegularFileAsSuperUser(const char *nodeName, uint32 nodePort,
StringInfo remoteFilename, StringInfo localFilename)
{
char *userName = CurrentUserName();
uint32 randomId = (uint32) random();
/*
* We create an attempt file to signal that the file is still in transit. We
* further append a random id to the filename to handle the unexpected case
* of another process concurrently fetching the same file.
*/
StringInfo attemptFilename = makeStringInfo();
appendStringInfo(attemptFilename, "%s_%0*u%s", localFilename->data,
MIN_TASK_FILENAME_WIDTH, randomId, ATTEMPT_FILE_SUFFIX);
StringInfo transmitCommand = makeStringInfo();
appendStringInfo(transmitCommand, TRANSMIT_WITH_USER_COMMAND, remoteFilename->data,
quote_literal_cstr(userName));
/* connect as superuser to give file access */
char *nodeUser = CitusExtensionOwnerName();
bool received = ReceiveRegularFile(nodeName, nodePort, nodeUser, transmitCommand,
attemptFilename);
if (!received)
{
ereport(ERROR, (errmsg("could not receive file \"%s\" from %s:%u",
remoteFilename->data, nodeName, nodePort)));
}
/* atomically rename the attempt file */
int renamed = rename(attemptFilename->data, localFilename->data);
if (renamed != 0)
{
ereport(ERROR, (errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
attemptFilename->data, localFilename->data)));
}
}
/*
* ReceiveRegularFile creates a local file at the given file path, and connects
@ -712,6 +581,26 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS)
}
/*
* CopyStatement creates and initializes a copy statement to read the given
* file's contents into the given table, using copy's standard text format.
*/
static CopyStmt *
CopyStatement(RangeVar *relation, char *sourceFilename)
{
CopyStmt *copyStatement = makeNode(CopyStmt);
copyStatement->relation = relation;
copyStatement->query = NULL;
copyStatement->attlist = NIL;
copyStatement->options = NIL;
copyStatement->is_from = true;
copyStatement->is_program = false;
copyStatement->filename = sourceFilename;
return copyStatement;
}
/*
* worker_nextval calculates nextval() in worker nodes
* for int and smallint column default types
@ -863,39 +752,3 @@ SetDefElemArg(AlterSeqStmt *statement, const char *name, Node *arg)
statement->options = lappend(statement->options, defElem);
}
/*
* worker_fetch_regular_table UDF is a stub UDF to install Citus flawlessly.
* Otherwise we need to delete them from our sql files, which is confusing
*/
Datum
worker_fetch_regular_table(PG_FUNCTION_ARGS)
{
ereport(DEBUG2, (errmsg("this function is deprecated and no longer is used")));
PG_RETURN_VOID();
}
/*
* worker_fetch_foreign_file UDF is a stub UDF to install Citus flawlessly.
* Otherwise we need to delete them from our sql files, which is confusing
*/
Datum
worker_fetch_foreign_file(PG_FUNCTION_ARGS)
{
ereport(DEBUG2, (errmsg("this function is deprecated and no longer is used")));
PG_RETURN_VOID();
}
/*
* master_expire_table_cache UDF is a stub UDF to install Citus flawlessly.
* Otherwise we need to delete them from our sql files, which is confusing
*/
Datum
master_expire_table_cache(PG_FUNCTION_ARGS)
{
ereport(DEBUG2, (errmsg("this function is deprecated and no longer is used")));
PG_RETURN_VOID();
}

View File

@ -1,91 +0,0 @@
/*-------------------------------------------------------------------------
*
* worker_file_access_protocol.c
*
* Routines for accessing file related information on this worker node.
*
* Copyright (c) Citus Data, Inc.
*
* $Id$
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "funcapi.h"
#include "commands/defrem.h"
#include "distributed/listutils.h"
#include "distributed/coordinator_protocol.h"
#include "distributed/worker_protocol.h"
#include "foreign/foreign.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
/* exports for SQL callable functions */
PG_FUNCTION_INFO_V1(worker_foreign_file_path);
PG_FUNCTION_INFO_V1(worker_find_block_local_path);
/*
* worker_foreign_file_path resolves the foreign table for the given table name,
* and extracts and returns the file path associated with that foreign table.
*/
Datum
worker_foreign_file_path(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
text *foreignTableName = PG_GETARG_TEXT_P(0);
text *foreignFilePath = NULL;
Oid relationId = ResolveRelationId(foreignTableName, false);
ForeignTable *foreignTable = GetForeignTable(relationId);
DefElem *option = NULL;
foreach_ptr(option, foreignTable->options)
{
char *optionName = option->defname;
int compareResult = strncmp(optionName, FOREIGN_FILENAME_OPTION, MAXPGPATH);
if (compareResult == 0)
{
char *optionValue = defGetString(option);
foreignFilePath = cstring_to_text(optionValue);
break;
}
}
/* check that we found the filename option */
if (foreignFilePath == NULL)
{
char *relationName = get_rel_name(relationId);
ereport(ERROR, (errmsg("could not find filename for foreign table: \"%s\"",
relationName)));
}
PG_RETURN_TEXT_P(foreignFilePath);
}
/*
* Protocol declaration for a function whose future implementation will find the
* given HDFS block's local file path.
*/
Datum
worker_find_block_local_path(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
int64 blockId = PG_GETARG_INT64(0);
ArrayType *dataDirectoryObject = PG_GETARG_ARRAYTYPE_P(1);
/* keep the compiler silent */
(void) blockId;
(void) dataDirectoryObject;
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("called function is currently unsupported")));
PG_RETURN_TEXT_P(NULL);
}

View File

@ -1,612 +0,0 @@
/*-------------------------------------------------------------------------
*
* worker_merge_protocol.c
*
* Routines for merging partitioned files into a single file or table. Merging
* files is one of the three distributed execution primitives that we apply on
* worker nodes.
*
* Copyright (c) Citus Data, Inc.
*
* $Id$
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "distributed/pg_version_constants.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "access/genam.h"
#include "access/table.h"
#include "access/htup_details.h"
#include "access/xact.h"
#include "catalog/dependency.h"
#include "catalog/pg_namespace.h"
#include "commands/copy.h"
#include "commands/tablecmds.h"
#include "common/string.h"
#include "distributed/listutils.h"
#include "distributed/metadata_cache.h"
#include "distributed/worker_protocol.h"
#include "distributed/version_compat.h"
#include "executor/spi.h"
#include "nodes/makefuncs.h"
#include "parser/parse_relation.h"
#include "parser/parse_type.h"
#include "storage/lmgr.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "commands/schemacmds.h"
#include "distributed/resource_lock.h"
/* Local functions forward declarations */
static List * ArrayObjectToCStringList(ArrayType *arrayObject);
static void CreateTaskTable(StringInfo schemaName, StringInfo relationName,
List *columnNameList, List *columnTypeList);
static void CopyTaskFilesFromDirectory(StringInfo schemaName, StringInfo relationName,
StringInfo sourceDirectoryName, Oid userId);
static void CreateJobSchema(StringInfo schemaName, char *schemaOwner);
/* exports for SQL callable functions */
PG_FUNCTION_INFO_V1(worker_merge_files_into_table);
PG_FUNCTION_INFO_V1(worker_merge_files_and_run_query);
PG_FUNCTION_INFO_V1(worker_cleanup_job_schema_cache);
PG_FUNCTION_INFO_V1(worker_create_schema);
PG_FUNCTION_INFO_V1(worker_repartition_cleanup);
/*
* worker_create_schema creates a schema with the given job id in local.
*/
Datum
worker_create_schema(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
uint64 jobId = PG_GETARG_INT64(0);
text *ownerText = PG_GETARG_TEXT_P(1);
char *ownerString = TextDatumGetCString(ownerText);
StringInfo jobSchemaName = JobSchemaName(jobId);
bool schemaExists = JobSchemaExists(jobSchemaName);
if (!schemaExists)
{
CreateJobSchema(jobSchemaName, ownerString);
}
PG_RETURN_VOID();
}
/*
* CreateJobSchema creates a job schema with the given schema name. Note that
* this function ensures that our pg_ prefixed schema names can be created.
* Further note that the created schema does not become visible to other
* processes until the transaction commits.
*
* If schemaOwner is NULL, then current user is used.
*/
static void
CreateJobSchema(StringInfo schemaName, char *schemaOwner)
{
const char *queryString = NULL;
Oid savedUserId = InvalidOid;
int savedSecurityContext = 0;
RoleSpec currentUserRole = { 0 };
/* allow schema names that start with pg_ */
bool oldAllowSystemTableMods = allowSystemTableMods;
allowSystemTableMods = true;
/* ensure we're allowed to create this schema */
GetUserIdAndSecContext(&savedUserId, &savedSecurityContext);
SetUserIdAndSecContext(CitusExtensionOwner(), SECURITY_LOCAL_USERID_CHANGE);
if (schemaOwner == NULL)
{
schemaOwner = GetUserNameFromId(savedUserId, false);
}
/* build a CREATE SCHEMA statement */
currentUserRole.type = T_RoleSpec;
currentUserRole.roletype = ROLESPEC_CSTRING;
currentUserRole.rolename = schemaOwner;
currentUserRole.location = -1;
CreateSchemaStmt *createSchemaStmt = makeNode(CreateSchemaStmt);
createSchemaStmt->schemaname = schemaName->data;
createSchemaStmt->schemaElts = NIL;
/* actually create schema with the current user as owner */
createSchemaStmt->authrole = &currentUserRole;
CreateSchemaCommand(createSchemaStmt, queryString, -1, -1);
CommandCounterIncrement();
/* and reset environment */
SetUserIdAndSecContext(savedUserId, savedSecurityContext);
allowSystemTableMods = oldAllowSystemTableMods;
}
/*
* worker_repartition_cleanup removes the job directory and schema with the given job id .
*/
Datum
worker_repartition_cleanup(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
uint64 jobId = PG_GETARG_INT64(0);
StringInfo jobDirectoryName = JobDirectoryName(jobId);
StringInfo jobSchemaName = JobSchemaName(jobId);
Oid schemaId = get_namespace_oid(jobSchemaName->data, false);
EnsureSchemaOwner(schemaId);
CitusRemoveDirectory(jobDirectoryName->data);
RemoveJobSchema(jobSchemaName);
PG_RETURN_VOID();
}
/*
* worker_merge_files_into_table creates a task table within the job's schema,
* which should have already been created by repartition join execution, and
* copies files in its task directory into this table. If the schema doesn't
* exist, the function defaults to the 'public' schema. Note that, unlike
* partitioning functions, this function is not always idempotent. On success,
* the function creates the table and loads data, and subsequent calls to the
* function error out because the table already exist. On failure, the task
* table creation commands are rolled back, and the function can be called
* again.
*/
Datum
worker_merge_files_into_table(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
uint64 jobId = PG_GETARG_INT64(0);
uint32 taskId = PG_GETARG_UINT32(1);
ArrayType *columnNameObject = PG_GETARG_ARRAYTYPE_P(2);
ArrayType *columnTypeObject = PG_GETARG_ARRAYTYPE_P(3);
StringInfo jobSchemaName = JobSchemaName(jobId);
StringInfo taskTableName = TaskTableName(taskId);
StringInfo taskDirectoryName = TaskDirectoryName(jobId, taskId);
Oid userId = GetUserId();
/* we should have the same number of column names and types */
int32 columnNameCount = ArrayObjectCount(columnNameObject);
int32 columnTypeCount = ArrayObjectCount(columnTypeObject);
if (columnNameCount != columnTypeCount)
{
ereport(ERROR, (errmsg("column name array size: %d and type array size: %d"
" do not match", columnNameCount, columnTypeCount)));
}
/*
* If the schema for the job isn't already created by the repartition join
* execution, we fall to using the default 'public' schema.
*/
bool schemaExists = JobSchemaExists(jobSchemaName);
if (!schemaExists)
{
/*
* For testing purposes, we allow merging into a table in the public schema,
* but only when running as superuser.
*/
if (!superuser())
{
ereport(ERROR, (errmsg("job schema does not exist"),
errdetail("must be superuser to use public schema")));
}
resetStringInfo(jobSchemaName);
appendStringInfoString(jobSchemaName, "public");
}
else
{
Oid schemaId = get_namespace_oid(jobSchemaName->data, false);
EnsureSchemaOwner(schemaId);
}
/* create the task table and copy files into the table */
List *columnNameList = ArrayObjectToCStringList(columnNameObject);
List *columnTypeList = ArrayObjectToCStringList(columnTypeObject);
CreateTaskTable(jobSchemaName, taskTableName, columnNameList, columnTypeList);
CopyTaskFilesFromDirectory(jobSchemaName, taskTableName, taskDirectoryName,
userId);
PG_RETURN_VOID();
}
/* This UDF is deprecated.*/
Datum
worker_merge_files_and_run_query(PG_FUNCTION_ARGS)
{
ereport(ERROR, (errmsg("This UDF is deprecated.")));
PG_RETURN_VOID();
}
/*
* worker_cleanup_job_schema_cache walks over all schemas in the database, and
* removes schemas whose names start with the job schema prefix. Note that this
* function does not perform any locking; we expect it to be called at process
* start-up time before any merge tasks are run. Further note that this function
* runs within the scope of a particular database (template1, postgres) and can
* only delete schemas within that database.
*/
Datum
worker_cleanup_job_schema_cache(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
ScanKey scanKey = NULL;
int scanKeyCount = 0;
Relation pgNamespace = table_open(NamespaceRelationId, AccessExclusiveLock);
TableScanDesc scanDescriptor = table_beginscan_catalog(pgNamespace, scanKeyCount,
scanKey);
HeapTuple heapTuple = heap_getnext(scanDescriptor, ForwardScanDirection);
while (HeapTupleIsValid(heapTuple))
{
Form_pg_namespace schemaForm = (Form_pg_namespace) GETSTRUCT(heapTuple);
char *schemaName = NameStr(schemaForm->nspname);
char *jobSchemaFound = strstr(schemaName, JOB_SCHEMA_PREFIX);
if (jobSchemaFound != NULL)
{
StringInfo jobSchemaName = makeStringInfo();
appendStringInfoString(jobSchemaName, schemaName);
RemoveJobSchema(jobSchemaName);
}
heapTuple = heap_getnext(scanDescriptor, ForwardScanDirection);
}
heap_endscan(scanDescriptor);
table_close(pgNamespace, AccessExclusiveLock);
PG_RETURN_VOID();
}
/* Constructs a standardized job schema name for the given job id. */
StringInfo
JobSchemaName(uint64 jobId)
{
StringInfo jobSchemaName = makeStringInfo();
appendStringInfo(jobSchemaName, "%s%0*" INT64_MODIFIER "u", JOB_SCHEMA_PREFIX,
MIN_JOB_DIRNAME_WIDTH, jobId);
return jobSchemaName;
}
/* Constructs a standardized task table name for the given task id. */
StringInfo
TaskTableName(uint32 taskId)
{
StringInfo taskTableName = makeStringInfo();
appendStringInfo(taskTableName, "%s%0*u",
TASK_TABLE_PREFIX, MIN_TASK_FILENAME_WIDTH, taskId);
return taskTableName;
}
/* Creates a list of cstrings from a single dimensional array object. */
static List *
ArrayObjectToCStringList(ArrayType *arrayObject)
{
List *cstringList = NIL;
Datum *datumArray = DeconstructArrayObject(arrayObject);
int32 arraySize = ArrayObjectCount(arrayObject);
for (int32 arrayIndex = 0; arrayIndex < arraySize; arrayIndex++)
{
Datum datum = datumArray[arrayIndex];
char *cstring = TextDatumGetCString(datum);
cstringList = lappend(cstringList, cstring);
}
Assert(cstringList != NIL);
return cstringList;
}
/* Checks if a schema with the given schema name exists. */
bool
JobSchemaExists(StringInfo schemaName)
{
Datum schemaNameDatum = CStringGetDatum(schemaName->data);
bool schemaExists = SearchSysCacheExists(NAMESPACENAME, schemaNameDatum, 0, 0, 0);
return schemaExists;
}
/* Removes the schema and all tables within the schema, if the schema exists. */
void
RemoveJobSchema(StringInfo schemaName)
{
Datum schemaNameDatum = CStringGetDatum(schemaName->data);
Oid schemaId = GetSysCacheOid1Compat(NAMESPACENAME, Anum_pg_namespace_oid,
schemaNameDatum);
if (OidIsValid(schemaId))
{
ObjectAddress schemaObject = { 0, 0, 0 };
bool permissionsOK = pg_namespace_ownercheck(schemaId, GetUserId());
if (!permissionsOK)
{
aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA, schemaName->data);
}
schemaObject.classId = NamespaceRelationId;
schemaObject.objectId = schemaId;
schemaObject.objectSubId = 0;
/*
* We first delete all tables in this schema. Rather than relying on the
* schema command, we call the dependency mechanism directly so that we
* can suppress notice messages that are typically displayed during
* cascading deletes.
*/
performDeletion(&schemaObject, DROP_CASCADE,
PERFORM_DELETION_INTERNAL |
PERFORM_DELETION_QUIETLY |
PERFORM_DELETION_SKIP_ORIGINAL |
PERFORM_DELETION_SKIP_EXTENSIONS);
CommandCounterIncrement();
/* drop the empty schema */
performDeletion(&schemaObject, DROP_RESTRICT, 0);
CommandCounterIncrement();
}
else
{
ereport(DEBUG2, (errmsg("schema \"%s\" does not exist, skipping",
schemaName->data)));
}
}
/* Creates a simple table that only defines columns, in the given schema. */
static void
CreateTaskTable(StringInfo schemaName, StringInfo relationName,
List *columnNameList, List *columnTypeList)
{
Oid relationId PG_USED_FOR_ASSERTS_ONLY = InvalidOid;
Assert(schemaName != NULL);
Assert(relationName != NULL);
RangeVar *relation = makeRangeVar(schemaName->data, relationName->data, -1);
/* this table will only exist for the duration of the query, avoid writing to WAL */
relation->relpersistence = RELPERSISTENCE_UNLOGGED;
List *columnDefinitionList = ColumnDefinitionList(columnNameList, columnTypeList);
CreateStmt *createStatement = CreateStatement(relation, columnDefinitionList);
ObjectAddress relationObject = DefineRelation(createStatement, RELKIND_RELATION,
InvalidOid, NULL,
NULL);
relationId = relationObject.objectId;
Assert(relationId != InvalidOid);
CommandCounterIncrement();
}
/*
* ColumnDefinitionList creates and returns a list of column definition objects
* from two lists of column names and types. As an example, this function takes
* in two single elements lists: "l_quantity" and "decimal(15, 2)". The function
* then returns a list with one column definition, where the column's name is
* l_quantity, its type is numeric, and the type modifier represents (15, 2).
*/
List *
ColumnDefinitionList(List *columnNameList, List *columnTypeList)
{
List *columnDefinitionList = NIL;
const char *columnName = NULL;
const char *columnType = NULL;
forboth_ptr(columnName, columnNameList, columnType, columnTypeList)
{
/*
* We should have a SQL compatible column type declaration; we first
* convert this type to PostgreSQL's type identifiers and modifiers.
*/
Oid columnTypeId = InvalidOid;
int32 columnTypeMod = -1;
bool missingOK = false;
parseTypeString(columnType, &columnTypeId, &columnTypeMod, missingOK);
TypeName *typeName = makeTypeNameFromOid(columnTypeId, columnTypeMod);
/* we then create the column definition */
ColumnDef *columnDefinition = makeNode(ColumnDef);
columnDefinition->colname = (char *) columnName;
columnDefinition->typeName = typeName;
columnDefinition->is_local = true;
columnDefinition->is_not_null = false;
columnDefinition->raw_default = NULL;
columnDefinition->cooked_default = NULL;
columnDefinition->constraints = NIL;
columnDefinitionList = lappend(columnDefinitionList, columnDefinition);
}
return columnDefinitionList;
}
/*
* CreateStatement creates and initializes a simple table create statement that
* only has column definitions.
*/
CreateStmt *
CreateStatement(RangeVar *relation, List *columnDefinitionList)
{
CreateStmt *createStatement = makeNode(CreateStmt);
createStatement->relation = relation;
createStatement->tableElts = columnDefinitionList;
createStatement->inhRelations = NIL;
createStatement->constraints = NIL;
createStatement->options = NIL;
createStatement->oncommit = ONCOMMIT_NOOP;
createStatement->tablespacename = NULL;
createStatement->if_not_exists = false;
return createStatement;
}
/*
* CopyTaskFilesFromDirectory finds all files in the given directory, except for
* those having an attempt suffix. The function then copies these files into the
* database table identified by the given schema and table name.
*
* The function makes sure all files were generated by the current user by checking
* whether the filename ends with the username, since this is added to local file
* names by functions such as worker_fetch_partition-file. Files that were generated
* by other users calling worker_fetch_partition_file directly are skipped.
*/
static void
CopyTaskFilesFromDirectory(StringInfo schemaName, StringInfo relationName,
StringInfo sourceDirectoryName, Oid userId)
{
const char *directoryName = sourceDirectoryName->data;
uint64 copiedRowTotal = 0;
StringInfo expectedFileSuffix = makeStringInfo();
DIR *directory = AllocateDir(directoryName);
if (directory == NULL)
{
ereport(ERROR, (errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m", directoryName)));
}
appendStringInfo(expectedFileSuffix, ".%u", userId);
struct dirent *directoryEntry = ReadDir(directory, directoryName);
for (; directoryEntry != NULL; directoryEntry = ReadDir(directory, directoryName))
{
const char *baseFilename = directoryEntry->d_name;
const char *queryString = NULL;
uint64 copiedRowCount = 0;
/* if system file or lingering task file, skip it */
if (strncmp(baseFilename, ".", MAXPGPATH) == 0 ||
strncmp(baseFilename, "..", MAXPGPATH) == 0 ||
strstr(baseFilename, ATTEMPT_FILE_SUFFIX) != NULL)
{
continue;
}
if (!pg_str_endswith(baseFilename, expectedFileSuffix->data))
{
/*
* Someone is trying to tamper with our results. We don't throw an error
* here because we don't want to allow users to prevent each other from
* running queries.
*/
ereport(WARNING, (errmsg("Task file \"%s\" does not have expected suffix "
"\"%s\"", baseFilename, expectedFileSuffix->data)));
continue;
}
StringInfo fullFilename = makeStringInfo();
appendStringInfo(fullFilename, "%s/%s", directoryName, baseFilename);
/* build relation object and copy statement */
RangeVar *rangeVar = makeRangeVar(schemaName->data, relationName->data, -1);
CopyStmt *copyStatement = CopyStatement(rangeVar, fullFilename->data);
if (BinaryWorkerCopyFormat)
{
DefElem *copyOption = makeDefElem("format", (Node *) makeString("binary"),
-1);
copyStatement->options = list_make1(copyOption);
}
{
ParseState *parseState = make_parsestate(NULL);
parseState->p_sourcetext = queryString;
Relation relation = table_openrv(rangeVar, RowExclusiveLock);
(void) addRangeTableEntryForRelation(parseState, relation, RowExclusiveLock,
NULL, false, false);
CopyFromState copyState = BeginCopyFrom_compat(parseState,
relation,
NULL,
copyStatement->filename,
copyStatement->is_program,
NULL,
copyStatement->attlist,
copyStatement->options);
copiedRowCount = CopyFrom(copyState);
EndCopyFrom(copyState);
free_parsestate(parseState);
table_close(relation, NoLock);
}
copiedRowTotal += copiedRowCount;
CommandCounterIncrement();
}
ereport(DEBUG2, (errmsg("copied " UINT64_FORMAT " rows into table: \"%s.%s\"",
copiedRowTotal, schemaName->data, relationName->data)));
FreeDir(directory);
}
/*
* CopyStatement creates and initializes a copy statement to read the given
* file's contents into the given table, using copy's standard text format.
*/
CopyStmt *
CopyStatement(RangeVar *relation, char *sourceFilename)
{
CopyStmt *copyStatement = makeNode(CopyStmt);
copyStatement->relation = relation;
copyStatement->query = NULL;
copyStatement->attlist = NIL;
copyStatement->options = NIL;
copyStatement->is_from = true;
copyStatement->is_program = false;
copyStatement->filename = sourceFilename;
return copyStatement;
}

File diff suppressed because it is too large Load Diff

View File

@ -31,6 +31,9 @@
/* A string containing the version number, platform, and C compiler */
#undef CITUS_VERSION_STR
/* Define to 1 to build with lz4 support. (--with-lz4) */
#undef HAVE_CITUS_LIBLZ4
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
@ -38,7 +41,7 @@
#undef HAVE_LIBCURL
/* Define to 1 if you have the `lz4' library (-llz4). */
#undef HAVE_CITUS_LIBLZ4
#undef HAVE_LIBLZ4
/* Define to 1 if you have the `zstd' library (-lzstd). */
#undef HAVE_LIBZSTD

View File

@ -170,8 +170,6 @@ typedef struct ListCellAndListWrapper
extern List * SortList(List *pointerList,
int (*ComparisonFunction)(const void *, const void *));
extern void ** PointerArrayFromList(List *pointerList);
extern ArrayType * DatumArrayToArrayType(Datum *datumArray, int datumCount,
Oid datumTypeId);
extern HTAB * ListToHashSet(List *pointerList, Size keySize, bool isStringList);
extern char * StringJoin(List *stringList, char delimiter);
extern List * ListTake(List *pointerList, int size);

View File

@ -23,13 +23,5 @@ extern void RedirectCopyDataToRegularFile(const char *filename);
extern void SendRegularFile(const char *filename);
extern File FileOpenForTransmit(const char *filename, int fileFlags, int fileMode);
/* Function declaration local to commands and worker modules */
extern void FreeStringInfo(StringInfo stringInfo);
/* Local functions forward declarations for Transmit statement */
extern bool IsTransmitStmt(Node *parsetree);
extern char * TransmitStatementUser(CopyStmt *copyStatement);
extern void VerifyTransmitStmt(CopyStmt *copyStatement);
#endif /* TRANSMIT_H */

View File

@ -0,0 +1,25 @@
/*-------------------------------------------------------------------------
*
* array_type.h
* Utility functions for dealing with array types.
*
* Copyright (c) Citus Data, Inc.
*
*-------------------------------------------------------------------------
*/
#ifndef CITUS_ARRAY_TYPE_H
#define CITUS_ARRAY_TYPE_H
#include "postgres.h"
#include "utils/array.h"
extern Datum * DeconstructArrayObject(ArrayType *arrayObject);
extern int32 ArrayObjectCount(ArrayType *arrayObject);
extern ArrayType * DatumArrayToArrayType(Datum *datumArray, int datumCount,
Oid datumTypeId);
#endif /* CITUS_ARRAY_TYPE_H */

View File

@ -0,0 +1,27 @@
/*-------------------------------------------------------------------------
*
* directory.h
* Utility functions for dealing with directories.
*
* Copyright (c) Citus Data, Inc.
*
*-------------------------------------------------------------------------
*/
#ifndef CITUS_DIRECTORY_H
#define CITUS_DIRECTORY_H
#include "postgres.h"
#include "lib/stringinfo.h"
#define PG_JOB_CACHE_DIR "pgsql_job_cache"
extern bool CacheDirectoryElement(const char *filename);
extern void CleanupJobCacheDirectory(void);
extern void CitusCreateDirectory(StringInfo directoryName);
extern void CitusRemoveDirectory(const char *filename);
#endif /* CITUS_DIRECTORY_H */

View File

@ -0,0 +1,21 @@
/*-------------------------------------------------------------------------
*
* function.h
* Utility functions for dealing with functions.
*
* Copyright (c) Citus Data, Inc.
*
*-------------------------------------------------------------------------
*/
#ifndef CITUS_FUNCTION_H
#define CITUS_FUNCTION_H
#include "postgres.h"
#include "fmgr.h"
extern FmgrInfo * GetFunctionInfo(Oid typeId, Oid accessMethodId, int16 procedureId);
#endif /* CITUS_FUNCTION_H */

View File

@ -28,25 +28,8 @@
/* Number of rows to prefetch when reading data with a cursor */
#define ROW_PREFETCH_COUNT 50
/* Directory, file, table name, and UDF related defines for distributed tasks */
#define PG_JOB_CACHE_DIR "pgsql_job_cache"
#define MASTER_JOB_DIRECTORY_PREFIX "master_job_"
#define JOB_DIRECTORY_PREFIX "job_"
#define JOB_SCHEMA_PREFIX "pg_merge_job_"
#define TASK_FILE_PREFIX "task_"
#define TASK_TABLE_PREFIX "task_"
#define PARTITION_FILE_PREFIX "p_"
#define ATTEMPT_FILE_SUFFIX ".attempt"
#define MERGE_TABLE_SUFFIX "_merge"
#define MIN_JOB_DIRNAME_WIDTH 4
#define MIN_TASK_FILENAME_WIDTH 6
#define MIN_PARTITION_FILENAME_WIDTH 5
#define FOREIGN_FILENAME_OPTION "filename"
/* Defines used for fetching files and tables */
/* the tablename in the overloaded COPY statement is the to-be-transferred file */
#define TRANSMIT_WITH_USER_COMMAND \
"COPY \"%s\" TO STDOUT WITH (format 'transmit', user %s)"
#define COPY_OUT_COMMAND "COPY %s TO STDOUT"
#define COPY_SELECT_ALL_OUT_COMMAND "COPY (SELECT * FROM %s) TO STDOUT"
#define COPY_IN_COMMAND "COPY %s FROM '%s'"
@ -58,78 +41,12 @@
#define CREATE_TABLE_AS_COMMAND "CREATE TABLE %s (%s) AS (%s)"
/*
* RangePartitionContext keeps range re-partitioning related data. The Btree
* comparison function is set according to the partitioned column's data type.
*/
typedef struct RangePartitionContext
{
FmgrInfo *comparisonFunction;
Datum *splitPointArray;
int32 splitPointCount;
} RangePartitionContext;
/*
* HashPartitionContext keeps hash re-partitioning related data. The hashing
* function is set according to the partitioned column's data type.
*/
typedef struct HashPartitionContext
{
FmgrInfo *hashFunction;
FmgrInfo *comparisonFunction;
ShardInterval **syntheticShardIntervalArray;
uint32 partitionCount;
bool hasUniformHashDistribution;
} HashPartitionContext;
/*
* FileOutputStream helps buffer write operations to a file; these writes are
* then regularly flushed to the underlying file. This structure differs from
* standard file output streams in that it keeps a larger buffer, and only
* supports appending data to virtual file descriptors.
*/
typedef struct FileOutputStream
{
FileCompat fileCompat;
StringInfo fileBuffer;
StringInfo filePath;
} FileOutputStream;
/* Config variables managed via guc.c */
extern int PartitionBufferSize;
extern bool BinaryWorkerCopyFormat;
/* Function declarations local to the worker module */
extern StringInfo JobSchemaName(uint64 jobId);
extern StringInfo TaskTableName(uint32 taskId);
extern bool JobSchemaExists(StringInfo schemaName);
extern StringInfo JobDirectoryName(uint64 jobId);
extern StringInfo TaskDirectoryName(uint64 jobId, uint32 taskId);
extern StringInfo PartitionFilename(StringInfo directoryName, uint32 partitionId);
extern bool CacheDirectoryElement(const char *filename);
extern bool DirectoryExists(StringInfo directoryName);
extern void CitusCreateDirectory(StringInfo directoryName);
extern void CitusRemoveDirectory(const char *filename);
extern StringInfo InitTaskDirectory(uint64 jobId, uint32 taskId);
extern void RemoveJobSchema(StringInfo schemaName);
extern Datum * DeconstructArrayObject(ArrayType *arrayObject);
extern int32 ArrayObjectCount(ArrayType *arrayObject);
extern FmgrInfo * GetFunctionInfo(Oid typeId, Oid accessMethodId, int16 procedureId);
extern uint64 ExtractShardIdFromTableName(const char *tableName, bool missingOk);
extern void RepartitionCleanupJobDirectories(void);
extern void SetDefElemArg(AlterSeqStmt *statement, const char *name, Node *arg);
/* Function declarations shared with the master planner */
extern StringInfo TaskFilename(StringInfo directoryName, uint32 taskId);
extern StringInfo UserTaskFilename(StringInfo directoryName, uint32 taskId);
extern List * ColumnDefinitionList(List *columnNameList, List *columnTypeList);
extern CreateStmt * CreateStatement(RangeVar *relation, List *columnDefinitionList);
extern CopyStmt * CopyStatement(RangeVar *relation, char *sourceFilename);
extern DestReceiver * CreateFileDestReceiver(char *filePath,
MemoryContext tupleContext,
bool binaryCopyFormat);
@ -142,21 +59,10 @@ extern Node * ParseTreeNode(const char *ddlCommand);
extern Node * ParseTreeRawStmt(const char *ddlCommand);
/* Function declarations for applying distributed execution primitives */
extern Datum worker_fetch_partition_file(PG_FUNCTION_ARGS);
extern Datum worker_apply_shard_ddl_command(PG_FUNCTION_ARGS);
extern Datum worker_range_partition_table(PG_FUNCTION_ARGS);
extern Datum worker_hash_partition_table(PG_FUNCTION_ARGS);
extern Datum worker_merge_files_into_table(PG_FUNCTION_ARGS);
extern Datum worker_create_schema(PG_FUNCTION_ARGS);
extern Datum worker_merge_files_and_run_query(PG_FUNCTION_ARGS);
extern Datum worker_cleanup_job_schema_cache(PG_FUNCTION_ARGS);
/* Function declarations for fetching regular and foreign tables */
extern Datum worker_fetch_foreign_file(PG_FUNCTION_ARGS);
extern Datum worker_fetch_regular_table(PG_FUNCTION_ARGS);
extern Datum worker_append_table_to_shard(PG_FUNCTION_ARGS);
extern Datum worker_foreign_file_path(PG_FUNCTION_ARGS);
extern Datum worker_find_block_local_path(PG_FUNCTION_ARGS);
/* Function declaration for calculating hashed value */
extern Datum worker_hash(PG_FUNCTION_ARGS);

View File

@ -43,7 +43,7 @@ output_files := $(patsubst $(citus_abs_srcdir)/output/%.source,expected/%.out, $
# intermediate, for muscle memory backward compatibility.
check: check-full
# check-full triggers all tests that ought to be run routinely
check-full: check-multi check-multi-mx check-multi-1 check-worker check-operations check-follower-cluster check-isolation check-failure
check-full: check-multi check-multi-mx check-multi-1 check-operations check-follower-cluster check-isolation check-failure
ISOLATION_DEPDIR=.deps/isolation
@ -98,12 +98,6 @@ ifndef CITUS_VALGRIND_LOG_FILE
CITUS_VALGRIND_LOG_FILE := citus_valgrind_test_log.txt
endif
# using pg_regress_multi_check unnecessarily starts up multiple nodes, which isn't needed
# for check-worker. But that's harmless besides a few cycles.
check-worker: all
$(pg_regress_multi_check) --load-extension=citus \
-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/worker_schedule $(EXTRA_TESTS)
# check-base only sets up a testing environment so you can specify all your tests using EXTRA_TESTS
check-base: all
$(pg_regress_multi_check) --load-extension=citus \

View File

@ -38,7 +38,7 @@ CITUS_ARBITRARY_TEST_DIR = "./tmp_citus_test"
MASTER = "master"
# This should be updated when citus version changes
MASTER_VERSION = "11.0"
MASTER_VERSION = "11.1"
HOME = expanduser("~")
@ -264,7 +264,6 @@ class CitusUnusualExecutorConfig(CitusDefaultClusterConfig):
"citus.max_cached_connection_lifetime": "10ms",
# https://github.com/citusdata/citus/issues/5345
# "citus.force_max_query_parallelization": "on",
"citus.binary_worker_copy_format": False,
"citus.enable_binary_protocol": False,
"citus.local_table_join_policy": "prefer-distributed",
}

View File

@ -16,4 +16,3 @@
/multi_outer_join.out
/multi_outer_join_reference.out
/tablespace.out
/worker_copy.out

View File

@ -1031,12 +1031,27 @@ SELECT * FROM multi_extension.print_extension_changes();
| view citus_stat_activity
(41 rows)
-- Snapshot of state at 11.1-1
ALTER EXTENSION citus UPDATE TO '11.1-1';
SELECT * FROM multi_extension.print_extension_changes();
previous_object | current_object
---------------------------------------------------------------------
function worker_cleanup_job_schema_cache() void |
function worker_create_schema(bigint,text) void |
function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) void |
function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) void |
function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) void |
function worker_merge_files_into_table(bigint,integer,text[],text[]) void |
function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) void |
function worker_repartition_cleanup(bigint) void |
(8 rows)
DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff;
-- show running version
SHOW citus.version;
citus.version
---------------------------------------------------------------------
11.0devel
11.1devel
(1 row)
-- ensure no unexpected objects were created outside pg_catalog
@ -1060,7 +1075,7 @@ RESET columnar.enable_version_checks;
DROP EXTENSION citus;
CREATE EXTENSION citus VERSION '8.0-1';
ERROR: specified version incompatible with loaded Citus library
DETAIL: Loaded library requires 11.0, but 8.0-1 was specified.
DETAIL: Loaded library requires 11.1, but 8.0-1 was specified.
HINT: If a newer library is present, restart the database and try the command again.
-- Test non-distributed queries work even in version mismatch
SET citus.enable_version_checks TO 'false';
@ -1105,7 +1120,7 @@ ORDER BY 1;
-- We should not distribute table in version mistmatch
SELECT create_distributed_table('version_mismatch_table', 'column1');
ERROR: loaded Citus library version differs from installed extension version
DETAIL: Loaded library requires 11.0, but the installed extension version is 8.1-1.
DETAIL: Loaded library requires 11.1, but the installed extension version is 8.1-1.
HINT: Run ALTER EXTENSION citus UPDATE and try again.
-- This function will cause fail in next ALTER EXTENSION
CREATE OR REPLACE FUNCTION pg_catalog.relation_is_a_known_shard(regclass)

View File

@ -102,12 +102,6 @@ SET citus.shard_replication_factor TO 1;
-- create prepare tests
PREPARE prepare_insert AS INSERT INTO test VALUES ($1);
PREPARE prepare_select AS SELECT count(*) FROM test;
-- not allowed to read absolute paths, even as superuser
COPY "/etc/passwd" TO STDOUT WITH (format transmit);
ERROR: absolute path not allowed
-- not allowed to read paths outside pgsql_job_cache, even as superuser
COPY "postgresql.conf" TO STDOUT WITH (format transmit);
ERROR: path must be in the pgsql_job_cache directory
-- check full permission
SET ROLE full_access;
EXECUTE prepare_insert(1);
@ -151,14 +145,6 @@ SELECT count(*) FROM test a JOIN test b ON (a.val = b.val) WHERE a.id = 1 AND b.
0
(1 row)
-- should not be able to transmit directly
COPY "postgresql.conf" TO STDOUT WITH (format transmit);
ERROR: operation is not allowed
HINT: Run the command with a superuser.
-- should not be able to transmit directly
COPY "postgresql.conf" TO STDOUT WITH (format transmit);
ERROR: operation is not allowed
HINT: Run the command with a superuser.
-- check read permission
SET ROLE read_access;
-- should be allowed to run commands, as the current user
@ -230,10 +216,6 @@ SELECT count(*) FROM test a JOIN test b ON (a.val = b.val) WHERE a.id = 1 AND b.
0
(1 row)
-- should not be able to transmit directly
COPY "postgresql.conf" TO STDOUT WITH (format transmit);
ERROR: operation is not allowed
HINT: Run the command with a superuser.
-- should not be allowed to take aggressive locks on table
BEGIN;
SELECT lock_relation_if_exists('test', 'ACCESS SHARE');
@ -322,10 +304,6 @@ ERROR: permission denied for table test
SET citus.enable_repartition_joins TO true;
SELECT count(*) FROM test a JOIN test b ON (a.val = b.val) WHERE a.id = 1 AND b.id = 2;
ERROR: permission denied for table test
-- should not be able to transmit directly
COPY "postgresql.conf" TO STDOUT WITH (format transmit);
ERROR: operation is not allowed
HINT: Run the command with a superuser.
-- should be able to use intermediate results as any user
BEGIN;
SELECT create_intermediate_result('topten', 'SELECT s FROM generate_series(1,10) s');
@ -602,70 +580,7 @@ SELECT create_distributed_table('full_access_user_schema.t2', 'id');
(1 row)
RESET ROLE;
-- super user should be the only one being able to call worker_cleanup_job_schema_cache
SELECT worker_cleanup_job_schema_cache();
worker_cleanup_job_schema_cache
---------------------------------------------------------------------
(1 row)
SET ROLE full_access;
SELECT worker_cleanup_job_schema_cache();
ERROR: permission denied for function worker_cleanup_job_schema_cache
SET ROLE usage_access;
SELECT worker_cleanup_job_schema_cache();
ERROR: permission denied for function worker_cleanup_job_schema_cache
SET ROLE read_access;
SELECT worker_cleanup_job_schema_cache();
ERROR: permission denied for function worker_cleanup_job_schema_cache
SET ROLE no_access;
SELECT worker_cleanup_job_schema_cache();
ERROR: permission denied for function worker_cleanup_job_schema_cache
RESET ROLE;
-- to test access to files created during repartition we will create some on worker 1
\c - - - :worker_1_port
SET citus.enable_metadata_sync TO OFF;
CREATE OR REPLACE FUNCTION citus_rm_job_directory(bigint)
RETURNS void
AS 'citus'
LANGUAGE C STRICT;
RESET citus.enable_metadata_sync;
SET ROLE full_access;
SELECT worker_hash_partition_table(42,1,'SELECT a FROM generate_series(1,100) AS a', 'a', 23, ARRAY[-2147483648, -1073741824, 0, 1073741824]::int4[]);
worker_hash_partition_table
---------------------------------------------------------------------
(1 row)
RESET ROLE;
-- all attempts for transfer are initiated from other workers
\c - - - :worker_2_port
SET citus.enable_metadata_sync TO OFF;
CREATE OR REPLACE FUNCTION citus_rm_job_directory(bigint)
RETURNS void
AS 'citus'
LANGUAGE C STRICT;
RESET citus.enable_metadata_sync;
-- super user should not be able to copy files created by a user
SELECT worker_fetch_partition_file(42, 1, 1, 1, 'localhost', :worker_1_port);
WARNING: could not open file "base/pgsql_job_cache/job_0042/task_000001/p_00001.xxxx": No such file or directory
CONTEXT: while executing command on localhost:xxxxx
ERROR: could not receive file "base/pgsql_job_cache/job_0042/task_000001/p_00001" from localhost:xxxxx
-- different user should not be able to fetch partition file
SET ROLE usage_access;
SELECT worker_fetch_partition_file(42, 1, 1, 1, 'localhost', :worker_1_port);
WARNING: could not open file "base/pgsql_job_cache/job_0042/task_000001/p_00001.xxxx": No such file or directory
CONTEXT: while executing command on localhost:xxxxx
ERROR: could not receive file "base/pgsql_job_cache/job_0042/task_000001/p_00001" from localhost:xxxxx
-- only the user whom created the files should be able to fetch
SET ROLE full_access;
SELECT worker_fetch_partition_file(42, 1, 1, 1, 'localhost', :worker_1_port);
worker_fetch_partition_file
---------------------------------------------------------------------
(1 row)
RESET ROLE;
-- non-superuser should be able to use worker_append_table_to_shard on their own shard
SET ROLE full_access;
CREATE TABLE full_access_user_schema.source_table (id int);
@ -699,68 +614,6 @@ SELECT worker_append_table_to_shard('full_access_user_schema.shard_0', 'full_acc
ERROR: permission denied for table shard_0
RESET ROLE;
DROP TABLE full_access_user_schema.source_table, full_access_user_schema.shard_0;
-- now we will test that only the user who owns the fetched file is able to merge it into
-- a table
-- test that no other user can merge the downloaded file before the task is being tracked
SET ROLE usage_access;
SELECT worker_merge_files_into_table(42, 1, ARRAY['a'], ARRAY['integer']);
ERROR: job schema does not exist
DETAIL: must be superuser to use public schema
RESET ROLE;
-- test that no other user can merge the downloaded file after the task is being tracked
SET ROLE usage_access;
SELECT worker_merge_files_into_table(42, 1, ARRAY['a'], ARRAY['integer']);
ERROR: job schema does not exist
DETAIL: must be superuser to use public schema
RESET ROLE;
-- test that the super user is unable to read the contents of the intermediate file,
-- although it does create the table
SELECT worker_merge_files_into_table(42, 1, ARRAY['a'], ARRAY['integer']);
WARNING: Task file "task_000001.xxxx" does not have expected suffix ".10"
worker_merge_files_into_table
---------------------------------------------------------------------
(1 row)
SELECT count(*) FROM pg_merge_job_0042.task_000001;
ERROR: relation "pg_merge_job_0042.task_000001" does not exist
DROP TABLE pg_merge_job_0042.task_000001; -- drop table so we can reuse the same files for more tests
ERROR: schema "pg_merge_job_0042" does not exist
SET ROLE full_access;
SELECT worker_merge_files_into_table(42, 1, ARRAY['a'], ARRAY['integer']);
ERROR: job schema does not exist
DETAIL: must be superuser to use public schema
SELECT count(*) FROM pg_merge_job_0042.task_000001;
ERROR: relation "pg_merge_job_0042.task_000001" does not exist
DROP TABLE pg_merge_job_0042.task_000001; -- drop table so we can reuse the same files for more tests
ERROR: schema "pg_merge_job_0042" does not exist
RESET ROLE;
SELECT count(*) FROM pg_merge_job_0042.task_000001_merge;
ERROR: relation "pg_merge_job_0042.task_000001_merge" does not exist
SELECT count(*) FROM pg_merge_job_0042.task_000001;
ERROR: relation "pg_merge_job_0042.task_000001" does not exist
DROP TABLE pg_merge_job_0042.task_000001, pg_merge_job_0042.task_000001_merge; -- drop table so we can reuse the same files for more tests
ERROR: schema "pg_merge_job_0042" does not exist
SELECT count(*) FROM pg_merge_job_0042.task_000001_merge;
ERROR: relation "pg_merge_job_0042.task_000001_merge" does not exist
SELECT count(*) FROM pg_merge_job_0042.task_000001;
ERROR: relation "pg_merge_job_0042.task_000001" does not exist
DROP TABLE pg_merge_job_0042.task_000001, pg_merge_job_0042.task_000001_merge; -- drop table so we can reuse the same files for more tests
ERROR: schema "pg_merge_job_0042" does not exist
RESET ROLE;
SELECT citus_rm_job_directory(42::bigint);
citus_rm_job_directory
---------------------------------------------------------------------
(1 row)
\c - - - :worker_1_port
SELECT citus_rm_job_directory(42::bigint);
citus_rm_job_directory
---------------------------------------------------------------------
(1 row)
\c - - - :master_port
DROP SCHEMA full_access_user_schema CASCADE;
NOTICE: drop cascades to 2 other objects

View File

@ -7,21 +7,6 @@
-- result files.
SET citus.next_shard_id TO 810000;
SET citus.enable_unique_job_ids TO off;
CREATE FUNCTION citus_rm_job_directory(bigint)
RETURNS void
AS 'citus'
LANGUAGE C STRICT;
with silence as (
SELECT citus_rm_job_directory(split_part(f, '_', 2)::bigint)
from pg_ls_dir('base/pgsql_job_cache') f
)
select count(*) * 0 zero
from silence;
zero
---------------------------------------------------------------------
0
(1 row)
BEGIN;
-- pg_ls_dir() displays jobids. We explicitly set the jobId sequence
-- here so that the regression output becomes independent of the

View File

@ -215,23 +215,17 @@ ORDER BY 1;
function worker_apply_shard_ddl_command(bigint,text)
function worker_apply_shard_ddl_command(bigint,text,text)
function worker_change_sequence_dependency(regclass,regclass,regclass)
function worker_cleanup_job_schema_cache()
function worker_create_or_alter_role(text,text,text)
function worker_create_or_replace_object(text)
function worker_create_or_replace_object(text[])
function worker_create_schema(bigint,text)
function worker_create_truncate_trigger(regclass)
function worker_drop_distributed_table(text)
function worker_drop_sequence_dependency(text)
function worker_drop_shell_table(text)
function worker_fetch_foreign_file(text,text,bigint,text[],integer[])
function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer)
function worker_fix_partition_shard_index_names(regclass,text,text)
function worker_fix_pre_citus10_partitioned_table_constraint_names(regclass,bigint,text)
function worker_hash("any")
function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray)
function worker_last_saved_explain_analyze()
function worker_merge_files_into_table(bigint,integer,text[],text[])
function worker_nextval(regclass)
function worker_partial_agg(oid,anyelement)
function worker_partial_agg_ffunc(internal)
@ -240,9 +234,7 @@ ORDER BY 1;
function worker_partitioned_relation_size(regclass)
function worker_partitioned_relation_total_size(regclass)
function worker_partitioned_table_size(regclass)
function worker_range_partition_table(bigint,integer,text,text,oid,anyarray)
function worker_record_sequence_dependency(regclass,regclass,name)
function worker_repartition_cleanup(bigint)
function worker_save_query_explain_analyze(text,jsonb)
schema citus
schema citus_internal
@ -283,5 +275,5 @@ ORDER BY 1;
view citus_stat_statements
view pg_dist_shard_placement
view time_partitions
(267 rows)
(259 rows)

View File

@ -1,121 +0,0 @@
--
-- WORKER_BINARY_DATA_PARTITION
--
\set JobId 201010
\set TaskId 101105
\set Partition_Column textcolumn
\set Partition_Column_Text '\'textcolumn\''
\set Partition_Column_Type 25
\set Select_Query_Text '\'SELECT * FROM binary_data_table\''
\set Select_All 'SELECT *'
\set Table_Name binary_data_table
\set Table_Part_00 binary_data_table_part_00
\set Table_Part_01 binary_data_table_part_01
\set Table_Part_02 binary_data_table_part_02
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Table_File_00 :File_Basedir/job_:JobId/task_:TaskId/p_00000.:userid
\set Table_File_01 :File_Basedir/job_:JobId/task_:TaskId/p_00001.:userid
\set Table_File_02 :File_Basedir/job_:JobId/task_:TaskId/p_00002.:userid
-- Create table with special characters
CREATE TABLE :Table_Name(textcolumn text, binarycolumn bytea);
COPY :Table_Name FROM stdin;
SELECT length(binarycolumn) FROM :Table_Name;
length
---------------------------------------------------------------------
2
4
3
2
4
14
28
16
9
11
11
24
17
12
(14 rows)
-- Run select query, and apply range partitioning on query results
SELECT worker_range_partition_table(:JobId, :TaskId, :Select_Query_Text,
:Partition_Column_Text, :Partition_Column_Type,
ARRAY['aaa', 'some']::_text);
worker_range_partition_table
---------------------------------------------------------------------
(1 row)
-- Copy range partitioned files into tables
CREATE TABLE :Table_Part_00 ( LIKE :Table_Name );
CREATE TABLE :Table_Part_01 ( LIKE :Table_Name );
CREATE TABLE :Table_Part_02 ( LIKE :Table_Name );
COPY :Table_Part_00 FROM :'Table_File_00';
COPY :Table_Part_01 FROM :'Table_File_01';
COPY :Table_Part_02 FROM :'Table_File_02';
-- The union of the three partitions should have as many rows as original table
SELECT COUNT(*) AS total_row_count FROM (
SELECT * FROM :Table_Part_00 UNION ALL
SELECT * FROM :Table_Part_01 UNION ALL
SELECT * FROM :Table_Part_02 ) AS all_rows;
total_row_count
---------------------------------------------------------------------
14
(1 row)
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_All FROM :Table_Part_00 EXCEPT ALL
:Select_All FROM :Table_Name WHERE :Partition_Column IS NULL OR
:Partition_Column < 'aaa' ) diff;
diff_lhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_All FROM :Table_Part_01 EXCEPT ALL
:Select_All FROM :Table_Name WHERE :Partition_Column >= 'aaa' AND
:Partition_Column < 'some' ) diff;
diff_lhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_All FROM :Table_Part_02 EXCEPT ALL
:Select_All FROM :Table_Name WHERE :Partition_Column >= 'some' ) diff;
diff_lhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_00 FROM (
:Select_All FROM :Table_Name WHERE :Partition_Column IS NULL OR
:Partition_Column < 'aaa' EXCEPT ALL
:Select_All FROM :Table_Part_00 ) diff;
diff_rhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_All FROM :Table_Name WHERE :Partition_Column >= 'aaa' AND
:Partition_Column < 'some' EXCEPT ALL
:Select_All FROM :Table_Part_01 ) diff;
diff_rhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM :Table_Name WHERE :Partition_Column >= 'some' EXCEPT ALL
:Select_All FROM :Table_Part_02 ) diff;
diff_rhs_02
---------------------------------------------------------------------
0
(1 row)

View File

@ -1,70 +0,0 @@
--
-- WORKER_CREATE_TABLE
--
-- Create new table definitions for lineitem and supplier tables to test worker
-- node execution logic. For now,the tests include range and hash partitioning
-- of existing tables.
SET citus.next_shard_id TO 1110000;
CREATE TABLE lineitem (
l_orderkey bigint not null,
l_partkey integer not null,
l_suppkey integer not null,
l_linenumber integer not null,
l_quantity decimal(15, 2) not null,
l_extendedprice decimal(15, 2) not null,
l_discount decimal(15, 2) not null,
l_tax decimal(15, 2) not null,
l_returnflag char(1) not null,
l_linestatus char(1) not null,
l_shipdate date not null,
l_commitdate date not null,
l_receiptdate date not null,
l_shipinstruct char(25) not null,
l_shipmode char(10) not null,
l_comment varchar(44) not null,
PRIMARY KEY(l_orderkey, l_linenumber) );
CREATE TABLE lineitem_complex (
l_partkey integer not null,
l_discount decimal(15, 2) not null,
l_shipdate date not null,
l_comment varchar(44) not null );
-- Range partitioned lineitem data are inserted into these four tables
CREATE TABLE lineitem_range_part_00 ( LIKE lineitem );
CREATE TABLE lineitem_range_part_01 ( LIKE lineitem );
CREATE TABLE lineitem_range_part_02 ( LIKE lineitem );
CREATE TABLE lineitem_range_part_03 ( LIKE lineitem );
-- Complex range partitioned lineitem data are inserted into these four tables
CREATE TABLE lineitem_range_complex_part_00 ( LIKE lineitem_complex );
CREATE TABLE lineitem_range_complex_part_01 ( LIKE lineitem_complex );
CREATE TABLE lineitem_range_complex_part_02 ( LIKE lineitem_complex );
CREATE TABLE lineitem_range_complex_part_03 ( LIKE lineitem_complex );
-- Hash partitioned lineitem data are inserted into these four tables
CREATE TABLE lineitem_hash_part_00 ( LIKE lineitem );
CREATE TABLE lineitem_hash_part_01 ( LIKE lineitem );
CREATE TABLE lineitem_hash_part_02 ( LIKE lineitem );
CREATE TABLE lineitem_hash_part_03 ( LIKE lineitem );
-- Complex hash partitioned lineitem data are inserted into these four tables
CREATE TABLE lineitem_hash_complex_part_00 ( LIKE lineitem_complex );
CREATE TABLE lineitem_hash_complex_part_01 ( LIKE lineitem_complex );
CREATE TABLE lineitem_hash_complex_part_02 ( LIKE lineitem_complex );
CREATE TABLE lineitem_hash_complex_part_03 ( LIKE lineitem_complex );
-- Now create a supplier table to test repartitioning the data on the nation key
-- column, where the column's values can be null or zero.
CREATE TABLE SUPPLIER
(
s_suppkey integer not null,
s_name char(25) not null,
s_address varchar(40) not null,
s_nationkey integer,
s_phone char(15) not null,
s_acctbal decimal(15,2) not null,
s_comment varchar(101) not null
);
-- Range partitioned supplier data are inserted into three tables
CREATE TABLE supplier_range_part_00 ( LIKE supplier );
CREATE TABLE supplier_range_part_01 ( LIKE supplier );
CREATE TABLE supplier_range_part_02 ( LIKE supplier );
-- Hash partitioned supplier data are inserted into three tables
CREATE TABLE supplier_hash_part_00 ( LIKE supplier );
CREATE TABLE supplier_hash_part_01 ( LIKE supplier );
CREATE TABLE supplier_hash_part_02 ( LIKE supplier );

View File

@ -1,20 +0,0 @@
-- The files we use in the following text use the text based worker copy
-- format. So we disable the binary worker copy format here.
-- This is a no-op for PG_VERSION_NUM < 14, because the default is off there.
ALTER SYSTEM SET citus.binary_worker_copy_format TO off;
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
SELECT success FROM run_command_on_workers('ALTER SYSTEM SET citus.binary_worker_copy_format TO off');
success
---------------------------------------------------------------------
(0 rows)
SELECT success FROM run_command_on_workers('SELECT pg_reload_conf()');
success
---------------------------------------------------------------------
(0 rows)

View File

@ -1,127 +0,0 @@
--
-- WORKER_HASH_PARTITION
--
\set JobId 201010
\set TaskId 101103
\set Partition_Column l_orderkey
\set Partition_Column_Text '\'l_orderkey\''
\set Partition_Column_Type '\'int8\''
\set Partition_Count 4
\set hashTokenIncrement 1073741824
\set Select_Query_Text '\'SELECT * FROM lineitem\''
\set Select_All 'SELECT *'
-- Hash functions is mapped to exactly behave as Citus planner does
\set Hash_Mod_Function '( hashint8(l_orderkey)::int8 - (-2147483648))::int8 / :hashTokenIncrement::int8'
\set Table_Part_00 lineitem_hash_part_00
\set Table_Part_01 lineitem_hash_part_01
\set Table_Part_02 lineitem_hash_part_02
\set Table_Part_03 lineitem_hash_part_03
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Table_File_00 :File_Basedir/job_:JobId/task_:TaskId/p_00000.:userid
\set Table_File_01 :File_Basedir/job_:JobId/task_:TaskId/p_00001.:userid
\set Table_File_02 :File_Basedir/job_:JobId/task_:TaskId/p_00002.:userid
\set Table_File_03 :File_Basedir/job_:JobId/task_:TaskId/p_00003.:userid
-- Run select query, and apply hash partitioning on query results
SELECT worker_hash_partition_table(:JobId, :TaskId, :Select_Query_Text,
:Partition_Column_Text, :Partition_Column_Type::regtype,
ARRAY[-2147483648, -1073741824, 0, 1073741824]::int4[]);
worker_hash_partition_table
---------------------------------------------------------------------
(1 row)
COPY :Table_Part_00 FROM :'Table_File_00';
COPY :Table_Part_01 FROM :'Table_File_01';
COPY :Table_Part_02 FROM :'Table_File_02';
COPY :Table_Part_03 FROM :'Table_File_03';
SELECT COUNT(*) FROM :Table_Part_00;
count
---------------------------------------------------------------------
2885
(1 row)
SELECT COUNT(*) FROM :Table_Part_01;
count
---------------------------------------------------------------------
3009
(1 row)
SELECT COUNT(*) FROM :Table_Part_02;
count
---------------------------------------------------------------------
3104
(1 row)
SELECT COUNT(*) FROM :Table_Part_03;
count
---------------------------------------------------------------------
3002
(1 row)
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_All FROM :Table_Part_00 EXCEPT ALL
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 0) ) diff;
diff_lhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_All FROM :Table_Part_01 EXCEPT ALL
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 1) ) diff;
diff_lhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_All FROM :Table_Part_02 EXCEPT ALL
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 2) ) diff;
diff_lhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_03 FROM (
:Select_All FROM :Table_Part_03 EXCEPT ALL
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 3) ) diff;
diff_lhs_03
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_00 FROM (
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 0) EXCEPT ALL
:Select_All FROM :Table_Part_00 ) diff;
diff_rhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 1) EXCEPT ALL
:Select_All FROM :Table_Part_01 ) diff;
diff_rhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 2) EXCEPT ALL
:Select_All FROM :Table_Part_02 ) diff;
diff_rhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_03 FROM (
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 3) EXCEPT ALL
:Select_All FROM :Table_Part_03 ) diff;
diff_rhs_03
---------------------------------------------------------------------
0
(1 row)

View File

@ -1,128 +0,0 @@
--
-- WORKER_HASH_PARTITION_COMPLEX
--
\set JobId 201010
\set TaskId 101104
\set Partition_Column l_partkey
\set Partition_Column_Text '\'l_partkey\''
\set Partition_Column_Type 23
\set Partition_Count 4
\set hashTokenIncrement 1073741824
\set Select_Columns 'SELECT l_partkey, l_discount, l_shipdate, l_comment'
\set Select_Filters 'l_shipdate >= date \'1992-01-15\' AND l_discount between 0.02 AND 0.08'
\set Hash_Mod_Function '( hashint4(l_partkey)::int8 - (-2147483648))::int8 / :hashTokenIncrement::int8'
\set Table_Part_00 lineitem_hash_complex_part_00
\set Table_Part_01 lineitem_hash_complex_part_01
\set Table_Part_02 lineitem_hash_complex_part_02
\set Table_Part_03 lineitem_hash_complex_part_03
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Table_File_00 :File_Basedir/job_:JobId/task_:TaskId/p_00000.:userid
\set Table_File_01 :File_Basedir/job_:JobId/task_:TaskId/p_00001.:userid
\set Table_File_02 :File_Basedir/job_:JobId/task_:TaskId/p_00002.:userid
\set Table_File_03 :File_Basedir/job_:JobId/task_:TaskId/p_00003.:userid
-- Run hardcoded complex select query, and apply hash partitioning on query
-- results
SELECT worker_hash_partition_table(:JobId, :TaskId,
'SELECT l_partkey, l_discount, l_shipdate, l_comment'
' FROM lineitem '
' WHERE l_shipdate >= date ''1992-01-15'''
' AND l_discount between 0.02 AND 0.08',
:Partition_Column_Text, :Partition_Column_Type,
ARRAY[-2147483648, -1073741824, 0, 1073741824]::int4[]);
worker_hash_partition_table
---------------------------------------------------------------------
(1 row)
-- Copy partitioned data files into tables for testing purposes
COPY :Table_Part_00 FROM :'Table_File_00';
COPY :Table_Part_01 FROM :'Table_File_01';
COPY :Table_Part_02 FROM :'Table_File_02';
COPY :Table_Part_03 FROM :'Table_File_03';
SELECT COUNT(*) FROM :Table_Part_00;
count
---------------------------------------------------------------------
1883
(1 row)
SELECT COUNT(*) FROM :Table_Part_03;
count
---------------------------------------------------------------------
1913
(1 row)
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_Columns FROM :Table_Part_00 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 0) ) diff;
diff_lhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_Columns FROM :Table_Part_01 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 1) ) diff;
diff_lhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_Columns FROM :Table_Part_02 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 2) ) diff;
diff_lhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_03 FROM (
:Select_Columns FROM :Table_Part_03 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 3) ) diff;
diff_lhs_03
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_00 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 0) EXCEPT ALL
:Select_Columns FROM :Table_Part_00 ) diff;
diff_rhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 1) EXCEPT ALL
:Select_Columns FROM :Table_Part_01 ) diff;
diff_rhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 2) EXCEPT ALL
:Select_Columns FROM :Table_Part_02 ) diff;
diff_rhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_03 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 3) EXCEPT ALL
:Select_Columns FROM :Table_Part_03 ) diff;
diff_rhs_03
---------------------------------------------------------------------
0
(1 row)

View File

@ -1,50 +0,0 @@
--
-- WORKER_MERGE_HASH_FILES
--
\set JobId 201010
\set TaskId 101103
\set Task_Table_Name public.task_101103
\set Select_All 'SELECT *'
-- TaskId determines our dependency on hash partitioned files. We take these
-- files, and merge them in a task table. We also pass the column names and
-- column types that are used to create the task table.
SELECT worker_merge_files_into_table(:JobId, :TaskId,
ARRAY['orderkey', 'partkey', 'suppkey', 'linenumber', 'quantity', 'extendedprice',
'discount', 'tax', 'returnflag', 'linestatus', 'shipdate', 'commitdate',
'receiptdate', 'shipinstruct', 'shipmode', 'comment']::_text,
ARRAY['bigint', 'integer', 'integer', 'integer', 'decimal(15, 2)', 'decimal(15, 2)',
'decimal(15, 2)', 'decimal(15, 2)', 'char(1)', 'char(1)', 'date', 'date',
'date', 'char(25)', 'char(10)', 'varchar(44)']::_text);
worker_merge_files_into_table
---------------------------------------------------------------------
(1 row)
-- We first count elements from the merged table and the original table we hash
-- partitioned. We then compute the difference of these two tables.
SELECT COUNT(*) FROM :Task_Table_Name;
count
---------------------------------------------------------------------
12000
(1 row)
SELECT COUNT(*) FROM lineitem;
count
---------------------------------------------------------------------
12000
(1 row)
SELECT COUNT(*) AS diff_lhs FROM ( :Select_All FROM :Task_Table_Name EXCEPT ALL
:Select_All FROM lineitem ) diff;
diff_lhs
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs FROM ( :Select_All FROM lineitem EXCEPT ALL
:Select_All FROM :Task_Table_Name ) diff;
diff_rhs
---------------------------------------------------------------------
0
(1 row)

View File

@ -1,50 +0,0 @@
--
-- WORKER_MERGE_RANGE_FILES
--
\set JobId 201010
\set TaskId 101101
\set Task_Table_Name public.task_101101
\set Select_All 'SELECT *'
-- TaskId determines our dependency on range partitioned files. We take these
-- files, and merge them in a task table. We also pass the column names and
-- column types that are used to create the task table.
SELECT worker_merge_files_into_table(:JobId, :TaskId,
ARRAY['orderkey', 'partkey', 'suppkey', 'linenumber', 'quantity', 'extendedprice',
'discount', 'tax', 'returnflag', 'linestatus', 'shipdate', 'commitdate',
'receiptdate', 'shipinstruct', 'shipmode', 'comment']::_text,
ARRAY['bigint', 'integer', 'integer', 'integer', 'decimal(15, 2)', 'decimal(15, 2)',
'decimal(15, 2)', 'decimal(15, 2)', 'char(1)', 'char(1)', 'date', 'date',
'date', 'char(25)', 'char(10)', 'varchar(44)']::_text);
worker_merge_files_into_table
---------------------------------------------------------------------
(1 row)
-- We first count elements from the merged table and the original table we range
-- partitioned. We then compute the difference of these two tables.
SELECT COUNT(*) FROM :Task_Table_Name;
count
---------------------------------------------------------------------
12000
(1 row)
SELECT COUNT(*) FROM lineitem;
count
---------------------------------------------------------------------
12000
(1 row)
SELECT COUNT(*) AS diff_lhs FROM ( :Select_All FROM :Task_Table_Name EXCEPT ALL
:Select_All FROM lineitem ) diff;
diff_lhs
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs FROM ( :Select_All FROM lineitem EXCEPT ALL
:Select_All FROM :Task_Table_Name ) diff;
diff_rhs
---------------------------------------------------------------------
0
(1 row)

View File

@ -1,190 +0,0 @@
--
-- WORKER_NULL_DATA_PARTITION
--
\set JobId 201010
\set Range_TaskId 101106
\set Partition_Column s_nationkey
\set Partition_Column_Text '\'s_nationkey\''
\set Partition_Column_Type 23
\set Select_Query_Text '\'SELECT * FROM supplier\''
\set Select_All 'SELECT *'
\set Range_Table_Part_00 supplier_range_part_00
\set Range_Table_Part_01 supplier_range_part_01
\set Range_Table_Part_02 supplier_range_part_02
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Range_Table_File_00 :File_Basedir/job_:JobId/task_:Range_TaskId/p_00000.:userid
\set Range_Table_File_01 :File_Basedir/job_:JobId/task_:Range_TaskId/p_00001.:userid
\set Range_Table_File_02 :File_Basedir/job_:JobId/task_:Range_TaskId/p_00002.:userid
-- Run select query, and apply range partitioning on query results. Note that
-- one of the split point values is 0, We are checking here that the partition
-- function doesn't treat 0 as null, and that range repartitioning correctly
-- puts null nation key values into the 0th repartition bucket.
SELECT worker_range_partition_table(:JobId, :Range_TaskId, :Select_Query_Text,
:Partition_Column_Text, :Partition_Column_Type,
ARRAY[0, 10]::_int4);
worker_range_partition_table
---------------------------------------------------------------------
(1 row)
-- Copy partitioned data files into tables for testing purposes
COPY :Range_Table_Part_00 FROM :'Range_Table_File_00';
COPY :Range_Table_Part_01 FROM :'Range_Table_File_01';
COPY :Range_Table_Part_02 FROM :'Range_Table_File_02';
SELECT COUNT(*) FROM :Range_Table_Part_00;
count
---------------------------------------------------------------------
6
(1 row)
SELECT COUNT(*) FROM :Range_Table_Part_02;
count
---------------------------------------------------------------------
588
(1 row)
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_All FROM :Range_Table_Part_00 EXCEPT ALL
(:Select_All FROM supplier WHERE :Partition_Column < 0 OR
:Partition_Column IS NULL) ) diff;
diff_lhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_All FROM :Range_Table_Part_01 EXCEPT ALL
:Select_All FROM supplier WHERE :Partition_Column >= 0 AND
:Partition_Column < 10 ) diff;
diff_lhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM supplier WHERE :Partition_Column >= 10 EXCEPT ALL
:Select_All FROM :Range_Table_Part_02 ) diff;
diff_rhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_00 FROM (
(:Select_All FROM supplier WHERE :Partition_Column < 0 OR
:Partition_Column IS NULL) EXCEPT ALL
:Select_All FROM :Range_Table_Part_00 ) diff;
diff_rhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_All FROM supplier WHERE :Partition_Column >= 0 AND
:Partition_Column < 10 EXCEPT ALL
:Select_All FROM :Range_Table_Part_01 ) diff;
diff_rhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM supplier WHERE :Partition_Column >= 10 EXCEPT ALL
:Select_All FROM :Range_Table_Part_02 ) diff;
diff_rhs_02
---------------------------------------------------------------------
0
(1 row)
-- Next, run select query and apply hash partitioning on query results. We are
-- checking here that hash repartitioning correctly puts null nation key values
-- into the 0th repartition bucket.
\set Hash_TaskId 101107
\set Partition_Count 4
\set Hash_Mod_Function '( hashint4(s_nationkey)::int8 - (-2147483648))::int8 / :hashTokenIncrement::int8'
\set hashTokenIncrement 1073741824
\set Hash_Table_Part_00 supplier_hash_part_00
\set Hash_Table_Part_01 supplier_hash_part_01
\set Hash_Table_Part_02 supplier_hash_part_02
\set File_Basedir base/pgsql_job_cache
\set Hash_Table_File_00 :File_Basedir/job_:JobId/task_:Hash_TaskId/p_00000.:userid
\set Hash_Table_File_01 :File_Basedir/job_:JobId/task_:Hash_TaskId/p_00001.:userid
\set Hash_Table_File_02 :File_Basedir/job_:JobId/task_:Hash_TaskId/p_00002.:userid
-- Run select query, and apply hash partitioning on query results
SELECT worker_hash_partition_table(:JobId, :Hash_TaskId, :Select_Query_Text,
:Partition_Column_Text, :Partition_Column_Type,
ARRAY[-2147483648, -1073741824, 0, 1073741824]::int4[]);
worker_hash_partition_table
---------------------------------------------------------------------
(1 row)
COPY :Hash_Table_Part_00 FROM :'Hash_Table_File_00';
COPY :Hash_Table_Part_01 FROM :'Hash_Table_File_01';
COPY :Hash_Table_Part_02 FROM :'Hash_Table_File_02';
SELECT COUNT(*) FROM :Hash_Table_Part_00;
count
---------------------------------------------------------------------
282
(1 row)
SELECT COUNT(*) FROM :Hash_Table_Part_02;
count
---------------------------------------------------------------------
102
(1 row)
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_All FROM :Hash_Table_Part_00 EXCEPT ALL
(:Select_All FROM supplier WHERE (:Hash_Mod_Function = 0) OR
:Partition_Column IS NULL) ) diff;
diff_lhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_All FROM :Hash_Table_Part_01 EXCEPT ALL
:Select_All FROM supplier WHERE (:Hash_Mod_Function = 1) ) diff;
diff_lhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_All FROM :Hash_Table_Part_02 EXCEPT ALL
:Select_All FROM supplier WHERE (:Hash_Mod_Function = 2) ) diff;
diff_lhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_00 FROM (
(:Select_All FROM supplier WHERE (:Hash_Mod_Function = 0) OR
:Partition_Column IS NULL) EXCEPT ALL
:Select_All FROM :Hash_Table_Part_00 ) diff;
diff_rhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_All FROM supplier WHERE (:Hash_Mod_Function = 1) EXCEPT ALL
:Select_All FROM :Hash_Table_Part_01 ) diff;
diff_rhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM supplier WHERE (:Hash_Mod_Function = 2) EXCEPT ALL
:Select_All FROM :Hash_Table_Part_02 ) diff;
diff_rhs_02
---------------------------------------------------------------------
0
(1 row)

View File

@ -1,115 +0,0 @@
--
-- WORKER_RANGE_PARTITION
--
\set JobId 201010
\set TaskId 101101
\set Partition_Column l_orderkey
\set Partition_Column_Text '\'l_orderkey\''
\set Partition_Column_Type 20
\set Select_Query_Text '\'SELECT * FROM lineitem\''
\set Select_All 'SELECT *'
\set Table_Part_00 lineitem_range_part_00
\set Table_Part_01 lineitem_range_part_01
\set Table_Part_02 lineitem_range_part_02
\set Table_Part_03 lineitem_range_part_03
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Table_File_00 :File_Basedir/job_:JobId/task_:TaskId/p_00000.:userid
\set Table_File_01 :File_Basedir/job_:JobId/task_:TaskId/p_00001.:userid
\set Table_File_02 :File_Basedir/job_:JobId/task_:TaskId/p_00002.:userid
\set Table_File_03 :File_Basedir/job_:JobId/task_:TaskId/p_00003.:userid
-- Run select query, and apply range partitioning on query results
SELECT worker_range_partition_table(:JobId, :TaskId, :Select_Query_Text,
:Partition_Column_Text, :Partition_Column_Type,
ARRAY[1, 3000, 12000]::_int8);
worker_range_partition_table
---------------------------------------------------------------------
(1 row)
COPY :Table_Part_00 FROM :'Table_File_00';
COPY :Table_Part_01 FROM :'Table_File_01';
COPY :Table_Part_02 FROM :'Table_File_02';
COPY :Table_Part_03 FROM :'Table_File_03';
SELECT COUNT(*) FROM :Table_Part_00;
count
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) FROM :Table_Part_03;
count
---------------------------------------------------------------------
3047
(1 row)
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_All FROM :Table_Part_00 EXCEPT ALL
:Select_All FROM lineitem WHERE :Partition_Column < 1 ) diff;
diff_lhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_All FROM :Table_Part_01 EXCEPT ALL
:Select_All FROM lineitem WHERE :Partition_Column >= 1 AND
:Partition_Column < 3000 ) diff;
diff_lhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_All FROM :Table_Part_02 EXCEPT ALL
:Select_All FROM lineitem WHERE :Partition_Column >= 3000 AND
:Partition_Column < 12000 ) diff;
diff_lhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_03 FROM (
:Select_All FROM :Table_Part_03 EXCEPT ALL
:Select_All FROM lineitem WHERE :Partition_Column >= 12000 ) diff;
diff_lhs_03
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_00 FROM (
:Select_All FROM lineitem WHERE :Partition_Column < 1 EXCEPT ALL
:Select_All FROM :Table_Part_00 ) diff;
diff_rhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_All FROM lineitem WHERE :Partition_Column >= 1 AND
:Partition_Column < 3000 EXCEPT ALL
:Select_All FROM :Table_Part_01 ) diff;
diff_rhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM lineitem WHERE :Partition_Column >= 3000 AND
:Partition_Column < 12000 EXCEPT ALL
:Select_All FROM :Table_Part_02 ) diff;
diff_rhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_03 FROM (
:Select_All FROM lineitem WHERE :Partition_Column >= 12000 EXCEPT ALL
:Select_All FROM :Table_Part_03 ) diff;
diff_rhs_03
---------------------------------------------------------------------
0
(1 row)

View File

@ -1,129 +0,0 @@
--
-- WORKER_RANGE_PARTITION_COMPLEX
--
\set JobId 201010
\set TaskId 101102
\set Partition_Column l_partkey
\set Partition_Column_Text '\'l_partkey\''
\set Partition_Column_Type 23
\set Select_Columns 'SELECT l_partkey, l_discount, l_shipdate, l_comment'
\set Select_Filters 'l_shipdate >= date \'1992-01-15\' AND l_discount between 0.02 AND 0.08'
\set Table_Part_00 lineitem_range_complex_part_00
\set Table_Part_01 lineitem_range_complex_part_01
\set Table_Part_02 lineitem_range_complex_part_02
\set Table_Part_03 lineitem_range_complex_part_03
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Table_File_00 :File_Basedir/job_:JobId/task_:TaskId/p_00000.:userid
\set Table_File_01 :File_Basedir/job_:JobId/task_:TaskId/p_00001.:userid
\set Table_File_02 :File_Basedir/job_:JobId/task_:TaskId/p_00002.:userid
\set Table_File_03 :File_Basedir/job_:JobId/task_:TaskId/p_00003.:userid
-- Run hardcoded complex select query, and apply range partitioning on query
-- results
SELECT worker_range_partition_table(:JobId, :TaskId,
'SELECT l_partkey, l_discount, l_shipdate, l_comment'
' FROM lineitem '
' WHERE l_shipdate >= date ''1992-01-15'''
' AND l_discount between 0.02 AND 0.08',
:Partition_Column_Text, :Partition_Column_Type,
ARRAY[101, 12000, 18000]::_int4);
worker_range_partition_table
---------------------------------------------------------------------
(1 row)
-- Copy partitioned data files into tables for testing purposes
COPY :Table_Part_00 FROM :'Table_File_00';
COPY :Table_Part_01 FROM :'Table_File_01';
COPY :Table_Part_02 FROM :'Table_File_02';
COPY :Table_Part_03 FROM :'Table_File_03';
SELECT COUNT(*) FROM :Table_Part_00;
count
---------------------------------------------------------------------
3
(1 row)
SELECT COUNT(*) FROM :Table_Part_03;
count
---------------------------------------------------------------------
7022
(1 row)
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_Columns FROM :Table_Part_00 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column < 101 ) diff;
diff_lhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_Columns FROM :Table_Part_01 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 101 AND
:Partition_Column < 12000 ) diff;
diff_lhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_Columns FROM :Table_Part_02 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 12000 AND
:Partition_Column < 18000 ) diff;
diff_lhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_lhs_03 FROM (
:Select_Columns FROM :Table_Part_03 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 18000 ) diff;
diff_lhs_03
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_00 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column < 101 EXCEPT ALL
:Select_Columns FROM :Table_Part_00 ) diff;
diff_rhs_00
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 101 AND
:Partition_Column < 12000 EXCEPT ALL
:Select_Columns FROM :Table_Part_01 ) diff;
diff_rhs_01
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 12000 AND
:Partition_Column < 18000 EXCEPT ALL
:Select_Columns FROM :Table_Part_02 ) diff;
diff_rhs_02
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) AS diff_rhs_03 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 18000 EXCEPT ALL
:Select_Columns FROM :Table_Part_03 ) diff;
diff_rhs_03
---------------------------------------------------------------------
0
(1 row)

View File

@ -23,9 +23,3 @@ copy customer_append FROM '@abs_srcdir@/data/customer.3.data' with (delimiter '|
SELECT master_create_empty_shard('part_append') AS shardid \gset
copy part_append FROM '@abs_srcdir@/data/part.more.data' with (delimiter '|', append_to_shard :shardid);
-- Exchange partition files in binary format in remaining tests
ALTER SYSTEM SET citus.binary_worker_copy_format TO on;
SELECT pg_reload_conf();
SELECT success FROM run_command_on_workers('ALTER SYSTEM SET citus.binary_worker_copy_format TO on');
SELECT success FROM run_command_on_workers('SELECT pg_reload_conf()');

View File

@ -1,12 +0,0 @@
--
-- WORKER_COPY
--
SET citus.next_shard_id TO 260000;
COPY lineitem FROM '@abs_srcdir@/data/lineitem.1.data' WITH DELIMITER '|';
COPY lineitem FROM '@abs_srcdir@/data/lineitem.2.data' WITH DELIMITER '|';
COPY supplier FROM '@abs_srcdir@/data/supplier.data' WITH DELIMITER '|';

View File

@ -14,25 +14,3 @@ copy customer_append FROM '@abs_srcdir@/data/customer.2.data' with (delimiter '|
copy customer_append FROM '@abs_srcdir@/data/customer.3.data' with (delimiter '|', append_to_shard :shardid2);
SELECT master_create_empty_shard('part_append') AS shardid \gset
copy part_append FROM '@abs_srcdir@/data/part.more.data' with (delimiter '|', append_to_shard :shardid);
-- Exchange partition files in binary format in remaining tests
ALTER SYSTEM SET citus.binary_worker_copy_format TO on;
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
SELECT success FROM run_command_on_workers('ALTER SYSTEM SET citus.binary_worker_copy_format TO on');
success
---------------------------------------------------------------------
t
t
(2 rows)
SELECT success FROM run_command_on_workers('SELECT pg_reload_conf()');
success
---------------------------------------------------------------------
t
t
(2 rows)

View File

@ -1,7 +0,0 @@
--
-- WORKER_COPY
--
SET citus.next_shard_id TO 260000;
COPY lineitem FROM '@abs_srcdir@/data/lineitem.1.data' WITH DELIMITER '|';
COPY lineitem FROM '@abs_srcdir@/data/lineitem.2.data' WITH DELIMITER '|';
COPY supplier FROM '@abs_srcdir@/data/supplier.data' WITH DELIMITER '|';

View File

@ -16,4 +16,3 @@
/multi_outer_join.sql
/multi_outer_join_reference.sql
/tablespace.sql
/worker_copy.sql

View File

@ -459,6 +459,10 @@ SELECT * FROM multi_extension.print_extension_changes();
ALTER EXTENSION citus UPDATE TO '11.0-1';
SELECT * FROM multi_extension.print_extension_changes();
-- Snapshot of state at 11.1-1
ALTER EXTENSION citus UPDATE TO '11.1-1';
SELECT * FROM multi_extension.print_extension_changes();
DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff;
-- show running version

View File

@ -77,12 +77,6 @@ SET citus.shard_replication_factor TO 1;
PREPARE prepare_insert AS INSERT INTO test VALUES ($1);
PREPARE prepare_select AS SELECT count(*) FROM test;
-- not allowed to read absolute paths, even as superuser
COPY "/etc/passwd" TO STDOUT WITH (format transmit);
-- not allowed to read paths outside pgsql_job_cache, even as superuser
COPY "postgresql.conf" TO STDOUT WITH (format transmit);
-- check full permission
SET ROLE full_access;
@ -102,13 +96,6 @@ SELECT count(*) FROM test a JOIN test b ON (a.val = b.val) WHERE a.id = 1 AND b.
SET citus.enable_repartition_joins TO true;
SELECT count(*) FROM test a JOIN test b ON (a.val = b.val) WHERE a.id = 1 AND b.id = 2;
-- should not be able to transmit directly
COPY "postgresql.conf" TO STDOUT WITH (format transmit);
-- should not be able to transmit directly
COPY "postgresql.conf" TO STDOUT WITH (format transmit);
-- check read permission
SET ROLE read_access;
@ -133,9 +120,6 @@ SELECT count(*) FROM test a JOIN test b ON (a.val = b.val) WHERE a.id = 1 AND b.
SET citus.enable_repartition_joins TO true;
SELECT count(*) FROM test a JOIN test b ON (a.val = b.val) WHERE a.id = 1 AND b.id = 2;
-- should not be able to transmit directly
COPY "postgresql.conf" TO STDOUT WITH (format transmit);
-- should not be allowed to take aggressive locks on table
BEGIN;
SELECT lock_relation_if_exists('test', 'ACCESS SHARE');
@ -199,10 +183,6 @@ SELECT count(*) FROM test a JOIN test b ON (a.val = b.val) WHERE a.id = 1 AND b.
SET citus.enable_repartition_joins TO true;
SELECT count(*) FROM test a JOIN test b ON (a.val = b.val) WHERE a.id = 1 AND b.id = 2;
-- should not be able to transmit directly
COPY "postgresql.conf" TO STDOUT WITH (format transmit);
-- should be able to use intermediate results as any user
BEGIN;
SELECT create_intermediate_result('topten', 'SELECT s FROM generate_series(1,10) s');
@ -355,50 +335,7 @@ CREATE TABLE full_access_user_schema.t2(id int);
SELECT create_distributed_table('full_access_user_schema.t2', 'id');
RESET ROLE;
-- super user should be the only one being able to call worker_cleanup_job_schema_cache
SELECT worker_cleanup_job_schema_cache();
SET ROLE full_access;
SELECT worker_cleanup_job_schema_cache();
SET ROLE usage_access;
SELECT worker_cleanup_job_schema_cache();
SET ROLE read_access;
SELECT worker_cleanup_job_schema_cache();
SET ROLE no_access;
SELECT worker_cleanup_job_schema_cache();
RESET ROLE;
-- to test access to files created during repartition we will create some on worker 1
\c - - - :worker_1_port
SET citus.enable_metadata_sync TO OFF;
CREATE OR REPLACE FUNCTION citus_rm_job_directory(bigint)
RETURNS void
AS 'citus'
LANGUAGE C STRICT;
RESET citus.enable_metadata_sync;
SET ROLE full_access;
SELECT worker_hash_partition_table(42,1,'SELECT a FROM generate_series(1,100) AS a', 'a', 23, ARRAY[-2147483648, -1073741824, 0, 1073741824]::int4[]);
RESET ROLE;
-- all attempts for transfer are initiated from other workers
\c - - - :worker_2_port
SET citus.enable_metadata_sync TO OFF;
CREATE OR REPLACE FUNCTION citus_rm_job_directory(bigint)
RETURNS void
AS 'citus'
LANGUAGE C STRICT;
RESET citus.enable_metadata_sync;
-- super user should not be able to copy files created by a user
SELECT worker_fetch_partition_file(42, 1, 1, 1, 'localhost', :worker_1_port);
-- different user should not be able to fetch partition file
SET ROLE usage_access;
SELECT worker_fetch_partition_file(42, 1, 1, 1, 'localhost', :worker_1_port);
-- only the user whom created the files should be able to fetch
SET ROLE full_access;
SELECT worker_fetch_partition_file(42, 1, 1, 1, 'localhost', :worker_1_port);
RESET ROLE;
-- non-superuser should be able to use worker_append_table_to_shard on their own shard
SET ROLE full_access;
CREATE TABLE full_access_user_schema.source_table (id int);
@ -423,44 +360,6 @@ RESET ROLE;
DROP TABLE full_access_user_schema.source_table, full_access_user_schema.shard_0;
-- now we will test that only the user who owns the fetched file is able to merge it into
-- a table
-- test that no other user can merge the downloaded file before the task is being tracked
SET ROLE usage_access;
SELECT worker_merge_files_into_table(42, 1, ARRAY['a'], ARRAY['integer']);
RESET ROLE;
-- test that no other user can merge the downloaded file after the task is being tracked
SET ROLE usage_access;
SELECT worker_merge_files_into_table(42, 1, ARRAY['a'], ARRAY['integer']);
RESET ROLE;
-- test that the super user is unable to read the contents of the intermediate file,
-- although it does create the table
SELECT worker_merge_files_into_table(42, 1, ARRAY['a'], ARRAY['integer']);
SELECT count(*) FROM pg_merge_job_0042.task_000001;
DROP TABLE pg_merge_job_0042.task_000001; -- drop table so we can reuse the same files for more tests
SET ROLE full_access;
SELECT worker_merge_files_into_table(42, 1, ARRAY['a'], ARRAY['integer']);
SELECT count(*) FROM pg_merge_job_0042.task_000001;
DROP TABLE pg_merge_job_0042.task_000001; -- drop table so we can reuse the same files for more tests
RESET ROLE;
SELECT count(*) FROM pg_merge_job_0042.task_000001_merge;
SELECT count(*) FROM pg_merge_job_0042.task_000001;
DROP TABLE pg_merge_job_0042.task_000001, pg_merge_job_0042.task_000001_merge; -- drop table so we can reuse the same files for more tests
SELECT count(*) FROM pg_merge_job_0042.task_000001_merge;
SELECT count(*) FROM pg_merge_job_0042.task_000001;
DROP TABLE pg_merge_job_0042.task_000001, pg_merge_job_0042.task_000001_merge; -- drop table so we can reuse the same files for more tests
RESET ROLE;
SELECT citus_rm_job_directory(42::bigint);
\c - - - :worker_1_port
SELECT citus_rm_job_directory(42::bigint);
\c - - - :master_port
DROP SCHEMA full_access_user_schema CASCADE;

View File

@ -11,18 +11,6 @@
SET citus.next_shard_id TO 810000;
SET citus.enable_unique_job_ids TO off;
CREATE FUNCTION citus_rm_job_directory(bigint)
RETURNS void
AS 'citus'
LANGUAGE C STRICT;
with silence as (
SELECT citus_rm_job_directory(split_part(f, '_', 2)::bigint)
from pg_ls_dir('base/pgsql_job_cache') f
)
select count(*) * 0 zero
from silence;
BEGIN;
-- pg_ls_dir() displays jobids. We explicitly set the jobId sequence

View File

@ -1,98 +0,0 @@
--
-- WORKER_BINARY_DATA_PARTITION
--
\set JobId 201010
\set TaskId 101105
\set Partition_Column textcolumn
\set Partition_Column_Text '\'textcolumn\''
\set Partition_Column_Type 25
\set Select_Query_Text '\'SELECT * FROM binary_data_table\''
\set Select_All 'SELECT *'
\set Table_Name binary_data_table
\set Table_Part_00 binary_data_table_part_00
\set Table_Part_01 binary_data_table_part_01
\set Table_Part_02 binary_data_table_part_02
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Table_File_00 :File_Basedir/job_:JobId/task_:TaskId/p_00000.:userid
\set Table_File_01 :File_Basedir/job_:JobId/task_:TaskId/p_00001.:userid
\set Table_File_02 :File_Basedir/job_:JobId/task_:TaskId/p_00002.:userid
-- Create table with special characters
CREATE TABLE :Table_Name(textcolumn text, binarycolumn bytea);
COPY :Table_Name FROM stdin;
aaa \013\120
binary data first \012\120\20\21
binary data second \21\120\130
binary data hex \x1E\x0D
binary data with tabs \012\t\120\v
some\t tabs\t with \t spaces text with tabs
some\\ special\n characters \b text with special characters
some ' and " and '' characters text with quotes
\N null text
\N null text 2
\N null text 3
\\N actual backslash N value
\NN null string and N
empty string
\.
SELECT length(binarycolumn) FROM :Table_Name;
-- Run select query, and apply range partitioning on query results
SELECT worker_range_partition_table(:JobId, :TaskId, :Select_Query_Text,
:Partition_Column_Text, :Partition_Column_Type,
ARRAY['aaa', 'some']::_text);
-- Copy range partitioned files into tables
CREATE TABLE :Table_Part_00 ( LIKE :Table_Name );
CREATE TABLE :Table_Part_01 ( LIKE :Table_Name );
CREATE TABLE :Table_Part_02 ( LIKE :Table_Name );
COPY :Table_Part_00 FROM :'Table_File_00';
COPY :Table_Part_01 FROM :'Table_File_01';
COPY :Table_Part_02 FROM :'Table_File_02';
-- The union of the three partitions should have as many rows as original table
SELECT COUNT(*) AS total_row_count FROM (
SELECT * FROM :Table_Part_00 UNION ALL
SELECT * FROM :Table_Part_01 UNION ALL
SELECT * FROM :Table_Part_02 ) AS all_rows;
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_All FROM :Table_Part_00 EXCEPT ALL
:Select_All FROM :Table_Name WHERE :Partition_Column IS NULL OR
:Partition_Column < 'aaa' ) diff;
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_All FROM :Table_Part_01 EXCEPT ALL
:Select_All FROM :Table_Name WHERE :Partition_Column >= 'aaa' AND
:Partition_Column < 'some' ) diff;
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_All FROM :Table_Part_02 EXCEPT ALL
:Select_All FROM :Table_Name WHERE :Partition_Column >= 'some' ) diff;
SELECT COUNT(*) AS diff_rhs_00 FROM (
:Select_All FROM :Table_Name WHERE :Partition_Column IS NULL OR
:Partition_Column < 'aaa' EXCEPT ALL
:Select_All FROM :Table_Part_00 ) diff;
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_All FROM :Table_Name WHERE :Partition_Column >= 'aaa' AND
:Partition_Column < 'some' EXCEPT ALL
:Select_All FROM :Table_Part_01 ) diff;
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM :Table_Name WHERE :Partition_Column >= 'some' EXCEPT ALL
:Select_All FROM :Table_Part_02 ) diff;

View File

@ -1,91 +0,0 @@
--
-- WORKER_CREATE_TABLE
--
-- Create new table definitions for lineitem and supplier tables to test worker
-- node execution logic. For now,the tests include range and hash partitioning
-- of existing tables.
SET citus.next_shard_id TO 1110000;
CREATE TABLE lineitem (
l_orderkey bigint not null,
l_partkey integer not null,
l_suppkey integer not null,
l_linenumber integer not null,
l_quantity decimal(15, 2) not null,
l_extendedprice decimal(15, 2) not null,
l_discount decimal(15, 2) not null,
l_tax decimal(15, 2) not null,
l_returnflag char(1) not null,
l_linestatus char(1) not null,
l_shipdate date not null,
l_commitdate date not null,
l_receiptdate date not null,
l_shipinstruct char(25) not null,
l_shipmode char(10) not null,
l_comment varchar(44) not null,
PRIMARY KEY(l_orderkey, l_linenumber) );
CREATE TABLE lineitem_complex (
l_partkey integer not null,
l_discount decimal(15, 2) not null,
l_shipdate date not null,
l_comment varchar(44) not null );
-- Range partitioned lineitem data are inserted into these four tables
CREATE TABLE lineitem_range_part_00 ( LIKE lineitem );
CREATE TABLE lineitem_range_part_01 ( LIKE lineitem );
CREATE TABLE lineitem_range_part_02 ( LIKE lineitem );
CREATE TABLE lineitem_range_part_03 ( LIKE lineitem );
-- Complex range partitioned lineitem data are inserted into these four tables
CREATE TABLE lineitem_range_complex_part_00 ( LIKE lineitem_complex );
CREATE TABLE lineitem_range_complex_part_01 ( LIKE lineitem_complex );
CREATE TABLE lineitem_range_complex_part_02 ( LIKE lineitem_complex );
CREATE TABLE lineitem_range_complex_part_03 ( LIKE lineitem_complex );
-- Hash partitioned lineitem data are inserted into these four tables
CREATE TABLE lineitem_hash_part_00 ( LIKE lineitem );
CREATE TABLE lineitem_hash_part_01 ( LIKE lineitem );
CREATE TABLE lineitem_hash_part_02 ( LIKE lineitem );
CREATE TABLE lineitem_hash_part_03 ( LIKE lineitem );
-- Complex hash partitioned lineitem data are inserted into these four tables
CREATE TABLE lineitem_hash_complex_part_00 ( LIKE lineitem_complex );
CREATE TABLE lineitem_hash_complex_part_01 ( LIKE lineitem_complex );
CREATE TABLE lineitem_hash_complex_part_02 ( LIKE lineitem_complex );
CREATE TABLE lineitem_hash_complex_part_03 ( LIKE lineitem_complex );
-- Now create a supplier table to test repartitioning the data on the nation key
-- column, where the column's values can be null or zero.
CREATE TABLE SUPPLIER
(
s_suppkey integer not null,
s_name char(25) not null,
s_address varchar(40) not null,
s_nationkey integer,
s_phone char(15) not null,
s_acctbal decimal(15,2) not null,
s_comment varchar(101) not null
);
-- Range partitioned supplier data are inserted into three tables
CREATE TABLE supplier_range_part_00 ( LIKE supplier );
CREATE TABLE supplier_range_part_01 ( LIKE supplier );
CREATE TABLE supplier_range_part_02 ( LIKE supplier );
-- Hash partitioned supplier data are inserted into three tables
CREATE TABLE supplier_hash_part_00 ( LIKE supplier );
CREATE TABLE supplier_hash_part_01 ( LIKE supplier );
CREATE TABLE supplier_hash_part_02 ( LIKE supplier );

View File

@ -1,8 +0,0 @@
-- The files we use in the following text use the text based worker copy
-- format. So we disable the binary worker copy format here.
-- This is a no-op for PG_VERSION_NUM < 14, because the default is off there.
ALTER SYSTEM SET citus.binary_worker_copy_format TO off;
SELECT pg_reload_conf();
SELECT success FROM run_command_on_workers('ALTER SYSTEM SET citus.binary_worker_copy_format TO off');
SELECT success FROM run_command_on_workers('SELECT pg_reload_conf()');

View File

@ -1,76 +0,0 @@
--
-- WORKER_HASH_PARTITION
--
\set JobId 201010
\set TaskId 101103
\set Partition_Column l_orderkey
\set Partition_Column_Text '\'l_orderkey\''
\set Partition_Column_Type '\'int8\''
\set Partition_Count 4
\set hashTokenIncrement 1073741824
\set Select_Query_Text '\'SELECT * FROM lineitem\''
\set Select_All 'SELECT *'
-- Hash functions is mapped to exactly behave as Citus planner does
\set Hash_Mod_Function '( hashint8(l_orderkey)::int8 - (-2147483648))::int8 / :hashTokenIncrement::int8'
\set Table_Part_00 lineitem_hash_part_00
\set Table_Part_01 lineitem_hash_part_01
\set Table_Part_02 lineitem_hash_part_02
\set Table_Part_03 lineitem_hash_part_03
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Table_File_00 :File_Basedir/job_:JobId/task_:TaskId/p_00000.:userid
\set Table_File_01 :File_Basedir/job_:JobId/task_:TaskId/p_00001.:userid
\set Table_File_02 :File_Basedir/job_:JobId/task_:TaskId/p_00002.:userid
\set Table_File_03 :File_Basedir/job_:JobId/task_:TaskId/p_00003.:userid
-- Run select query, and apply hash partitioning on query results
SELECT worker_hash_partition_table(:JobId, :TaskId, :Select_Query_Text,
:Partition_Column_Text, :Partition_Column_Type::regtype,
ARRAY[-2147483648, -1073741824, 0, 1073741824]::int4[]);
COPY :Table_Part_00 FROM :'Table_File_00';
COPY :Table_Part_01 FROM :'Table_File_01';
COPY :Table_Part_02 FROM :'Table_File_02';
COPY :Table_Part_03 FROM :'Table_File_03';
SELECT COUNT(*) FROM :Table_Part_00;
SELECT COUNT(*) FROM :Table_Part_01;
SELECT COUNT(*) FROM :Table_Part_02;
SELECT COUNT(*) FROM :Table_Part_03;
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_All FROM :Table_Part_00 EXCEPT ALL
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 0) ) diff;
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_All FROM :Table_Part_01 EXCEPT ALL
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 1) ) diff;
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_All FROM :Table_Part_02 EXCEPT ALL
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 2) ) diff;
SELECT COUNT(*) AS diff_lhs_03 FROM (
:Select_All FROM :Table_Part_03 EXCEPT ALL
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 3) ) diff;
SELECT COUNT(*) AS diff_rhs_00 FROM (
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 0) EXCEPT ALL
:Select_All FROM :Table_Part_00 ) diff;
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 1) EXCEPT ALL
:Select_All FROM :Table_Part_01 ) diff;
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 2) EXCEPT ALL
:Select_All FROM :Table_Part_02 ) diff;
SELECT COUNT(*) AS diff_rhs_03 FROM (
:Select_All FROM lineitem WHERE (:Hash_Mod_Function = 3) EXCEPT ALL
:Select_All FROM :Table_Part_03 ) diff;

View File

@ -1,89 +0,0 @@
--
-- WORKER_HASH_PARTITION_COMPLEX
--
\set JobId 201010
\set TaskId 101104
\set Partition_Column l_partkey
\set Partition_Column_Text '\'l_partkey\''
\set Partition_Column_Type 23
\set Partition_Count 4
\set hashTokenIncrement 1073741824
\set Select_Columns 'SELECT l_partkey, l_discount, l_shipdate, l_comment'
\set Select_Filters 'l_shipdate >= date \'1992-01-15\' AND l_discount between 0.02 AND 0.08'
\set Hash_Mod_Function '( hashint4(l_partkey)::int8 - (-2147483648))::int8 / :hashTokenIncrement::int8'
\set Table_Part_00 lineitem_hash_complex_part_00
\set Table_Part_01 lineitem_hash_complex_part_01
\set Table_Part_02 lineitem_hash_complex_part_02
\set Table_Part_03 lineitem_hash_complex_part_03
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Table_File_00 :File_Basedir/job_:JobId/task_:TaskId/p_00000.:userid
\set Table_File_01 :File_Basedir/job_:JobId/task_:TaskId/p_00001.:userid
\set Table_File_02 :File_Basedir/job_:JobId/task_:TaskId/p_00002.:userid
\set Table_File_03 :File_Basedir/job_:JobId/task_:TaskId/p_00003.:userid
-- Run hardcoded complex select query, and apply hash partitioning on query
-- results
SELECT worker_hash_partition_table(:JobId, :TaskId,
'SELECT l_partkey, l_discount, l_shipdate, l_comment'
' FROM lineitem '
' WHERE l_shipdate >= date ''1992-01-15'''
' AND l_discount between 0.02 AND 0.08',
:Partition_Column_Text, :Partition_Column_Type,
ARRAY[-2147483648, -1073741824, 0, 1073741824]::int4[]);
-- Copy partitioned data files into tables for testing purposes
COPY :Table_Part_00 FROM :'Table_File_00';
COPY :Table_Part_01 FROM :'Table_File_01';
COPY :Table_Part_02 FROM :'Table_File_02';
COPY :Table_Part_03 FROM :'Table_File_03';
SELECT COUNT(*) FROM :Table_Part_00;
SELECT COUNT(*) FROM :Table_Part_03;
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_Columns FROM :Table_Part_00 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 0) ) diff;
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_Columns FROM :Table_Part_01 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 1) ) diff;
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_Columns FROM :Table_Part_02 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 2) ) diff;
SELECT COUNT(*) AS diff_lhs_03 FROM (
:Select_Columns FROM :Table_Part_03 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 3) ) diff;
SELECT COUNT(*) AS diff_rhs_00 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 0) EXCEPT ALL
:Select_Columns FROM :Table_Part_00 ) diff;
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 1) EXCEPT ALL
:Select_Columns FROM :Table_Part_01 ) diff;
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 2) EXCEPT ALL
:Select_Columns FROM :Table_Part_02 ) diff;
SELECT COUNT(*) AS diff_rhs_03 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
(:Hash_Mod_Function = 3) EXCEPT ALL
:Select_Columns FROM :Table_Part_03 ) diff;

View File

@ -1,35 +0,0 @@
--
-- WORKER_MERGE_HASH_FILES
--
\set JobId 201010
\set TaskId 101103
\set Task_Table_Name public.task_101103
\set Select_All 'SELECT *'
-- TaskId determines our dependency on hash partitioned files. We take these
-- files, and merge them in a task table. We also pass the column names and
-- column types that are used to create the task table.
SELECT worker_merge_files_into_table(:JobId, :TaskId,
ARRAY['orderkey', 'partkey', 'suppkey', 'linenumber', 'quantity', 'extendedprice',
'discount', 'tax', 'returnflag', 'linestatus', 'shipdate', 'commitdate',
'receiptdate', 'shipinstruct', 'shipmode', 'comment']::_text,
ARRAY['bigint', 'integer', 'integer', 'integer', 'decimal(15, 2)', 'decimal(15, 2)',
'decimal(15, 2)', 'decimal(15, 2)', 'char(1)', 'char(1)', 'date', 'date',
'date', 'char(25)', 'char(10)', 'varchar(44)']::_text);
-- We first count elements from the merged table and the original table we hash
-- partitioned. We then compute the difference of these two tables.
SELECT COUNT(*) FROM :Task_Table_Name;
SELECT COUNT(*) FROM lineitem;
SELECT COUNT(*) AS diff_lhs FROM ( :Select_All FROM :Task_Table_Name EXCEPT ALL
:Select_All FROM lineitem ) diff;
SELECT COUNT(*) AS diff_rhs FROM ( :Select_All FROM lineitem EXCEPT ALL
:Select_All FROM :Task_Table_Name ) diff;

View File

@ -1,35 +0,0 @@
--
-- WORKER_MERGE_RANGE_FILES
--
\set JobId 201010
\set TaskId 101101
\set Task_Table_Name public.task_101101
\set Select_All 'SELECT *'
-- TaskId determines our dependency on range partitioned files. We take these
-- files, and merge them in a task table. We also pass the column names and
-- column types that are used to create the task table.
SELECT worker_merge_files_into_table(:JobId, :TaskId,
ARRAY['orderkey', 'partkey', 'suppkey', 'linenumber', 'quantity', 'extendedprice',
'discount', 'tax', 'returnflag', 'linestatus', 'shipdate', 'commitdate',
'receiptdate', 'shipinstruct', 'shipmode', 'comment']::_text,
ARRAY['bigint', 'integer', 'integer', 'integer', 'decimal(15, 2)', 'decimal(15, 2)',
'decimal(15, 2)', 'decimal(15, 2)', 'char(1)', 'char(1)', 'date', 'date',
'date', 'char(25)', 'char(10)', 'varchar(44)']::_text);
-- We first count elements from the merged table and the original table we range
-- partitioned. We then compute the difference of these two tables.
SELECT COUNT(*) FROM :Task_Table_Name;
SELECT COUNT(*) FROM lineitem;
SELECT COUNT(*) AS diff_lhs FROM ( :Select_All FROM :Task_Table_Name EXCEPT ALL
:Select_All FROM lineitem ) diff;
SELECT COUNT(*) AS diff_rhs FROM ( :Select_All FROM lineitem EXCEPT ALL
:Select_All FROM :Task_Table_Name ) diff;

View File

@ -1,127 +0,0 @@
--
-- WORKER_NULL_DATA_PARTITION
--
\set JobId 201010
\set Range_TaskId 101106
\set Partition_Column s_nationkey
\set Partition_Column_Text '\'s_nationkey\''
\set Partition_Column_Type 23
\set Select_Query_Text '\'SELECT * FROM supplier\''
\set Select_All 'SELECT *'
\set Range_Table_Part_00 supplier_range_part_00
\set Range_Table_Part_01 supplier_range_part_01
\set Range_Table_Part_02 supplier_range_part_02
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Range_Table_File_00 :File_Basedir/job_:JobId/task_:Range_TaskId/p_00000.:userid
\set Range_Table_File_01 :File_Basedir/job_:JobId/task_:Range_TaskId/p_00001.:userid
\set Range_Table_File_02 :File_Basedir/job_:JobId/task_:Range_TaskId/p_00002.:userid
-- Run select query, and apply range partitioning on query results. Note that
-- one of the split point values is 0, We are checking here that the partition
-- function doesn't treat 0 as null, and that range repartitioning correctly
-- puts null nation key values into the 0th repartition bucket.
SELECT worker_range_partition_table(:JobId, :Range_TaskId, :Select_Query_Text,
:Partition_Column_Text, :Partition_Column_Type,
ARRAY[0, 10]::_int4);
-- Copy partitioned data files into tables for testing purposes
COPY :Range_Table_Part_00 FROM :'Range_Table_File_00';
COPY :Range_Table_Part_01 FROM :'Range_Table_File_01';
COPY :Range_Table_Part_02 FROM :'Range_Table_File_02';
SELECT COUNT(*) FROM :Range_Table_Part_00;
SELECT COUNT(*) FROM :Range_Table_Part_02;
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_All FROM :Range_Table_Part_00 EXCEPT ALL
(:Select_All FROM supplier WHERE :Partition_Column < 0 OR
:Partition_Column IS NULL) ) diff;
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_All FROM :Range_Table_Part_01 EXCEPT ALL
:Select_All FROM supplier WHERE :Partition_Column >= 0 AND
:Partition_Column < 10 ) diff;
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM supplier WHERE :Partition_Column >= 10 EXCEPT ALL
:Select_All FROM :Range_Table_Part_02 ) diff;
SELECT COUNT(*) AS diff_rhs_00 FROM (
(:Select_All FROM supplier WHERE :Partition_Column < 0 OR
:Partition_Column IS NULL) EXCEPT ALL
:Select_All FROM :Range_Table_Part_00 ) diff;
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_All FROM supplier WHERE :Partition_Column >= 0 AND
:Partition_Column < 10 EXCEPT ALL
:Select_All FROM :Range_Table_Part_01 ) diff;
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM supplier WHERE :Partition_Column >= 10 EXCEPT ALL
:Select_All FROM :Range_Table_Part_02 ) diff;
-- Next, run select query and apply hash partitioning on query results. We are
-- checking here that hash repartitioning correctly puts null nation key values
-- into the 0th repartition bucket.
\set Hash_TaskId 101107
\set Partition_Count 4
\set Hash_Mod_Function '( hashint4(s_nationkey)::int8 - (-2147483648))::int8 / :hashTokenIncrement::int8'
\set hashTokenIncrement 1073741824
\set Hash_Table_Part_00 supplier_hash_part_00
\set Hash_Table_Part_01 supplier_hash_part_01
\set Hash_Table_Part_02 supplier_hash_part_02
\set File_Basedir base/pgsql_job_cache
\set Hash_Table_File_00 :File_Basedir/job_:JobId/task_:Hash_TaskId/p_00000.:userid
\set Hash_Table_File_01 :File_Basedir/job_:JobId/task_:Hash_TaskId/p_00001.:userid
\set Hash_Table_File_02 :File_Basedir/job_:JobId/task_:Hash_TaskId/p_00002.:userid
-- Run select query, and apply hash partitioning on query results
SELECT worker_hash_partition_table(:JobId, :Hash_TaskId, :Select_Query_Text,
:Partition_Column_Text, :Partition_Column_Type,
ARRAY[-2147483648, -1073741824, 0, 1073741824]::int4[]);
COPY :Hash_Table_Part_00 FROM :'Hash_Table_File_00';
COPY :Hash_Table_Part_01 FROM :'Hash_Table_File_01';
COPY :Hash_Table_Part_02 FROM :'Hash_Table_File_02';
SELECT COUNT(*) FROM :Hash_Table_Part_00;
SELECT COUNT(*) FROM :Hash_Table_Part_02;
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_All FROM :Hash_Table_Part_00 EXCEPT ALL
(:Select_All FROM supplier WHERE (:Hash_Mod_Function = 0) OR
:Partition_Column IS NULL) ) diff;
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_All FROM :Hash_Table_Part_01 EXCEPT ALL
:Select_All FROM supplier WHERE (:Hash_Mod_Function = 1) ) diff;
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_All FROM :Hash_Table_Part_02 EXCEPT ALL
:Select_All FROM supplier WHERE (:Hash_Mod_Function = 2) ) diff;
SELECT COUNT(*) AS diff_rhs_00 FROM (
(:Select_All FROM supplier WHERE (:Hash_Mod_Function = 0) OR
:Partition_Column IS NULL) EXCEPT ALL
:Select_All FROM :Hash_Table_Part_00 ) diff;
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_All FROM supplier WHERE (:Hash_Mod_Function = 1) EXCEPT ALL
:Select_All FROM :Hash_Table_Part_01 ) diff;
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM supplier WHERE (:Hash_Mod_Function = 2) EXCEPT ALL
:Select_All FROM :Hash_Table_Part_02 ) diff;

View File

@ -1,74 +0,0 @@
--
-- WORKER_RANGE_PARTITION
--
\set JobId 201010
\set TaskId 101101
\set Partition_Column l_orderkey
\set Partition_Column_Text '\'l_orderkey\''
\set Partition_Column_Type 20
\set Select_Query_Text '\'SELECT * FROM lineitem\''
\set Select_All 'SELECT *'
\set Table_Part_00 lineitem_range_part_00
\set Table_Part_01 lineitem_range_part_01
\set Table_Part_02 lineitem_range_part_02
\set Table_Part_03 lineitem_range_part_03
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Table_File_00 :File_Basedir/job_:JobId/task_:TaskId/p_00000.:userid
\set Table_File_01 :File_Basedir/job_:JobId/task_:TaskId/p_00001.:userid
\set Table_File_02 :File_Basedir/job_:JobId/task_:TaskId/p_00002.:userid
\set Table_File_03 :File_Basedir/job_:JobId/task_:TaskId/p_00003.:userid
-- Run select query, and apply range partitioning on query results
SELECT worker_range_partition_table(:JobId, :TaskId, :Select_Query_Text,
:Partition_Column_Text, :Partition_Column_Type,
ARRAY[1, 3000, 12000]::_int8);
COPY :Table_Part_00 FROM :'Table_File_00';
COPY :Table_Part_01 FROM :'Table_File_01';
COPY :Table_Part_02 FROM :'Table_File_02';
COPY :Table_Part_03 FROM :'Table_File_03';
SELECT COUNT(*) FROM :Table_Part_00;
SELECT COUNT(*) FROM :Table_Part_03;
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_All FROM :Table_Part_00 EXCEPT ALL
:Select_All FROM lineitem WHERE :Partition_Column < 1 ) diff;
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_All FROM :Table_Part_01 EXCEPT ALL
:Select_All FROM lineitem WHERE :Partition_Column >= 1 AND
:Partition_Column < 3000 ) diff;
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_All FROM :Table_Part_02 EXCEPT ALL
:Select_All FROM lineitem WHERE :Partition_Column >= 3000 AND
:Partition_Column < 12000 ) diff;
SELECT COUNT(*) AS diff_lhs_03 FROM (
:Select_All FROM :Table_Part_03 EXCEPT ALL
:Select_All FROM lineitem WHERE :Partition_Column >= 12000 ) diff;
SELECT COUNT(*) AS diff_rhs_00 FROM (
:Select_All FROM lineitem WHERE :Partition_Column < 1 EXCEPT ALL
:Select_All FROM :Table_Part_00 ) diff;
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_All FROM lineitem WHERE :Partition_Column >= 1 AND
:Partition_Column < 3000 EXCEPT ALL
:Select_All FROM :Table_Part_01 ) diff;
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_All FROM lineitem WHERE :Partition_Column >= 3000 AND
:Partition_Column < 12000 EXCEPT ALL
:Select_All FROM :Table_Part_02 ) diff;
SELECT COUNT(*) AS diff_rhs_03 FROM (
:Select_All FROM lineitem WHERE :Partition_Column >= 12000 EXCEPT ALL
:Select_All FROM :Table_Part_03 ) diff;

View File

@ -1,90 +0,0 @@
--
-- WORKER_RANGE_PARTITION_COMPLEX
--
\set JobId 201010
\set TaskId 101102
\set Partition_Column l_partkey
\set Partition_Column_Text '\'l_partkey\''
\set Partition_Column_Type 23
\set Select_Columns 'SELECT l_partkey, l_discount, l_shipdate, l_comment'
\set Select_Filters 'l_shipdate >= date \'1992-01-15\' AND l_discount between 0.02 AND 0.08'
\set Table_Part_00 lineitem_range_complex_part_00
\set Table_Part_01 lineitem_range_complex_part_01
\set Table_Part_02 lineitem_range_complex_part_02
\set Table_Part_03 lineitem_range_complex_part_03
SELECT usesysid AS userid FROM pg_user WHERE usename = current_user \gset
\set File_Basedir base/pgsql_job_cache
\set Table_File_00 :File_Basedir/job_:JobId/task_:TaskId/p_00000.:userid
\set Table_File_01 :File_Basedir/job_:JobId/task_:TaskId/p_00001.:userid
\set Table_File_02 :File_Basedir/job_:JobId/task_:TaskId/p_00002.:userid
\set Table_File_03 :File_Basedir/job_:JobId/task_:TaskId/p_00003.:userid
-- Run hardcoded complex select query, and apply range partitioning on query
-- results
SELECT worker_range_partition_table(:JobId, :TaskId,
'SELECT l_partkey, l_discount, l_shipdate, l_comment'
' FROM lineitem '
' WHERE l_shipdate >= date ''1992-01-15'''
' AND l_discount between 0.02 AND 0.08',
:Partition_Column_Text, :Partition_Column_Type,
ARRAY[101, 12000, 18000]::_int4);
-- Copy partitioned data files into tables for testing purposes
COPY :Table_Part_00 FROM :'Table_File_00';
COPY :Table_Part_01 FROM :'Table_File_01';
COPY :Table_Part_02 FROM :'Table_File_02';
COPY :Table_Part_03 FROM :'Table_File_03';
SELECT COUNT(*) FROM :Table_Part_00;
SELECT COUNT(*) FROM :Table_Part_03;
-- We first compute the difference of partition tables against the base table.
-- Then, we compute the difference of the base table against partitioned tables.
SELECT COUNT(*) AS diff_lhs_00 FROM (
:Select_Columns FROM :Table_Part_00 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column < 101 ) diff;
SELECT COUNT(*) AS diff_lhs_01 FROM (
:Select_Columns FROM :Table_Part_01 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 101 AND
:Partition_Column < 12000 ) diff;
SELECT COUNT(*) AS diff_lhs_02 FROM (
:Select_Columns FROM :Table_Part_02 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 12000 AND
:Partition_Column < 18000 ) diff;
SELECT COUNT(*) AS diff_lhs_03 FROM (
:Select_Columns FROM :Table_Part_03 EXCEPT ALL
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 18000 ) diff;
SELECT COUNT(*) AS diff_rhs_00 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column < 101 EXCEPT ALL
:Select_Columns FROM :Table_Part_00 ) diff;
SELECT COUNT(*) AS diff_rhs_01 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 101 AND
:Partition_Column < 12000 EXCEPT ALL
:Select_Columns FROM :Table_Part_01 ) diff;
SELECT COUNT(*) AS diff_rhs_02 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 12000 AND
:Partition_Column < 18000 EXCEPT ALL
:Select_Columns FROM :Table_Part_02 ) diff;
SELECT COUNT(*) AS diff_rhs_03 FROM (
:Select_Columns FROM lineitem WHERE :Select_Filters AND
:Partition_Column >= 18000 EXCEPT ALL
:Select_Columns FROM :Table_Part_03 ) diff;

View File

@ -1,28 +0,0 @@
# ----------
# $Id$
#
# Regression tests that exercise worker node related distributed execution
# logic.
# ----------
# ----------
# All worker tests use the following table and its data
# ----------
test: worker_create_table
test: worker_copy
# ----------
# Range and hash re-partitioning related regression tests
# ----------
test: worker_disable_binary_worker_copy_format
test: worker_range_partition worker_range_partition_complex
test: worker_hash_partition worker_hash_partition_complex
test: worker_merge_range_files worker_merge_hash_files
test: worker_binary_data_partition worker_null_data_partition
# ---------
# test that no tests leaked intermediate results. This should always be last
# ---------
# there will be open some leak because we removed task tracker remove files
# this shouldn't be a problem in this schedule
# test: ensure_no_intermediate_data_leak