diff --git a/.circleci/config.yml b/.circleci/config.yml index c6f2ee445..55eaa51c4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -64,6 +64,12 @@ jobs: - run: name: 'Check if changed' command: git diff --exit-code + - run: + name: 'Check for gitignore entries .for source files' + command: ci/fix_gitignore.sh + - run: + name: 'Check if changed' + command: git diff --exit-code - run: name: 'Check for lengths of changelog entries' command: ci/disallow_long_changelog_entries.sh @@ -445,6 +451,12 @@ workflows: image_tag: '12.6' make: check-multi requires: [build-12] + - test-citus: + name: 'test-12_check-multi-1' + pg_major: 12 + image_tag: '12.6' + make: check-multi-1 + requires: [build-12] - test-citus: name: 'test-12_check-mx' pg_major: 12 @@ -513,6 +525,12 @@ workflows: image_tag: '13.2' make: check-multi requires: [build-13] + - test-citus: + name: 'test-13_check-multi-1' + pg_major: 13 + image_tag: '13.2' + make: check-multi-1 + requires: [build-13] - test-citus: name: 'test-13_check-mx' pg_major: 13 diff --git a/CHANGELOG.md b/CHANGELOG.md index 62c5e7b39..6b60f7d7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,87 @@ +### citus v10.1.0 (June 15, 2021) ### + +* Drops support for PostgreSQL 11 + +* Adds `shard_count` parameter to `create_distributed_table` function + +* Adds support for `ALTER DATABASE OWNER` + +* Adds support for temporary columnar tables + +* Adds support for using sequences as column default values when syncing + metadata + +* `alter_columnar_table_set` enforces columnar table option constraints + +* Continues to remove shards after failure in `DropMarkedShards` + +* Deprecates the `citus.replication_model` GUC + +* Enables `citus.defer_drop_after_shard_move` by default + +* Ensures free disk space before moving a shard + +* Fetches shard size on the fly for the rebalance monitor + +* Ignores old placements when disabling or removing a node + +* Implements `improvement_threshold` at shard rebalancer moves + +* Improves orphaned shard cleanup logic + +* Improves performance of `citus_shards` + +* Introduces `citus.local_hostname` GUC for connections to the current node + +* Makes sure connection is closed after each shard move + +* Makes sure that target node in shard moves is eligible for shard move + +* Optimizes partitioned disk size calculation for shard rebalancer + +* Prevents connection errors by properly terminating connections + +* Prevents inheriting a distributed table + +* Prevents users from dropping & truncating known shards + +* Pushes down `VALUES` clause as long as not in outer part of a `JOIN` + +* Reduces memory usage for multi-row inserts + +* Reduces memory usage while rebalancing shards + +* Removes length limits around partition names + +* Executor avoids opening extra connections + +* Fixes a bug that can cause a crash when DEBUG4 logging is enabled + +* Fixes data race in `get_rebalance_progress` + +* Fixes error message for local table joins + +* Fixes `FROM ONLY` queries on partitioned tables + +* Fixes issues caused by omitting public schema in queries + +* Fixes nested `SELECT` query with `UNION` bug + +* Fixes null relationName bug at parallel execution + +* Fixes possible segfaults when using Citus in the middle of an upgrade + +* Fixes problems with concurrent calls of `DropMarkedShards` + +* Fixes shared dependencies that are not resident in a database + +* Fixes stale hostnames bug in prepared statements after `master_update_node` + +* Fixes using 2PC when it might be necessary + +* Preserves access method of materialized views when undistributing + or altering distributed tables + ### citus v8.3.3 (March 23, 2021) ### * Fixes a bug that leads to various issues when a connection is lost diff --git a/README.md b/README.md index 49a8a3a15..c8a305ea0 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Slack Status](http://slack.citusdata.com/badge.svg)](https://slack.citusdata.com) [![Latest Docs](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://docs.citusdata.com/) -[![Code Coverage](https://codecov.io/gh/citusdata/citus/branch/master/graph/badge.svg)](https://codecov.io/gh/citusdata/citus/branch/master/graph/badge.svg) +[![Code Coverage](https://codecov.io/gh/citusdata/citus/branch/master/graph/badge.svg)](https://app.codecov.io/gh/citusdata/citus) ## What is Citus? diff --git a/ci/README.md b/ci/README.md index 0d1be65f2..6e221b351 100644 --- a/ci/README.md +++ b/ci/README.md @@ -156,9 +156,9 @@ git merge "community/$PR_BRANCH" familiar with the change. 5. You should rerun the `check-merge-to-enterprise` check on `community/$PR_BRANCH`. You can use re-run from failed option in circle CI. -6. You can now merge the PR on community. Be sure to NOT use "squash and merge", +6. You can now merge the PR on enterprise. Be sure to NOT use "squash and merge", but instead use the regular "merge commit" mode. -7. You can now merge the PR on enterprise. Be sure to NOT use "squash and merge", +7. You can now merge the PR on community. Be sure to NOT use "squash and merge", but instead use the regular "merge commit" mode. The subsequent PRs on community will be able to pass the @@ -346,3 +346,15 @@ foo = 2 #endif ``` This was deemed to be error prone and not worth the effort. + +## `fix_gitignore.sh` + +This script checks and fixes issues with `.gitignore` rules: + +1. Makes sure git ignores the `.sql` files and expected output files that are generated + from `.source` template files. If you created or deleted a `.source` file in a commit, + git ignore rules should be updated to reflect this change. + +2. Makes sure we do not commit any generated files that should be ignored. If there is an + ignored file in the git tree, the user is expected to review the files that are removed + from the git tree and commit them. diff --git a/ci/check_enterprise_merge.sh b/ci/check_enterprise_merge.sh index 097e1aed5..040a5c224 100755 --- a/ci/check_enterprise_merge.sh +++ b/ci/check_enterprise_merge.sh @@ -65,6 +65,14 @@ fi # undo partial merge git merge --abort +# If we have a conflict on enterprise merge on the master branch, we have a problem. +# Provide an error message to indicate that enterprise merge is needed. +if [[ $PR_BRANCH = master ]]; then + echo "ERROR: Master branch has merge conlicts with enterprise-master." + echo "Try re-running this job if you merged community PR before enterprise PR. Otherwise conflicts need to be resolved as a separate PR on enterprise." + exit 1 +fi + if ! git fetch enterprise "$PR_BRANCH" ; then echo "ERROR: enterprise/$PR_BRANCH was not found and community PR branch could not be merged into enterprise-master" exit 1 diff --git a/ci/fix_gitignore.sh b/ci/fix_gitignore.sh new file mode 100755 index 000000000..a2258c472 --- /dev/null +++ b/ci/fix_gitignore.sh @@ -0,0 +1,34 @@ +#! /bin/bash +# shellcheck disable=SC2012 + +set -euo pipefail +# shellcheck disable=SC1091 +source ci/ci_helpers.sh + +# We list all the .source files in alphabetical order, and do a substitution +# before writing the resulting file names that are created by those templates in +# relevant .gitignore files +# +# 1. Capture the file name without the .source extension +# 2. Add the desired extension at the end +# 3. Add a / character at the beginning of each line to conform to .gitignore file format +# +# e.g. multi_copy.source -> /multi_copy.sql +ls -1 src/test/regress/input | sed -E "s#(.*)\.source#/\1.sql#" > src/test/regress/sql/.gitignore + +# e.g. multi_copy.source -> /multi_copy.out +ls -1 src/test/regress/output | sed -E "s#(.*)\.source#/\1.out#" > src/test/regress/expected/.gitignore + +# Remove all the ignored files from git tree, and error out +# find all ignored files in git tree, and use quotation marks to prevent word splitting on filenames with spaces in them +ignored_lines_in_git_tree=$(git ls-files --ignored --exclude-standard | sed 's/.*/"&"/') + +if [[ -n $ignored_lines_in_git_tree ]] +then + echo "Ignored files should not be in git tree!" + echo "${ignored_lines_in_git_tree}" + + echo "Removing these files from git tree, please review and commit" + echo "$ignored_lines_in_git_tree" | xargs git rm -r --cached + exit 1 +fi diff --git a/ci/fix_style.sh b/ci/fix_style.sh index 8846eda38..c15eb3def 100755 --- a/ci/fix_style.sh +++ b/ci/fix_style.sh @@ -14,3 +14,4 @@ ci/remove_useless_declarations.sh ci/disallow_c_comments_in_migrations.sh ci/disallow_long_changelog_entries.sh ci/normalize_expected.sh +ci/fix_gitignore.sh diff --git a/configure b/configure index 3294c736f..a5aa66ee4 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for Citus 10.1devel. +# Generated by GNU Autoconf 2.69 for Citus 10.2devel. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -579,8 +579,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='Citus' PACKAGE_TARNAME='citus' -PACKAGE_VERSION='10.1devel' -PACKAGE_STRING='Citus 10.1devel' +PACKAGE_VERSION='10.2devel' +PACKAGE_STRING='Citus 10.2devel' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1260,7 +1260,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures Citus 10.1devel to adapt to many kinds of systems. +\`configure' configures Citus 10.2devel to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1322,7 +1322,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of Citus 10.1devel:";; + short | recursive ) echo "Configuration of Citus 10.2devel:";; esac cat <<\_ACEOF @@ -1425,7 +1425,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -Citus configure 10.1devel +Citus configure 10.2devel generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1908,7 +1908,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by Citus $as_me 10.1devel, which was +It was created by Citus $as_me 10.2devel, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -5356,7 +5356,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by Citus $as_me 10.1devel, which was +This file was extended by Citus $as_me 10.2devel, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -5418,7 +5418,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -Citus config.status 10.1devel +Citus config.status 10.2devel configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.in b/configure.in index 651daeef5..f92b5214c 100644 --- a/configure.in +++ b/configure.in @@ -5,7 +5,7 @@ # everyone needing autoconf installed, the resulting files are checked # into the SCM. -AC_INIT([Citus], [10.1devel]) +AC_INIT([Citus], [10.2devel]) AC_COPYRIGHT([Copyright (c) Citus Data, Inc.]) # we'll need sed and awk for some of the version commands diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c index 6d0a9c7fd..0e2099fb5 100644 --- a/src/backend/columnar/columnar_tableam.c +++ b/src/backend/columnar/columnar_tableam.c @@ -2076,6 +2076,15 @@ alter_columnar_table_set(PG_FUNCTION_ARGS) if (!PG_ARGISNULL(1)) { options.chunkRowCount = PG_GETARG_INT32(1); + if (options.chunkRowCount < CHUNK_ROW_COUNT_MINIMUM || + options.chunkRowCount > CHUNK_ROW_COUNT_MAXIMUM) + { + ereport(ERROR, (errmsg("chunk group row count limit out of range"), + errhint("chunk group row count limit must be between " + UINT64_FORMAT " and " UINT64_FORMAT, + (uint64) CHUNK_ROW_COUNT_MINIMUM, + (uint64) CHUNK_ROW_COUNT_MAXIMUM))); + } ereport(DEBUG1, (errmsg("updating chunk row count to %d", options.chunkRowCount))); } @@ -2084,6 +2093,15 @@ alter_columnar_table_set(PG_FUNCTION_ARGS) if (!PG_ARGISNULL(2)) { options.stripeRowCount = PG_GETARG_INT32(2); + if (options.stripeRowCount < STRIPE_ROW_COUNT_MINIMUM || + options.stripeRowCount > STRIPE_ROW_COUNT_MAXIMUM) + { + ereport(ERROR, (errmsg("stripe row count limit out of range"), + errhint("stripe row count limit must be between " + UINT64_FORMAT " and " UINT64_FORMAT, + (uint64) STRIPE_ROW_COUNT_MINIMUM, + (uint64) STRIPE_ROW_COUNT_MAXIMUM))); + } ereport(DEBUG1, (errmsg( "updating stripe row count to " UINT64_FORMAT, options.stripeRowCount))); diff --git a/src/backend/distributed/citus.control b/src/backend/distributed/citus.control index a004b145c..624694d19 100644 --- a/src/backend/distributed/citus.control +++ b/src/backend/distributed/citus.control @@ -1,6 +1,6 @@ # Citus extension comment = 'Citus distributed database' -default_version = '10.1-1' +default_version = '10.2-1' module_pathname = '$libdir/citus' relocatable = false schema = pg_catalog diff --git a/src/backend/distributed/commands/alter_table.c b/src/backend/distributed/commands/alter_table.c index ddc62021a..da2e5490c 100644 --- a/src/backend/distributed/commands/alter_table.c +++ b/src/backend/distributed/commands/alter_table.c @@ -204,7 +204,6 @@ static char * GetAccessMethodForMatViewIfExists(Oid viewOid); static bool WillRecreateForeignKeyToReferenceTable(Oid relationId, CascadeToColocatedOption cascadeOption); static void WarningsForDroppingForeignKeysWithDistributedTables(Oid relationId); -static void ExecuteQueryViaSPI(char *query, int SPIOK); PG_FUNCTION_INFO_V1(undistribute_table); PG_FUNCTION_INFO_V1(alter_distributed_table); @@ -219,11 +218,11 @@ PG_FUNCTION_INFO_V1(worker_change_sequence_dependency); Datum undistribute_table(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); bool cascadeViaForeignKeys = PG_GETARG_BOOL(1); - CheckCitusVersion(ERROR); - TableConversionParameters params = { .relationId = relationId, .cascadeViaForeignKeys = cascadeViaForeignKeys @@ -243,6 +242,8 @@ undistribute_table(PG_FUNCTION_ARGS) Datum alter_distributed_table(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); char *distributionColumn = NULL; @@ -280,9 +281,6 @@ alter_distributed_table(PG_FUNCTION_ARGS) } } - CheckCitusVersion(ERROR); - - TableConversionParameters params = { .relationId = relationId, .distributionColumn = distributionColumn, @@ -305,13 +303,13 @@ alter_distributed_table(PG_FUNCTION_ARGS) Datum alter_table_set_access_method(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); text *accessMethodText = PG_GETARG_TEXT_P(1); char *accessMethod = text_to_cstring(accessMethodText); - CheckCitusVersion(ERROR); - TableConversionParameters params = { .relationId = relationId, .accessMethod = accessMethod @@ -558,8 +556,11 @@ ConvertTable(TableConversionState *con) includeIndexes = false; } + + bool includeReplicaIdentity = true; List *postLoadCommands = GetPostLoadTableCreationCommands(con->relationId, - includeIndexes); + includeIndexes, + includeReplicaIdentity); List *justBeforeDropCommands = NIL; List *attachPartitionCommands = NIL; diff --git a/src/backend/distributed/commands/citus_add_local_table_to_metadata.c b/src/backend/distributed/commands/citus_add_local_table_to_metadata.c index d89c44630..3a8e54c68 100644 --- a/src/backend/distributed/commands/citus_add_local_table_to_metadata.c +++ b/src/backend/distributed/commands/citus_add_local_table_to_metadata.c @@ -179,7 +179,7 @@ remove_local_tables_from_metadata(PG_FUNCTION_ARGS) * properties: * - it will have only one shard, * - its distribution method will be DISTRIBUTE_BY_NONE, - * - its replication model will be ReplicationModel, + * - its replication model will be REPLICATION_MODEL_STREAMING, * - its replication factor will be set to 1. * Similar to reference tables, it has only 1 placement. In addition to that, that * single placement is only allowed to be on the coordinator. @@ -996,9 +996,7 @@ InsertMetadataForCitusLocalTable(Oid citusLocalTableId, uint64 shardId) Assert(shardId != INVALID_SHARD_ID); char distributionMethod = DISTRIBUTE_BY_NONE; - char replicationModel = ReplicationModel; - - Assert(replicationModel != REPLICATION_MODEL_2PC); + char replicationModel = REPLICATION_MODEL_STREAMING; uint32 colocationId = INVALID_COLOCATION_ID; Var *distributionColumn = NULL; diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index dcdd292db..06bd5096d 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -86,13 +86,9 @@ */ #define LOG_PER_TUPLE_AMOUNT 1000000 - -/* Replication model to use when creating distributed tables */ -int ReplicationModel = REPLICATION_MODEL_COORDINATOR; - - /* local function forward declarations */ -static char DecideReplicationModel(char distributionMethod, bool viaDeprecatedAPI); +static char DecideReplicationModel(char distributionMethod, char *colocateWithTableName, + bool viaDeprecatedAPI); static void CreateHashDistributedTableShards(Oid relationId, int shardCount, Oid colocatedTableId, bool localTableEmpty); static uint32 ColocationIdForNewTable(Oid relationId, Var *distributionColumn, @@ -119,7 +115,6 @@ static List * GetFKeyCreationCommandsRelationInvolvedWithTableType(Oid relationI int tableTypeFlag); static Oid DropFKeysAndUndistributeTable(Oid relationId); static void DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag); -static bool LocalTableEmpty(Oid tableId); static void CopyLocalDataIntoShards(Oid relationId); static List * TupleDescColumnNameList(TupleDesc tupleDescriptor); static bool DistributionColumnUsesGeneratedStoredColumn(TupleDesc relationDesc, @@ -146,12 +141,11 @@ PG_FUNCTION_INFO_V1(create_reference_table); Datum master_create_distributed_table(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); Oid relationId = PG_GETARG_OID(0); text *distributionColumnText = PG_GETARG_TEXT_P(1); Oid distributionMethodOid = PG_GETARG_OID(2); - CheckCitusVersion(ERROR); - EnsureCitusTableCanBeCreated(relationId); char *colocateWithTableName = NULL; @@ -193,6 +187,8 @@ master_create_distributed_table(PG_FUNCTION_ARGS) Datum create_distributed_table(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2) || PG_ARGISNULL(3)) { PG_RETURN_VOID(); @@ -225,8 +221,6 @@ create_distributed_table(PG_FUNCTION_ARGS) shardCountIsStrict = true; } - CheckCitusVersion(ERROR); - EnsureCitusTableCanBeCreated(relationId); /* enable create_distributed_table on an empty node */ @@ -275,6 +269,7 @@ create_distributed_table(PG_FUNCTION_ARGS) Datum create_reference_table(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); Oid relationId = PG_GETARG_OID(0); char *colocateWithTableName = NULL; @@ -282,8 +277,6 @@ create_reference_table(PG_FUNCTION_ARGS) bool viaDeprecatedAPI = false; - CheckCitusVersion(ERROR); - EnsureCitusTableCanBeCreated(relationId); /* enable create_reference_table on an empty node */ @@ -442,6 +435,7 @@ CreateDistributedTable(Oid relationId, Var *distributionColumn, char distributio EnsureDependenciesExistOnAllNodes(&tableAddress); char replicationModel = DecideReplicationModel(distributionMethod, + colocateWithTableName, viaDeprecatedAPI); /* @@ -464,7 +458,7 @@ CreateDistributedTable(Oid relationId, Var *distributionColumn, char distributio EnsureReferenceTablesExistOnAllNodes(); /* we need to calculate these variables before creating distributed metadata */ - bool localTableEmpty = LocalTableEmpty(relationId); + bool localTableEmpty = TableEmpty(relationId); Oid colocatedTableId = ColocatedTableId(colocationId); /* create an entry for distributed table in pg_dist_partition */ @@ -631,44 +625,38 @@ DropFKeysRelationInvolvedWithTableType(Oid relationId, int tableTypeFlag) /* * DecideReplicationModel function decides which replication model should be - * used depending on given distribution configuration and global ReplicationModel - * variable. If ReplicationModel conflicts with distribution configuration, this - * function errors out. + * used depending on given distribution configuration. */ static char -DecideReplicationModel(char distributionMethod, bool viaDeprecatedAPI) +DecideReplicationModel(char distributionMethod, char *colocateWithTableName, bool + viaDeprecatedAPI) { if (viaDeprecatedAPI) { - if (ReplicationModel != REPLICATION_MODEL_COORDINATOR) - { - ereport(NOTICE, (errmsg("using statement-based replication"), - errdetail("The current replication_model setting is " - "'streaming', which is not supported by " - "master_create_distributed_table."), - errhint("Use create_distributed_table to use the streaming " - "replication model."))); - } - return REPLICATION_MODEL_COORDINATOR; } else if (distributionMethod == DISTRIBUTE_BY_NONE) { return REPLICATION_MODEL_2PC; } - else if (distributionMethod == DISTRIBUTE_BY_HASH) + else if (pg_strncasecmp(colocateWithTableName, "default", NAMEDATALEN) != 0 && + !IsColocateWithNone(colocateWithTableName)) { - return ReplicationModel; + text *colocateWithTableNameText = cstring_to_text(colocateWithTableName); + Oid colocatedRelationId = ResolveRelationId(colocateWithTableNameText, false); + CitusTableCacheEntry *targetTableEntry = GetCitusTableCacheEntry( + colocatedRelationId); + char replicationModel = targetTableEntry->replicationModel; + + return replicationModel; + } + else if (distributionMethod == DISTRIBUTE_BY_HASH && + !DistributedTableReplicationIsEnabled()) + { + return REPLICATION_MODEL_STREAMING; } else { - if (ReplicationModel != REPLICATION_MODEL_COORDINATOR) - { - ereport(NOTICE, (errmsg("using statement-based replication"), - errdetail("Streaming replication is supported only for " - "hash-distributed tables."))); - } - return REPLICATION_MODEL_COORDINATOR; } @@ -863,7 +851,6 @@ EnsureRelationCanBeDistributed(Oid relationId, Var *distributionColumn, EnsureTableNotDistributed(relationId); EnsureLocalTableEmptyIfNecessary(relationId, distributionMethod, viaDeprecatedAPI); - EnsureReplicationSettings(InvalidOid, replicationModel); EnsureRelationHasNoTriggers(relationId); /* we assume callers took necessary locks */ @@ -1125,7 +1112,7 @@ static void EnsureLocalTableEmpty(Oid relationId) { char *relationName = get_rel_name(relationId); - bool localTableEmpty = LocalTableEmpty(relationId); + bool localTableEmpty = TableEmpty(relationId); if (!localTableEmpty) { @@ -1156,36 +1143,6 @@ EnsureTableNotDistributed(Oid relationId) } -/* - * EnsureReplicationSettings checks whether the current replication factor - * setting is compatible with the replication model. This function errors - * out if caller tries to use streaming replication with more than one - * replication factor. - */ -void -EnsureReplicationSettings(Oid relationId, char replicationModel) -{ - char *msgSuffix = "the streaming replication model"; - char *extraHint = " or setting \"citus.replication_model\" to \"statement\""; - - if (relationId != InvalidOid) - { - msgSuffix = "tables which use the streaming replication model"; - extraHint = ""; - } - - if (replicationModel == REPLICATION_MODEL_STREAMING && - DistributedTableReplicationIsEnabled()) - { - ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("replication factors above one are incompatible with %s", - msgSuffix), - errhint("Try again after reducing \"citus.shard_replication_" - "factor\" to one%s.", extraHint))); - } -} - - /* * EnsureRelationHasNoTriggers errors out if the given table has triggers on * it. See also GetExplicitTriggerIdList function's comment for the triggers this @@ -1291,50 +1248,40 @@ SupportFunctionForColumn(Var *partitionColumn, Oid accessMethodId, /* - * LocalTableEmpty function checks whether given local table contains any row and - * returns false if there is any data. This function is only for local tables and - * should not be called for distributed tables. + * TableEmpty function checks whether given table contains any row and + * returns false if there is any data. */ -static bool -LocalTableEmpty(Oid tableId) +bool +TableEmpty(Oid tableId) { Oid schemaId = get_rel_namespace(tableId); char *schemaName = get_namespace_name(schemaId); char *tableName = get_rel_name(tableId); char *tableQualifiedName = quote_qualified_identifier(schemaName, tableName); - StringInfo selectExistQueryString = makeStringInfo(); + StringInfo selectTrueQueryString = makeStringInfo(); - bool columnNull = false; bool readOnly = true; - int rowId = 0; - int attributeId = 1; - - AssertArg(!IsCitusTable(tableId)); - int spiConnectionResult = SPI_connect(); if (spiConnectionResult != SPI_OK_CONNECT) { ereport(ERROR, (errmsg("could not connect to SPI manager"))); } - appendStringInfo(selectExistQueryString, SELECT_EXIST_QUERY, tableQualifiedName); + appendStringInfo(selectTrueQueryString, SELECT_TRUE_QUERY, tableQualifiedName); - int spiQueryResult = SPI_execute(selectExistQueryString->data, readOnly, 0); + int spiQueryResult = SPI_execute(selectTrueQueryString->data, readOnly, 0); if (spiQueryResult != SPI_OK_SELECT) { ereport(ERROR, (errmsg("execution was not successful \"%s\"", - selectExistQueryString->data))); + selectTrueQueryString->data))); } - /* we expect that SELECT EXISTS query will return single value in a single row */ - Assert(SPI_processed == 1); + /* we expect that SELECT TRUE query will return single value in a single row OR empty set */ + Assert(SPI_processed == 1 || SPI_processed == 0); - HeapTuple tuple = SPI_tuptable->vals[rowId]; - Datum hasDataDatum = SPI_getbinval(tuple, SPI_tuptable->tupdesc, attributeId, - &columnNull); - bool localTableEmpty = !DatumGetBool(hasDataDatum); + bool localTableEmpty = !SPI_processed; SPI_finish(); diff --git a/src/backend/distributed/commands/database.c b/src/backend/distributed/commands/database.c new file mode 100644 index 000000000..b4fb15110 --- /dev/null +++ b/src/backend/distributed/commands/database.c @@ -0,0 +1,215 @@ +/*------------------------------------------------------------------------- + * + * database.c + * Commands to interact with the database object in a distributed + * environment. + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/objectaddress.h" +#include "catalog/pg_database.h" +#include "commands/dbcommands.h" +#include "miscadmin.h" +#include "nodes/parsenodes.h" +#include "utils/syscache.h" + +#include "distributed/commands.h" +#include "distributed/commands/utility_hook.h" +#include "distributed/deparser.h" +#include "distributed/metadata_sync.h" +#include "distributed/metadata_utility.h" +#include "distributed/multi_executor.h" +#include "distributed/relation_access_tracking.h" +#include "distributed/worker_transaction.h" + +static void EnsureSequentialModeForDatabaseDDL(void); +static AlterOwnerStmt * RecreateAlterDatabaseOwnerStmt(Oid databaseOid); +static Oid get_database_owner(Oid db_oid); + +/* controlled via GUC */ +bool EnableAlterDatabaseOwner = false; + + +/* + * PreprocessAlterDatabaseOwnerStmt is called during the utility hook before the alter + * command is applied locally on the coordinator. This will verify if the command needs to + * be propagated to the workers and if so prepares a list of ddl commands to execute. + */ +List * +PreprocessAlterDatabaseOwnerStmt(Node *node, const char *queryString, + ProcessUtilityContext processUtilityContext) +{ + AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node); + Assert(stmt->objectType == OBJECT_DATABASE); + + ObjectAddress typeAddress = GetObjectAddressFromParseTree((Node *) stmt, false); + if (!ShouldPropagateObject(&typeAddress)) + { + return NIL; + } + + if (!EnableAlterDatabaseOwner) + { + /* don't propagate if GUC is turned off */ + return NIL; + } + + EnsureCoordinator(); + + QualifyTreeNode((Node *) stmt); + const char *sql = DeparseTreeNode((Node *) stmt); + + EnsureSequentialModeForDatabaseDDL(); + List *commands = list_make3(DISABLE_DDL_PROPAGATION, + (void *) sql, + ENABLE_DDL_PROPAGATION); + + return NodeDDLTaskList(NON_COORDINATOR_NODES, commands); +} + + +/* + * PostprocessAlterDatabaseOwnerStmt is called during the utility hook after the alter + * database command has been applied locally. + * + * Its main purpose is to propagate the newly formed dependencies onto the nodes before + * applying the change of owner of the databse. This ensures, for systems that have role + * management, that the roles will be created before applying the alter owner command. + */ +List * +PostprocessAlterDatabaseOwnerStmt(Node *node, const char *queryString) +{ + AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node); + Assert(stmt->objectType == OBJECT_DATABASE); + + ObjectAddress typeAddress = GetObjectAddressFromParseTree((Node *) stmt, false); + if (!ShouldPropagateObject(&typeAddress)) + { + return NIL; + } + + if (!EnableAlterDatabaseOwner) + { + /* don't propagate if GUC is turned off */ + return NIL; + } + + EnsureDependenciesExistOnAllNodes(&typeAddress); + return NIL; +} + + +/* + * AlterDatabaseOwnerObjectAddress returns the ObjectAddress of the database that is the + * object of the AlterOwnerStmt. Errors if missing_ok is false. + */ +ObjectAddress +AlterDatabaseOwnerObjectAddress(Node *node, bool missing_ok) +{ + AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node); + Assert(stmt->objectType == OBJECT_DATABASE); + + Oid databaseOid = get_database_oid(strVal((Value *) stmt->object), missing_ok); + ObjectAddress address = { 0 }; + ObjectAddressSet(address, DatabaseRelationId, databaseOid); + + return address; +} + + +/* + * DatabaseOwnerDDLCommands returns a list of sql statements to idempotently apply a + * change of the database owner on the workers so that the database is owned by the same + * user on all nodes in the cluster. + */ +List * +DatabaseOwnerDDLCommands(const ObjectAddress *address) +{ + Node *stmt = (Node *) RecreateAlterDatabaseOwnerStmt(address->objectId); + return list_make1(DeparseTreeNode(stmt)); +} + + +/* + * RecreateAlterDatabaseOwnerStmt creates an AlterOwnerStmt that represents the operation + * of changing the owner of the database to its current owner. + */ +static AlterOwnerStmt * +RecreateAlterDatabaseOwnerStmt(Oid databaseOid) +{ + AlterOwnerStmt *stmt = makeNode(AlterOwnerStmt); + + stmt->objectType = OBJECT_DATABASE; + stmt->object = (Node *) makeString(get_database_name(databaseOid)); + + Oid ownerOid = get_database_owner(databaseOid); + stmt->newowner = makeNode(RoleSpec); + stmt->newowner->roletype = ROLESPEC_CSTRING; + stmt->newowner->rolename = GetUserNameFromId(ownerOid, false); + + return stmt; +} + + +/* + * get_database_owner returns the Oid of the role owning the database + */ +static Oid +get_database_owner(Oid db_oid) +{ + HeapTuple tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(db_oid)); + if (!HeapTupleIsValid(tuple)) + { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database with OID %u does not exist", db_oid))); + } + + Oid dba = ((Form_pg_database) GETSTRUCT(tuple))->datdba; + + ReleaseSysCache(tuple); + + return dba; +} + + +/* + * EnsureSequentialModeForDatabaseDDL makes sure that the current transaction is already + * in sequential mode, or can still safely be put in sequential mode, it errors if that is + * not possible. The error contains information for the user to retry the transaction with + * sequential mode set from the beginning. + */ +static void +EnsureSequentialModeForDatabaseDDL(void) +{ + if (!IsTransactionBlock()) + { + /* we do not need to switch to sequential mode if we are not in a transaction */ + return; + } + + if (ParallelQueryExecutedInTransaction()) + { + ereport(ERROR, (errmsg("cannot create or modify database because there was a " + "parallel operation on a distributed table in the " + "transaction"), + errdetail("When creating or altering a database, Citus needs to " + "perform all operations over a single connection per " + "node to ensure consistency."), + errhint("Try re-running the transaction with " + "\"SET LOCAL citus.multi_shard_modify_mode TO " + "\'sequential\';\""))); + } + + ereport(DEBUG1, (errmsg("switching to sequential query execution mode"), + errdetail("Database is created or altered. To make sure subsequent " + "commands see the type correctly we need to make sure to " + "use only one connection for all future commands"))); + SetLocalMultiShardModifyModeToSequential(); +} diff --git a/src/backend/distributed/commands/dependencies.c b/src/backend/distributed/commands/dependencies.c index c885d4429..60cb56492 100644 --- a/src/backend/distributed/commands/dependencies.c +++ b/src/backend/distributed/commands/dependencies.c @@ -14,6 +14,7 @@ #include "catalog/objectaddress.h" #include "commands/extension.h" #include "distributed/commands.h" +#include "distributed/commands/utility_hook.h" #include "distributed/connection_management.h" #include "distributed/listutils.h" #include "distributed/metadata/dependency.h" @@ -191,6 +192,20 @@ GetDependencyCreateDDLCommands(const ObjectAddress *dependency) return CreateCollationDDLsIdempotent(dependency->objectId); } + case OCLASS_DATABASE: + { + List *databaseDDLCommands = NIL; + + /* only propagate the ownership of the database when the feature is on */ + if (EnableAlterDatabaseOwner) + { + List *ownerDDLCommands = DatabaseOwnerDDLCommands(dependency); + databaseDDLCommands = list_concat(databaseDDLCommands, ownerDDLCommands); + } + + return databaseDDLCommands; + } + case OCLASS_PROC: { return CreateFunctionDDLCommandsIdempotent(dependency); diff --git a/src/backend/distributed/commands/distribute_object_ops.c b/src/backend/distributed/commands/distribute_object_ops.c index 5f4b94961..e5b8d7e98 100644 --- a/src/backend/distributed/commands/distribute_object_ops.c +++ b/src/backend/distributed/commands/distribute_object_ops.c @@ -240,6 +240,13 @@ static DistributeObjectOps Collation_Rename = { .postprocess = NULL, .address = RenameCollationStmtObjectAddress, }; +static DistributeObjectOps Database_AlterOwner = { + .deparse = DeparseAlterDatabaseOwnerStmt, + .qualify = NULL, + .preprocess = PreprocessAlterDatabaseOwnerStmt, + .postprocess = PostprocessAlterDatabaseOwnerStmt, + .address = AlterDatabaseOwnerObjectAddress, +}; static DistributeObjectOps Extension_AlterObjectSchema = { .deparse = DeparseAlterExtensionSchemaStmt, .qualify = NULL, @@ -359,6 +366,34 @@ static DistributeObjectOps Routine_AlterObjectDepends = { .postprocess = NULL, .address = AlterFunctionDependsStmtObjectAddress, }; +static DistributeObjectOps Sequence_Alter = { + .deparse = NULL, + .qualify = NULL, + .preprocess = PreprocessAlterSequenceStmt, + .postprocess = NULL, + .address = AlterSequenceObjectAddress, +}; +static DistributeObjectOps Sequence_AlterObjectSchema = { + .deparse = NULL, + .qualify = NULL, + .preprocess = PreprocessAlterSequenceSchemaStmt, + .postprocess = NULL, + .address = AlterSequenceSchemaStmtObjectAddress, +}; +static DistributeObjectOps Sequence_Drop = { + .deparse = DeparseDropSequenceStmt, + .qualify = NULL, + .preprocess = PreprocessDropSequenceStmt, + .postprocess = NULL, + .address = NULL, +}; +static DistributeObjectOps Sequence_Rename = { + .deparse = DeparseRenameSequenceStmt, + .qualify = QualifyRenameSequenceStmt, + .preprocess = PreprocessRenameSequenceStmt, + .postprocess = NULL, + .address = RenameSequenceStmtObjectAddress, +}; static DistributeObjectOps Trigger_AlterObjectDepends = { .deparse = NULL, .qualify = NULL, @@ -453,7 +488,7 @@ static DistributeObjectOps Statistics_Rename = { .address = NULL, }; static DistributeObjectOps Table_AlterTable = { - .deparse = NULL, + .deparse = DeparseAlterTableStmt, .qualify = NULL, .preprocess = PreprocessAlterTableStmt, .postprocess = NULL, @@ -621,6 +656,11 @@ GetDistributeObjectOps(Node *node) return &Routine_AlterObjectSchema; } + case OBJECT_SEQUENCE: + { + return &Sequence_AlterObjectSchema; + } + case OBJECT_STATISTIC_EXT: { return &Statistics_AlterObjectSchema; @@ -658,6 +698,11 @@ GetDistributeObjectOps(Node *node) return &Collation_AlterOwner; } + case OBJECT_DATABASE: + { + return &Database_AlterOwner; + } + case OBJECT_FUNCTION: { return &Function_AlterOwner; @@ -705,6 +750,11 @@ GetDistributeObjectOps(Node *node) return &Any_AlterRoleSet; } + case T_AlterSeqStmt: + { + return &Sequence_Alter; + } + #if PG_VERSION_NUM >= PG_VERSION_13 case T_AlterStatsStmt: { @@ -861,6 +911,11 @@ GetDistributeObjectOps(Node *node) return &Schema_Drop; } + case OBJECT_SEQUENCE: + { + return &Sequence_Drop; + } + case OBJECT_STATISTIC_EXT: { return &Statistics_Drop; @@ -955,6 +1010,11 @@ GetDistributeObjectOps(Node *node) return &Schema_Rename; } + case OBJECT_SEQUENCE: + { + return &Sequence_Rename; + } + case OBJECT_STATISTIC_EXT: { return &Statistics_Rename; diff --git a/src/backend/distributed/commands/drop_distributed_table.c b/src/backend/distributed/commands/drop_distributed_table.c index 536e27206..79adf02a9 100644 --- a/src/backend/distributed/commands/drop_distributed_table.c +++ b/src/backend/distributed/commands/drop_distributed_table.c @@ -59,6 +59,8 @@ master_drop_distributed_table_metadata(PG_FUNCTION_ARGS) Datum master_remove_partition_metadata(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); text *schemaNameText = PG_GETARG_TEXT_P(1); text *tableNameText = PG_GETARG_TEXT_P(2); @@ -66,8 +68,6 @@ master_remove_partition_metadata(PG_FUNCTION_ARGS) char *schemaName = text_to_cstring(schemaNameText); char *tableName = text_to_cstring(tableNameText); - CheckCitusVersion(ERROR); - /* * The SQL_DROP trigger calls this function even for tables that are * not distributed. In that case, silently ignore. This is not very @@ -97,6 +97,8 @@ master_remove_partition_metadata(PG_FUNCTION_ARGS) Datum master_remove_distributed_table_metadata_from_workers(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); text *schemaNameText = PG_GETARG_TEXT_P(1); text *tableNameText = PG_GETARG_TEXT_P(2); @@ -104,8 +106,6 @@ master_remove_distributed_table_metadata_from_workers(PG_FUNCTION_ARGS) char *schemaName = text_to_cstring(schemaNameText); char *tableName = text_to_cstring(tableNameText); - CheckCitusVersion(ERROR); - CheckTableSchemaNameForDrop(relationId, &schemaName, &tableName); MasterRemoveDistributedTableMetadataFromWorkers(relationId, schemaName, tableName); diff --git a/src/backend/distributed/commands/foreign_constraint.c b/src/backend/distributed/commands/foreign_constraint.c index 8fdc932ee..fe146c268 100644 --- a/src/backend/distributed/commands/foreign_constraint.c +++ b/src/backend/distributed/commands/foreign_constraint.c @@ -734,8 +734,8 @@ HasForeignKeyWithLocalTable(Oid relationId) /* - * GetForeignKeysWithLocalTables returns a list foreign keys for foreign key - * relationaships that relation has with local tables. + * GetForeignKeysWithLocalTables returns a list of foreign keys for foreign key + * relationships that relation has with local tables. */ static List * GetForeignKeysWithLocalTables(Oid relationId) @@ -753,6 +753,21 @@ GetForeignKeysWithLocalTables(Oid relationId) } +/* + * GetForeignKeysFromLocalTables returns a list of foreign keys where the referencing + * relation is a local table. + */ +List * +GetForeignKeysFromLocalTables(Oid relationId) +{ + int referencedFKeysFlag = INCLUDE_REFERENCED_CONSTRAINTS | + INCLUDE_LOCAL_TABLES; + List *referencingFKeyList = GetForeignKeyOids(relationId, referencedFKeysFlag); + + return referencingFKeyList; +} + + /* * HasForeignKeyToCitusLocalTable returns true if any of the foreign key constraints * on the relation with relationId references to a citus local table. @@ -1102,6 +1117,30 @@ GetReferencedTableId(Oid foreignKeyId) } +/* + * GetReferencingTableId returns OID of the referencing relation for the foreign + * key with foreignKeyId. If there is no such foreign key, then this function + * returns InvalidOid. + */ +Oid +GetReferencingTableId(Oid foreignKeyId) +{ + HeapTuple heapTuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(foreignKeyId)); + if (!HeapTupleIsValid(heapTuple)) + { + /* no such foreign key */ + return InvalidOid; + } + + Form_pg_constraint constraintForm = (Form_pg_constraint) GETSTRUCT(heapTuple); + Oid referencingTableId = constraintForm->conrelid; + + ReleaseSysCache(heapTuple); + + return referencingTableId; +} + + /* * IsTableTypeIncluded returns true if type of the table with relationId (distributed, * reference, Citus local or Postgres local) is included in the flags, false if not diff --git a/src/backend/distributed/commands/function.c b/src/backend/distributed/commands/function.c index 028882bcd..54f87f9a9 100644 --- a/src/backend/distributed/commands/function.c +++ b/src/backend/distributed/commands/function.c @@ -461,15 +461,6 @@ GetFunctionColocationId(Oid functionOid, char *colocateWithTableName, EnsureFunctionCanBeColocatedWithTable(functionOid, distributionArgumentOid, colocatedTableId); } - else if (ReplicationModel == REPLICATION_MODEL_COORDINATOR) - { - /* streaming replication model is required for metadata syncing */ - ereport(ERROR, (errmsg("cannot create a function with a distribution " - "argument when citus.replication_model is " - "'statement'"), - errhint("Set citus.replication_model to 'streaming' " - "before creating distributed tables"))); - } } else { @@ -537,7 +528,7 @@ EnsureFunctionCanBeColocatedWithTable(Oid functionOid, Oid distributionColumnTyp "with distributed tables that are created using " "streaming replication model."), errhint("When distributing tables make sure that " - "citus.replication_model = 'streaming'"))); + "citus.shard_replication_factor = 1"))); } /* @@ -1438,7 +1429,7 @@ PreprocessAlterFunctionSchemaStmt(Node *node, const char *queryString, /* - * PreprocessAlterTypeOwnerStmt is called for change of owner ship of functions before the owner + * PreprocessAlterFunctionOwnerStmt is called for change of owner ship of functions before the owner * ship is changed on the local instance. * * If the function for which the owner is changed is distributed we execute the change on diff --git a/src/backend/distributed/commands/index.c b/src/backend/distributed/commands/index.c index ee553de1e..54fa057c5 100644 --- a/src/backend/distributed/commands/index.c +++ b/src/backend/distributed/commands/index.c @@ -302,7 +302,8 @@ CreateIndexStmtGetSchemaId(IndexStmt *createIndexStatement) * It returns a list that is filled by the pgIndexProcessor. */ List * -ExecuteFunctionOnEachTableIndex(Oid relationId, PGIndexProcessor pgIndexProcessor) +ExecuteFunctionOnEachTableIndex(Oid relationId, PGIndexProcessor pgIndexProcessor, + int indexFlags) { List *result = NIL; ScanKeyData scanKey[1]; @@ -324,7 +325,7 @@ ExecuteFunctionOnEachTableIndex(Oid relationId, PGIndexProcessor pgIndexProcesso while (HeapTupleIsValid(heapTuple)) { Form_pg_index indexForm = (Form_pg_index) GETSTRUCT(heapTuple); - pgIndexProcessor(indexForm, &result); + pgIndexProcessor(indexForm, &result, indexFlags); heapTuple = systable_getnext(scanDescriptor); } diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c index 4141bf5fd..7d5370389 100644 --- a/src/backend/distributed/commands/multi_copy.c +++ b/src/backend/distributed/commands/multi_copy.c @@ -2228,7 +2228,7 @@ CitusCopyDestReceiverStartup(DestReceiver *dest, int operation, if (cacheEntry->replicationModel == REPLICATION_MODEL_2PC || MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC) { - CoordinatedTransactionShouldUse2PC(); + Use2PCForCoordinatedTransaction(); } /* define how tuples will be serialised */ diff --git a/src/backend/distributed/commands/sequence.c b/src/backend/distributed/commands/sequence.c index 7a509aa19..d42148b54 100644 --- a/src/backend/distributed/commands/sequence.c +++ b/src/backend/distributed/commands/sequence.c @@ -14,14 +14,20 @@ #include "catalog/dependency.h" #include "catalog/namespace.h" #include "commands/defrem.h" +#include "commands/extension.h" #include "distributed/commands.h" #include "distributed/commands/sequence.h" +#include "distributed/commands/utility_hook.h" +#include "distributed/deparser.h" #include "distributed/listutils.h" +#include "distributed/metadata/distobject.h" #include "distributed/metadata_cache.h" +#include "distributed/metadata_sync.h" #include "nodes/parsenodes.h" /* Local functions forward declarations for helper functions */ static bool OptionsSpecifyOwnedBy(List *optionList, Oid *ownedByTableId); +static bool ShouldPropagateAlterSequence(const ObjectAddress *address); /* @@ -92,15 +98,6 @@ ErrorIfDistributedAlterSeqOwnedBy(AlterSeqStmt *alterSeqStmt) errmsg("cannot alter OWNED BY option of a sequence " "already owned by a distributed table"))); } - else if (!hasDistributedOwner && IsCitusTable(newOwnedByTableId)) - { - /* and don't let local sequences get a distributed OWNED BY */ - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot associate an existing sequence with a " - "distributed table"), - errhint("Use a sequence in a distributed table by specifying " - "a serial column type before creating any shards."))); - } } } @@ -209,3 +206,305 @@ ExtractDefaultColumnsAndOwnedSequences(Oid relationId, List **columnNameList, relation_close(relation, NoLock); } + + +/* + * PreprocessDropSequenceStmt gets called during the planning phase of a DROP SEQUENCE statement + * and returns a list of DDLJob's that will drop any distributed sequences from the + * workers. + * + * The DropStmt could have multiple objects to drop, the list of objects will be filtered + * to only keep the distributed sequences for deletion on the workers. Non-distributed + * sequences will still be dropped locally but not on the workers. + */ +List * +PreprocessDropSequenceStmt(Node *node, const char *queryString, + ProcessUtilityContext processUtilityContext) +{ + DropStmt *stmt = castNode(DropStmt, node); + List *deletingSequencesList = stmt->objects; + List *distributedSequencesList = NIL; + List *distributedSequenceAddresses = NIL; + + Assert(stmt->removeType == OBJECT_SEQUENCE); + + if (creating_extension) + { + /* + * extensions should be created separately on the workers, sequences cascading + * from an extension should therefor not be propagated here. + */ + return NIL; + } + + if (!EnableDependencyCreation) + { + /* + * we are configured to disable object propagation, should not propagate anything + */ + return NIL; + } + + /* + * Our statements need to be fully qualified so we can drop them from the right schema + * on the workers + */ + QualifyTreeNode((Node *) stmt); + + /* + * iterate over all sequences to be dropped and filter to keep only distributed + * sequences. + */ + List *objectNameList = NULL; + foreach_ptr(objectNameList, deletingSequencesList) + { + RangeVar *seq = makeRangeVarFromNameList(objectNameList); + + Oid seqOid = RangeVarGetRelid(seq, NoLock, stmt->missing_ok); + + ObjectAddress sequenceAddress = { 0 }; + ObjectAddressSet(sequenceAddress, RelationRelationId, seqOid); + + if (!IsObjectDistributed(&sequenceAddress)) + { + continue; + } + + /* collect information for all distributed sequences */ + ObjectAddress *addressp = palloc(sizeof(ObjectAddress)); + *addressp = sequenceAddress; + distributedSequenceAddresses = lappend(distributedSequenceAddresses, addressp); + distributedSequencesList = lappend(distributedSequencesList, objectNameList); + } + + if (list_length(distributedSequencesList) <= 0) + { + /* no distributed functions to drop */ + return NIL; + } + + /* + * managing types can only be done on the coordinator if ddl propagation is on. when + * it is off we will never get here. MX workers don't have a notion of distributed + * types, so we block the call. + */ + EnsureCoordinator(); + + /* remove the entries for the distributed objects on dropping */ + ObjectAddress *address = NULL; + foreach_ptr(address, distributedSequenceAddresses) + { + UnmarkObjectDistributed(address); + } + + /* + * Swap the list of objects before deparsing and restore the old list after. This + * ensures we only have distributed sequences in the deparsed drop statement. + */ + DropStmt *stmtCopy = copyObject(stmt); + stmtCopy->objects = distributedSequencesList; + stmtCopy->missing_ok = true; + const char *dropStmtSql = DeparseTreeNode((Node *) stmtCopy); + + List *commands = list_make3(DISABLE_DDL_PROPAGATION, + (void *) dropStmtSql, + ENABLE_DDL_PROPAGATION); + + return NodeDDLTaskList(NON_COORDINATOR_NODES, commands); +} + + +/* + * PreprocessRenameSequenceStmt is called when the user is renaming a sequence. The invocation + * happens before the statement is applied locally. + * + * As the sequence already exists we have access to the ObjectAddress, this is used to + * check if it is distributed. If so the rename is executed on all the workers to keep the + * types in sync across the cluster. + */ +List * +PreprocessRenameSequenceStmt(Node *node, const char *queryString, ProcessUtilityContext + processUtilityContext) +{ + RenameStmt *stmt = castNode(RenameStmt, node); + Assert(stmt->renameType == OBJECT_SEQUENCE); + + ObjectAddress address = GetObjectAddressFromParseTree((Node *) stmt, + stmt->missing_ok); + + if (!ShouldPropagateAlterSequence(&address)) + { + return NIL; + } + + EnsureCoordinator(); + QualifyTreeNode((Node *) stmt); + + /* this takes care of cases where not all workers have synced metadata */ + RenameStmt *stmtCopy = copyObject(stmt); + stmtCopy->missing_ok = true; + + const char *sql = DeparseTreeNode((Node *) stmtCopy); + + List *commands = list_make3(DISABLE_DDL_PROPAGATION, (void *) sql, + ENABLE_DDL_PROPAGATION); + + return NodeDDLTaskList(NON_COORDINATOR_NODES, commands); +} + + +/* + * RenameSequenceStmtObjectAddress returns the ObjectAddress of the sequence that is the + * subject of the RenameStmt. + */ +ObjectAddress +RenameSequenceStmtObjectAddress(Node *node, bool missing_ok) +{ + RenameStmt *stmt = castNode(RenameStmt, node); + Assert(stmt->renameType == OBJECT_SEQUENCE); + + RangeVar *sequence = stmt->relation; + Oid seqOid = RangeVarGetRelid(sequence, NoLock, missing_ok); + ObjectAddress sequenceAddress = { 0 }; + ObjectAddressSet(sequenceAddress, RelationRelationId, seqOid); + + return sequenceAddress; +} + + +/* + * ShouldPropagateAlterSequence returns, based on the address of a sequence, if alter + * statements targeting the function should be propagated. + */ +static bool +ShouldPropagateAlterSequence(const ObjectAddress *address) +{ + if (creating_extension) + { + /* + * extensions should be created separately on the workers, sequences cascading + * from an extension should therefore not be propagated. + */ + return false; + } + + if (!EnableDependencyCreation) + { + /* + * we are configured to disable object propagation, should not propagate anything + */ + return false; + } + + if (!IsObjectDistributed(address)) + { + /* do not propagate alter sequence for non-distributed sequences */ + return false; + } + + return true; +} + + +/* + * PreprocessAlterSequenceStmt gets called during the planning phase of an ALTER SEQUENCE statement + * of one of the following forms: + * ALTER SEQUENCE [ IF EXISTS ] name + * [ AS data_type ] + * [ INCREMENT [ BY ] increment ] + * [ MINVALUE minvalue | NO MINVALUE ] [ MAXVALUE maxvalue | NO MAXVALUE ] + * [ START [ WITH ] start ] + * [ RESTART [ [ WITH ] restart ] ] + * [ CACHE cache ] [ [ NO ] CYCLE ] + * [ OWNED BY { table_name.column_name | NONE } ] + * + * For distributed sequences, this operation will not be allowed for now. + * The reason is that we change sequence parameters when distributing it, so we don't want to + * touch those parameters for now. + */ +List * +PreprocessAlterSequenceStmt(Node *node, const char *queryString, + ProcessUtilityContext processUtilityContext) +{ + AlterSeqStmt *stmt = castNode(AlterSeqStmt, node); + + ObjectAddress address = GetObjectAddressFromParseTree((Node *) stmt, + stmt->missing_ok); + + /* error out if the sequence is distributed */ + if (IsObjectDistributed(&address)) + { + ereport(ERROR, (errmsg( + "This operation is currently not allowed for a distributed sequence."))); + } + else + { + return NIL; + } +} + + +/* + * AlterSequenceOwnerObjectAddress returns the ObjectAddress of the sequence that is the + * subject of the AlterOwnerStmt. + */ +ObjectAddress +AlterSequenceObjectAddress(Node *node, bool missing_ok) +{ + AlterSeqStmt *stmt = castNode(AlterSeqStmt, node); + + RangeVar *sequence = stmt->sequence; + Oid seqOid = RangeVarGetRelid(sequence, NoLock, stmt->missing_ok); + ObjectAddress sequenceAddress = { 0 }; + ObjectAddressSet(sequenceAddress, RelationRelationId, seqOid); + + return sequenceAddress; +} + + +/* + * PreprocessAlterSequenceSchemaStmt is executed before the statement is applied to the local + * postgres instance. + * + * For distributed sequences, this operation will not be allowed for now. + */ +List * +PreprocessAlterSequenceSchemaStmt(Node *node, const char *queryString, + ProcessUtilityContext processUtilityContext) +{ + AlterObjectSchemaStmt *stmt = castNode(AlterObjectSchemaStmt, node); + Assert(stmt->objectType == OBJECT_SEQUENCE); + + ObjectAddress address = GetObjectAddressFromParseTree((Node *) stmt, + stmt->missing_ok); + + /* error out if the sequence is distributed */ + if (IsObjectDistributed(&address)) + { + ereport(ERROR, (errmsg( + "This operation is currently not allowed for a distributed sequence."))); + } + else + { + return NIL; + } +} + + +/* + * AlterSequenceSchemaStmtObjectAddress returns the ObjectAddress of the sequence that is + * the subject of the AlterObjectSchemaStmt. + */ +ObjectAddress +AlterSequenceSchemaStmtObjectAddress(Node *node, bool missing_ok) +{ + AlterObjectSchemaStmt *stmt = castNode(AlterObjectSchemaStmt, node); + Assert(stmt->objectType == OBJECT_SEQUENCE); + + RangeVar *sequence = stmt->relation; + Oid seqOid = RangeVarGetRelid(sequence, NoLock, missing_ok); + ObjectAddress sequenceAddress = { 0 }; + ObjectAddressSet(sequenceAddress, RelationRelationId, seqOid); + + return sequenceAddress; +} diff --git a/src/backend/distributed/commands/table.c b/src/backend/distributed/commands/table.c index e7e186deb..f4958b4bc 100644 --- a/src/backend/distributed/commands/table.c +++ b/src/backend/distributed/commands/table.c @@ -17,6 +17,7 @@ #include "catalog/pg_class.h" #include "catalog/pg_constraint.h" #include "catalog/pg_depend.h" +#include "catalog/pg_type.h" #include "commands/tablecmds.h" #include "distributed/citus_ruleutils.h" #include "distributed/colocation_utils.h" @@ -28,14 +29,17 @@ #include "distributed/coordinator_protocol.h" #include "distributed/metadata_sync.h" #include "distributed/metadata/dependency.h" +#include "distributed/metadata/distobject.h" #include "distributed/multi_executor.h" #include "distributed/multi_partitioning_utils.h" #include "distributed/reference_table_utils.h" #include "distributed/relation_access_tracking.h" #include "distributed/resource_lock.h" #include "distributed/version_compat.h" +#include "distributed/worker_shard_visibility.h" #include "lib/stringinfo.h" #include "nodes/parsenodes.h" +#include "parser/parse_expr.h" #include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -122,6 +126,8 @@ PreprocessDropTableStmt(Node *node, const char *queryString, Oid relationId = RangeVarGetRelid(tableRangeVar, AccessShareLock, missingOK); + ErrorIfIllegallyChangingKnownShard(relationId); + /* we're not interested in non-valid, non-distributed relations */ if (relationId == InvalidOid || !IsCitusTable(relationId)) { @@ -165,6 +171,8 @@ PreprocessDropTableStmt(Node *node, const char *queryString, SendCommandToWorkersWithMetadata(detachPartitionCommand); } + + SendCommandToWorkersWithMetadata(ENABLE_DDL_PROPAGATION); } return NIL; @@ -574,24 +582,50 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand, ErrorIfUnsupportedAlterTableStmt(alterTableStatement); } + EnsureCoordinator(); + /* these will be set in below loop according to subcommands */ Oid rightRelationId = InvalidOid; bool executeSequentially = false; /* - * We check if there is a ADD/DROP FOREIGN CONSTRAINT command in sub commands - * list. If there is we assign referenced relation id to rightRelationId and - * we also set skip_validation to true to prevent PostgreSQL to verify validity - * of the foreign constraint in master. Validity will be checked in workers - * anyway. + * We check if there is: + * - an ADD/DROP FOREIGN CONSTRAINT command in sub commands + * list. If there is we assign referenced relation id to rightRelationId and + * we also set skip_validation to true to prevent PostgreSQL to verify validity + * of the foreign constraint in master. Validity will be checked in workers + * anyway. + * - an ADD COLUMN .. DEFAULT nextval('..') OR + * an ADD COLUMN .. SERIAL pseudo-type OR + * an ALTER COLUMN .. SET DEFAULT nextval('..'). If there is we set + * deparseAT variable to true which means we will deparse the statement + * before we propagate the command to shards. For shards, all the defaults + * coming from a user-defined sequence will be replaced by + * NOT NULL constraint. */ List *commandList = alterTableStatement->cmds; + /* + * if deparsing is needed, we will use a different version of the original + * alterTableStmt + */ + bool deparseAT = false; + bool propagateCommandToWorkers = true; + AlterTableStmt *newStmt = copyObject(alterTableStatement); + + AlterTableCmd *newCmd = makeNode(AlterTableCmd); + AlterTableCmd *command = NULL; foreach_ptr(command, commandList) { AlterTableType alterTableType = command->subtype; + /* + * if deparsing is needed, we will use a different version of the original + * AlterTableCmd + */ + newCmd = copyObject(command); + if (alterTableType == AT_AddConstraint) { Constraint *constraint = (Constraint *) command->def; @@ -666,6 +700,96 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand, break; } } + + /* + * We check for ADD COLUMN .. DEFAULT expr + * if expr contains nextval('user_defined_seq') + * we should deparse the statement + */ + constraint = NULL; + foreach_ptr(constraint, columnConstraints) + { + if (constraint->contype == CONSTR_DEFAULT) + { + if (constraint->raw_expr != NULL) + { + ParseState *pstate = make_parsestate(NULL); + Node *expr = transformExpr(pstate, constraint->raw_expr, + EXPR_KIND_COLUMN_DEFAULT); + + if (contain_nextval_expression_walker(expr, NULL)) + { + deparseAT = true; + + /* the new column definition will have no constraint */ + ColumnDef *newColDef = copyObject(columnDefinition); + newColDef->constraints = NULL; + + newCmd->def = (Node *) newColDef; + } + } + } + } + + /* + * We check for ADD COLUMN .. SERIAL pseudo-type + * if that's the case, we should deparse the statement + * The structure of this check is copied from transformColumnDefinition. + */ + if (columnDefinition->typeName && list_length( + columnDefinition->typeName->names) == 1 && + !columnDefinition->typeName->pct_type) + { + char *typeName = strVal(linitial(columnDefinition->typeName->names)); + + if (strcmp(typeName, "smallserial") == 0 || + strcmp(typeName, "serial2") == 0 || + strcmp(typeName, "serial") == 0 || + strcmp(typeName, "serial4") == 0 || + strcmp(typeName, "bigserial") == 0 || + strcmp(typeName, "serial8") == 0) + { + deparseAT = true; + + ColumnDef *newColDef = copyObject(columnDefinition); + newColDef->is_not_null = false; + + if (strcmp(typeName, "smallserial") == 0 || + strcmp(typeName, "serial2") == 0) + { + newColDef->typeName->names = NIL; + newColDef->typeName->typeOid = INT2OID; + } + else if (strcmp(typeName, "serial") == 0 || + strcmp(typeName, "serial4") == 0) + { + newColDef->typeName->names = NIL; + newColDef->typeName->typeOid = INT4OID; + } + else if (strcmp(typeName, "bigserial") == 0 || + strcmp(typeName, "serial8") == 0) + { + newColDef->typeName->names = NIL; + newColDef->typeName->typeOid = INT8OID; + } + newCmd->def = (Node *) newColDef; + } + } + } + /* + * We check for ALTER COLUMN .. SET/DROP DEFAULT + * we should not propagate anything to shards + */ + else if (alterTableType == AT_ColumnDefault) + { + ParseState *pstate = make_parsestate(NULL); + Node *expr = transformExpr(pstate, command->def, + EXPR_KIND_COLUMN_DEFAULT); + + if (contain_nextval_expression_walker(expr, NULL)) + { + propagateCommandToWorkers = false; + } } else if (alterTableType == AT_AttachPartition) { @@ -731,12 +855,20 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand, DDLJob *ddlJob = palloc0(sizeof(DDLJob)); ddlJob->targetRelationId = leftRelationId; ddlJob->concurrentIndexCmd = false; + + const char *sqlForTaskList = alterTableCommand; + if (deparseAT) + { + newStmt->cmds = list_make1(newCmd); + sqlForTaskList = DeparseTreeNode((Node *) newStmt); + } + ddlJob->commandString = alterTableCommand; if (OidIsValid(rightRelationId)) { bool referencedIsLocalTable = !IsCitusTable(rightRelationId); - if (referencedIsLocalTable) + if (referencedIsLocalTable || !propagateCommandToWorkers) { ddlJob->taskList = NIL; } @@ -744,13 +876,17 @@ PreprocessAlterTableStmt(Node *node, const char *alterTableCommand, { /* if foreign key related, use specialized task list function ... */ ddlJob->taskList = InterShardDDLTaskList(leftRelationId, rightRelationId, - alterTableCommand); + sqlForTaskList); } } else { /* ... otherwise use standard DDL task list function */ - ddlJob->taskList = DDLTaskList(leftRelationId, alterTableCommand); + ddlJob->taskList = DDLTaskList(leftRelationId, sqlForTaskList); + if (!propagateCommandToWorkers) + { + ddlJob->taskList = NIL; + } } List *ddlJobs = list_make1(ddlJob); @@ -1467,7 +1603,79 @@ PostprocessAlterTableStmt(AlterTableStmt *alterTableStatement) constraint); } } + + /* + * We check for ADD COLUMN .. DEFAULT expr + * if expr contains nextval('user_defined_seq') + * we should make sure that the type of the column that uses + * that sequence is supported + */ + constraint = NULL; + foreach_ptr(constraint, columnConstraints) + { + if (constraint->contype == CONSTR_DEFAULT) + { + if (constraint->raw_expr != NULL) + { + ParseState *pstate = make_parsestate(NULL); + Node *expr = transformExpr(pstate, constraint->raw_expr, + EXPR_KIND_COLUMN_DEFAULT); + + /* + * We should make sure that the type of the column that uses + * that sequence is supported + */ + if (contain_nextval_expression_walker(expr, NULL)) + { + AttrNumber attnum = get_attnum(relationId, + columnDefinition->colname); + Oid seqTypId = GetAttributeTypeOid(relationId, attnum); + EnsureSequenceTypeSupported(relationId, attnum, seqTypId); + } + } + } + } } + /* + * We check for ALTER COLUMN .. SET DEFAULT nextval('user_defined_seq') + * we should make sure that the type of the column that uses + * that sequence is supported + */ + else if (alterTableType == AT_ColumnDefault) + { + ParseState *pstate = make_parsestate(NULL); + Node *expr = transformExpr(pstate, command->def, + EXPR_KIND_COLUMN_DEFAULT); + + if (contain_nextval_expression_walker(expr, NULL)) + { + AttrNumber attnum = get_attnum(relationId, command->name); + Oid seqTypId = GetAttributeTypeOid(relationId, attnum); + EnsureSequenceTypeSupported(relationId, attnum, seqTypId); + } + } + } + + /* for the new sequences coming with this ALTER TABLE statement */ + if (ShouldSyncTableMetadata(relationId) && ClusterHasKnownMetadataWorkers()) + { + List *sequenceCommandList = NIL; + + /* commands to create sequences */ + List *sequenceDDLCommands = SequenceDDLCommandsForTable(relationId); + sequenceCommandList = list_concat(sequenceCommandList, sequenceDDLCommands); + + /* prevent recursive propagation */ + SendCommandToWorkersWithMetadata(DISABLE_DDL_PROPAGATION); + + /* send the commands one by one */ + const char *sequenceCommand = NULL; + foreach_ptr(sequenceCommand, sequenceCommandList) + { + SendCommandToWorkersWithMetadata(sequenceCommand); + } + + SendCommandToWorkersWithMetadata(ENABLE_DDL_PROPAGATION); } } @@ -1736,9 +1944,100 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement) strcmp(typeName, "bigserial") == 0 || strcmp(typeName, "serial8") == 0) { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot execute ADD COLUMN commands " - "involving serial pseudotypes"))); + /* + * We currently don't support adding a serial column for an MX table + * TODO: record the dependency in the workers + */ + if (ShouldSyncTableMetadata(relationId) && + ClusterHasKnownMetadataWorkers()) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "cannot execute ADD COLUMN commands involving serial" + " pseudotypes when metadata is synchronized to workers"))); + } + + /* + * we only allow adding a serial column if it is the only subcommand + * and it has no constraints + */ + if (commandList->length > 1 || column->constraints) + { + ereport(ERROR, (errcode( + ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "cannot execute ADD COLUMN commands involving " + "serial pseudotypes with other subcommands/constraints"), + errhint( + "You can issue each subcommand separately"))); + } + + /* + * Currently we don't support backfilling the new column with default values + * if the table is not empty + */ + if (!TableEmpty(relationId)) + { + ereport(ERROR, (errcode( + ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "Cannot add a column involving serial pseudotypes " + "because the table is not empty"), + errhint( + "You can first call ALTER TABLE .. ADD COLUMN .. smallint/int/bigint\n" + "Then set the default by ALTER TABLE .. ALTER COLUMN .. SET DEFAULT nextval('..')"))); + } + } + } + + List *columnConstraints = column->constraints; + + Constraint *constraint = NULL; + foreach_ptr(constraint, columnConstraints) + { + if (constraint->contype == CONSTR_DEFAULT) + { + if (constraint->raw_expr != NULL) + { + ParseState *pstate = make_parsestate(NULL); + Node *expr = transformExpr(pstate, constraint->raw_expr, + EXPR_KIND_COLUMN_DEFAULT); + + if (contain_nextval_expression_walker(expr, NULL)) + { + /* + * we only allow adding a column with non_const default + * if its the only subcommand and has no other constraints + */ + if (commandList->length > 1 || + columnConstraints->length > 1) + { + ereport(ERROR, (errcode( + ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "cannot execute ADD COLUMN .. DEFAULT nextval('..')" + " command with other subcommands/constraints"), + errhint( + "You can issue each subcommand separately"))); + } + + /* + * Currently we don't support backfilling the new column with default values + * if the table is not empty + */ + if (!TableEmpty(relationId)) + { + ereport(ERROR, (errcode( + ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "cannot add a column involving DEFAULT nextval('..') " + "because the table is not empty"), + errhint( + "You can first call ALTER TABLE .. ADD COLUMN .. smallint/int/bigint\n" + "Then set the default by ALTER TABLE .. ALTER COLUMN .. SET DEFAULT nextval('..')"))); + } + } + } } } } @@ -1746,9 +2045,67 @@ ErrorIfUnsupportedAlterTableStmt(AlterTableStmt *alterTableStatement) break; } - case AT_DropColumn: case AT_ColumnDefault: + { + if (AlterInvolvesPartitionColumn(alterTableStatement, command)) + { + ereport(ERROR, (errmsg("cannot execute ALTER TABLE command " + "involving partition column"))); + } + + ParseState *pstate = make_parsestate(NULL); + Node *expr = transformExpr(pstate, command->def, + EXPR_KIND_COLUMN_DEFAULT); + + if (contain_nextval_expression_walker(expr, NULL)) + { + /* + * we only allow altering a column's default to non_const expr + * if its the only subcommand + */ + if (commandList->length > 1) + { + ereport(ERROR, (errcode( + ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "cannot execute ALTER COLUMN COLUMN .. SET DEFAULT " + "nextval('..') command with other subcommands"), + errhint( + "You can issue each subcommand separately"))); + } + } + + break; + } + case AT_AlterColumnType: + { + if (AlterInvolvesPartitionColumn(alterTableStatement, command)) + { + ereport(ERROR, (errmsg("cannot execute ALTER TABLE command " + "involving partition column"))); + } + + /* + * We check for ALTER COLUMN TYPE ... + * if the column has default coming from a user-defined sequence + * changing the type of the column should not be allowed for now + */ + AttrNumber attnum = get_attnum(relationId, command->name); + List *attnumList = NIL; + List *dependentSequenceList = NIL; + GetDependentSequencesWithRelation(relationId, &attnumList, + &dependentSequenceList, attnum); + if (dependentSequenceList != NIL) + { + ereport(ERROR, (errmsg("cannot execute ALTER COLUMN TYPE .. command " + "because the column involves a default coming " + "from a sequence"))); + } + break; + } + + case AT_DropColumn: case AT_DropNotNull: { if (AlterInvolvesPartitionColumn(alterTableStatement, command)) diff --git a/src/backend/distributed/commands/truncate.c b/src/backend/distributed/commands/truncate.c index 03a2c98f3..30b9f3765 100644 --- a/src/backend/distributed/commands/truncate.c +++ b/src/backend/distributed/commands/truncate.c @@ -32,6 +32,7 @@ #include "distributed/resource_lock.h" #include "distributed/transaction_management.h" #include "distributed/worker_transaction.h" +#include "distributed/worker_shard_visibility.h" #include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -174,10 +175,11 @@ TruncateTaskList(Oid relationId) Datum truncate_local_data_after_distributing_table(PG_FUNCTION_ARGS) { - Oid relationId = PG_GETARG_OID(0); - CheckCitusVersion(ERROR); EnsureCoordinator(); + + Oid relationId = PG_GETARG_OID(0); + EnsureLocalTableCanBeTruncated(relationId); TruncateStmt *truncateStmt = makeNode(TruncateStmt); @@ -215,25 +217,20 @@ EnsureLocalTableCanBeTruncated(Oid relationId) "tables."))); } - /* make sure there are no foreign key references from a local table */ - SetForeignConstraintRelationshipGraphInvalid(); - List *referencingRelationList = ReferencingRelationIdList(relationId); - - Oid referencingRelation = InvalidOid; - foreach_oid(referencingRelation, referencingRelationList) + List *referencingForeignConstaintsFromLocalTables = + GetForeignKeysFromLocalTables(relationId); + if (list_length(referencingForeignConstaintsFromLocalTables) > 0) { - /* we do not truncate a table if there is a local table referencing it */ - if (!IsCitusTable(referencingRelation)) - { - char *referencedRelationName = get_rel_name(relationId); - char *referencingRelationName = get_rel_name(referencingRelation); + Oid foreignKeyId = linitial_oid(referencingForeignConstaintsFromLocalTables); + Oid referencingRelation = GetReferencingTableId(foreignKeyId); + char *referencedRelationName = get_rel_name(relationId); + char *referencingRelationName = get_rel_name(referencingRelation); - ereport(ERROR, (errmsg("cannot truncate a table referenced in a " - "foreign key constraint by a local table"), - errdetail("Table \"%s\" references \"%s\"", - referencingRelationName, - referencedRelationName))); - } + ereport(ERROR, (errmsg("cannot truncate a table referenced in a " + "foreign key constraint by a local table"), + errdetail("Table \"%s\" references \"%s\"", + referencingRelationName, + referencedRelationName))); } } @@ -265,6 +262,9 @@ ErrorIfUnsupportedTruncateStmt(TruncateStmt *truncateStatement) foreach_ptr(rangeVar, relationList) { Oid relationId = RangeVarGetRelid(rangeVar, NoLock, false); + + ErrorIfIllegallyChangingKnownShard(relationId); + char relationKind = get_rel_relkind(relationId); if (IsCitusTable(relationId) && relationKind == RELKIND_FOREIGN_TABLE) diff --git a/src/backend/distributed/connection/connection_configuration.c b/src/backend/distributed/connection/connection_configuration.c index 65d82f27b..43ceaefd7 100644 --- a/src/backend/distributed/connection/connection_configuration.c +++ b/src/backend/distributed/connection/connection_configuration.c @@ -21,6 +21,7 @@ /* stores the string representation of our node connection GUC */ char *NodeConninfo = ""; +char *LocalHostName = "localhost"; /* represents a list of libpq parameter settings */ typedef struct ConnParamsInfo diff --git a/src/backend/distributed/connection/locally_reserved_shared_connections.c b/src/backend/distributed/connection/locally_reserved_shared_connections.c index 19bc93ae6..efe14b2ad 100644 --- a/src/backend/distributed/connection/locally_reserved_shared_connections.c +++ b/src/backend/distributed/connection/locally_reserved_shared_connections.c @@ -111,9 +111,9 @@ PG_FUNCTION_INFO_V1(citus_reserved_connection_stats); Datum citus_reserved_connection_stats(PG_FUNCTION_ARGS) { - TupleDesc tupleDescriptor = NULL; - CheckCitusVersion(ERROR); + + TupleDesc tupleDescriptor = NULL; Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor); StoreAllReservedConnections(tupleStore, tupleDescriptor); diff --git a/src/backend/distributed/connection/placement_connection.c b/src/backend/distributed/connection/placement_connection.c index 74c42d95d..b5d1b260c 100644 --- a/src/backend/distributed/connection/placement_connection.c +++ b/src/backend/distributed/connection/placement_connection.c @@ -846,6 +846,18 @@ ConnectionModifiedPlacement(MultiConnection *connection) return false; } + if (dlist_is_empty(&connection->referencedPlacements)) + { + /* + * When referencesPlacements are empty, it means that we come here + * from an API that uses a node connection (e.g., not placement connection), + * which doesn't set placements. + * In that case, the command sent could be either write or read, so we assume + * it is write to be on the safe side. + */ + return true; + } + dlist_foreach(placementIter, &connection->referencedPlacements) { ConnectionReference *connectionReference = diff --git a/src/backend/distributed/connection/shared_connection_stats.c b/src/backend/distributed/connection/shared_connection_stats.c index 649f2e9b5..89fb1cd19 100644 --- a/src/backend/distributed/connection/shared_connection_stats.c +++ b/src/backend/distributed/connection/shared_connection_stats.c @@ -136,9 +136,9 @@ PG_FUNCTION_INFO_V1(citus_remote_connection_stats); Datum citus_remote_connection_stats(PG_FUNCTION_ARGS) { - TupleDesc tupleDescriptor = NULL; - CheckCitusVersion(ERROR); + + TupleDesc tupleDescriptor = NULL; Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor); StoreAllRemoteConnectionStats(tupleStore, tupleDescriptor); diff --git a/src/backend/distributed/deparser/citus_ruleutils.c b/src/backend/distributed/deparser/citus_ruleutils.c index 78af394c7..28d83918c 100644 --- a/src/backend/distributed/deparser/citus_ruleutils.c +++ b/src/backend/distributed/deparser/citus_ruleutils.c @@ -27,10 +27,12 @@ #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/pg_am.h" +#include "catalog/pg_attrdef.h" #include "catalog/pg_attribute.h" #include "catalog/pg_authid.h" #include "catalog/pg_class.h" #include "catalog/pg_collation.h" +#include "catalog/pg_depend.h" #include "catalog/pg_extension.h" #include "catalog/pg_foreign_data_wrapper.h" #include "catalog/pg_index.h" @@ -40,9 +42,10 @@ #include "distributed/citus_ruleutils.h" #include "distributed/listutils.h" #include "distributed/multi_partitioning_utils.h" -#include "distributed/relay_utility.h" -#include "distributed/metadata_utility.h" #include "distributed/metadata_cache.h" +#include "distributed/metadata_sync.h" +#include "distributed/metadata_utility.h" +#include "distributed/relay_utility.h" #include "distributed/version_compat.h" #include "foreign/foreign.h" #include "lib/stringinfo.h" @@ -74,6 +77,8 @@ static void AppendStorageParametersToString(StringInfo stringBuffer, List *optionList); static void simple_quote_literal(StringInfo buf, const char *val); static char * flatten_reloptions(Oid relid); +static Oid get_attrdef_oid(Oid relationId, AttrNumber attnum); + /* * pg_get_extensiondef_string finds the foreign data wrapper that corresponds to @@ -365,6 +370,16 @@ pg_get_tableschemadef_string(Oid tableRelationId, bool includeSequenceDefaults, appendStringInfo(&buffer, " DEFAULT %s", defaultString); } } + + /* + * We should make sure that the type of the column that uses + * that sequence is supported + */ + if (contain_nextval_expression_walker(defaultNode, NULL)) + { + EnsureSequenceTypeSupported(tableRelationId, defaultValue->adnum, + attributeForm->atttypid); + } } /* if this column has a not null constraint, append the constraint */ @@ -483,6 +498,138 @@ pg_get_tableschemadef_string(Oid tableRelationId, bool includeSequenceDefaults, } +/* + * EnsureSequenceTypeSupported ensures that the type of the column that uses + * a sequence on its DEFAULT is consistent with previous uses of the sequence (if any) + * It gets the AttrDefault OID from the given relationId and attnum, extracts the sequence + * id from it, and if any other distributed table uses that same sequence, it checks whether + * the types of the columns using the sequence match. If they don't, it errors out. + * Otherwise, the condition is ensured. + */ +void +EnsureSequenceTypeSupported(Oid relationId, AttrNumber attnum, Oid seqTypId) +{ + /* get attrdefoid from the given relationId and attnum */ + Oid attrdefOid = get_attrdef_oid(relationId, attnum); + + /* retrieve the sequence id of the sequence found in nextval('seq') */ + List *sequencesFromAttrDef = GetSequencesFromAttrDef(attrdefOid); + + if (list_length(sequencesFromAttrDef) == 0) + { + /* + * We need this check because sometimes there are cases where the + * dependency between the table and the sequence is not formed + * One example is when the default is defined by + * DEFAULT nextval('seq_name'::text) (not by DEFAULT nextval('seq_name')) + * In these cases, sequencesFromAttrDef with be empty. + */ + return; + } + + if (list_length(sequencesFromAttrDef) > 1) + { + /* to simplify and eliminate cases like "DEFAULT nextval('..') - nextval('..')" */ + ereport(ERROR, (errmsg( + "More than one sequence in a column default" + " is not supported for distribution"))); + } + + Oid seqOid = lfirst_oid(list_head(sequencesFromAttrDef)); + + List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE); + Oid citusTableId = InvalidOid; + foreach_oid(citusTableId, citusTableIdList) + { + List *attnumList = NIL; + List *dependentSequenceList = NIL; + GetDependentSequencesWithRelation(citusTableId, &attnumList, + &dependentSequenceList, 0); + ListCell *attnumCell = NULL; + ListCell *dependentSequenceCell = NULL; + forboth(attnumCell, attnumList, dependentSequenceCell, + dependentSequenceList) + { + AttrNumber currentAttnum = lfirst_int(attnumCell); + Oid currentSeqOid = lfirst_oid(dependentSequenceCell); + + /* + * If another distributed table is using the same sequence + * in one of its column defaults, make sure the types of the + * columns match + */ + if (currentSeqOid == seqOid) + { + Oid currentSeqTypId = GetAttributeTypeOid(citusTableId, + currentAttnum); + if (seqTypId != currentSeqTypId) + { + char *sequenceName = generate_qualified_relation_name( + seqOid); + char *citusTableName = + generate_qualified_relation_name(citusTableId); + ereport(ERROR, (errmsg( + "The sequence %s is already used for a different" + " type in column %d of the table %s", + sequenceName, currentAttnum, + citusTableName))); + } + } + } + } +} + + +/* + * get_attrdef_oid gets the oid of the attrdef that has dependency with + * the given relationId (refobjid) and attnum (refobjsubid). + * If there is no such attrdef it returns InvalidOid. + * NOTE: we are iterating pg_depend here since this function is used together + * with other functions that iterate pg_depend. Normally, a look at pg_attrdef + * would make more sense. + */ +static Oid +get_attrdef_oid(Oid relationId, AttrNumber attnum) +{ + Oid resultAttrdefOid = InvalidOid; + + ScanKeyData key[3]; + + Relation depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relationId)); + ScanKeyInit(&key[2], + Anum_pg_depend_refobjsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(attnum)); + + SysScanDesc scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, attnum ? 3 : 2, key); + + HeapTuple tup; + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); + + if (deprec->classid == AttrDefaultRelationId) + { + resultAttrdefOid = deprec->objid; + } + } + + systable_endscan(scan); + table_close(depRel, AccessShareLock); + return resultAttrdefOid; +} + + /* * EnsureRelationKindSupported errors out if the given relation is not supported * as a distributed relation. diff --git a/src/backend/distributed/deparser/deparse_database_stmts.c b/src/backend/distributed/deparser/deparse_database_stmts.c new file mode 100644 index 000000000..0ebc69238 --- /dev/null +++ b/src/backend/distributed/deparser/deparse_database_stmts.c @@ -0,0 +1,49 @@ +/*------------------------------------------------------------------------- + * + * deparse_database_stmts.c + * + * All routines to deparse database statements. + * + * Copyright (c), Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/namespace.h" +#include "lib/stringinfo.h" +#include "nodes/parsenodes.h" +#include "utils/builtins.h" + +#include "distributed/citus_ruleutils.h" +#include "distributed/deparser.h" + +static void AppendAlterDatabaseOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt); + + +char * +DeparseAlterDatabaseOwnerStmt(Node *node) +{ + AlterOwnerStmt *stmt = castNode(AlterOwnerStmt, node); + StringInfoData str = { 0 }; + initStringInfo(&str); + + Assert(stmt->objectType == OBJECT_DATABASE); + + AppendAlterDatabaseOwnerStmt(&str, stmt); + + return str.data; +} + + +static void +AppendAlterDatabaseOwnerStmt(StringInfo buf, AlterOwnerStmt *stmt) +{ + Assert(stmt->objectType == OBJECT_DATABASE); + + appendStringInfo(buf, + "ALTER DATABASE %s OWNER TO %s;", + quote_identifier(strVal((Value *) stmt->object)), + RoleSpecString(stmt->newowner, true)); +} diff --git a/src/backend/distributed/deparser/deparse_sequence_stmts.c b/src/backend/distributed/deparser/deparse_sequence_stmts.c new file mode 100644 index 000000000..ecbcefc20 --- /dev/null +++ b/src/backend/distributed/deparser/deparse_sequence_stmts.c @@ -0,0 +1,158 @@ +/*------------------------------------------------------------------------- + * + * deparse_sequence_stmts.c + * + * All routines to deparse sequence statements. + * This file contains all entry points specific for sequence statement + * deparsing + * + * Copyright (c), Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/namespace.h" +#include "distributed/deparser.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" + + +/* forward declaration for deparse functions */ +static void AppendDropSequenceStmt(StringInfo buf, DropStmt *stmt); +static void AppendSequenceNameList(StringInfo buf, List *objects, ObjectType objtype); +static void AppendRenameSequenceStmt(StringInfo buf, RenameStmt *stmt); + +/* + * DeparseDropSequenceStmt builds and returns a string representing the DropStmt + */ +char * +DeparseDropSequenceStmt(Node *node) +{ + DropStmt *stmt = castNode(DropStmt, node); + StringInfoData str = { 0 }; + initStringInfo(&str); + + Assert(stmt->removeType == OBJECT_SEQUENCE); + + AppendDropSequenceStmt(&str, stmt); + + return str.data; +} + + +/* + * AppendDropSequenceStmt appends a string representing the DropStmt to a buffer + */ +static void +AppendDropSequenceStmt(StringInfo buf, DropStmt *stmt) +{ + appendStringInfoString(buf, "DROP SEQUENCE "); + + if (stmt->missing_ok) + { + appendStringInfoString(buf, "IF EXISTS "); + } + + AppendSequenceNameList(buf, stmt->objects, stmt->removeType); + + if (stmt->behavior == DROP_CASCADE) + { + appendStringInfoString(buf, " CASCADE"); + } + + appendStringInfoString(buf, ";"); +} + + +/* + * AppendSequenceNameList appends a string representing the list of sequence names to a buffer + */ +static void +AppendSequenceNameList(StringInfo buf, List *objects, ObjectType objtype) +{ + ListCell *objectCell = NULL; + foreach(objectCell, objects) + { + if (objectCell != list_head(objects)) + { + appendStringInfo(buf, ", "); + } + + RangeVar *seq = makeRangeVarFromNameList((List *) lfirst(objectCell)); + + if (seq->schemaname == NULL) + { + Oid schemaOid = RangeVarGetCreationNamespace(seq); + seq->schemaname = get_namespace_name(schemaOid); + } + + char *qualifiedSequenceName = quote_qualified_identifier(seq->schemaname, + seq->relname); + appendStringInfoString(buf, qualifiedSequenceName); + } +} + + +/* + * DeparseRenameSequenceStmt builds and returns a string representing the RenameStmt + */ +char * +DeparseRenameSequenceStmt(Node *node) +{ + RenameStmt *stmt = castNode(RenameStmt, node); + StringInfoData str = { 0 }; + initStringInfo(&str); + + Assert(stmt->renameType == OBJECT_SEQUENCE); + + AppendRenameSequenceStmt(&str, stmt); + + return str.data; +} + + +/* + * AppendRenameSequenceStmt appends a string representing the RenameStmt to a buffer + */ +static void +AppendRenameSequenceStmt(StringInfo buf, RenameStmt *stmt) +{ + RangeVar *seq = stmt->relation; + + char *qualifiedSequenceName = quote_qualified_identifier(seq->schemaname, + seq->relname); + + appendStringInfoString(buf, "ALTER SEQUENCE "); + + if (stmt->missing_ok) + { + appendStringInfoString(buf, "IF EXISTS "); + } + + appendStringInfoString(buf, qualifiedSequenceName); + + appendStringInfo(buf, " RENAME TO %s", quote_identifier(stmt->newname)); +} + + +/* + * QualifyRenameSequenceStmt transforms a + * ALTER SEQUENCE .. RENAME TO .. + * statement in place and makes the sequence name fully qualified. + */ +void +QualifyRenameSequenceStmt(Node *node) +{ + RenameStmt *stmt = castNode(RenameStmt, node); + Assert(stmt->renameType == OBJECT_SEQUENCE); + + RangeVar *seq = stmt->relation; + + if (seq->schemaname == NULL) + { + Oid schemaOid = RangeVarGetCreationNamespace(seq); + seq->schemaname = get_namespace_name(schemaOid); + } +} diff --git a/src/backend/distributed/deparser/deparse_table_stmts.c b/src/backend/distributed/deparser/deparse_table_stmts.c index bbab78f8a..8b63207f4 100644 --- a/src/backend/distributed/deparser/deparse_table_stmts.c +++ b/src/backend/distributed/deparser/deparse_table_stmts.c @@ -14,9 +14,13 @@ #include "distributed/deparser.h" #include "nodes/nodes.h" #include "nodes/parsenodes.h" +#include "parser/parse_type.h" #include "utils/builtins.h" static void AppendAlterTableSchemaStmt(StringInfo buf, AlterObjectSchemaStmt *stmt); +static void AppendAlterTableStmt(StringInfo buf, AlterTableStmt *stmt); +static void AppendAlterTableCmd(StringInfo buf, AlterTableCmd *alterTableCmd); +static void AppendAlterTableCmdAddColumn(StringInfo buf, AlterTableCmd *alterTableCmd); char * DeparseAlterTableSchemaStmt(Node *node) @@ -46,3 +50,129 @@ AppendAlterTableSchemaStmt(StringInfo buf, AlterObjectSchemaStmt *stmt) const char *newSchemaName = quote_identifier(stmt->newschema); appendStringInfo(buf, "%s SET SCHEMA %s;", tableName, newSchemaName); } + + +/* + * DeparseAlterTableStmt builds and returns a string representing the + * AlterTableStmt where the object acted upon is of kind OBJECT_TABLE + */ +char * +DeparseAlterTableStmt(Node *node) +{ + AlterTableStmt *stmt = castNode(AlterTableStmt, node); + StringInfoData str = { 0 }; + initStringInfo(&str); + + Assert(stmt->relkind == OBJECT_TABLE); + + AppendAlterTableStmt(&str, stmt); + return str.data; +} + + +/* + * AppendAlterTableStmt builds and returns an SQL command representing an + * ALTER TABLE statement from given AlterTableStmt object where the object + * acted upon is of kind OBJECT_TABLE + */ +static void +AppendAlterTableStmt(StringInfo buf, AlterTableStmt *stmt) +{ + const char *identifier = quote_qualified_identifier(stmt->relation->schemaname, + stmt->relation->relname); + ListCell *cmdCell = NULL; + + Assert(stmt->relkind == OBJECT_TABLE); + + appendStringInfo(buf, "ALTER TABLE %s", identifier); + foreach(cmdCell, stmt->cmds) + { + if (cmdCell != list_head(stmt->cmds)) + { + appendStringInfoString(buf, ", "); + } + + AlterTableCmd *alterTableCmd = castNode(AlterTableCmd, lfirst(cmdCell)); + AppendAlterTableCmd(buf, alterTableCmd); + } + + appendStringInfoString(buf, ";"); +} + + +/* + * AppendAlterTableCmd builds and appends to the given buffer a command + * from given AlterTableCmd object. Currently supported commands are of type + * AT_AddColumn and AT_SetNotNull + */ +static void +AppendAlterTableCmd(StringInfo buf, AlterTableCmd *alterTableCmd) +{ + switch (alterTableCmd->subtype) + { + case AT_AddColumn: + { + AppendAlterTableCmdAddColumn(buf, alterTableCmd); + break; + } + + default: + { + ereport(ERROR, (errmsg("unsupported subtype for alter table command"), + errdetail("sub command type: %d", alterTableCmd->subtype))); + } + } +} + + +/* + * AppendAlterTableCmd builds and appends to the given buffer an AT_AddColumn command + * from given AlterTableCmd object in the form ADD COLUMN ... + */ +static void +AppendAlterTableCmdAddColumn(StringInfo buf, AlterTableCmd *alterTableCmd) +{ + Assert(alterTableCmd->subtype == AT_AddColumn); + + appendStringInfoString(buf, " ADD COLUMN "); + + ColumnDef *columnDefinition = (ColumnDef *) alterTableCmd->def; + + /* + * the way we use the deparser now, constraints are always NULL + * adding this check for ColumnDef consistency + */ + if (columnDefinition->constraints != NULL) + { + ereport(ERROR, (errmsg("Constraints are not supported for AT_AddColumn"))); + } + + if (columnDefinition->colname) + { + appendStringInfo(buf, "%s ", quote_identifier(columnDefinition->colname)); + } + + int32 typmod = 0; + Oid typeOid = InvalidOid; + bits16 formatFlags = FORMAT_TYPE_TYPEMOD_GIVEN | FORMAT_TYPE_FORCE_QUALIFY; + typenameTypeIdAndMod(NULL, columnDefinition->typeName, &typeOid, &typmod); + appendStringInfo(buf, "%s", format_type_extended(typeOid, typmod, + formatFlags)); + if (columnDefinition->is_not_null) + { + appendStringInfoString(buf, " NOT NULL"); + } + + /* + * the way we use the deparser now, collation is never used + * since the data type of columns that use sequences for default + * are only int,smallint and bigint (never text, varchar, char) + * Adding this part only for ColumnDef consistency + */ + Oid collationOid = GetColumnDefCollation(NULL, columnDefinition, typeOid); + if (OidIsValid(collationOid)) + { + const char *identifier = FormatCollateBEQualified(collationOid); + appendStringInfo(buf, " COLLATE %s", identifier); + } +} diff --git a/src/backend/distributed/executor/adaptive_executor.c b/src/backend/distributed/executor/adaptive_executor.c index 270cb7e80..6bfcbe753 100644 --- a/src/backend/distributed/executor/adaptive_executor.c +++ b/src/backend/distributed/executor/adaptive_executor.c @@ -124,6 +124,7 @@ #include "miscadmin.h" #include "pgstat.h" +#include #include #include @@ -142,6 +143,7 @@ #include "distributed/deparse_shard_query.h" #include "distributed/shared_connection_stats.h" #include "distributed/distributed_execution_locks.h" +#include "distributed/intermediate_result_pruning.h" #include "distributed/listutils.h" #include "distributed/local_executor.h" #include "distributed/multi_client_executor.h" @@ -413,6 +415,10 @@ typedef struct WorkerPool * use it anymore. */ WorkerPoolFailureState failureState; + + /* execution statistics per pool, in microseconds */ + uint64 totalTaskExecutionTime; + int totalExecutedTasks; } WorkerPool; struct TaskPlacementExecution; @@ -472,6 +478,8 @@ bool EnableBinaryProtocol = false; /* GUC, number of ms to wait between opening connections to the same worker */ int ExecutorSlowStartInterval = 10; +bool EnableCostBasedConnectionEstablishment = true; +bool PreventIncompleteConnectionEstablishment = true; /* @@ -634,6 +642,12 @@ static WorkerSession * FindOrCreateWorkerSession(WorkerPool *workerPool, static void ManageWorkerPool(WorkerPool *workerPool); static bool ShouldWaitForSlowStart(WorkerPool *workerPool); static int CalculateNewConnectionCount(WorkerPool *workerPool); +static bool UsingExistingSessionsCheaperThanEstablishingNewConnections(int + readyTaskCount, + WorkerPool * + workerPool); +static double AvgTaskExecutionTimeApproximation(WorkerPool *workerPool); +static double AvgConnectionEstablishmentTime(WorkerPool *workerPool); static void OpenNewConnections(WorkerPool *workerPool, int newConnectionCount, TransactionProperties *transactionProperties); static void CheckConnectionTimeout(WorkerPool *workerPool); @@ -650,6 +664,7 @@ static bool StartPlacementExecutionOnSession(TaskPlacementExecution *placementEx static bool SendNextQuery(TaskPlacementExecution *placementExecution, WorkerSession *session); static void ConnectionStateMachine(WorkerSession *session); +static bool HasUnfinishedTaskForSession(WorkerSession *session); static void HandleMultiConnectionSuccess(WorkerSession *session); static bool HasAnyConnectionFailure(WorkerPool *workerPool); static void Activate2PCIfModifyingTransactionExpandsToNewNode(WorkerSession *session); @@ -675,15 +690,20 @@ static void ExtractParametersForRemoteExecution(ParamListInfo paramListInfo, Oid **parameterTypes, const char ***parameterValues); static int GetEventSetSize(List *sessionList); +static bool HasIncompleteConnectionEstablishment(DistributedExecution *execution); static int RebuildWaitEventSet(DistributedExecution *execution); static void ProcessWaitEvents(DistributedExecution *execution, WaitEvent *events, int eventCount, bool *cancellationReceived); static long MillisecondsBetweenTimestamps(instr_time startTime, instr_time endTime); +static uint64 MicrosecondsBetweenTimestamps(instr_time startTime, instr_time endTime); static HeapTuple BuildTupleFromBytes(AttInMetadata *attinmeta, fmStringInfo *values); static AttInMetadata * TupleDescGetAttBinaryInMetadata(TupleDesc tupdesc); static int WorkerPoolCompare(const void *lhsKey, const void *rhsKey); static void SetAttributeInputMetadata(DistributedExecution *execution, ShardCommandExecution *shardCommandExecution); +static void LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName, + int *nodePort); +static bool IsDummyPlacement(ShardPlacement *taskPlacement); /* * AdaptiveExecutorPreExecutorRun gets called right before postgres starts its executor @@ -742,6 +762,12 @@ AdaptiveExecutor(CitusScanState *scanState) /* we should only call this once before the scan finished */ Assert(!scanState->finishedRemoteScan); + MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, + "AdaptiveExecutor", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldContext = MemoryContextSwitchTo(localContext); + + /* Reset Task fields that are only valid for a single execution */ ResetExplainAnalyzeData(taskList); @@ -830,6 +856,8 @@ AdaptiveExecutor(CitusScanState *scanState) SortTupleStore(scanState); } + MemoryContextSwitchTo(oldContext); + return resultSlot; } @@ -1227,7 +1255,7 @@ StartDistributedExecution(DistributedExecution *execution) if (xactProperties->requires2PC) { - CoordinatedTransactionShouldUse2PC(); + Use2PCForCoordinatedTransaction(); } /* @@ -1630,8 +1658,10 @@ CleanUpSessions(DistributedExecution *execution) { MultiConnection *connection = session->connection; - ereport(DEBUG4, (errmsg("Total number of commands sent over the session %ld: %ld", - session->sessionId, session->commandsSent))); + ereport(DEBUG4, (errmsg("Total number of commands sent over the session %ld: %ld " + "to node %s:%d", session->sessionId, + session->commandsSent, + connection->hostname, connection->port))); UnclaimConnection(connection); @@ -1751,8 +1781,10 @@ AssignTasksToConnectionsOrWorkerPool(DistributedExecution *execution) foreach_ptr(taskPlacement, task->taskPlacementList) { int connectionFlags = 0; - char *nodeName = taskPlacement->nodeName; - int nodePort = taskPlacement->nodePort; + char *nodeName = NULL; + int nodePort = 0; + LookupTaskPlacementHostAndPort(taskPlacement, &nodeName, &nodePort); + WorkerPool *workerPool = FindOrCreateWorkerPool(execution, nodeName, nodePort); @@ -1900,6 +1932,48 @@ AssignTasksToConnectionsOrWorkerPool(DistributedExecution *execution) } +/* + * LookupTaskPlacementHostAndPort sets the nodename and nodeport for the given task placement + * with a lookup. + */ +static void +LookupTaskPlacementHostAndPort(ShardPlacement *taskPlacement, char **nodeName, + int *nodePort) +{ + if (IsDummyPlacement(taskPlacement)) + { + /* + * If we create a dummy placement for the local node, it is possible + * that the entry doesn't exist in pg_dist_node, hence a lookup will fail. + * In that case we want to use the dummy placements values. + */ + *nodeName = taskPlacement->nodeName; + *nodePort = taskPlacement->nodePort; + } + else + { + /* + * We want to lookup the node information again since it is possible that + * there were changes in pg_dist_node and we will get those invalidations + * in LookupNodeForGroup. + */ + WorkerNode *workerNode = LookupNodeForGroup(taskPlacement->groupId); + *nodeName = workerNode->workerName; + *nodePort = workerNode->workerPort; + } +} + + +/* + * IsDummyPlacement returns true if the given placement is a dummy placement. + */ +static bool +IsDummyPlacement(ShardPlacement *taskPlacement) +{ + return taskPlacement->nodeId == LOCAL_NODE_ID; +} + + /* * WorkerPoolCompare is based on WorkerNodeCompare function. The function * compares two worker nodes by their host name and port number. @@ -2217,7 +2291,26 @@ RunDistributedExecution(DistributedExecution *execution) /* always (re)build the wait event set the first time */ execution->rebuildWaitEventSet = true; - while (execution->unfinishedTaskCount > 0 && !cancellationReceived) + /* + * Iterate until all the tasks are finished. Once all the tasks + * are finished, ensure that that all the connection initializations + * are also finished. Otherwise, those connections are terminated + * abruptly before they are established (or failed). Instead, we let + * the ConnectionStateMachine() to properly handle them. + * + * Note that we could have the connections that are not established + * as a side effect of slow-start algorithm. At the time the algorithm + * decides to establish new connections, the execution might have tasks + * to finish. But, the execution might finish before the new connections + * are established. + * + * Note that the rules explained above could be overriden by any + * cancellation to the query. In that case, we terminate the execution + * irrespective of the current status of the tasks or the connections. + */ + while (!cancellationReceived && + (execution->unfinishedTaskCount > 0 || + HasIncompleteConnectionEstablishment(execution))) { WorkerPool *workerPool = NULL; foreach_ptr(workerPool, execution->workerList) @@ -2299,6 +2392,33 @@ RunDistributedExecution(DistributedExecution *execution) } +/* + * HasIncompleteConnectionEstablishment returns true if any of the connections + * that has been initiated by the executor is in initilization stage. + */ +static bool +HasIncompleteConnectionEstablishment(DistributedExecution *execution) +{ + if (!PreventIncompleteConnectionEstablishment) + { + return false; + } + + WorkerSession *session = NULL; + foreach_ptr(session, execution->sessionList) + { + MultiConnection *connection = session->connection; + if (connection->connectionState == MULTI_CONNECTION_INITIAL || + connection->connectionState == MULTI_CONNECTION_CONNECTING) + { + return true; + } + } + + return false; +} + + /* * RebuildWaitEventSet updates the waitEventSet for the distributed execution. * This happens when the connection set for the distributed execution is changed, @@ -2398,6 +2518,9 @@ ManageWorkerPool(WorkerPool *workerPool) return; } + /* increase the open rate every cycle (like TCP slow start) */ + workerPool->maxNewConnectionsPerCycle += 1; + OpenNewConnections(workerPool, newConnectionCount, execution->transactionProperties); /* @@ -2584,16 +2707,176 @@ CalculateNewConnectionCount(WorkerPool *workerPool) * than the target pool size. */ newConnectionCount = Min(newConnectionsForReadyTasks, maxNewConnectionCount); - if (newConnectionCount > 0) + if (EnableCostBasedConnectionEstablishment && newConnectionCount > 0 && + initiatedConnectionCount <= MaxCachedConnectionsPerWorker && + UsingExistingSessionsCheaperThanEstablishingNewConnections( + readyTaskCount, workerPool)) { - /* increase the open rate every cycle (like TCP slow start) */ - workerPool->maxNewConnectionsPerCycle += 1; + /* + * Before giving the decision, we do one more check. If the cost of + * executing the remaining tasks over the existing sessions in the + * pool is cheaper than establishing new connections and executing + * the tasks over the new connections, we prefer the former. + * + * For cached connections we should ignore any optimizations as + * cached connections are almost free to get. In other words, + * as long as there are cached connections that the pool has + * not used yet, aggressively use these already established + * connections. + * + * Note that until MaxCachedConnectionsPerWorker has already been + * established within the session, we still need to establish + * the connections right now. + * + * Also remember that we are not trying to find the optimal number + * of connections for the remaining tasks here. Our goal is to prevent + * connection establishments that are absolutely unnecessary. In the + * future, we may improve the calculations below to find the optimal + * number of new connections required. + */ + return 0; } } + return newConnectionCount; } +/* + * UsingExistingSessionsCheaperThanEstablishingNewConnections returns true if + * using the already established connections takes less time compared to opening + * new connections based on the current execution's stats. + * + * The function returns false if the current execution has not established any connections + * or finished any tasks (e.g., no stats to act on). + */ +static bool +UsingExistingSessionsCheaperThanEstablishingNewConnections(int readyTaskCount, + WorkerPool *workerPool) +{ + int activeConnectionCount = workerPool->activeConnectionCount; + if (workerPool->totalExecutedTasks < 1 || activeConnectionCount < 1) + { + /* + * The pool has not finished any connection establishment or + * task yet. So, we refrain from optimizing the execution. + */ + return false; + } + + double avgTaskExecutionTime = AvgTaskExecutionTimeApproximation(workerPool); + double avgConnectionEstablishmentTime = AvgConnectionEstablishmentTime(workerPool); + + /* we assume that we are halfway through the execution */ + double remainingTimeForActiveTaskExecutionsToFinish = avgTaskExecutionTime / 2; + + /* + * We use "newConnectionCount" as if it is the task count as + * we are only interested in this iteration of CalculateNewConnectionCount(). + */ + double totalTimeToExecuteNewTasks = avgTaskExecutionTime * readyTaskCount; + + double estimatedExecutionTimeForNewTasks = + floor(totalTimeToExecuteNewTasks / activeConnectionCount); + + /* + * First finish the already running tasks, and then use the connections + * to execute the new tasks. + */ + double costOfExecutingTheTasksOverExistingConnections = + remainingTimeForActiveTaskExecutionsToFinish + + estimatedExecutionTimeForNewTasks; + + /* + * For every task, the executor is supposed to establish one + * connection and then execute the task over the connection. + */ + double costOfExecutingTheTasksOverNewConnection = + (avgTaskExecutionTime + avgConnectionEstablishmentTime); + + return (costOfExecutingTheTasksOverExistingConnections <= + costOfExecutingTheTasksOverNewConnection); +} + + +/* + * AvgTaskExecutionTimeApproximation returns the approximation of the average task + * execution times on the workerPool. + */ +static double +AvgTaskExecutionTimeApproximation(WorkerPool *workerPool) +{ + uint64 totalTaskExecutionTime = workerPool->totalTaskExecutionTime; + int taskCount = workerPool->totalExecutedTasks; + + instr_time now; + INSTR_TIME_SET_CURRENT(now); + + WorkerSession *session = NULL; + foreach_ptr(session, workerPool->sessionList) + { + /* + * Involve the tasks that are currently running. We do this to + * make sure that the execution responds with new connections + * quickly if the actively running tasks + */ + TaskPlacementExecution *placementExecution = session->currentTask; + if (placementExecution != NULL && + placementExecution->executionState == PLACEMENT_EXECUTION_RUNNING) + { + uint64 durationInMicroSecs = + MicrosecondsBetweenTimestamps(placementExecution->startTime, now); + + /* + * Our approximation is that we assume that the task execution is + * just in the halfway through. + */ + totalTaskExecutionTime += (2 * durationInMicroSecs); + taskCount += 1; + } + } + + return taskCount == 0 ? 0 : ((double) totalTaskExecutionTime / taskCount); +} + + +/* + * AvgConnectionEstablishmentTime calculates the average connection establishment times + * for the input workerPool. + */ +static double +AvgConnectionEstablishmentTime(WorkerPool *workerPool) +{ + double totalTimeMicrosec = 0; + int sessionCount = 0; + + WorkerSession *session = NULL; + foreach_ptr(session, workerPool->sessionList) + { + MultiConnection *connection = session->connection; + + /* + * There could be MaxCachedConnectionsPerWorker connections that are + * already connected. Those connections might skew the average + * connection establishment times for the current execution. The reason + * is that they are established earlier and the connection establishment + * times might be different at the moment those connections are established. + */ + if (connection->connectionState == MULTI_CONNECTION_CONNECTED) + { + long connectionEstablishmentTime = + MicrosecondsBetweenTimestamps(connection->connectionEstablishmentStart, + connection->connectionEstablishmentEnd); + + totalTimeMicrosec += connectionEstablishmentTime; + ++sessionCount; + } + } + + return (sessionCount == 0) ? 0 : (totalTimeMicrosec / sessionCount); +} + + /* * OpenNewConnections opens the given amount of connections for the given workerPool. */ @@ -2898,6 +3181,18 @@ MillisecondsBetweenTimestamps(instr_time startTime, instr_time endTime) } +/* + * MicrosecondsBetweenTimestamps is a helper to get the number of microseconds + * between timestamps. + */ +static uint64 +MicrosecondsBetweenTimestamps(instr_time startTime, instr_time endTime) +{ + INSTR_TIME_SUBTRACT(endTime, startTime); + return INSTR_TIME_GET_MICROSEC(endTime); +} + + /* * ConnectionStateMachine opens a connection and descends into the transaction * state machine when ready. @@ -2995,8 +3290,32 @@ ConnectionStateMachine(WorkerSession *session) case MULTI_CONNECTION_CONNECTED: { - /* connection is ready, run the transaction state machine */ - TransactionStateMachine(session); + if (HasUnfinishedTaskForSession(session)) + { + /* + * Connection is ready, and we have unfinished tasks. + * So, run the transaction state machine. + */ + TransactionStateMachine(session); + } + else + { + /* + * Connection is ready, but we don't have any unfinished + * tasks that this session can execute. + * + * Note that we can be in a situation where the executor + * decides to establish a connection, but not need to + * use it at the time the connection is established. This could + * happen when the earlier connections manages to finish all the + * tasks after this connection + * + * As no tasks are ready to be executed at the moment, we + * mark the socket readable to get any notices if exists. + */ + UpdateConnectionWaitFlags(session, WL_SOCKET_READABLE); + } + break; } @@ -3111,6 +3430,41 @@ ConnectionStateMachine(WorkerSession *session) } +/* + * HasUnfinishedTaskForSession gets a session and returns true if there + * are any tasks that this session can execute. + */ +static bool +HasUnfinishedTaskForSession(WorkerSession *session) +{ + if (session->currentTask != NULL) + { + /* the session is executing a command right now */ + return true; + } + + dlist_head *sessionReadyTaskQueue = &(session->readyTaskQueue); + if (!dlist_is_empty(sessionReadyTaskQueue)) + { + /* session has an assigned task, which is ready for execution */ + return true; + } + + WorkerPool *workerPool = session->workerPool; + dlist_head *poolReadyTaskQueue = &(workerPool->readyTaskQueue); + if (!dlist_is_empty(poolReadyTaskQueue)) + { + /* + * Pool has unassigned tasks that can be executed + * by the input session. + */ + return true; + } + + return false; +} + + /* * HandleMultiConnectionSuccess logs the established connection and updates * connection's state. @@ -3124,10 +3478,10 @@ HandleMultiConnectionSuccess(WorkerSession *session) MarkConnectionConnected(connection); ereport(DEBUG4, (errmsg("established connection to %s:%d for " - "session %ld in %ld msecs", + "session %ld in %ld microseconds", connection->hostname, connection->port, session->sessionId, - MillisecondsBetweenTimestamps( + MicrosecondsBetweenTimestamps( connection->connectionEstablishmentStart, connection->connectionEstablishmentEnd)))); @@ -3188,7 +3542,7 @@ Activate2PCIfModifyingTransactionExpandsToNewNode(WorkerSession *session) * just opened, which means we're now going to make modifications * over multiple connections. Activate 2PC! */ - CoordinatedTransactionShouldUse2PC(); + Use2PCForCoordinatedTransaction(); } } @@ -4270,19 +4624,20 @@ PlacementExecutionDone(TaskPlacementExecution *placementExecution, bool succeede Assert(INSTR_TIME_IS_ZERO(placementExecution->endTime)); INSTR_TIME_SET_CURRENT(placementExecution->endTime); + uint64 durationMicrosecs = + MicrosecondsBetweenTimestamps(placementExecution->startTime, + placementExecution->endTime); + workerPool->totalTaskExecutionTime += durationMicrosecs; + workerPool->totalExecutedTasks += 1; if (IsLoggableLevel(DEBUG4)) { - long durationMillisecs = - MillisecondsBetweenTimestamps(placementExecution->startTime, - placementExecution->endTime); - ereport(DEBUG4, (errmsg("task execution (%d) for placement (%ld) on anchor " - "shard (%ld) finished in %ld msecs on worker " + "shard (%ld) finished in %ld microseconds on worker " "node %s:%d", shardCommandExecution->task->taskId, placementExecution->shardPlacement->placementId, shardCommandExecution->task->anchorShardId, - durationMillisecs, workerPool->nodeName, + durationMicrosecs, workerPool->nodeName, workerPool->nodePort))); } } @@ -4457,8 +4812,6 @@ ScheduleNextPlacementExecution(TaskPlacementExecution *placementExecution, bool executionOrder == EXECUTION_ORDER_SEQUENTIAL) { TaskPlacementExecution *nextPlacementExecution = NULL; - int placementExecutionCount PG_USED_FOR_ASSERTS_ONLY = - shardCommandExecution->placementExecutionCount; /* find a placement execution that is not yet marked as failed */ do { @@ -4469,6 +4822,7 @@ ScheduleNextPlacementExecution(TaskPlacementExecution *placementExecution, bool * If all tasks failed then we should already have errored out. * Still, be defensive and throw error instead of crashes. */ + int placementExecutionCount = shardCommandExecution->placementExecutionCount; if (nextPlacementExecutionIndex >= placementExecutionCount) { WorkerPool *workerPool = placementExecution->workerPool; diff --git a/src/backend/distributed/executor/citus_custom_scan.c b/src/backend/distributed/executor/citus_custom_scan.c index 7c873b2d2..e1820a74e 100644 --- a/src/backend/distributed/executor/citus_custom_scan.c +++ b/src/backend/distributed/executor/citus_custom_scan.c @@ -189,6 +189,12 @@ CitusBeginScan(CustomScanState *node, EState *estate, int eflags) { CitusBeginModifyScan(node, estate, eflags); } + + /* + * In case of a prepared statement, we will see this distributed plan again + * on the next execution with a higher usage counter. + */ + distributedPlan->numberOfTimesExecuted++; } @@ -315,6 +321,11 @@ CitusBeginModifyScan(CustomScanState *node, EState *estate, int eflags) PlanState *planState = &(scanState->customScanState.ss.ps); DistributedPlan *originalDistributedPlan = scanState->distributedPlan; + MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, + "CitusBeginModifyScan", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldContext = MemoryContextSwitchTo(localContext); + DistributedPlan *currentPlan = CopyDistributedPlanWithoutCache(originalDistributedPlan); scanState->distributedPlan = currentPlan; @@ -405,6 +416,8 @@ CitusBeginModifyScan(CustomScanState *node, EState *estate, int eflags) */ CacheLocalPlanForShardQuery(task, originalDistributedPlan); } + + MemoryContextSwitchTo(oldContext); } diff --git a/src/backend/distributed/executor/intermediate_results.c b/src/backend/distributed/executor/intermediate_results.c index c45e64821..c1b8f86dc 100644 --- a/src/backend/distributed/executor/intermediate_results.c +++ b/src/backend/distributed/executor/intermediate_results.c @@ -118,6 +118,8 @@ PG_FUNCTION_INFO_V1(fetch_intermediate_results); Datum broadcast_intermediate_result(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *resultIdText = PG_GETARG_TEXT_P(0); char *resultIdString = text_to_cstring(resultIdText); text *queryText = PG_GETARG_TEXT_P(1); @@ -125,8 +127,6 @@ broadcast_intermediate_result(PG_FUNCTION_ARGS) bool writeLocalFile = false; ParamListInfo paramListInfo = NULL; - CheckCitusVersion(ERROR); - /* * Make sure that this transaction has a distributed transaction ID. * @@ -159,6 +159,8 @@ broadcast_intermediate_result(PG_FUNCTION_ARGS) Datum create_intermediate_result(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *resultIdText = PG_GETARG_TEXT_P(0); char *resultIdString = text_to_cstring(resultIdText); text *queryText = PG_GETARG_TEXT_P(1); @@ -167,8 +169,6 @@ create_intermediate_result(PG_FUNCTION_ARGS) bool writeLocalFile = true; ParamListInfo paramListInfo = NULL; - CheckCitusVersion(ERROR); - /* * Make sure that this transaction has a distributed transaction ID. * @@ -771,13 +771,13 @@ IntermediateResultSize(const char *resultId) Datum read_intermediate_result(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Datum resultId = PG_GETARG_DATUM(0); Datum copyFormatOidDatum = PG_GETARG_DATUM(1); Datum copyFormatLabelDatum = DirectFunctionCall1(enum_out, copyFormatOidDatum); char *copyFormatLabel = DatumGetCString(copyFormatLabelDatum); - CheckCitusVersion(ERROR); - ReadIntermediateResultsIntoFuncOutput(fcinfo, copyFormatLabel, &resultId, 1); PG_RETURN_DATUM(0); @@ -794,14 +794,14 @@ read_intermediate_result(PG_FUNCTION_ARGS) Datum read_intermediate_result_array(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + ArrayType *resultIdObject = PG_GETARG_ARRAYTYPE_P(0); Datum copyFormatOidDatum = PG_GETARG_DATUM(1); Datum copyFormatLabelDatum = DirectFunctionCall1(enum_out, copyFormatOidDatum); char *copyFormatLabel = DatumGetCString(copyFormatLabelDatum); - CheckCitusVersion(ERROR); - int32 resultCount = ArrayGetNItems(ARR_NDIM(resultIdObject), ARR_DIMS( resultIdObject)); Datum *resultIdArray = DeconstructArrayObject(resultIdObject); @@ -874,6 +874,8 @@ ReadIntermediateResultsIntoFuncOutput(FunctionCallInfo fcinfo, char *copyFormat, Datum fetch_intermediate_results(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + ArrayType *resultIdObject = PG_GETARG_ARRAYTYPE_P(0); Datum *resultIdArray = DeconstructArrayObject(resultIdObject); int32 resultCount = ArrayObjectCount(resultIdObject); @@ -885,8 +887,6 @@ fetch_intermediate_results(PG_FUNCTION_ARGS) int resultIndex = 0; int64 totalBytesWritten = 0L; - CheckCitusVersion(ERROR); - if (resultCount == 0) { PG_RETURN_INT64(0); diff --git a/src/backend/distributed/executor/local_executor.c b/src/backend/distributed/executor/local_executor.c index 9de14db2c..a824573ed 100644 --- a/src/backend/distributed/executor/local_executor.c +++ b/src/backend/distributed/executor/local_executor.c @@ -108,8 +108,15 @@ bool EnableLocalExecution = true; bool LogLocalCommands = false; +int LocalExecutorLevel = 0; + static LocalExecutionStatus CurrentLocalExecutionStatus = LOCAL_EXECUTION_OPTIONAL; +static uint64 ExecuteLocalTaskListInternal(List *taskList, + ParamListInfo paramListInfo, + DistributedPlan *distributedPlan, + TupleDestination *defaultTupleDest, + bool isUtilityCommand); static void SplitLocalAndRemotePlacements(List *taskPlacementList, List **localTaskPlacementList, List **remoteTaskPlacementList); @@ -200,10 +207,8 @@ ExecuteLocalTaskListExtended(List *taskList, TupleDestination *defaultTupleDest, bool isUtilityCommand) { - ParamListInfo paramListInfo = copyParamList(orig_paramListInfo); - int numParams = 0; - Oid *parameterTypes = NULL; uint64 totalRowsProcessed = 0; + ParamListInfo paramListInfo = copyParamList(orig_paramListInfo); /* * Even if we are executing local tasks, we still enable @@ -218,6 +223,38 @@ ExecuteLocalTaskListExtended(List *taskList, */ UseCoordinatedTransaction(); + LocalExecutorLevel++; + PG_TRY(); + { + totalRowsProcessed = ExecuteLocalTaskListInternal(taskList, paramListInfo, + distributedPlan, + defaultTupleDest, + isUtilityCommand); + } + PG_CATCH(); + { + LocalExecutorLevel--; + + PG_RE_THROW(); + } + PG_END_TRY(); + LocalExecutorLevel--; + + return totalRowsProcessed; +} + + +static uint64 +ExecuteLocalTaskListInternal(List *taskList, + ParamListInfo paramListInfo, + DistributedPlan *distributedPlan, + TupleDestination *defaultTupleDest, + bool isUtilityCommand) +{ + uint64 totalRowsProcessed = 0; + int numParams = 0; + Oid *parameterTypes = NULL; + if (paramListInfo != NULL) { /* not used anywhere, so declare here */ @@ -229,9 +266,19 @@ ExecuteLocalTaskListExtended(List *taskList, numParams = paramListInfo->numParams; } + /* + * Use a new memory context that gets reset after every task to free + * the deparsed query string and query plan. + */ + MemoryContext loopContext = AllocSetContextCreate(CurrentMemoryContext, + "ExecuteLocalTaskListExtended", + ALLOCSET_DEFAULT_SIZES); + Task *task = NULL; foreach_ptr(task, taskList) { + MemoryContext oldContext = MemoryContextSwitchTo(loopContext); + TupleDestination *tupleDest = task->tupleDest ? task->tupleDest : defaultTupleDest; @@ -253,7 +300,7 @@ ExecuteLocalTaskListExtended(List *taskList, * queries are also ReadOnly, our 2PC logic is smart enough to skip sending * PREPARE to those connections. */ - CoordinatedTransactionShouldUse2PC(); + Use2PCForCoordinatedTransaction(); } LogLocalCommand(task); @@ -261,6 +308,9 @@ ExecuteLocalTaskListExtended(List *taskList, if (isUtilityCommand) { ExecuteUtilityCommand(TaskQueryString(task)); + + MemoryContextSwitchTo(oldContext); + MemoryContextReset(loopContext); continue; } @@ -308,6 +358,9 @@ ExecuteLocalTaskListExtended(List *taskList, totalRowsProcessed += LocallyPlanAndExecuteMultipleQueries(queryStringList, tupleDest, task); + + MemoryContextSwitchTo(oldContext); + MemoryContextReset(loopContext); continue; } @@ -343,6 +396,9 @@ ExecuteLocalTaskListExtended(List *taskList, totalRowsProcessed += ExecuteLocalTaskPlan(localPlan, shardQueryString, tupleDest, task, paramListInfo); + + MemoryContextSwitchTo(oldContext); + MemoryContextReset(loopContext); } return totalRowsProcessed; @@ -582,6 +638,12 @@ ExecuteLocalTaskPlan(PlannedStmt *taskPlan, char *queryString, RecordNonDistTableAccessesForTask(task); + MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, + "ExecuteLocalTaskPlan", + ALLOCSET_DEFAULT_SIZES); + + MemoryContext oldContext = MemoryContextSwitchTo(localContext); + /* * Some tuple destinations look at task->taskPlacementList to determine * where the result came from using the placement index. Since a local @@ -625,6 +687,9 @@ ExecuteLocalTaskPlan(PlannedStmt *taskPlan, char *queryString, FreeQueryDesc(queryDesc); + MemoryContextSwitchTo(oldContext); + MemoryContextDelete(localContext); + return totalRowsProcessed; } diff --git a/src/backend/distributed/executor/partitioned_intermediate_results.c b/src/backend/distributed/executor/partitioned_intermediate_results.c index 3b481a231..c0f6e9d65 100644 --- a/src/backend/distributed/executor/partitioned_intermediate_results.c +++ b/src/backend/distributed/executor/partitioned_intermediate_results.c @@ -107,6 +107,8 @@ PG_FUNCTION_INFO_V1(worker_partition_query_result); Datum worker_partition_query_result(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + ReturnSetInfo *resultInfo = (ReturnSetInfo *) fcinfo->resultinfo; text *resultIdPrefixText = PG_GETARG_TEXT_P(0); @@ -136,8 +138,6 @@ worker_partition_query_result(PG_FUNCTION_ARGS) bool binaryCopy = PG_GETARG_BOOL(6); - CheckCitusVersion(ERROR); - if (!IsMultiStatementTransaction()) { ereport(ERROR, (errmsg("worker_partition_query_result can only be used in a " diff --git a/src/backend/distributed/metadata/dependency.c b/src/backend/distributed/metadata/dependency.c index f84e1152a..6dfa6afce 100644 --- a/src/backend/distributed/metadata/dependency.c +++ b/src/backend/distributed/metadata/dependency.c @@ -17,6 +17,7 @@ #include "access/htup_details.h" #include "access/skey.h" #include "access/sysattr.h" +#include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/indexing.h" #include "catalog/pg_class.h" @@ -379,9 +380,19 @@ DependencyDefinitionFromPgShDepend(ObjectAddress target) /* * Scan pg_shdepend for dbid = $1 AND classid = $2 AND objid = $3 using * pg_shdepend_depender_index + * + * where $1 is decided as follows: + * - shared dependencies $1 = InvalidOid + * - other dependencies $1 = MyDatabaseId + * This is consistent with postgres' static classIdGetDbId function */ + Oid dbid = InvalidOid; + if (!IsSharedRelation(target.classId)) + { + dbid = MyDatabaseId; + } ScanKeyInit(&key[0], Anum_pg_shdepend_dbid, BTEqualStrategyNumber, F_OIDEQ, - MyDatabaseId); + ObjectIdGetDatum(dbid)); ScanKeyInit(&key[1], Anum_pg_shdepend_classid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(target.classId)); ScanKeyInit(&key[2], Anum_pg_shdepend_objid, BTEqualStrategyNumber, F_OIDEQ, @@ -570,6 +581,12 @@ SupportedDependencyByCitus(const ObjectAddress *address) return true; } + case OCLASS_DATABASE: + { + /* only to propagate its owner */ + return true; + } + case OCLASS_ROLE: { /* diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c index aa5a61886..2d7dab121 100644 --- a/src/backend/distributed/metadata/metadata_cache.c +++ b/src/backend/distributed/metadata/metadata_cache.c @@ -2641,6 +2641,8 @@ SecondaryNodeRoleId(void) Datum citus_dist_partition_cache_invalidate(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + TriggerData *triggerData = (TriggerData *) fcinfo->context; Oid oldLogicalRelationId = InvalidOid; Oid newLogicalRelationId = InvalidOid; @@ -2651,8 +2653,6 @@ citus_dist_partition_cache_invalidate(PG_FUNCTION_ARGS) errmsg("must be called as trigger"))); } - CheckCitusVersion(ERROR); - if (RelationGetRelid(triggerData->tg_relation) != DistPartitionRelationId()) { ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), @@ -2718,6 +2718,8 @@ master_dist_partition_cache_invalidate(PG_FUNCTION_ARGS) Datum citus_dist_shard_cache_invalidate(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + TriggerData *triggerData = (TriggerData *) fcinfo->context; Oid oldLogicalRelationId = InvalidOid; Oid newLogicalRelationId = InvalidOid; @@ -2728,8 +2730,6 @@ citus_dist_shard_cache_invalidate(PG_FUNCTION_ARGS) errmsg("must be called as trigger"))); } - CheckCitusVersion(ERROR); - if (RelationGetRelid(triggerData->tg_relation) != DistShardRelationId()) { ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), @@ -2795,6 +2795,8 @@ master_dist_shard_cache_invalidate(PG_FUNCTION_ARGS) Datum citus_dist_placement_cache_invalidate(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + TriggerData *triggerData = (TriggerData *) fcinfo->context; Oid oldShardId = InvalidOid; Oid newShardId = InvalidOid; @@ -2805,8 +2807,6 @@ citus_dist_placement_cache_invalidate(PG_FUNCTION_ARGS) errmsg("must be called as trigger"))); } - CheckCitusVersion(ERROR); - /* * Before 7.0-2 this trigger is on pg_dist_shard_placement, * ignore trigger in this scenario. @@ -2884,14 +2884,14 @@ master_dist_placement_cache_invalidate(PG_FUNCTION_ARGS) Datum citus_dist_node_cache_invalidate(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + if (!CALLED_AS_TRIGGER(fcinfo)) { ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("must be called as trigger"))); } - CheckCitusVersion(ERROR); - CitusInvalidateRelcacheByRelid(DistNodeRelationId()); PG_RETURN_DATUM(PointerGetDatum(NULL)); @@ -2919,14 +2919,14 @@ master_dist_node_cache_invalidate(PG_FUNCTION_ARGS) Datum citus_conninfo_cache_invalidate(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + if (!CALLED_AS_TRIGGER(fcinfo)) { ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("must be called as trigger"))); } - CheckCitusVersion(ERROR); - /* no-op in community edition */ PG_RETURN_DATUM(PointerGetDatum(NULL)); @@ -2954,14 +2954,14 @@ master_dist_authinfo_cache_invalidate(PG_FUNCTION_ARGS) Datum citus_dist_local_group_cache_invalidate(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + if (!CALLED_AS_TRIGGER(fcinfo)) { ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("must be called as trigger"))); } - CheckCitusVersion(ERROR); - CitusInvalidateRelcacheByRelid(DistLocalGroupIdRelationId()); PG_RETURN_DATUM(PointerGetDatum(NULL)); @@ -2989,14 +2989,14 @@ master_dist_local_group_cache_invalidate(PG_FUNCTION_ARGS) Datum citus_dist_object_cache_invalidate(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + if (!CALLED_AS_TRIGGER(fcinfo)) { ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("must be called as trigger"))); } - CheckCitusVersion(ERROR); - CitusInvalidateRelcacheByRelid(DistObjectRelationId()); PG_RETURN_DATUM(PointerGetDatum(NULL)); @@ -3344,8 +3344,7 @@ GetLocalGroupId(void) return LocalGroupId; } - Oid localGroupTableOid = get_relname_relid("pg_dist_local_group", - PG_CATALOG_NAMESPACE); + Oid localGroupTableOid = DistLocalGroupIdRelationId(); if (localGroupTableOid == InvalidOid) { return 0; diff --git a/src/backend/distributed/metadata/metadata_sync.c b/src/backend/distributed/metadata/metadata_sync.c index 686a206b4..80415bbf0 100644 --- a/src/backend/distributed/metadata/metadata_sync.c +++ b/src/backend/distributed/metadata/metadata_sync.c @@ -25,11 +25,13 @@ #include "access/xact.h" #include "catalog/dependency.h" #include "catalog/indexing.h" +#include "catalog/pg_attrdef.h" #include "catalog/pg_depend.h" #include "catalog/pg_foreign_server.h" #include "catalog/pg_namespace.h" #include "catalog/pg_type.h" #include "commands/async.h" +#include "commands/sequence.h" #include "distributed/citus_ruleutils.h" #include "distributed/commands.h" #include "distributed/deparser.h" @@ -46,10 +48,13 @@ #include "distributed/pg_dist_node.h" #include "distributed/remote_commands.h" #include "distributed/worker_manager.h" +#include "distributed/worker_protocol.h" #include "distributed/worker_transaction.h" #include "distributed/version_compat.h" +#include "executor/spi.h" #include "foreign/foreign.h" #include "miscadmin.h" +#include "nodes/makefuncs.h" #include "nodes/pg_list.h" #include "pgstat.h" #include "postmaster/bgworker.h" @@ -67,7 +72,6 @@ static List * GetDistributedTableDDLEvents(Oid relationId); static char * LocalGroupIdUpdateCommand(int32 groupId); static void UpdateDistNodeBoolAttr(const char *nodeName, int32 nodePort, int attrNum, bool value); -static List * SequenceDDLCommandsForTable(Oid relationId); static List * SequenceDependencyCommandList(Oid relationId); static char * TruncateTriggerCreateCommand(Oid relationId); static char * SchemaOwnerName(Oid objectId); @@ -103,6 +107,8 @@ static bool got_SIGALRM = false; Datum start_metadata_sync_to_node(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *nodeName = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); @@ -126,10 +132,10 @@ StartMetadataSyncToNode(const char *nodeNameString, int32 nodePort) /* fail if metadata synchronization doesn't succeed */ bool raiseInterrupts = true; + CheckCitusVersion(ERROR); EnsureCoordinator(); EnsureSuperUser(); EnsureModificationsCanRun(); - CheckCitusVersion(ERROR); PreventInTransactionBlock(true, "start_metadata_sync_to_node"); @@ -185,14 +191,14 @@ StartMetadataSyncToNode(const char *nodeNameString, int32 nodePort) Datum stop_metadata_sync_to_node(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + EnsureCoordinator(); + EnsureSuperUser(); + text *nodeName = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); char *nodeNameString = text_to_cstring(nodeName); - EnsureCoordinator(); - EnsureSuperUser(); - CheckCitusVersion(ERROR); - LockRelationOid(DistNodeRelationId(), ExclusiveLock); WorkerNode *workerNode = FindWorkerNode(nodeNameString, nodePort); @@ -372,11 +378,9 @@ MetadataCreateCommands(void) continue; } - List *workerSequenceDDLCommands = SequenceDDLCommandsForTable(relationId); List *ddlCommandList = GetFullTableCreationCommands(relationId, includeSequenceDefaults); char *tableOwnerResetCommand = TableOwnerResetCommand(relationId); - List *sequenceDependencyCommandList = SequenceDependencyCommandList(relationId); /* * Tables might have dependencies on different objects, since we create shards for @@ -386,6 +390,7 @@ MetadataCreateCommands(void) ObjectAddressSet(tableAddress, RelationRelationId, relationId); EnsureDependenciesExistOnAllNodes(&tableAddress); + List *workerSequenceDDLCommands = SequenceDDLCommandsForTable(relationId); metadataSnapshotCommandList = list_concat(metadataSnapshotCommandList, workerSequenceDDLCommands); @@ -400,6 +405,9 @@ MetadataCreateCommands(void) metadataSnapshotCommandList = lappend(metadataSnapshotCommandList, tableOwnerResetCommand); + + List *sequenceDependencyCommandList = SequenceDependencyCommandList( + relationId); metadataSnapshotCommandList = list_concat(metadataSnapshotCommandList, sequenceDependencyCommandList); } @@ -507,7 +515,8 @@ GetDistributedTableDDLEvents(Oid relationId) } /* command to associate sequences with table */ - List *sequenceDependencyCommandList = SequenceDependencyCommandList(relationId); + List *sequenceDependencyCommandList = SequenceDependencyCommandList( + relationId); commandList = list_concat(commandList, sequenceDependencyCommandList); } @@ -1040,21 +1049,58 @@ List * SequenceDDLCommandsForTable(Oid relationId) { List *sequenceDDLList = NIL; - List *ownedSequences = GetSequencesOwnedByRelation(relationId); + + List *attnumList = NIL; + List *dependentSequenceList = NIL; + GetDependentSequencesWithRelation(relationId, &attnumList, &dependentSequenceList, 0); + char *ownerName = TableOwner(relationId); - Oid sequenceOid = InvalidOid; - foreach_oid(sequenceOid, ownedSequences) + ListCell *attnumCell = NULL; + ListCell *dependentSequenceCell = NULL; + forboth(attnumCell, attnumList, dependentSequenceCell, dependentSequenceList) { + AttrNumber attnum = lfirst_int(attnumCell); + Oid sequenceOid = lfirst_oid(dependentSequenceCell); + char *sequenceDef = pg_get_sequencedef_string(sequenceOid); char *escapedSequenceDef = quote_literal_cstr(sequenceDef); StringInfo wrappedSequenceDef = makeStringInfo(); StringInfo sequenceGrantStmt = makeStringInfo(); char *sequenceName = generate_qualified_relation_name(sequenceOid); Form_pg_sequence sequenceData = pg_get_sequencedef(sequenceOid); - Oid sequenceTypeOid = sequenceData->seqtypid; + Oid sequenceTypeOid = GetAttributeTypeOid(relationId, attnum); char *typeName = format_type_be(sequenceTypeOid); + /* get sequence address */ + ObjectAddress sequenceAddress = { 0 }; + ObjectAddressSet(sequenceAddress, RelationRelationId, sequenceOid); + EnsureDependenciesExistOnAllNodes(&sequenceAddress); + + /* + * Alter the sequence's data type in the coordinator if needed. + * A sequence's type is bigint by default and it doesn't change even if + * it's used in an int column. However, when distributing the sequence, + * we don't allow incompatible min/max ranges between the coordinator and + * workers, so we determine the sequence type here based on its current usage + * and propagate that same type to the workers as well. + * TODO: move this command to the part where the sequence is + * used in a distributed table: both in create_distributed_table + * and ALTER TABLE commands that include a sequence default + */ + Oid currentSequenceTypeOid = sequenceData->seqtypid; + if (currentSequenceTypeOid != sequenceTypeOid) + { + AlterSeqStmt *alterSequenceStatement = makeNode(AlterSeqStmt); + char *seqNamespace = get_namespace_name(get_rel_namespace(sequenceOid)); + char *seqName = get_rel_name(sequenceOid); + alterSequenceStatement->sequence = makeRangeVar(seqNamespace, seqName, -1); + Node *asTypeNode = (Node *) makeTypeNameFromOid(sequenceTypeOid, -1); + SetDefElemArg(alterSequenceStatement, "as", asTypeNode); + ParseState *pstate = make_parsestate(NULL); + AlterSequence(pstate, alterSequenceStatement); + } + /* create schema if needed */ appendStringInfo(wrappedSequenceDef, WORKER_APPLY_SEQUENCE_COMMAND, @@ -1067,12 +1113,184 @@ SequenceDDLCommandsForTable(Oid relationId) sequenceDDLList = lappend(sequenceDDLList, wrappedSequenceDef->data); sequenceDDLList = lappend(sequenceDDLList, sequenceGrantStmt->data); + + MarkObjectDistributed(&sequenceAddress); } return sequenceDDLList; } +/* + * GetAttributeTypeOid returns the OID of the type of the attribute of + * provided relationId that has the provided attnum + */ +Oid +GetAttributeTypeOid(Oid relationId, AttrNumber attnum) +{ + Oid resultOid = InvalidOid; + + ScanKeyData key[2]; + + /* Grab an appropriate lock on the pg_attribute relation */ + Relation attrel = table_open(AttributeRelationId, AccessShareLock); + + /* Use the index to scan only system attributes of the target relation */ + ScanKeyInit(&key[0], + Anum_pg_attribute_attrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relationId)); + ScanKeyInit(&key[1], + Anum_pg_attribute_attnum, + BTLessEqualStrategyNumber, F_INT2LE, + Int16GetDatum(attnum)); + + SysScanDesc scan = systable_beginscan(attrel, AttributeRelidNumIndexId, true, NULL, 2, + key); + + HeapTuple attributeTuple; + while (HeapTupleIsValid(attributeTuple = systable_getnext(scan))) + { + Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple); + resultOid = att->atttypid; + } + + systable_endscan(scan); + table_close(attrel, AccessShareLock); + + return resultOid; +} + + +/* + * GetDependentSequencesWithRelation appends the attnum and id of sequences that + * have direct (owned sequences) or indirect dependency with the given relationId, + * to the lists passed as NIL initially. + * For both cases, we use the intermediate AttrDefault object from pg_depend. + * If attnum is specified, we only return the sequences related to that + * attribute of the relationId. + */ +void +GetDependentSequencesWithRelation(Oid relationId, List **attnumList, + List **dependentSequenceList, AttrNumber attnum) +{ + Assert(*attnumList == NIL && *dependentSequenceList == NIL); + + List *attrdefResult = NIL; + List *attrdefAttnumResult = NIL; + ScanKeyData key[3]; + HeapTuple tup; + + Relation depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relationId)); + if (attnum) + { + ScanKeyInit(&key[2], + Anum_pg_depend_refobjsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(attnum)); + } + + SysScanDesc scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, attnum ? 3 : 2, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); + + if (deprec->classid == AttrDefaultRelationId && + deprec->objsubid == 0 && + deprec->refobjsubid != 0 && + deprec->deptype == DEPENDENCY_AUTO) + { + attrdefResult = lappend_oid(attrdefResult, deprec->objid); + attrdefAttnumResult = lappend_int(attrdefAttnumResult, deprec->refobjsubid); + } + } + + systable_endscan(scan); + + table_close(depRel, AccessShareLock); + + ListCell *attrdefOidCell = NULL; + ListCell *attrdefAttnumCell = NULL; + forboth(attrdefOidCell, attrdefResult, attrdefAttnumCell, attrdefAttnumResult) + { + Oid attrdefOid = lfirst_oid(attrdefOidCell); + AttrNumber attrdefAttnum = lfirst_int(attrdefAttnumCell); + + List *sequencesFromAttrDef = GetSequencesFromAttrDef(attrdefOid); + + /* to simplify and eliminate cases like "DEFAULT nextval('..') - nextval('..')" */ + if (list_length(sequencesFromAttrDef) > 1) + { + ereport(ERROR, (errmsg("More than one sequence in a column default" + " is not supported for distribution"))); + } + + if (list_length(sequencesFromAttrDef) == 1) + { + *dependentSequenceList = list_concat(*dependentSequenceList, + sequencesFromAttrDef); + *attnumList = lappend_int(*attnumList, attrdefAttnum); + } + } +} + + +/* + * GetSequencesFromAttrDef returns a list of sequence OIDs that have + * dependency with the given attrdefOid in pg_depend + */ +List * +GetSequencesFromAttrDef(Oid attrdefOid) +{ + List *sequencesResult = NIL; + ScanKeyData key[2]; + HeapTuple tup; + + Relation depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_classid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(AttrDefaultRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_objid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(attrdefOid)); + + SysScanDesc scan = systable_beginscan(depRel, DependDependerIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); + + if (deprec->refclassid == RelationRelationId && + deprec->deptype == DEPENDENCY_NORMAL && + get_rel_relkind(deprec->refobjid) == RELKIND_SEQUENCE) + { + sequencesResult = lappend_oid(sequencesResult, deprec->refobjid); + } + } + + systable_endscan(scan); + + table_close(depRel, AccessShareLock); + + return sequencesResult; +} + + /* * SequenceDependencyCommandList generates commands to record the dependency * of sequences on tables on the worker. This dependency does not exist by diff --git a/src/backend/distributed/metadata/metadata_utility.c b/src/backend/distributed/metadata/metadata_utility.c index 44ba74633..9b4a7025a 100644 --- a/src/backend/distributed/metadata/metadata_utility.c +++ b/src/backend/distributed/metadata/metadata_utility.c @@ -270,13 +270,13 @@ citus_shard_sizes(PG_FUNCTION_ARGS) Datum citus_total_relation_size(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); bool failOnError = PG_GETARG_BOOL(1); SizeQueryType sizeQueryType = TOTAL_RELATION_SIZE; - CheckCitusVersion(ERROR); - if (CStoreTable(relationId)) { sizeQueryType = CSTORE_TABLE_SIZE; @@ -301,12 +301,12 @@ citus_total_relation_size(PG_FUNCTION_ARGS) Datum citus_table_size(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); bool failOnError = true; SizeQueryType sizeQueryType = TABLE_SIZE; - CheckCitusVersion(ERROR); - if (CStoreTable(relationId)) { sizeQueryType = CSTORE_TABLE_SIZE; @@ -331,12 +331,12 @@ citus_table_size(PG_FUNCTION_ARGS) Datum citus_relation_size(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); bool failOnError = true; SizeQueryType sizeQueryType = RELATION_SIZE; - CheckCitusVersion(ERROR); - if (CStoreTable(relationId)) { sizeQueryType = CSTORE_TABLE_SIZE; @@ -644,7 +644,19 @@ DistributedTableSizeOnWorker(WorkerNode *workerNode, Oid relationId, StringInfo tableSizeStringInfo = (StringInfo) linitial(sizeList); char *tableSizeString = tableSizeStringInfo->data; - *tableSize = SafeStringToUint64(tableSizeString); + if (strlen(tableSizeString) > 0) + { + *tableSize = SafeStringToUint64(tableSizeString); + } + else + { + /* + * This means the shard is moved or dropped while citus_total_relation_size is + * being executed. For this case we get an empty string as table size. + * We can take that as zero to prevent any unnecessary errors. + */ + *tableSize = 0; + } PQclear(result); ClearResults(connection, failOnError); @@ -1288,6 +1300,26 @@ ShardLength(uint64 shardId) } +/* + * NodeGroupHasLivePlacements returns true if there is any placement + * on the given node group which is not a SHARD_STATE_TO_DELETE placement. + */ +bool +NodeGroupHasLivePlacements(int32 groupId) +{ + List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId); + GroupShardPlacement *placement = NULL; + foreach_ptr(placement, shardPlacements) + { + if (placement->shardState != SHARD_STATE_TO_DELETE) + { + return true; + } + } + return false; +} + + /* * NodeGroupHasShardPlacements returns whether any active shards are placed on the group */ diff --git a/src/backend/distributed/metadata/node_metadata.c b/src/backend/distributed/metadata/node_metadata.c index 3853cbe16..5abc62e43 100644 --- a/src/backend/distributed/metadata/node_metadata.c +++ b/src/backend/distributed/metadata/node_metadata.c @@ -112,7 +112,7 @@ static bool UnsetMetadataSyncedForAll(void); static void ErrorIfCoordinatorMetadataSetFalse(WorkerNode *workerNode, Datum value, char *field); static WorkerNode * SetShouldHaveShards(WorkerNode *workerNode, bool shouldHaveShards); - +static void RemoveOldShardPlacementForNodeGroup(int groupId); /* declarations for dynamic loading */ PG_FUNCTION_INFO_V1(citus_set_coordinator_host); @@ -161,6 +161,8 @@ DefaultNodeMetadata() Datum citus_set_coordinator_host(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *nodeName = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); char *nodeNameString = text_to_cstring(nodeName); @@ -173,8 +175,6 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS) Name nodeClusterName = PG_GETARG_NAME(3); nodeMetadata.nodeCluster = NameStr(*nodeClusterName); - CheckCitusVersion(ERROR); - /* prevent concurrent modification */ LockRelationOid(DistNodeRelationId(), RowShareLock); @@ -219,6 +219,8 @@ citus_set_coordinator_host(PG_FUNCTION_ARGS) Datum citus_add_node(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *nodeName = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); char *nodeNameString = text_to_cstring(nodeName); @@ -227,8 +229,6 @@ citus_add_node(PG_FUNCTION_ARGS) bool nodeAlreadyExists = false; nodeMetadata.groupId = PG_GETARG_INT32(2); - CheckCitusVersion(ERROR); - /* * During tests this function is called before nodeRole and nodeCluster have been * created. @@ -288,6 +288,8 @@ master_add_node(PG_FUNCTION_ARGS) Datum citus_add_inactive_node(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *nodeName = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); char *nodeNameString = text_to_cstring(nodeName); @@ -299,8 +301,6 @@ citus_add_inactive_node(PG_FUNCTION_ARGS) nodeMetadata.nodeRole = PG_GETARG_OID(3); nodeMetadata.nodeCluster = NameStr(*nodeClusterName); - CheckCitusVersion(ERROR); - if (nodeMetadata.groupId == COORDINATOR_GROUP_ID) { ereport(ERROR, (errmsg("coordinator node cannot be added as inactive node"))); @@ -331,6 +331,8 @@ master_add_inactive_node(PG_FUNCTION_ARGS) Datum citus_add_secondary_node(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *nodeName = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); char *nodeNameString = text_to_cstring(nodeName); @@ -348,8 +350,6 @@ citus_add_secondary_node(PG_FUNCTION_ARGS) nodeMetadata.nodeRole = SecondaryNodeRoleId(); nodeMetadata.isActive = true; - CheckCitusVersion(ERROR); - int nodeId = AddNodeMetadata(nodeNameString, nodePort, &nodeMetadata, &nodeAlreadyExists); TransactionModifiedNodeMetadata = true; @@ -380,11 +380,11 @@ master_add_secondary_node(PG_FUNCTION_ARGS) Datum citus_remove_node(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *nodeNameText = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); - CheckCitusVersion(ERROR); - RemoveNodeFromCluster(text_to_cstring(nodeNameText), nodePort); TransactionModifiedNodeMetadata = true; @@ -631,7 +631,6 @@ static WorkerNode * ModifiableWorkerNode(const char *nodeName, int32 nodePort) { CheckCitusVersion(ERROR); - EnsureCoordinator(); /* take an exclusive lock on pg_dist_node to serialize pg_dist_node changes */ @@ -843,6 +842,8 @@ ActivateNode(char *nodeName, int nodePort) Datum citus_update_node(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + int32 nodeId = PG_GETARG_INT32(0); text *newNodeName = PG_GETARG_TEXT_P(1); @@ -864,8 +865,6 @@ citus_update_node(PG_FUNCTION_ARGS) List *placementList = NIL; BackgroundWorkerHandle *handle = NULL; - CheckCitusVersion(ERROR); - WorkerNode *workerNodeWithSameAddress = FindWorkerNodeAnyCluster(newNodeNameString, newNodePort); if (workerNodeWithSameAddress != NULL) @@ -1077,10 +1076,10 @@ UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort) Datum get_shard_id_for_distribution_column(PG_FUNCTION_ARGS) { - ShardInterval *shardInterval = NULL; - CheckCitusVersion(ERROR); + ShardInterval *shardInterval = NULL; + /* * To have optional parameter as NULL, we defined this UDF as not strict, therefore * we need to check all parameters for NULL values. @@ -1291,11 +1290,9 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort) */ DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId); } - bool onlyConsiderActivePlacements = false; - if (NodeGroupHasShardPlacements(workerNode->groupId, - onlyConsiderActivePlacements)) + if (NodeGroupHasLivePlacements(workerNode->groupId)) { - if (ClusterHasReferenceTable()) + if (ActivePrimaryNodeCount() == 1 && ClusterHasReferenceTable()) { ereport(ERROR, (errmsg( "cannot remove the last worker node because there are reference " @@ -1320,6 +1317,8 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort) DeleteNodeRow(workerNode->workerName, nodePort); + RemoveOldShardPlacementForNodeGroup(workerNode->groupId); + char *nodeDeleteCommand = NodeDeleteCommand(workerNode->nodeId); /* make sure we don't have any lingering session lifespan connections */ @@ -1329,6 +1328,29 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort) } +/* + * RemoveOldShardPlacementForNodeGroup removes all old shard placements + * for the given node group from pg_dist_placement. + */ +static void +RemoveOldShardPlacementForNodeGroup(int groupId) +{ + /* + * Prevent concurrent deferred drop + */ + LockPlacementCleanup(); + List *shardPlacementsOnNode = AllShardPlacementsOnNodeGroup(groupId); + GroupShardPlacement *placement = NULL; + foreach_ptr(placement, shardPlacementsOnNode) + { + if (placement->shardState == SHARD_STATE_TO_DELETE) + { + DeleteShardPlacementRow(placement->placementId); + } + } +} + + /* * CanRemoveReferenceTablePlacements returns true if active primary * node count is more than 1, which means that even if we remove a node @@ -1818,7 +1840,7 @@ InsertPlaceholderCoordinatorRecord(void) bool nodeAlreadyExists = false; /* as long as there is a single node, localhost should be ok */ - AddNodeMetadata("localhost", PostPortNumber, &nodeMetadata, &nodeAlreadyExists); + AddNodeMetadata(LocalHostName, PostPortNumber, &nodeMetadata, &nodeAlreadyExists); } diff --git a/src/backend/distributed/operations/citus_create_restore_point.c b/src/backend/distributed/operations/citus_create_restore_point.c index 9d9019a83..42fc5311f 100644 --- a/src/backend/distributed/operations/citus_create_restore_point.c +++ b/src/backend/distributed/operations/citus_create_restore_point.c @@ -49,12 +49,12 @@ PG_FUNCTION_INFO_V1(citus_create_restore_point); Datum citus_create_restore_point(PG_FUNCTION_ARGS) { - text *restoreNameText = PG_GETARG_TEXT_P(0); - CheckCitusVersion(ERROR); EnsureSuperUser(); EnsureCoordinator(); + text *restoreNameText = PG_GETARG_TEXT_P(0); + if (RecoveryInProgress()) { ereport(ERROR, diff --git a/src/backend/distributed/operations/citus_tools.c b/src/backend/distributed/operations/citus_tools.c index 4035cf5e1..9b2eedefb 100644 --- a/src/backend/distributed/operations/citus_tools.c +++ b/src/backend/distributed/operations/citus_tools.c @@ -71,14 +71,14 @@ static Tuplestorestate * CreateTupleStore(TupleDesc tupleDescriptor, Datum master_run_on_worker(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; bool parallelExecution = false; StringInfo *nodeNameArray = NULL; int *nodePortArray = NULL; StringInfo *commandStringArray = NULL; - CheckCitusVersion(ERROR); - /* check to see if caller supports us returning a tuplestore */ if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize)) { diff --git a/src/backend/distributed/operations/create_shards.c b/src/backend/distributed/operations/create_shards.c index 74e0022a4..65cba434e 100644 --- a/src/backend/distributed/operations/create_shards.c +++ b/src/backend/distributed/operations/create_shards.c @@ -64,6 +64,9 @@ PG_FUNCTION_INFO_V1(master_create_worker_shards); Datum master_create_worker_shards(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + EnsureCoordinator(); + text *tableNameText = PG_GETARG_TEXT_P(0); int32 shardCount = PG_GETARG_INT32(1); int32 replicationFactor = PG_GETARG_INT32(2); @@ -74,9 +77,6 @@ master_create_worker_shards(PG_FUNCTION_ARGS) /* do not add any data */ bool useExclusiveConnections = false; - EnsureCoordinator(); - CheckCitusVersion(ERROR); - /* * distributed tables might have dependencies on different objects, since we create * shards for a distributed table via multiple sessions these objects will be created diff --git a/src/backend/distributed/operations/delete_protocol.c b/src/backend/distributed/operations/delete_protocol.c index 3ee3752b7..7543beaa6 100644 --- a/src/backend/distributed/operations/delete_protocol.c +++ b/src/backend/distributed/operations/delete_protocol.c @@ -109,6 +109,9 @@ PG_FUNCTION_INFO_V1(master_drop_sequences); Datum master_apply_delete_command(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + EnsureCoordinator(); + text *queryText = PG_GETARG_TEXT_P(0); char *queryString = text_to_cstring(queryText); List *deletableShardIntervalList = NIL; @@ -116,9 +119,6 @@ master_apply_delete_command(PG_FUNCTION_ARGS) RawStmt *rawStmt = (RawStmt *) ParseTreeRawStmt(queryString); Node *queryTreeNode = rawStmt->stmt; - EnsureCoordinator(); - CheckCitusVersion(ERROR); - if (!IsA(queryTreeNode, DeleteStmt)) { ereport(ERROR, (errmsg("query \"%s\" is not a delete statement", @@ -208,6 +208,8 @@ master_apply_delete_command(PG_FUNCTION_ARGS) Datum citus_drop_all_shards(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); text *schemaNameText = PG_GETARG_TEXT_P(1); text *relationNameText = PG_GETARG_TEXT_P(2); @@ -215,8 +217,6 @@ citus_drop_all_shards(PG_FUNCTION_ARGS) char *schemaName = text_to_cstring(schemaNameText); char *relationName = text_to_cstring(relationNameText); - CheckCitusVersion(ERROR); - /* * The SQL_DROP trigger calls this function even for tables that are * not distributed. In that case, silently ignore and return -1. @@ -325,7 +325,7 @@ DropShards(Oid relationId, char *schemaName, char *relationName, */ if (MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC) { - CoordinatedTransactionShouldUse2PC(); + Use2PCForCoordinatedTransaction(); } List *dropTaskList = DropTaskList(relationId, schemaName, relationName, diff --git a/src/backend/distributed/operations/modify_multiple_shards.c b/src/backend/distributed/operations/modify_multiple_shards.c index caac4cb64..cb740dabb 100644 --- a/src/backend/distributed/operations/modify_multiple_shards.c +++ b/src/backend/distributed/operations/modify_multiple_shards.c @@ -70,13 +70,13 @@ PG_FUNCTION_INFO_V1(master_modify_multiple_shards); Datum master_modify_multiple_shards(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *queryText = PG_GETARG_TEXT_P(0); char *queryString = text_to_cstring(queryText); RawStmt *rawStmt = (RawStmt *) ParseTreeRawStmt(queryString); Node *queryTreeNode = rawStmt->stmt; - CheckCitusVersion(ERROR); - if (!IsA(queryTreeNode, DeleteStmt) && !IsA(queryTreeNode, UpdateStmt)) { ereport(ERROR, (errmsg("query \"%s\" is not a delete or update " diff --git a/src/backend/distributed/operations/node_protocol.c b/src/backend/distributed/operations/node_protocol.c index f4def2511..c52f6efcf 100644 --- a/src/backend/distributed/operations/node_protocol.c +++ b/src/backend/distributed/operations/node_protocol.c @@ -74,10 +74,13 @@ int ShardPlacementPolicy = SHARD_PLACEMENT_ROUND_ROBIN; int NextShardId = 0; int NextPlacementId = 0; -static List * GetTableReplicaIdentityCommand(Oid relationId); +static void GatherIndexAndConstraintDefinitionListExcludingReplicaIdentity(Form_pg_index + indexForm, + List ** + indexDDLEventList, + int indexFlags); static Datum WorkerNodeGetDatum(WorkerNode *workerNode, TupleDesc tupleDescriptor); -static void GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm, - List **indexDDLEventList); + /* exports for SQL callable functions */ PG_FUNCTION_INFO_V1(master_get_table_metadata); @@ -100,6 +103,8 @@ PG_FUNCTION_INFO_V1(master_stage_shard_placement_row); Datum master_get_table_metadata(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *relationName = PG_GETARG_TEXT_P(0); Oid relationId = ResolveRelationId(relationName, false); @@ -109,8 +114,6 @@ master_get_table_metadata(PG_FUNCTION_ARGS) Datum values[TABLE_METADATA_FIELDS]; bool isNulls[TABLE_METADATA_FIELDS]; - CheckCitusVersion(ERROR); - /* find partition tuple for partitioned relation */ CitusTableCacheEntry *partitionEntry = GetCitusTableCacheEntry(relationId); @@ -198,11 +201,11 @@ CStoreTable(Oid relationId) Datum master_get_table_ddl_events(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + FuncCallContext *functionContext = NULL; ListCell *tableDDLEventCell = NULL; - CheckCitusVersion(ERROR); - /* * On the very first call to this function, we first use the given relation * name to get to the relation. We then recreate the list of DDL statements @@ -273,8 +276,8 @@ master_get_table_ddl_events(PG_FUNCTION_ARGS) Datum master_get_new_shardid(PG_FUNCTION_ARGS) { - EnsureCoordinator(); CheckCitusVersion(ERROR); + EnsureCoordinator(); uint64 shardId = GetNextShardId(); Datum shardIdDatum = Int64GetDatum(shardId); @@ -343,8 +346,8 @@ GetNextShardId() Datum master_get_new_placementid(PG_FUNCTION_ARGS) { - EnsureCoordinator(); CheckCitusVersion(ERROR); + EnsureCoordinator(); uint64 placementId = GetNextPlacementId(); Datum placementIdDatum = Int64GetDatum(placementId); @@ -450,11 +453,11 @@ master_stage_shard_placement_row(PG_FUNCTION_ARGS) Datum citus_get_active_worker_nodes(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + FuncCallContext *functionContext = NULL; uint32 workerNodeCount = 0; - CheckCitusVersion(ERROR); - if (SRF_IS_FIRSTCALL()) { /* create a function context for cross-call persistence */ @@ -549,7 +552,7 @@ GetFullTableCreationCommands(Oid relationId, bool includeSequenceDefaults) tableDDLEventList = list_concat(tableDDLEventList, preLoadCreationCommandList); List *postLoadCreationCommandList = - GetPostLoadTableCreationCommands(relationId, true); + GetPostLoadTableCreationCommands(relationId, true, true); tableDDLEventList = list_concat(tableDDLEventList, postLoadCreationCommandList); @@ -562,19 +565,43 @@ GetFullTableCreationCommands(Oid relationId, bool includeSequenceDefaults) * of DDL commands that should be applied after loading the data. */ List * -GetPostLoadTableCreationCommands(Oid relationId, bool includeIndexes) +GetPostLoadTableCreationCommands(Oid relationId, bool includeIndexes, + bool includeReplicaIdentity) { List *tableDDLEventList = NIL; - if (includeIndexes) + /* + * Include all the commands (e.g., create index, set index clustered + * and set index statistics) regarding the indexes. Note that + * running all these commands in parallel might fail as the + * latter two depends on the first one. So, the caller should + * execute the commands sequentially. + */ + int indexFlags = INCLUDE_INDEX_ALL_STATEMENTS; + + if (includeIndexes && includeReplicaIdentity) { List *indexAndConstraintCommandList = - GetTableIndexAndConstraintCommands(relationId); + GetTableIndexAndConstraintCommands(relationId, indexFlags); + tableDDLEventList = list_concat(tableDDLEventList, indexAndConstraintCommandList); + } + else if (includeIndexes && !includeReplicaIdentity) + { + /* + * Do not include the indexes/constraints that backs + * replica identity, if any. + */ + List *indexAndConstraintCommandList = + GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(relationId, + indexFlags); tableDDLEventList = list_concat(tableDDLEventList, indexAndConstraintCommandList); } - List *replicaIdentityEvents = GetTableReplicaIdentityCommand(relationId); - tableDDLEventList = list_concat(tableDDLEventList, replicaIdentityEvents); + if (includeReplicaIdentity) + { + List *replicaIdentityEvents = GetTableReplicaIdentityCommand(relationId); + tableDDLEventList = list_concat(tableDDLEventList, replicaIdentityEvents); + } List *triggerCommands = GetExplicitTriggerCommandList(relationId); tableDDLEventList = list_concat(tableDDLEventList, triggerCommands); @@ -590,7 +617,7 @@ GetPostLoadTableCreationCommands(Oid relationId, bool includeIndexes) * GetTableReplicaIdentityCommand returns the list of DDL commands to * (re)define the replica identity choice for a given table. */ -static List * +List * GetTableReplicaIdentityCommand(Oid relationId) { List *replicaIdentityCreateCommandList = NIL; @@ -694,18 +721,82 @@ GetPreLoadTableCreationCommands(Oid relationId, bool includeSequenceDefaults, * (re)create indexes and constraints for a given table. */ List * -GetTableIndexAndConstraintCommands(Oid relationId) +GetTableIndexAndConstraintCommands(Oid relationId, int indexFlags) { return ExecuteFunctionOnEachTableIndex(relationId, - GatherIndexAndConstraintDefinitionList); + GatherIndexAndConstraintDefinitionList, + indexFlags); +} + + +/* + * GetTableIndexAndConstraintCommands returns the list of DDL commands to + * (re)create indexes and constraints for a given table. + */ +List * +GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(Oid relationId, int indexFlags) +{ + return ExecuteFunctionOnEachTableIndex(relationId, + GatherIndexAndConstraintDefinitionListExcludingReplicaIdentity, + indexFlags); +} + + +/* + * GatherIndexAndConstraintDefinitionListExcludingReplicaIdentity is a wrapper around + * GatherIndexAndConstraintDefinitionList(), which only excludes the indexes or + * constraints that back the replica identity. + */ +static void +GatherIndexAndConstraintDefinitionListExcludingReplicaIdentity(Form_pg_index indexForm, + List **indexDDLEventList, + int indexFlags) +{ + Oid relationId = indexForm->indrelid; + Relation relation = table_open(relationId, AccessShareLock); + + Oid replicaIdentityIndex = GetRelationIdentityOrPK(relation); + + if (replicaIdentityIndex == indexForm->indexrelid) + { + /* this index is backing the replica identity, so skip */ + table_close(relation, NoLock); + return; + } + + GatherIndexAndConstraintDefinitionList(indexForm, indexDDLEventList, indexFlags); + + table_close(relation, NoLock); +} + + +/* + * Get replica identity index or if it is not defined a primary key. + * + * If neither is defined, returns InvalidOid. + * + * Inspired from postgres/src/backend/replication/logical/worker.c + */ +Oid +GetRelationIdentityOrPK(Relation rel) +{ + Oid idxoid = RelationGetReplicaIndex(rel); + + if (!OidIsValid(idxoid)) + { + idxoid = RelationGetPrimaryKeyIndex(rel); + } + + return idxoid; } /* * GatherIndexAndConstraintDefinitionList adds the DDL command for the given index. */ -static void -GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm, List **indexDDLEventList) +void +GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm, List **indexDDLEventList, + int indexFlags) { Oid indexId = indexForm->indexrelid; char *statementDef = NULL; @@ -726,11 +817,15 @@ GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm, List **indexDDLE } /* append found constraint or index definition to the list */ - *indexDDLEventList = lappend(*indexDDLEventList, makeTableDDLCommandString( - statementDef)); + if (indexFlags & INCLUDE_CREATE_INDEX_STATEMENTS) + { + *indexDDLEventList = lappend(*indexDDLEventList, makeTableDDLCommandString( + statementDef)); + } /* if table is clustered on this index, append definition to the list */ - if (indexForm->indisclustered) + if ((indexFlags & INCLUDE_INDEX_CLUSTERED_STATEMENTS) && + indexForm->indisclustered) { char *clusteredDef = pg_get_indexclusterdef_string(indexId); Assert(clusteredDef != NULL); @@ -740,8 +835,12 @@ GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm, List **indexDDLE } /* we need alter index commands for altered targets on expression indexes */ - List *alterIndexStatisticsCommands = GetAlterIndexStatisticsCommands(indexId); - *indexDDLEventList = list_concat(*indexDDLEventList, alterIndexStatisticsCommands); + if (indexFlags & INCLUDE_INDEX_STATISTICS_STATEMENTTS) + { + List *alterIndexStatisticsCommands = GetAlterIndexStatisticsCommands(indexId); + *indexDDLEventList = list_concat(*indexDDLEventList, + alterIndexStatisticsCommands); + } } diff --git a/src/backend/distributed/operations/partitioning.c b/src/backend/distributed/operations/partitioning.c index c7d989d5a..9e2057927 100644 --- a/src/backend/distributed/operations/partitioning.c +++ b/src/backend/distributed/operations/partitioning.c @@ -35,10 +35,10 @@ PG_FUNCTION_INFO_V1(time_partition_range); Datum time_partition_range(PG_FUNCTION_ARGS) { - Oid relationId = PG_GETARG_OID(0); - CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); + /* create tuple descriptor for return value */ TupleDesc metadataDescriptor = NULL; TypeFuncClass resultTypeClass = get_call_result_type(fcinfo, NULL, diff --git a/src/backend/distributed/operations/repair_shards.c b/src/backend/distributed/operations/repair_shards.c index b5b30606e..c3dfa791e 100644 --- a/src/backend/distributed/operations/repair_shards.c +++ b/src/backend/distributed/operations/repair_shards.c @@ -63,7 +63,10 @@ static void ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName char shardReplicationMode); static void CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort, char *targetNodeName, - int32 targetNodePort); + int32 targetNodePort, bool useLogicalReplication); +static void CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName, + int32 sourceNodePort, + char *targetNodeName, int32 targetNodePort); static List * CopyPartitionShardsCommandList(ShardInterval *shardInterval, const char *sourceNodeName, int32 sourceNodePort); @@ -93,6 +96,7 @@ static void EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList, char *targetNodeName, uint32 targetNodePort); + /* declarations for dynamic loading */ PG_FUNCTION_INFO_V1(citus_copy_shard_placement); PG_FUNCTION_INFO_V1(master_copy_shard_placement); @@ -118,6 +122,9 @@ bool CheckAvailableSpaceBeforeMove = true; Datum citus_copy_shard_placement(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + EnsureCoordinator(); + int64 shardId = PG_GETARG_INT64(0); text *sourceNodeNameText = PG_GETARG_TEXT_P(1); int32 sourceNodePort = PG_GETARG_INT32(2); @@ -129,9 +136,6 @@ citus_copy_shard_placement(PG_FUNCTION_ARGS) char *sourceNodeName = text_to_cstring(sourceNodeNameText); char *targetNodeName = text_to_cstring(targetNodeNameText); - EnsureCoordinator(); - CheckCitusVersion(ERROR); - char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid); if (shardReplicationMode == TRANSFER_MODE_FORCE_LOGICAL) { @@ -279,6 +283,9 @@ CheckSpaceConstraints(MultiConnection *connection, uint64 colocationSizeInBytes) Datum citus_move_shard_placement(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + EnsureCoordinator(); + int64 shardId = PG_GETARG_INT64(0); char *sourceNodeName = text_to_cstring(PG_GETARG_TEXT_P(1)); int32 sourceNodePort = PG_GETARG_INT32(2); @@ -290,12 +297,9 @@ citus_move_shard_placement(PG_FUNCTION_ARGS) ListCell *colocatedTableCell = NULL; ListCell *colocatedShardCell = NULL; - - CheckCitusVersion(ERROR); - EnsureCoordinator(); - Oid relationId = RelationIdForShard(shardId); ErrorIfMoveCitusLocalTable(relationId); + ErrorIfTargetNodeIsNotSafeToMove(targetNodeName, targetNodePort); ShardInterval *shardInterval = LoadShardInterval(shardId); Oid distributedTableId = shardInterval->relationId; @@ -359,8 +363,9 @@ citus_move_shard_placement(PG_FUNCTION_ARGS) * CopyColocatedShardPlacement function copies given shard with its co-located * shards. */ + bool useLogicalReplication = false; CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName, - targetNodePort); + targetNodePort, useLogicalReplication); ShardInterval *colocatedShard = NULL; foreach_ptr(colocatedShard, colocatedShardList) @@ -417,6 +422,51 @@ EnsureEnoughDiskSpaceForShardMove(List *colocatedShardList, } +/* + * ErrorIfTargetNodeIsNotSafeToMove throws error if the target node is not + * eligible for moving shards. + */ +void +ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int targetNodePort) +{ + WorkerNode *workerNode = FindWorkerNode(targetNodeName, targetNodePort); + if (workerNode == NULL) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Moving shards to a non-existing node is not supported"), + errhint( + "Add the target node via SELECT citus_add_node('%s', %d);", + targetNodeName, targetNodePort))); + } + + if (!workerNode->isActive) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Moving shards to a non-active node is not supported"), + errhint( + "Activate the target node via SELECT citus_activate_node('%s', %d);", + targetNodeName, targetNodePort))); + } + + if (!workerNode->shouldHaveShards) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Moving shards to a node that shouldn't have a shard is " + "not supported"), + errhint("Allow shards on the target node via " + "SELECT * FROM citus_set_node_property('%s', %d, 'shouldhaveshards', true);", + targetNodeName, targetNodePort))); + } + + if (!NodeIsPrimary(workerNode)) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Moving shards to a secondary (e.g., replica) node is " + "not supported"))); + } +} + + /* * master_move_shard_placement is a wrapper around citus_move_shard_placement. */ @@ -741,8 +791,9 @@ ReplicateColocatedShardPlacement(int64 shardId, char *sourceNodeName, EnsureReferenceTablesExistOnAllNodesExtended(shardReplicationMode); } + bool useLogicalReplication = false; CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, - targetNodeName, targetNodePort); + targetNodeName, targetNodePort, useLogicalReplication); /* * Finally insert the placements to pg_dist_placement and sync it to the @@ -820,32 +871,51 @@ EnsureTableListSuitableForReplication(List *tableIdList) /* - * CopyColocatedShardPlacement copies a shard along with its co-located shards - * from a source node to target node. It does not make any checks about state - * of the shards. It is caller's responsibility to make those checks if they are - * necessary. + * CopyShardTables copies a shard along with its co-located shards from a source + * node to target node. It does not make any checks about state of the shards. + * It is caller's responsibility to make those checks if they are necessary. */ static void CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodePort, - char *targetNodeName, int32 targetNodePort) + char *targetNodeName, int32 targetNodePort, bool useLogicalReplication) { - ShardInterval *shardInterval = NULL; + if (list_length(shardIntervalList) < 1) + { + return; + } + if (useLogicalReplication) + { + /* only supported in Citus enterprise */ + } + else + { + CopyShardTablesViaBlockWrites(shardIntervalList, sourceNodeName, sourceNodePort, + targetNodeName, targetNodePort); + } +} + + +/* + * CopyShardTablesViaBlockWrites copies a shard along with its co-located shards + * from a source node to target node via COPY command. While the command is in + * progress, the modifications on the source node is blocked. + */ +static void +CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName, + int32 sourceNodePort, char *targetNodeName, + int32 targetNodePort) +{ MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, - "CopyShardTables", + "CopyShardTablesViaBlockWrites", ALLOCSET_DEFAULT_SIZES); MemoryContext oldContext = MemoryContextSwitchTo(localContext); /* iterate through the colocated shards and copy each */ + ShardInterval *shardInterval = NULL; foreach_ptr(shardInterval, shardIntervalList) { - bool includeDataCopy = true; - - if (PartitionedTable(shardInterval->relationId)) - { - /* partitioned tables contain no data */ - includeDataCopy = false; - } + bool includeDataCopy = !PartitionedTable(shardInterval->relationId); List *ddlCommandList = CopyShardCommandList(shardInterval, sourceNodeName, sourceNodePort, includeDataCopy); @@ -853,11 +923,10 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP SendCommandListToWorkerInSingleTransaction(targetNodeName, targetNodePort, tableOwner, ddlCommandList); + + MemoryContextReset(localContext); } - MemoryContextReset(localContext); - - /* * Once all shards are created, we can recreate relationships between shards. * @@ -868,15 +937,14 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP { List *shardForeignConstraintCommandList = NIL; List *referenceTableForeignConstraintList = NIL; - - char *tableOwner = TableOwner(shardInterval->relationId); + List *commandList = NIL; CopyShardForeignConstraintCommandListGrouped(shardInterval, &shardForeignConstraintCommandList, &referenceTableForeignConstraintList); - List *commandList = list_concat(shardForeignConstraintCommandList, - referenceTableForeignConstraintList); + commandList = list_concat(commandList, shardForeignConstraintCommandList); + commandList = list_concat(commandList, referenceTableForeignConstraintList); if (PartitionTable(shardInterval->relationId)) { @@ -886,8 +954,10 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP commandList = lappend(commandList, attachPartitionCommand); } + char *tableOwner = TableOwner(shardInterval->relationId); SendCommandListToWorkerInSingleTransaction(targetNodeName, targetNodePort, tableOwner, commandList); + MemoryContextReset(localContext); } @@ -991,9 +1061,39 @@ EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName, int32 sourceNo targetNodePort); if (targetPlacement != NULL) { - ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("shard " INT64_FORMAT " already exists in the target node", - shardId))); + if (targetPlacement->shardState == SHARD_STATE_TO_DELETE) + { + /* + * Trigger deletion of orphaned shards and hope that this removes + * the shard. + */ + DropOrphanedShardsInSeparateTransaction(); + shardPlacementList = ShardPlacementList(shardId); + targetPlacement = SearchShardPlacementInList(shardPlacementList, + targetNodeName, + targetNodePort); + + /* + * If it still doesn't remove the shard, then we error. + */ + if (targetPlacement != NULL) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg( + "shard " INT64_FORMAT + " still exists on the target node as an orphaned shard", + shardId), + errdetail( + "The existing shard is orphaned, but could not be deleted because there are still active queries on it"))); + } + } + else + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg( + "shard " INT64_FORMAT " already exists in the target node", + shardId))); + } } } @@ -1079,7 +1179,9 @@ CopyShardCommandList(ShardInterval *shardInterval, const char *sourceNodeName, copyShardDataCommand->data); } - List *indexCommandList = GetPostLoadTableCreationCommands(relationId, true); + bool includeReplicaIdentity = true; + List *indexCommandList = + GetPostLoadTableCreationCommands(relationId, true, includeReplicaIdentity); indexCommandList = WorkerApplyShardDDLCommandList(indexCommandList, shardId); copyShardToNodeCommandsList = list_concat(copyShardToNodeCommandsList, diff --git a/src/backend/distributed/operations/shard_cleaner.c b/src/backend/distributed/operations/shard_cleaner.c index a10fe1373..afa206594 100644 --- a/src/backend/distributed/operations/shard_cleaner.c +++ b/src/backend/distributed/operations/shard_cleaner.c @@ -12,59 +12,109 @@ #include "postgres.h" +#include "access/xact.h" +#include "postmaster/postmaster.h" #include "distributed/coordinator_protocol.h" #include "distributed/metadata_cache.h" #include "distributed/shard_cleaner.h" +#include "distributed/shard_rebalancer.h" +#include "distributed/remote_commands.h" #include "distributed/resource_lock.h" #include "distributed/worker_transaction.h" /* declarations for dynamic loading */ -PG_FUNCTION_INFO_V1(master_defer_delete_shards); +PG_FUNCTION_INFO_V1(citus_cleanup_orphaned_shards); +PG_FUNCTION_INFO_V1(isolation_cleanup_orphaned_shards); static bool TryDropShard(GroupShardPlacement *placement); +static bool TryLockRelationAndPlacementCleanup(Oid relationId, LOCKMODE lockmode); + /* - * master_defer_delete_shards implements a user-facing UDF to deleter orphaned shards that - * are still haning around in the system. These shards are orphaned by previous actions - * that were not directly able to delete the placements eg. shard moving or dropping of a - * distributed table while one of the data nodes was not online. + * citus_cleanup_orphaned_shards implements a user-facing UDF to delete + * orphaned shards that are still haning around in the system. These shards are + * orphaned by previous actions that were not directly able to delete the + * placements eg. shard moving or dropping of a distributed table while one of + * the data nodes was not online. * - * This function iterates through placements where shardstate is SHARD_STATE_TO_DELETE - * (shardstate = 4), drops the corresponding tables from the node and removes the - * placement information from the catalog. + * This function iterates through placements where shardstate is + * SHARD_STATE_TO_DELETE (shardstate = 4), drops the corresponding tables from + * the node and removes the placement information from the catalog. * - * The function takes no arguments and runs cluster wide + * The function takes no arguments and runs cluster wide. It cannot be run in a + * transaction, because holding the locks it takes for a long time is not good. + * While the locks are held, it is impossible for the background daemon to + * cleanup orphaned shards. */ Datum -master_defer_delete_shards(PG_FUNCTION_ARGS) +citus_cleanup_orphaned_shards(PG_FUNCTION_ARGS) { CheckCitusVersion(ERROR); EnsureCoordinator(); + PreventInTransactionBlock(true, "citus_cleanup_orphaned_shards"); - bool waitForCleanupLock = true; - int droppedShardCount = DropMarkedShards(waitForCleanupLock); + bool waitForLocks = true; + int droppedShardCount = DropOrphanedShards(waitForLocks); + if (droppedShardCount > 0) + { + ereport(NOTICE, (errmsg("cleaned up %d orphaned shards", droppedShardCount))); + } - PG_RETURN_INT32(droppedShardCount); + PG_RETURN_VOID(); } /* - * TryDropMarkedShards is a wrapper around DropMarkedShards that catches + * isolation_cleanup_orphaned_shards implements a test UDF that's the same as + * citus_cleanup_orphaned_shards. The only difference is that this command can + * be run in transactions, this is to test + */ +Datum +isolation_cleanup_orphaned_shards(PG_FUNCTION_ARGS) +{ + CheckCitusVersion(ERROR); + EnsureCoordinator(); + + bool waitForLocks = true; + int droppedShardCount = DropOrphanedShards(waitForLocks); + if (droppedShardCount > 0) + { + ereport(NOTICE, (errmsg("cleaned up %d orphaned shards", droppedShardCount))); + } + + PG_RETURN_VOID(); +} + + +/* + * DropOrphanedShardsInSeparateTransaction cleans up orphaned shards by + * connecting to localhost. This is done, so that the locks that + * DropOrphanedShards takes are only held for a short time. + */ +void +DropOrphanedShardsInSeparateTransaction(void) +{ + ExecuteCriticalCommandInSeparateTransaction("CALL citus_cleanup_orphaned_shards()"); +} + + +/* + * TryDropOrphanedShards is a wrapper around DropOrphanedShards that catches * any errors to make it safe to use in the maintenance daemon. * * If dropping any of the shards failed this function returns -1, otherwise it * returns the number of dropped shards. */ int -TryDropMarkedShards(bool waitForCleanupLock) +TryDropOrphanedShards(bool waitForLocks) { int droppedShardCount = 0; MemoryContext savedContext = CurrentMemoryContext; PG_TRY(); { - droppedShardCount = DropMarkedShards(waitForCleanupLock); + droppedShardCount = DropOrphanedShards(waitForLocks); } PG_CATCH(); { @@ -83,7 +133,7 @@ TryDropMarkedShards(bool waitForCleanupLock) /* - * DropMarkedShards removes shards that were marked SHARD_STATE_TO_DELETE before. + * DropOrphanedShards removes shards that were marked SHARD_STATE_TO_DELETE before. * * It does so by trying to take an exclusive lock on the shard and its * colocated placements before removing. If the lock cannot be obtained it @@ -91,32 +141,45 @@ TryDropMarkedShards(bool waitForCleanupLock) * will be removed at a later time when there are no locks held anymore on * those placements. * + * If waitForLocks is false, then if we cannot take a lock on pg_dist_placement + * we continue without waiting. + * * Before doing any of this it will take an exclusive PlacementCleanup lock. * This is to ensure that this function is not being run concurrently. * Otherwise really bad race conditions are possible, such as removing all - * placements of a shard. waitForCleanupLock indicates if this function should - * wait for this lock or error out. + * placements of a shard. waitForLocks indicates if this function should + * wait for this lock or not. * */ int -DropMarkedShards(bool waitForCleanupLock) +DropOrphanedShards(bool waitForLocks) { int removedShardCount = 0; ListCell *shardPlacementCell = NULL; + /* + * We should try to take the highest lock that we take + * later in this function for pg_dist_placement. We take RowExclusiveLock + * in DeleteShardPlacementRow. + */ + LOCKMODE lockmode = RowExclusiveLock; + if (!IsCoordinator()) { return 0; } - if (waitForCleanupLock) + if (waitForLocks) { LockPlacementCleanup(); } - else if (!TryLockPlacementCleanup()) + else { - ereport(WARNING, (errmsg("could not acquire lock to cleanup placements"))); - return 0; + Oid distPlacementId = DistPlacementRelationId(); + if (!TryLockRelationAndPlacementCleanup(distPlacementId, lockmode)) + { + return 0; + } } int failedShardDropCount = 0; @@ -145,7 +208,7 @@ DropMarkedShards(bool waitForCleanupLock) if (failedShardDropCount > 0) { - ereport(WARNING, (errmsg("Failed to drop %d old shards out of %d", + ereport(WARNING, (errmsg("Failed to drop %d orphaned shards out of %d", failedShardDropCount, list_length(shardPlacementList)))); } @@ -153,10 +216,33 @@ DropMarkedShards(bool waitForCleanupLock) } +/* + * TryLockRelationAndCleanup tries to lock the given relation + * and the placement cleanup. If it cannot, it returns false. + * + */ +static bool +TryLockRelationAndPlacementCleanup(Oid relationId, LOCKMODE lockmode) +{ + if (!ConditionalLockRelationOid(relationId, lockmode)) + { + ereport(DEBUG1, (errmsg( + "could not acquire shard lock to cleanup placements"))); + return false; + } + + if (!TryLockPlacementCleanup()) + { + ereport(DEBUG1, (errmsg("could not acquire lock to cleanup placements"))); + return false; + } + return true; +} + + /* * TryDropShard tries to drop the given shard placement and returns - * true on success. On failure, this method swallows errors and emits them - * as WARNINGs. + * true on success. */ static bool TryDropShard(GroupShardPlacement *placement) diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c index 8fd3860ac..0049d1f0e 100644 --- a/src/backend/distributed/operations/shard_rebalancer.c +++ b/src/backend/distributed/operations/shard_rebalancer.c @@ -30,8 +30,8 @@ #include "distributed/connection_management.h" #include "distributed/enterprise.h" #include "distributed/hash_helpers.h" -#include "distributed/intermediate_result_pruning.h" #include "distributed/listutils.h" +#include "distributed/lock_graph.h" #include "distributed/coordinator_protocol.h" #include "distributed/metadata_cache.h" #include "distributed/multi_client_executor.h" @@ -43,6 +43,7 @@ #include "distributed/repair_shards.h" #include "distributed/resource_lock.h" #include "distributed/shard_rebalancer.h" +#include "distributed/shard_cleaner.h" #include "distributed/tuplestore.h" #include "distributed/worker_protocol.h" #include "funcapi.h" @@ -70,6 +71,7 @@ typedef struct RebalanceOptions int32 maxShardMoves; ArrayType *excludedShardArray; bool drainOnly; + float4 improvementThreshold; Form_pg_dist_rebalance_strategy rebalanceStrategy; } RebalanceOptions; @@ -80,14 +82,54 @@ typedef struct RebalanceOptions */ typedef struct RebalanceState { + /* + * placementsHash contains the current state of all shard placements, it + * is initialized from pg_dist_placement and is then modified based on the + * found shard moves. + */ HTAB *placementsHash; + + /* + * placementUpdateList contains all of the updates that have been done to + * reach the current state of placementsHash. + */ List *placementUpdateList; RebalancePlanFunctions *functions; + + /* + * fillStateListDesc contains all NodeFillStates ordered from full nodes to + * empty nodes. + */ List *fillStateListDesc; + + /* + * fillStateListAsc contains all NodeFillStates ordered from empty nodes to + * full nodes. + */ List *fillStateListAsc; + + /* + * disallowedPlacementList contains all placements that currently exist, + * but are not allowed according to the shardAllowedOnNode function. + */ List *disallowedPlacementList; + + /* + * totalCost is the cost of all the shards in the cluster added together. + */ float4 totalCost; + + /* + * totalCapacity is the capacity of all the nodes in the cluster added + * together. + */ float4 totalCapacity; + + /* + * ignoredMoves is the number of moves that were ignored. This is used to + * limit the amount of loglines we send. + */ + int64 ignoredMoves; } RebalanceState; @@ -99,11 +141,52 @@ typedef struct RebalanceContext FmgrInfo shardAllowedOnNodeUDF; } RebalanceContext; +/* WorkerHashKey contains hostname and port to be used as a key in a hash */ +typedef struct WorkerHashKey +{ + char hostname[MAX_NODE_LENGTH]; + int port; +} WorkerHashKey; + +/* WorkerShardIds represents a set of shardIds grouped by worker */ +typedef struct WorkerShardIds +{ + WorkerHashKey worker; + + /* This is a uint64 hashset representing the shard ids for a specific worker */ + HTAB *shardIds; +} WorkerShardIds; + +/* ShardStatistics contains statistics about a shard */ +typedef struct ShardStatistics +{ + uint64 shardId; + + /* The shard its size in bytes. */ + uint64 totalSize; +} ShardStatistics; + +/* + * WorkerShardStatistics represents a set of statistics about shards, + * grouped by worker. + */ +typedef struct WorkerShardStatistics +{ + WorkerHashKey worker; + + /* + * Statistics for each shard on this worker: + * key: shardId + * value: ShardStatistics + */ + HTAB *statistics; +} WorkerShardStatistics; + /* static declarations for main logic */ static int ShardActivePlacementCount(HTAB *activePlacementsHash, uint64 shardId, List *activeWorkerNodeList); -static bool UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent, +static void UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent, List *responsiveNodeList, Oid shardReplicationModeOid); /* static declarations for main logic's utility functions */ @@ -128,6 +211,7 @@ static RebalanceState * InitRebalanceState(List *workerNodeList, List *shardPlac static void MoveShardsAwayFromDisallowedNodes(RebalanceState *state); static bool FindAndMoveShardCost(float4 utilizationLowerBound, float4 utilizationUpperBound, + float4 improvementThreshold, RebalanceState *state); static NodeFillState * FindAllowedTargetFillState(RebalanceState *state, uint64 shardId); static void MoveShardCost(NodeFillState *sourceFillState, NodeFillState *targetFillState, @@ -151,6 +235,17 @@ static Form_pg_dist_rebalance_strategy GetRebalanceStrategy(Name name); static void EnsureShardCostUDF(Oid functionOid); static void EnsureNodeCapacityUDF(Oid functionOid); static void EnsureShardAllowedOnNodeUDF(Oid functionOid); +static void ConflictShardPlacementUpdateOnlyWithIsolationTesting(uint64 shardId); +static HTAB * BuildWorkerShardStatisticsHash(PlacementUpdateEventProgress *steps, + int stepCount); +static HTAB * GetShardStatistics(MultiConnection *connection, HTAB *shardIds); +static HTAB * GetMovedShardIdsByWorker(PlacementUpdateEventProgress *steps, + int stepCount, bool fromSource); +static uint64 WorkerShardSize(HTAB *workerShardStatistics, + char *workerName, int workerPort, uint64 shardId); +static void AddToWorkerShardIdSet(HTAB *shardsByWorker, char *workerName, int workerPort, + uint64 shardId); +static HTAB * BuildShardSizesHash(ProgressMonitorData *monitor, HTAB *shardStatistics); /* declarations for dynamic loading */ PG_FUNCTION_INFO_V1(rebalance_table_shards); @@ -163,6 +258,17 @@ PG_FUNCTION_INFO_V1(citus_shard_cost_by_disk_size); PG_FUNCTION_INFO_V1(citus_validate_rebalance_strategy_functions); PG_FUNCTION_INFO_V1(pg_dist_rebalance_strategy_enterprise_check); +bool RunningUnderIsolationTest = false; +int MaxRebalancerLoggedIgnoredMoves = 5; + +/* + * This is randomly generated hardcoded number. It's used as the first part of + * the advisory lock identifier that's used during isolation tests. See the + * comments on ConflictShardPlacementUpdateOnlyWithIsolationTesting, for more + * information. + */ +#define SHARD_PLACEMENT_UPDATE_ADVISORY_LOCK_FIRST_KEY 29279 + #ifdef USE_ASSERT_CHECKING @@ -369,6 +475,7 @@ GetRebalanceSteps(RebalanceOptions *options) options->threshold, options->maxShardMoves, options->drainOnly, + options->improvementThreshold, &rebalancePlanFunctions); } @@ -420,8 +527,7 @@ NodeCapacity(WorkerNode *workerNode, void *voidContext) static ShardCost GetShardCost(uint64 shardId, void *voidContext) { - ShardCost shardCost; - memset_struct_0(shardCost); + ShardCost shardCost = { 0 }; shardCost.shardId = shardId; RebalanceContext *context = voidContext; Datum shardCostDatum = FunctionCall1(&context->shardCostUDF, UInt64GetDatum(shardId)); @@ -442,6 +548,7 @@ GetShardCost(uint64 shardId, void *voidContext) Datum citus_shard_cost_by_disk_size(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); uint64 shardId = PG_GETARG_INT64(0); bool missingOk = false; ShardPlacement *shardPlacement = ActiveShardPlacement(shardId, missingOk); @@ -594,6 +701,8 @@ ExecutePlacementUpdates(List *placementUpdateList, Oid shardReplicationModeOid, "unsupported"))); } + DropOrphanedShardsInSeparateTransaction(); + foreach(placementUpdateCell, placementUpdateList) { PlacementUpdateEvent *placementUpdate = lfirst(placementUpdateCell); @@ -654,7 +763,7 @@ SetupRebalanceMonitor(List *placementUpdateList, Oid relationId) event->shardId = colocatedUpdate->shardId; event->sourcePort = colocatedUpdate->sourceNode->workerPort; event->targetPort = colocatedUpdate->targetNode->workerPort; - event->shardSize = ShardLength(colocatedUpdate->shardId); + pg_atomic_init_u64(&event->progress, REBALANCE_PROGRESS_WAITING); eventIndex++; } @@ -679,6 +788,7 @@ SetupRebalanceMonitor(List *placementUpdateList, Oid relationId) Datum rebalance_table_shards(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); List *relationIdList = NIL; if (!PG_ARGISNULL(0)) { @@ -711,6 +821,7 @@ rebalance_table_shards(PG_FUNCTION_ARGS) .excludedShardArray = PG_GETARG_ARRAYTYPE_P(3), .drainOnly = PG_GETARG_BOOL(5), .rebalanceStrategy = strategy, + .improvementThreshold = strategy->improvementThreshold, }; Oid shardTransferModeOid = PG_GETARG_OID(4); RebalanceTableShards(&options, shardTransferModeOid); @@ -782,6 +893,7 @@ GetRebalanceStrategy(Name name) Datum citus_drain_node(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); PG_ENSURE_ARGNOTNULL(0, "nodename"); PG_ENSURE_ARGNOTNULL(1, "nodeport"); PG_ENSURE_ARGNOTNULL(2, "shard_transfer_mode"); @@ -801,17 +913,15 @@ citus_drain_node(PG_FUNCTION_ARGS) }; char *nodeName = text_to_cstring(nodeNameText); - int connectionFlag = FORCE_NEW_CONNECTION; - MultiConnection *connection = GetNodeConnection(connectionFlag, LOCAL_HOST_NAME, - PostPortNumber); /* * This is done in a separate session. This way it's not undone if the * draining fails midway through. */ - ExecuteCriticalRemoteCommand(connection, psprintf( - "SELECT master_set_node_property(%s, %i, 'shouldhaveshards', false)", - quote_literal_cstr(nodeName), nodePort)); + ExecuteCriticalCommandInSeparateTransaction(psprintf( + "SELECT master_set_node_property(%s, %i, 'shouldhaveshards', false)", + quote_literal_cstr(nodeName), + nodePort)); RebalanceTableShards(&options, shardTransferModeOid); @@ -826,6 +936,7 @@ citus_drain_node(PG_FUNCTION_ARGS) Datum replicate_table_shards(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); Oid relationId = PG_GETARG_OID(0); uint32 shardReplicationFactor = PG_GETARG_INT32(1); int32 maxShardCopies = PG_GETARG_INT32(2); @@ -880,6 +991,7 @@ master_drain_node(PG_FUNCTION_ARGS) Datum get_rebalance_table_shards_plan(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); List *relationIdList = NIL; if (!PG_ARGISNULL(0)) { @@ -911,6 +1023,8 @@ get_rebalance_table_shards_plan(PG_FUNCTION_ARGS) .excludedShardArray = PG_GETARG_ARRAYTYPE_P(3), .drainOnly = PG_GETARG_BOOL(4), .rebalanceStrategy = strategy, + .improvementThreshold = PG_GETARG_FLOAT4_OR_DEFAULT( + 6, strategy->improvementThreshold), }; @@ -959,6 +1073,7 @@ get_rebalance_table_shards_plan(PG_FUNCTION_ARGS) Datum get_rebalance_progress(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); List *segmentList = NIL; ListCell *rebalanceMonitorCell = NULL; TupleDesc tupdesc; @@ -968,19 +1083,35 @@ get_rebalance_progress(PG_FUNCTION_ARGS) List *rebalanceMonitorList = ProgressMonitorList(REBALANCE_ACTIVITY_MAGIC_NUMBER, &segmentList); + foreach(rebalanceMonitorCell, rebalanceMonitorList) { ProgressMonitorData *monitor = lfirst(rebalanceMonitorCell); PlacementUpdateEventProgress *placementUpdateEvents = monitor->steps; - + HTAB *shardStatistics = BuildWorkerShardStatisticsHash(monitor->steps, + monitor->stepCount); + HTAB *shardSizes = BuildShardSizesHash(monitor, shardStatistics); for (int eventIndex = 0; eventIndex < monitor->stepCount; eventIndex++) { PlacementUpdateEventProgress *step = placementUpdateEvents + eventIndex; uint64 shardId = step->shardId; ShardInterval *shardInterval = LoadShardInterval(shardId); - Datum values[9]; - bool nulls[9]; + uint64 sourceSize = WorkerShardSize(shardStatistics, step->sourceName, + step->sourcePort, shardId); + uint64 targetSize = WorkerShardSize(shardStatistics, step->targetName, + step->targetPort, shardId); + + uint64 shardSize = 0; + ShardStatistics *shardSizesStat = + hash_search(shardSizes, &shardId, HASH_FIND, NULL); + if (shardSizesStat) + { + shardSize = shardSizesStat->totalSize; + } + + Datum values[11]; + bool nulls[11]; memset(values, 0, sizeof(values)); memset(nulls, 0, sizeof(nulls)); @@ -988,12 +1119,14 @@ get_rebalance_progress(PG_FUNCTION_ARGS) values[0] = monitor->processId; values[1] = ObjectIdGetDatum(shardInterval->relationId); values[2] = UInt64GetDatum(shardId); - values[3] = UInt64GetDatum(step->shardSize); + values[3] = UInt64GetDatum(shardSize); values[4] = PointerGetDatum(cstring_to_text(step->sourceName)); values[5] = UInt32GetDatum(step->sourcePort); values[6] = PointerGetDatum(cstring_to_text(step->targetName)); values[7] = UInt32GetDatum(step->targetPort); - values[8] = UInt64GetDatum(step->progress); + values[8] = UInt64GetDatum(pg_atomic_read_u64(&step->progress)); + values[9] = UInt64GetDatum(sourceSize); + values[10] = UInt64GetDatum(targetSize); tuplestore_putvalues(tupstore, tupdesc, values, nulls); } @@ -1007,6 +1140,349 @@ get_rebalance_progress(PG_FUNCTION_ARGS) } +/* + * BuildShardSizesHash creates a hash that maps a shardid to its full size + * within the cluster. It does this by using the rebalance progress monitor + * state to find the node the shard is currently on. It then looks up the shard + * size in the shardStatistics hashmap for this node. + */ +static HTAB * +BuildShardSizesHash(ProgressMonitorData *monitor, HTAB *shardStatistics) +{ + HASHCTL info = { + .keysize = sizeof(uint64), + .entrysize = sizeof(ShardStatistics), + .hcxt = CurrentMemoryContext + }; + + HTAB *shardSizes = hash_create( + "ShardSizeHash", 32, &info, + HASH_ELEM | HASH_CONTEXT | HASH_BLOBS); + PlacementUpdateEventProgress *placementUpdateEvents = monitor->steps; + for (int eventIndex = 0; eventIndex < monitor->stepCount; eventIndex++) + { + PlacementUpdateEventProgress *step = placementUpdateEvents + eventIndex; + uint64 shardId = step->shardId; + uint64 shardSize = 0; + uint64 backupShardSize = 0; + uint64 progress = pg_atomic_read_u64(&step->progress); + + uint64 sourceSize = WorkerShardSize(shardStatistics, step->sourceName, + step->sourcePort, shardId); + uint64 targetSize = WorkerShardSize(shardStatistics, step->targetName, + step->targetPort, shardId); + + if (progress == REBALANCE_PROGRESS_WAITING || + progress == REBALANCE_PROGRESS_MOVING) + { + /* + * If we are not done with the move, the correct shard size is the + * size on the source. + */ + shardSize = sourceSize; + backupShardSize = targetSize; + } + else if (progress == REBALANCE_PROGRESS_MOVED) + { + /* + * If we are done with the move, the correct shard size is the size + * on the target + */ + shardSize = targetSize; + backupShardSize = sourceSize; + } + + if (shardSize == 0) + { + if (backupShardSize == 0) + { + /* + * We don't have any useful shard size. This can happen when a + * shard is moved multiple times and it is not present on + * either of these nodes. Probably the shard is on a worker + * related to another event. In the weird case that this shard + * is on the nodes and actually is size 0, we will have no + * entry in the hashmap. When fetching from it we always + * default to 0 if no entry is found, so that's fine. + */ + continue; + } + + /* + * Because of the way we fetch shard sizes they are from a slightly + * earlier moment than the progress state we just read from shared + * memory. Usually this is no problem, but there exist some race + * conditions where this matters. For example, for very quick moves + * it is possible that even though a step is now reported as MOVED, + * when we read the shard sizes the move had not even started yet. + * This in turn can mean that the target size is 0 while the source + * size is not. We try to handle such rare edge cases by falling + * back on the other shard size if that one is not 0. + */ + shardSize = backupShardSize; + } + + + ShardStatistics *currentWorkerStatistics = + hash_search(shardSizes, &shardId, HASH_ENTER, NULL); + currentWorkerStatistics->totalSize = shardSize; + } + return shardSizes; +} + + +/* + * WorkerShardSize returns the size of a shard in bytes on a worker, based on + * the workerShardStatisticsHash. + */ +static uint64 +WorkerShardSize(HTAB *workerShardStatisticsHash, char *workerName, int workerPort, + uint64 shardId) +{ + WorkerHashKey workerKey = { 0 }; + strlcpy(workerKey.hostname, workerName, MAX_NODE_LENGTH); + workerKey.port = workerPort; + + WorkerShardStatistics *workerStats = + hash_search(workerShardStatisticsHash, &workerKey, HASH_FIND, NULL); + if (!workerStats) + { + return 0; + } + + ShardStatistics *shardStats = + hash_search(workerStats->statistics, &shardId, HASH_FIND, NULL); + if (!shardStats) + { + return 0; + } + return shardStats->totalSize; +} + + +/* + * BuildWorkerShardStatisticsHash returns a shard id -> shard statistics hash containing + * sizes of shards on the source node and destination node. + */ +static HTAB * +BuildWorkerShardStatisticsHash(PlacementUpdateEventProgress *steps, int stepCount) +{ + HTAB *shardsByWorker = GetMovedShardIdsByWorker(steps, stepCount, true); + + HASHCTL info = { + .keysize = sizeof(WorkerHashKey), + .entrysize = sizeof(WorkerShardStatistics), + .hcxt = CurrentMemoryContext + }; + + HTAB *workerShardStatistics = hash_create("WorkerShardStatistics", 32, &info, + HASH_ELEM | HASH_CONTEXT | HASH_BLOBS); + WorkerShardIds *entry = NULL; + + HASH_SEQ_STATUS status; + hash_seq_init(&status, shardsByWorker); + while ((entry = hash_seq_search(&status)) != NULL) + { + int connectionFlags = 0; + MultiConnection *connection = GetNodeConnection(connectionFlags, + entry->worker.hostname, + entry->worker.port); + + HTAB *statistics = + GetShardStatistics(connection, entry->shardIds); + + WorkerHashKey workerKey = { 0 }; + strlcpy(workerKey.hostname, entry->worker.hostname, MAX_NODE_LENGTH); + workerKey.port = entry->worker.port; + + WorkerShardStatistics *moveStat = + hash_search(workerShardStatistics, &entry->worker, HASH_ENTER, NULL); + moveStat->statistics = statistics; + } + + return workerShardStatistics; +} + + +/* + * GetShardStatistics fetches the statics for the given shard ids over the + * given connection. It returns a hashmap where the keys are the shard ids and + * the values are the statistics. + */ +static HTAB * +GetShardStatistics(MultiConnection *connection, HTAB *shardIds) +{ + StringInfo query = makeStringInfo(); + + appendStringInfoString( + query, + "WITH shard_names (shard_id, schema_name, table_name) AS ((VALUES "); + + bool isFirst = true; + uint64 *shardIdPtr = NULL; + HASH_SEQ_STATUS status; + hash_seq_init(&status, shardIds); + while ((shardIdPtr = hash_seq_search(&status)) != NULL) + { + uint64 shardId = *shardIdPtr; + ShardInterval *shardInterval = LoadShardInterval(shardId); + Oid relationId = shardInterval->relationId; + char *shardName = get_rel_name(relationId); + + AppendShardIdToName(&shardName, shardId); + + Oid schemaId = get_rel_namespace(relationId); + char *schemaName = get_namespace_name(schemaId); + if (!isFirst) + { + appendStringInfo(query, ", "); + } + + appendStringInfo(query, "(" UINT64_FORMAT ",%s,%s)", + shardId, + quote_literal_cstr(schemaName), + quote_literal_cstr(shardName)); + + isFirst = false; + } + + appendStringInfoString(query, "))"); + appendStringInfoString( + query, + " SELECT shard_id, coalesce(pg_total_relation_size(tables.relid),0)" + + /* for each shard in shardIds */ + " FROM shard_names" + + /* check if its name can be found in pg_class, if so return size */ + " LEFT JOIN" + " (SELECT c.oid AS relid, c.relname, n.nspname" + " FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace) tables" + " ON tables.relname = shard_names.table_name AND" + " tables.nspname = shard_names.schema_name "); + + PGresult *result = NULL; + int queryResult = ExecuteOptionalRemoteCommand(connection, query->data, &result); + if (queryResult != RESPONSE_OKAY) + { + ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("cannot get the size because of a connection error"))); + } + + int rowCount = PQntuples(result); + int colCount = PQnfields(result); + + /* This is not expected to ever happen, but we check just to be sure */ + if (colCount < 2) + { + ereport(ERROR, (errmsg("unexpected number of columns returned by: %s", + query->data))); + } + + HASHCTL info = { + .keysize = sizeof(uint64), + .entrysize = sizeof(ShardStatistics), + .hcxt = CurrentMemoryContext + }; + + HTAB *shardStatistics = hash_create("ShardStatisticsHash", 32, &info, + HASH_ELEM | HASH_CONTEXT | HASH_BLOBS); + + for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) + { + char *shardIdString = PQgetvalue(result, rowIndex, 0); + uint64 shardId = pg_strtouint64(shardIdString, NULL, 10); + char *sizeString = PQgetvalue(result, rowIndex, 1); + uint64 totalSize = pg_strtouint64(sizeString, NULL, 10); + + ShardStatistics *statistics = + hash_search(shardStatistics, &shardId, HASH_ENTER, NULL); + statistics->totalSize = totalSize; + } + + PQclear(result); + + bool raiseErrors = true; + ClearResults(connection, raiseErrors); + + return shardStatistics; +} + + +/* + * GetMovedShardIdsByWorker groups the shard ids in the provided steps by + * worker. It returns a hashmap that contains a set of these shard ids. + */ +static HTAB * +GetMovedShardIdsByWorker(PlacementUpdateEventProgress *steps, int stepCount, + bool fromSource) +{ + HASHCTL info = { + .keysize = sizeof(WorkerHashKey), + .entrysize = sizeof(WorkerShardIds), + .hcxt = CurrentMemoryContext + }; + + HTAB *shardsByWorker = hash_create("GetRebalanceStepsByWorker", 32, &info, + HASH_ELEM | HASH_CONTEXT | HASH_BLOBS); + + for (int stepIndex = 0; stepIndex < stepCount; stepIndex++) + { + PlacementUpdateEventProgress *step = &(steps[stepIndex]); + + AddToWorkerShardIdSet(shardsByWorker, step->sourceName, step->sourcePort, + step->shardId); + + if (pg_atomic_read_u64(&step->progress) == REBALANCE_PROGRESS_WAITING) + { + /* + * shard move has not started so we don't need target stats for + * this shard + */ + continue; + } + + AddToWorkerShardIdSet(shardsByWorker, step->targetName, step->targetPort, + step->shardId); + } + + return shardsByWorker; +} + + +/* + * AddToWorkerShardIdSet adds the shard id to the shard id set for the + * specified worker in the shardsByWorker hashmap. + */ +static void +AddToWorkerShardIdSet(HTAB *shardsByWorker, char *workerName, int workerPort, + uint64 shardId) +{ + WorkerHashKey workerKey = { 0 }; + + strlcpy(workerKey.hostname, workerName, MAX_NODE_LENGTH); + workerKey.port = workerPort; + + bool isFound = false; + WorkerShardIds *workerShardIds = + hash_search(shardsByWorker, &workerKey, HASH_ENTER, &isFound); + if (!isFound) + { + HASHCTL info = { + .keysize = sizeof(uint64), + .entrysize = sizeof(uint64), + .hcxt = CurrentMemoryContext + }; + + workerShardIds->shardIds = hash_create( + "WorkerShardIdsSet", 32, &info, + HASH_ELEM | HASH_CONTEXT | HASH_BLOBS); + } + + hash_search(workerShardIds->shardIds, &shardId, HASH_ENTER, NULL); +} + + /* * NonColocatedDistRelationIdList returns a list of distributed table oids, one * for each existing colocation group. @@ -1104,11 +1580,47 @@ RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid) } +/* + * ConflictShardPlacementUpdateOnlyWithIsolationTesting is only useful for + * testing and should not be called by any code-path except for + * UpdateShardPlacement(). + * + * To be able to test the rebalance monitor functionality correctly, we need to + * be able to pause the rebalancer at a specific place in time. We cannot do + * this by block the shard move itself someway (e.g. by calling truncate on the + * distributed table). The reason for this is that we do the shard move in a + * newly opened connection. This causes our isolation tester block detection to + * not realise that the rebalance_table_shards call is blocked. + * + * So instead, before opening a connection we lock an advisory lock that's + * based on the shard id (shard id mod 1000). By locking this advisory lock in + * a different session we can block the rebalancer in a way that the isolation + * tester block detection is able to detect. + */ +static void +ConflictShardPlacementUpdateOnlyWithIsolationTesting(uint64 shardId) +{ + LOCKTAG tag; + const bool sessionLock = false; + const bool dontWait = false; + + if (RunningUnderIsolationTest) + { + /* we've picked a random lock */ + SET_LOCKTAG_ADVISORY(tag, MyDatabaseId, + SHARD_PLACEMENT_UPDATE_ADVISORY_LOCK_FIRST_KEY, + shardId % 1000, 2); + + (void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait); + } +} + + /* * UpdateShardPlacement copies or moves a shard placement by calling * the corresponding functions in Citus in a subtransaction. */ -static bool +static void UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent, List *responsiveNodeList, Oid shardReplicationModeOid) { @@ -1130,13 +1642,9 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent, targetNode->workerPort); if (!targetResponsive) { - ereport(WARNING, (errmsg("%s:%d is not responsive", targetNode->workerName, - targetNode->workerPort))); - UpdateColocatedShardPlacementProgress(shardId, - sourceNode->workerName, - sourceNode->workerPort, - REBALANCE_PROGRESS_ERROR); - return false; + ereport(ERROR, (errmsg("target node %s:%d is not responsive", + targetNode->workerName, + targetNode->workerPort))); } /* if source node is not responsive, don't continue */ @@ -1145,13 +1653,9 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent, sourceNode->workerPort); if (!sourceResponsive) { - ereport(WARNING, (errmsg("%s:%d is not responsive", sourceNode->workerName, - sourceNode->workerPort))); - UpdateColocatedShardPlacementProgress(shardId, - sourceNode->workerName, - sourceNode->workerPort, - REBALANCE_PROGRESS_ERROR); - return false; + ereport(ERROR, (errmsg("source node %s:%d is not responsive", + sourceNode->workerName, + sourceNode->workerPort))); } if (updateType == PLACEMENT_UPDATE_MOVE) @@ -1188,23 +1692,34 @@ UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent, sourceNode->workerPort, REBALANCE_PROGRESS_MOVING); - int connectionFlag = FORCE_NEW_CONNECTION; - MultiConnection *connection = GetNodeConnection(connectionFlag, LOCAL_HOST_NAME, - PostPortNumber); + ConflictShardPlacementUpdateOnlyWithIsolationTesting(shardId); /* * In case of failure, we throw an error such that rebalance_table_shards * fails early. */ - ExecuteCriticalRemoteCommand(connection, placementUpdateCommand->data); + ExecuteCriticalCommandInSeparateTransaction(placementUpdateCommand->data); UpdateColocatedShardPlacementProgress(shardId, sourceNode->workerName, sourceNode->workerPort, REBALANCE_PROGRESS_MOVED); - CloseConnection(connection); +} - return true; + +/* + * ExecuteCriticalCommandInSeparateTransaction runs a command in a separate + * transaction that is commited right away. This is useful for things that you + * don't want to rollback when the current transaction is rolled back. + */ +void +ExecuteCriticalCommandInSeparateTransaction(char *command) +{ + int connectionFlag = FORCE_NEW_CONNECTION; + MultiConnection *connection = GetNodeConnection(connectionFlag, LocalHostName, + PostPortNumber); + ExecuteCriticalRemoteCommand(connection, command); + CloseConnection(connection); } @@ -1228,6 +1743,7 @@ RebalancePlacementUpdates(List *workerNodeList, List *shardPlacementListList, double threshold, int32 maxShardMoves, bool drainOnly, + float4 improvementThreshold, RebalancePlanFunctions *functions) { List *rebalanceStates = NIL; @@ -1264,9 +1780,11 @@ RebalancePlacementUpdates(List *workerNodeList, List *shardPlacementListList, while (list_length(state->placementUpdateList) < maxShardMoves && moreMovesAvailable) { - moreMovesAvailable = FindAndMoveShardCost(utilizationLowerBound, - utilizationUpperBound, - state); + moreMovesAvailable = FindAndMoveShardCost( + utilizationLowerBound, + utilizationUpperBound, + improvementThreshold, + state); } placementUpdateList = state->placementUpdateList; @@ -1286,6 +1804,36 @@ RebalancePlacementUpdates(List *workerNodeList, List *shardPlacementListList, hash_destroy(state->placementsHash); } + + if (state->ignoredMoves > 0) + { + if (MaxRebalancerLoggedIgnoredMoves == -1 || + state->ignoredMoves <= MaxRebalancerLoggedIgnoredMoves) + { + ereport(NOTICE, ( + errmsg( + "Ignored %ld moves, all of which are shown in notices above", + state->ignoredMoves + ), + errhint( + "If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (%g).", + improvementThreshold) + )); + } + else + { + ereport(NOTICE, ( + errmsg( + "Ignored %ld moves, %d of which are shown in notices above", + state->ignoredMoves, + MaxRebalancerLoggedIgnoredMoves + ), + errhint( + "If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (%g).", + improvementThreshold) + )); + } + } return placementUpdateList; } @@ -1314,8 +1862,8 @@ InitRebalanceState(List *workerNodeList, List *shardPlacementList, fillState->capacity = functions->nodeCapacity(workerNode, functions->context); /* - * Set the utilization here although the totalCost is not set yet. This is - * important to set the utilization to INFINITY when the capacity is 0. + * Set the utilization here although the totalCost is not set yet. This + * is needed to set the utilization to INFINITY when the capacity is 0. */ fillState->utilization = CalculateUtilization(fillState->totalCost, fillState->capacity); @@ -1648,13 +2196,39 @@ MoveShardCost(NodeFillState *sourceFillState, * current state and returns a list with a new move appended that improves the * balance of shards. The algorithm is greedy and will use the first new move * that improves the balance. It finds nodes by trying to move a shard from the - * fullest node to the emptiest node. If no moves are possible it will try the - * second emptiest node until it tried all of them. Then it wil try the second - * fullest node. If it was able to find a move it will return true and false if - * it couldn't. + * most utilized node (highest utilization) to the emptiest node (lowest + * utilization). If no moves are possible it will try the second emptiest node + * until it tried all of them. Then it wil try the second fullest node. If it + * was able to find a move it will return true and false if it couldn't. + * + * This algorithm won't necessarily result in the best possible balance. Getting + * the best balance is an NP problem, so it's not feasible to go for the best + * balance. This algorithm was chosen because of the following reasons: + * 1. Literature research showed that similar problems would get within 2X of + * the optimal balance with a greedy algoritm. + * 2. Every move will always improve the balance. So if the user stops a + * rebalance midway through, they will never be in a worse situation than + * before. + * 3. It's pretty easy to reason about. + * 4. It's simple to implement. + * + * utilizationLowerBound and utilizationUpperBound are used to indicate what + * the target utilization range of all nodes is. If they are within this range, + * then balance is good enough. If all nodes are in this range then the cluster + * is considered balanced and no more moves are done. This is mostly useful for + * the by_disk_size rebalance strategy. If we wouldn't have this then the + * rebalancer could become flappy in certain cases. + * + * improvementThreshold is a threshold that can be used to ignore moves when + * they only improve the balance a little relative to the cost of the shard. + * Again this is mostly useful for the by_disk_size rebalance strategy. + * Without this threshold the rebalancer would move a shard of 1TB when this + * move only improves the cluster by 10GB. */ static bool -FindAndMoveShardCost(float4 utilizationLowerBound, float4 utilizationUpperBound, +FindAndMoveShardCost(float4 utilizationLowerBound, + float4 utilizationUpperBound, + float4 improvementThreshold, RebalanceState *state) { NodeFillState *sourceFillState = NULL; @@ -1727,11 +2301,24 @@ FindAndMoveShardCost(float4 utilizationLowerBound, float4 utilizationUpperBound, } /* - * Ensure that the cost distrubition is actually better - * after the move, i.e. the new highest utilization of - * source and target is lower than the previous highest, or - * the highest utilization is the same, but the lowest - * increased. + * If the target is still less utilized than the source, then + * this is clearly a good move. And if they are equally + * utilized too. + */ + if (newTargetUtilization <= newSourceUtilization) + { + MoveShardCost(sourceFillState, targetFillState, + shardCost, state); + return true; + } + + /* + * The target is now more utilized than the source. So we need + * to determine if the move is a net positive for the overall + * cost distribution. This means that the new highest + * utilization of source and target is lower than the previous + * highest, or the highest utilization is the same, but the + * lowest increased. */ if (newTargetUtilization > sourceFillState->utilization) { @@ -1752,6 +2339,58 @@ FindAndMoveShardCost(float4 utilizationLowerBound, float4 utilizationUpperBound, */ continue; } + + /* + * fmaxf and fminf here are only needed for cases when nodes + * have different capacities. If they are the same, then both + * arguments are equal. + */ + float4 utilizationImprovement = fmaxf( + sourceFillState->utilization - newTargetUtilization, + newSourceUtilization - targetFillState->utilization + ); + float4 utilizationAddedByShard = fminf( + newTargetUtilization - targetFillState->utilization, + sourceFillState->utilization - newSourceUtilization + ); + + /* + * If the shard causes a lot of utilization, but the + * improvement which is gained by moving it is small, then we + * ignore the move. Probably there are other shards that are + * better candidates, and in any case it's probably not worth + * the effort to move the this shard. + * + * One of the main cases this tries to avoid is the rebalancer + * moving a very large shard with the "by_disk_size" strategy + * when that only gives a small benefit in data distribution. + */ + float4 normalizedUtilizationImprovement = utilizationImprovement / + utilizationAddedByShard; + if (normalizedUtilizationImprovement < improvementThreshold) + { + state->ignoredMoves++; + if (MaxRebalancerLoggedIgnoredMoves == -1 || + state->ignoredMoves <= MaxRebalancerLoggedIgnoredMoves) + { + ereport(NOTICE, ( + errmsg( + "Ignoring move of shard %ld from %s:%d to %s:%d, because the move only brings a small improvement relative to the shard its size", + shardCost->shardId, + sourceFillState->node->workerName, + sourceFillState->node->workerPort, + targetFillState->node->workerName, + targetFillState->node->workerPort + ), + errdetail( + "The balance improvement of %g is lower than the improvement_threshold of %g", + normalizedUtilizationImprovement, + improvementThreshold + ) + )); + } + continue; + } MoveShardCost(sourceFillState, targetFillState, shardCost, state); return true; @@ -2158,7 +2797,7 @@ UpdateColocatedShardPlacementProgress(uint64 shardId, char *sourceName, int sour strcmp(step->sourceName, sourceName) == 0 && step->sourcePort == sourcePort) { - step->progress = progress; + pg_atomic_write_u64(&step->progress, progress); } } } @@ -2166,13 +2805,12 @@ UpdateColocatedShardPlacementProgress(uint64 shardId, char *sourceName, int sour /* - * citus_rebalance_strategy_enterprise_check is trigger function, intended for - * use in prohibiting writes to pg_dist_rebalance_strategy in Citus Community. + * pg_dist_rebalance_strategy_enterprise_check is a now removed function, but + * to avoid issues during upgrades a C stub is kept. */ Datum pg_dist_rebalance_strategy_enterprise_check(PG_FUNCTION_ARGS) { - /* This is Enterprise, so this check is a no-op */ PG_RETURN_VOID(); } @@ -2192,6 +2830,7 @@ pg_dist_rebalance_strategy_enterprise_check(PG_FUNCTION_ARGS) Datum citus_validate_rebalance_strategy_functions(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); EnsureShardCostUDF(PG_GETARG_OID(0)); EnsureNodeCapacityUDF(PG_GETARG_OID(1)); EnsureShardAllowedOnNodeUDF(PG_GETARG_OID(2)); diff --git a/src/backend/distributed/operations/split_shards.c b/src/backend/distributed/operations/split_shards.c index 80d3f930f..df95faded 100644 --- a/src/backend/distributed/operations/split_shards.c +++ b/src/backend/distributed/operations/split_shards.c @@ -61,10 +61,10 @@ isolate_tenant_to_new_shard(PG_FUNCTION_ARGS) Datum worker_hash(PG_FUNCTION_ARGS) { - Datum valueDatum = PG_GETARG_DATUM(0); - CheckCitusVersion(ERROR); + Datum valueDatum = PG_GETARG_DATUM(0); + /* figure out hash function from the data type */ Oid valueDataType = get_fn_expr_argtype(fcinfo->flinfo, 0); TypeCacheEntry *typeEntry = lookup_type_cache(valueDataType, diff --git a/src/backend/distributed/operations/stage_protocol.c b/src/backend/distributed/operations/stage_protocol.c index 0813da540..225317132 100644 --- a/src/backend/distributed/operations/stage_protocol.c +++ b/src/backend/distributed/operations/stage_protocol.c @@ -94,6 +94,8 @@ PG_FUNCTION_INFO_V1(citus_update_table_statistics); Datum master_create_empty_shard(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *relationNameText = PG_GETARG_TEXT_P(0); char *relationName = text_to_cstring(relationNameText); uint32 attemptableNodeCount = 0; @@ -108,8 +110,6 @@ master_create_empty_shard(PG_FUNCTION_ARGS) Oid relationId = ResolveRelationId(relationNameText, false); char relationKind = get_rel_relkind(relationId); - CheckCitusVersion(ERROR); - EnsureTablePermissions(relationId, ACL_INSERT); CheckDistributedTable(relationId); @@ -171,10 +171,6 @@ master_create_empty_shard(PG_FUNCTION_ARGS) "on local tables"))); } - char replicationModel = TableReplicationModel(relationId); - - EnsureReplicationSettings(relationId, replicationModel); - /* generate new and unique shardId from sequence */ uint64 shardId = GetNextShardId(); @@ -243,6 +239,8 @@ master_create_empty_shard(PG_FUNCTION_ARGS) Datum master_append_table_to_shard(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + uint64 shardId = PG_GETARG_INT64(0); text *sourceTableNameText = PG_GETARG_TEXT_P(1); text *sourceNodeNameText = PG_GETARG_TEXT_P(2); @@ -253,8 +251,6 @@ master_append_table_to_shard(PG_FUNCTION_ARGS) float4 shardFillLevel = 0.0; - CheckCitusVersion(ERROR); - ShardInterval *shardInterval = LoadShardInterval(shardId); Oid relationId = shardInterval->relationId; @@ -363,10 +359,10 @@ master_append_table_to_shard(PG_FUNCTION_ARGS) Datum citus_update_shard_statistics(PG_FUNCTION_ARGS) { - int64 shardId = PG_GETARG_INT64(0); - CheckCitusVersion(ERROR); + int64 shardId = PG_GETARG_INT64(0); + uint64 shardSize = UpdateShardStatistics(shardId); PG_RETURN_INT64(shardSize); @@ -380,10 +376,10 @@ citus_update_shard_statistics(PG_FUNCTION_ARGS) Datum citus_update_table_statistics(PG_FUNCTION_ARGS) { - Oid distributedTableId = PG_GETARG_OID(0); - CheckCitusVersion(ERROR); + Oid distributedTableId = PG_GETARG_OID(0); + UpdateTableStatistics(distributedTableId); PG_RETURN_VOID(); diff --git a/src/backend/distributed/planner/deparse_shard_query.c b/src/backend/distributed/planner/deparse_shard_query.c index 73666935e..01998b029 100644 --- a/src/backend/distributed/planner/deparse_shard_query.c +++ b/src/backend/distributed/planner/deparse_shard_query.c @@ -59,6 +59,7 @@ RebuildQueryStrings(Job *workerJob) Query *originalQuery = workerJob->jobQuery; List *taskList = workerJob->taskList; Task *task = NULL; + bool isSingleTask = list_length(taskList) == 1; if (originalQuery->commandType == CMD_INSERT) { @@ -74,7 +75,7 @@ RebuildQueryStrings(Job *workerJob) * task, we scribble on the original query to avoid the copying * overhead. */ - if (list_length(taskList) > 1) + if (!isSingleTask) { query = copyObject(originalQuery); } @@ -119,6 +120,19 @@ RebuildQueryStrings(Job *workerJob) * deparse_shard_query when the string is needed */ task->anchorDistributedTableId = modifiedRelationRTE->relid; + + /* + * For multi-row inserts, we modify the VALUES before storing the + * query in the task. + */ + RangeTblEntry *valuesRTE = ExtractDistributedInsertValuesRTE(query); + if (valuesRTE != NULL) + { + Assert(valuesRTE->rtekind == RTE_VALUES); + Assert(task->rowValuesLists != NULL); + + valuesRTE->values_lists = task->rowValuesLists; + } } bool isQueryObjectOrText = GetTaskQueryType(task) == TASK_QUERY_TEXT || @@ -180,39 +194,7 @@ AddInsertAliasIfNeeded(Query *query) static void UpdateTaskQueryString(Query *query, Task *task) { - List *oldValuesLists = NIL; - RangeTblEntry *valuesRTE = NULL; - - if (query->commandType == CMD_INSERT) - { - /* extract the VALUES from the INSERT */ - valuesRTE = ExtractDistributedInsertValuesRTE(query); - - if (valuesRTE != NULL) - { - Assert(valuesRTE->rtekind == RTE_VALUES); - Assert(task->rowValuesLists != NULL); - - oldValuesLists = valuesRTE->values_lists; - valuesRTE->values_lists = task->rowValuesLists; - } - - if (ShouldLazyDeparseQuery(task)) - { - /* - * not all insert queries are copied before calling this - * function, so we do it here - */ - query = copyObject(query); - } - } - SetTaskQueryIfShouldLazyDeparse(task, query); - - if (valuesRTE != NULL) - { - valuesRTE->values_lists = oldValuesLists; - } } diff --git a/src/backend/distributed/planner/local_distributed_join_planner.c b/src/backend/distributed/planner/local_distributed_join_planner.c index bb7c1d4de..9f93b447c 100644 --- a/src/backend/distributed/planner/local_distributed_join_planner.c +++ b/src/backend/distributed/planner/local_distributed_join_planner.c @@ -94,6 +94,7 @@ #include "distributed/multi_physical_planner.h" #include "distributed/multi_server_executor.h" #include "distributed/multi_router_planner.h" +#include "distributed/coordinator_protocol.h" #include "distributed/query_colocation_checker.h" #include "distributed/query_pushdown_planning.h" #include "distributed/recursive_planning.h" @@ -179,7 +180,8 @@ static ConversionCandidates * CreateConversionCandidates(PlannerRestrictionConte plannerRestrictionContext, List *rangeTableList, int resultRTEIdentity); -static void AppendUniqueIndexColumnsToList(Form_pg_index indexForm, List **uniqueIndexes); +static void AppendUniqueIndexColumnsToList(Form_pg_index indexForm, List **uniqueIndexes, + int flags); static ConversionChoice GetConversionChoice(ConversionCandidates * conversionCandidates, PlannerRestrictionContext * @@ -403,7 +405,8 @@ HasConstantFilterOnUniqueColumn(RangeTblEntry *rangeTableEntry, FetchEqualityAttrNumsForRTE((Node *) restrictClauseList); List *uniqueIndexColumnsList = ExecuteFunctionOnEachTableIndex(rangeTableEntry->relid, - AppendUniqueIndexColumnsToList); + AppendUniqueIndexColumnsToList, + INCLUDE_INDEX_ALL_STATEMENTS); IndexColumns *indexColumns = NULL; foreach_ptr(indexColumns, uniqueIndexColumnsList) { @@ -442,7 +445,8 @@ FirstIsSuperSetOfSecond(List *firstIntList, List *secondIntList) * unique index. */ static void -AppendUniqueIndexColumnsToList(Form_pg_index indexForm, List **uniqueIndexGroups) +AppendUniqueIndexColumnsToList(Form_pg_index indexForm, List **uniqueIndexGroups, + int flags) { if (indexForm->indisunique || indexForm->indisprimary) { diff --git a/src/backend/distributed/planner/local_plan_cache.c b/src/backend/distributed/planner/local_plan_cache.c index 77a80a892..3ae83d235 100644 --- a/src/backend/distributed/planner/local_plan_cache.c +++ b/src/backend/distributed/planner/local_plan_cache.c @@ -139,6 +139,14 @@ GetCachedLocalPlan(Task *task, DistributedPlan *distributedPlan) bool IsLocalPlanCachingSupported(Job *currentJob, DistributedPlan *originalDistributedPlan) { + if (originalDistributedPlan->numberOfTimesExecuted < 1) + { + /* + * Only cache if a plan is being reused (via a prepared statement). + */ + return false; + } + if (!currentJob->deferredPruning) { /* diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 15c1a61a0..615c0ddbe 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -318,10 +318,6 @@ static Node * WorkerLimitCount(Node *limitCount, Node *limitOffset, OrderByLimit static List * WorkerSortClauseList(Node *limitCount, List *groupClauseList, List *sortClauseList, OrderByLimitReference orderByLimitReference); -static List * GenerateNewTargetEntriesForSortClauses(List *originalTargetList, - List *sortClauseList, - AttrNumber *targetProjectionNumber, - Index *nextSortGroupRefIndex); static bool CanPushDownLimitApproximate(List *sortClauseList, List *targetList); static bool HasOrderByAggregate(List *sortClauseList, List *targetList); static bool HasOrderByNonCommutativeAggregate(List *sortClauseList, List *targetList); @@ -2701,38 +2697,6 @@ ProcessWindowFunctionsForWorkerQuery(List *windowClauseList, return; } - WindowClause *windowClause = NULL; - foreach_ptr(windowClause, windowClauseList) - { - List *partitionClauseTargetList = - GenerateNewTargetEntriesForSortClauses(originalTargetEntryList, - windowClause->partitionClause, - &(queryTargetList-> - targetProjectionNumber), - queryWindowClause-> - nextSortGroupRefIndex); - List *orderClauseTargetList = - GenerateNewTargetEntriesForSortClauses(originalTargetEntryList, - windowClause->orderClause, - &(queryTargetList-> - targetProjectionNumber), - queryWindowClause-> - nextSortGroupRefIndex); - - /* - * Note that even Citus does push down the window clauses as-is, we may still need to - * add the generated entries to the target list. The reason is that the same aggregates - * might be referred from another target entry that is a bare aggregate (e.g., no window - * functions), which would have been mutated. For instance, when an average aggregate - * is mutated on the target list, the window function would refer to a sum aggregate, - * which is obviously wrong. - */ - queryTargetList->targetEntryList = list_concat(queryTargetList->targetEntryList, - partitionClauseTargetList); - queryTargetList->targetEntryList = list_concat(queryTargetList->targetEntryList, - orderClauseTargetList); - } - queryWindowClause->workerWindowClauseList = windowClauseList; queryWindowClause->hasWindowFunctions = true; } @@ -2798,19 +2762,6 @@ ProcessLimitOrderByForWorkerQuery(OrderByLimitReference orderByLimitReference, groupClauseList, sortClauseList, orderByLimitReference); - - /* - * TODO: Do we really need to add the target entries if we're not pushing - * down ORDER BY? - */ - List *newTargetEntryListForSortClauses = - GenerateNewTargetEntriesForSortClauses(originalTargetList, - queryOrderByLimit->workerSortClauseList, - &(queryTargetList->targetProjectionNumber), - queryOrderByLimit->nextSortGroupRefIndex); - - queryTargetList->targetEntryList = - list_concat(queryTargetList->targetEntryList, newTargetEntryListForSortClauses); } @@ -4795,87 +4746,6 @@ WorkerSortClauseList(Node *limitCount, List *groupClauseList, List *sortClauseLi } -/* - * GenerateNewTargetEntriesForSortClauses goes over provided sort clause lists and - * creates new target entries if needed to make sure sort clauses has correct - * references. The function returns list of new target entries, caller is - * responsible to add those target entries to the end of worker target list. - * - * The function is required because we change the target entry if it contains an - * expression having an aggregate operation, or just the AVG aggregate. - * Afterwards any order by clause referring to original target entry starts - * to point to a wrong expression. - * - * Note the function modifies SortGroupClause items in sortClauseList, - * targetProjectionNumber, and nextSortGroupRefIndex. - */ -static List * -GenerateNewTargetEntriesForSortClauses(List *originalTargetList, - List *sortClauseList, - AttrNumber *targetProjectionNumber, - Index *nextSortGroupRefIndex) -{ - List *createdTargetList = NIL; - - SortGroupClause *sgClause = NULL; - foreach_ptr(sgClause, sortClauseList) - { - TargetEntry *targetEntry = get_sortgroupclause_tle(sgClause, originalTargetList); - Expr *targetExpr = targetEntry->expr; - bool containsAggregate = contain_aggs_of_level((Node *) targetExpr, 0); - bool createNewTargetEntry = false; - - /* we are only interested in target entries containing aggregates */ - if (!containsAggregate) - { - continue; - } - - /* - * If the target expression is not an Aggref, it is either an expression - * on a single aggregate, or expression containing multiple aggregates. - * Worker query mutates these target entries to have a naked target entry - * per aggregate function. We want to use original target entries if this - * the case. - * If the original target expression is an avg aggref, we also want to use - * original target entry. - */ - if (!IsA(targetExpr, Aggref)) - { - createNewTargetEntry = true; - } - else - { - Aggref *aggNode = (Aggref *) targetExpr; - AggregateType aggregateType = GetAggregateType(aggNode); - if (aggregateType == AGGREGATE_AVERAGE) - { - createNewTargetEntry = true; - } - } - - if (createNewTargetEntry) - { - bool resJunk = true; - AttrNumber nextResNo = (*targetProjectionNumber); - Expr *newExpr = copyObject(targetExpr); - TargetEntry *newTargetEntry = makeTargetEntry(newExpr, nextResNo, - targetEntry->resname, resJunk); - newTargetEntry->ressortgroupref = *nextSortGroupRefIndex; - - createdTargetList = lappend(createdTargetList, newTargetEntry); - - sgClause->tleSortGroupRef = *nextSortGroupRefIndex; - - (*nextSortGroupRefIndex)++; - (*targetProjectionNumber)++; - } - } - - return createdTargetList; -} - - /* * CanPushDownLimitApproximate checks if we can push down the limit clause to * the worker nodes, and get approximate and meaningful results. We can do this diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index b21553301..d3dc054b6 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -2425,7 +2425,7 @@ CreateLocalDummyPlacement() { ShardPlacement *dummyPlacement = CitusMakeNode(ShardPlacement); dummyPlacement->nodeId = LOCAL_NODE_ID; - dummyPlacement->nodeName = LOCAL_HOST_NAME; + dummyPlacement->nodeName = LocalHostName; dummyPlacement->nodePort = PostPortNumber; dummyPlacement->groupId = GetLocalGroupId(); return dummyPlacement; diff --git a/src/backend/distributed/relay/relay_event_utility.c b/src/backend/distributed/relay/relay_event_utility.c index 1120422b7..e79ab6a14 100644 --- a/src/backend/distributed/relay/relay_event_utility.c +++ b/src/backend/distributed/relay/relay_event_utility.c @@ -901,13 +901,13 @@ AppendShardIdToName(char **name, uint64 shardId) Datum shard_name(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); int64 shardId = PG_GETARG_INT64(1); char *qualifiedName = NULL; - CheckCitusVersion(ERROR); - if (shardId <= 0) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 512aa1add..e734b1594 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -68,6 +68,7 @@ #include "distributed/time_constants.h" #include "distributed/query_stats.h" #include "distributed/remote_commands.h" +#include "distributed/shard_rebalancer.h" #include "distributed/shared_library_init.h" #include "distributed/statistics_collection.h" #include "distributed/subplan_execution.h" @@ -98,6 +99,9 @@ PG_MODULE_MAGIC; #define DUMMY_REAL_TIME_EXECUTOR_ENUM_VALUE 9999999 static char *CitusVersion = CITUS_VERSION; +/* deprecated GUC value that should not be used anywhere outside this file */ +static int ReplicationModel = REPLICATION_MODEL_STREAMING; + void _PG_init(void); void _PG_fini(void); @@ -114,6 +118,7 @@ static void RegisterCitusConfigVariables(void); static bool ErrorIfNotASuitableDeadlockFactor(double *newval, void **extra, GucSource source); static bool WarnIfDeprecatedExecutorUsed(int *newval, void **extra, GucSource source); +static bool WarnIfReplicationModelIsSet(int *newval, void **extra, GucSource source); static bool NoticeIfSubqueryPushdownEnabled(bool *newval, void **extra, GucSource source); static bool NodeConninfoGucCheckHook(char **newval, void **extra, GucSource source); static void NodeConninfoGucAssignHook(const char *newval, void *extra); @@ -575,6 +580,17 @@ RegisterCitusConfigVariables(void) GUC_STANDARD, NULL, NULL, NULL); + DefineCustomBoolVariable( + "citus.check_available_space_before_move", + gettext_noop("When enabled will check free disk space before a shard move"), + gettext_noop( + "Free disk space will be checked when this setting is enabled before each shard move."), + &CheckAvailableSpaceBeforeMove, + true, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomStringVariable( "citus.cluster_name", gettext_noop("Which cluster this node is a part of"), @@ -629,7 +645,9 @@ RegisterCitusConfigVariables(void) DefineCustomBoolVariable( "citus.defer_drop_after_shard_move", - gettext_noop("When enabled a shard move will mark old shards for deletion"), + gettext_noop("When enabled a shard move will mark the original shards " + "for deletion after a successful move, instead of deleting " + "them right away."), gettext_noop("The deletion of a shard can sometimes run into a conflict with a " "long running transactions on a the shard during the drop phase of " "the shard move. This causes some moves to be rolled back after " @@ -639,7 +657,7 @@ RegisterCitusConfigVariables(void) "citus.defer_shard_delete_interval to make sure defered deletions " "will be executed"), &DeferShardDeleteOnMove, - false, + true, PGC_USERSET, 0, NULL, NULL, NULL); @@ -654,11 +672,37 @@ RegisterCitusConfigVariables(void) "the background worker moves on. When set to -1 this background " "process is skipped."), &DeferShardDeleteInterval, - -1, -1, 7 * 24 * 3600 * 1000, + 15000, -1, 7 * 24 * 3600 * 1000, PGC_SIGHUP, GUC_UNIT_MS, NULL, NULL, NULL); + DefineCustomRealVariable( + "citus.desired_percent_disk_available_after_move", + gettext_noop( + "Sets how many percentage of free disk space should be after a shard move"), + gettext_noop( + "This setting controls how much free space should be available after a shard move." + "If the free disk space will be lower than this parameter, then shard move will result in" + "an error."), + &DesiredPercentFreeAfterMove, + 10.0, 0.0, 100.0, + PGC_SIGHUP, + GUC_STANDARD, + NULL, NULL, NULL); + + DefineCustomBoolVariable( + "citus.enable_manual_changes_to_shards", + gettext_noop("Enables dropping and truncating known shards."), + gettext_noop("Set to false by default. If set to true, enables " + "dropping and truncating shards on the coordinator " + "(or the workers with metadata)"), + &EnableManualChangesToShards, + false, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomRealVariable( "citus.distributed_deadlock_detection_factor", gettext_noop("Sets the time to wait before checking for distributed " @@ -693,6 +737,17 @@ RegisterCitusConfigVariables(void) GUC_NO_SHOW_ALL, NULL, NULL, NULL); + DefineCustomBoolVariable( + "citus.enable_alter_database_owner", + gettext_noop("Enables propagating ALTER DATABASE ... OWNER TO ... statements to " + "workers"), + NULL, + &EnableAlterDatabaseOwner, + false, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomBoolVariable( "citus.enable_binary_protocol", gettext_noop( @@ -940,30 +995,17 @@ RegisterCitusConfigVariables(void) NULL, NULL, NULL); DefineCustomBoolVariable( - "citus.check_available_space_before_move", - gettext_noop("When enabled will check free disk space before a shard move"), - gettext_noop( - "Free disk space will be checked when this setting is enabled before each shard move."), - &CheckAvailableSpaceBeforeMove, + "citus.enable_cost_based_connection_establishment", + gettext_noop("When enabled the connection establishment times " + "and task execution times into account for deciding " + "whether or not to establish new connections."), + NULL, + &EnableCostBasedConnectionEstablishment, true, PGC_USERSET, GUC_NO_SHOW_ALL, NULL, NULL, NULL); - DefineCustomRealVariable( - "citus.desired_percent_disk_available_after_move", - gettext_noop( - "Sets how many percentage of free disk space should be after a shard move"), - gettext_noop( - "This setting controls how much free space should be available after a shard move." - "If the free disk space will be lower than this parameter, then shard move will result in" - "an error."), - &DesiredPercentFreeAfterMove, - 10.0, 0.0, 100.0, - PGC_SIGHUP, - GUC_STANDARD, - NULL, NULL, NULL); - DefineCustomBoolVariable( "citus.explain_distributed_queries", gettext_noop("Enables Explain for distributed queries."), @@ -1051,6 +1093,19 @@ RegisterCitusConfigVariables(void) GUC_UNIT_BYTE | GUC_NO_SHOW_ALL, NULL, NULL, NULL); + DefineCustomStringVariable( + "citus.local_hostname", + gettext_noop("Sets the hostname when connecting back to itself."), + gettext_noop("For some operations nodes, mostly the coordinator, connect back to " + "itself. When configuring SSL certificates it sometimes is required " + "to use a specific hostname to match the CN of the certificate when " + "verify-full is used."), + &LocalHostName, + "localhost", + PGC_SUSET, + GUC_STANDARD, + NULL, NULL, NULL); + DefineCustomIntVariable( "citus.local_shared_pool_size", gettext_noop( @@ -1190,6 +1245,16 @@ RegisterCitusConfigVariables(void) GUC_UNIT_KB | GUC_STANDARD, NULL, NULL, NULL); + DefineCustomIntVariable( + "citus.max_rebalancer_logged_ignored_moves", + gettext_noop("Sets the maximum number of ignored moves the rebalance logs"), + NULL, + &MaxRebalancerLoggedIgnoredMoves, + 5, -1, INT_MAX, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomIntVariable( "citus.max_shared_pool_size", gettext_noop("Sets the maximum number of connections allowed per worker node " @@ -1364,6 +1429,21 @@ RegisterCitusConfigVariables(void) GUC_UNIT_KB | GUC_STANDARD, NULL, NULL, NULL); + DefineCustomBoolVariable( + "citus.prevent_incomplete_connection_establishment", + gettext_noop("When enabled, the executor waits until all the connections " + "are successfully established."), + gettext_noop("Under some load, the executor may decide to establish some " + "extra connections to further parallelize the execution. However," + "before the connection establishment is done, the execution might " + "have already finished. When this GUC is set to true, the execution " + "waits for such connections to be established."), + &PreventIncompleteConnectionEstablishment, + true, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomEnumVariable( "citus.propagate_set_commands", gettext_noop("Sets which SET commands are propagated to workers."), @@ -1438,16 +1518,28 @@ RegisterCitusConfigVariables(void) DefineCustomEnumVariable( "citus.replication_model", - gettext_noop("Sets the replication model to be used for distributed tables."), - gettext_noop("Depending upon the execution environment, statement- or streaming-" - "based replication modes may be employed. Though most Citus deploy-" - "ments will simply use statement replication, hosted and MX-style" - "deployments should set this parameter to 'streaming'."), + gettext_noop("Deprecated. Please use citus.shard_replication_factor instead"), + gettext_noop( + "Shard replication model is determined by the shard replication factor." + "'statement' replication is used only when the replication factor is one."), &ReplicationModel, - REPLICATION_MODEL_COORDINATOR, + REPLICATION_MODEL_STREAMING, replication_model_options, PGC_SUSET, - GUC_SUPERUSER_ONLY, + GUC_NO_SHOW_ALL, + WarnIfReplicationModelIsSet, NULL, NULL); + + DefineCustomBoolVariable( + "citus.running_under_isolation_test", + gettext_noop( + "Only useful for testing purposes, when set to true, Citus does some " + "tricks to implement useful isolation tests with rebalancing. Should " + "never be set to true on production systems "), + gettext_noop("for details of the tricks implemented, refer to the source code"), + &RunningUnderIsolationTest, + false, + PGC_SUSET, + GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL, NULL, NULL, NULL); DefineCustomBoolVariable( @@ -1741,6 +1833,32 @@ NoticeIfSubqueryPushdownEnabled(bool *newval, void **extra, GucSource source) } +/* + * WarnIfReplicationModelIsSet prints a warning when a user sets + * citus.replication_model. + */ +static bool +WarnIfReplicationModelIsSet(int *newval, void **extra, GucSource source) +{ + /* print a warning only when user sets the guc */ + if (source == PGC_S_SESSION) + { + ereport(NOTICE, (errcode(ERRCODE_WARNING_DEPRECATED_FEATURE), + errmsg( + "Setting citus.replication_model has no effect. Please use " + "citus.shard_replication_factor instead."), + errdetail( + "Citus determines the replication model based on the " + "replication factor and the replication models of the colocated " + "shards. If a colocated table is present, the replication model " + "is inherited. Otherwise 'streaming' replication is preferred if " + "supported by the replication factor."))); + } + + return true; +} + + /* * NodeConninfoGucCheckHook ensures conninfo settings are in the expected form * and that the keywords of all non-null settings are on a allowlist devised to diff --git a/src/backend/distributed/sql/citus--10.0-3--10.1-1.sql b/src/backend/distributed/sql/citus--10.0-3--10.1-1.sql index c85c81e3f..ec5a122fa 100644 --- a/src/backend/distributed/sql/citus--10.0-3--10.1-1.sql +++ b/src/backend/distributed/sql/citus--10.0-3--10.1-1.sql @@ -1,9 +1,51 @@ -- citus--10.0-3--10.1-1 +-- add the current database to the distributed objects if not already in there. +-- this is to reliably propagate some of the alter database commands that might be +-- supported. +INSERT INTO citus.pg_dist_object SELECT + 'pg_catalog.pg_database'::regclass::oid AS oid, + (SELECT oid FROM pg_database WHERE datname = current_database()) as objid, + 0 as objsubid +ON CONFLICT DO NOTHING; + #include "../../columnar/sql/columnar--10.0-3--10.1-1.sql" #include "udfs/create_distributed_table/10.1-1.sql"; #include "udfs/worker_partitioned_relation_total_size/10.1-1.sql" #include "udfs/worker_partitioned_relation_size/10.1-1.sql" #include "udfs/worker_partitioned_table_size/10.1-1.sql" +#include "udfs/citus_prepare_pg_upgrade/10.1-1.sql" #include "udfs/citus_finish_pg_upgrade/10.1-1.sql" #include "udfs/citus_local_disk_space_stats/10.1-1.sql" +#include "udfs/get_rebalance_table_shards_plan/10.1-1.sql" +#include "udfs/citus_add_rebalance_strategy/10.1-1.sql" + +ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ADD COLUMN improvement_threshold float4 NOT NULL default 0; +UPDATE pg_catalog.pg_dist_rebalance_strategy SET improvement_threshold = 0.5 WHERE name = 'by_disk_size'; + +#include "udfs/get_rebalance_progress/10.1-1.sql" + +-- use streaming replication when replication factor = 1 +WITH replicated_shards AS ( + SELECT shardid + FROM pg_dist_placement + WHERE shardstate = 1 OR shardstate = 3 + GROUP BY shardid + HAVING count(*) <> 1 ), +replicated_relations AS ( + SELECT DISTINCT logicalrelid + FROM pg_dist_shard + JOIN replicated_shards + USING (shardid) +) +UPDATE pg_dist_partition +SET repmodel = 's' +WHERE repmodel = 'c' + AND partmethod = 'h' + AND logicalrelid NOT IN (SELECT * FROM replicated_relations); +#include "udfs/citus_shards/10.1-1.sql" + +DROP TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger ON pg_catalog.pg_dist_rebalance_strategy; +DROP FUNCTION citus_internal.pg_dist_rebalance_strategy_enterprise_check(); + +#include "udfs/citus_cleanup_orphaned_shards/10.1-1.sql" diff --git a/src/backend/distributed/sql/citus--10.1-1--10.2-1.sql b/src/backend/distributed/sql/citus--10.1-1--10.2-1.sql new file mode 100644 index 000000000..d11c3ac18 --- /dev/null +++ b/src/backend/distributed/sql/citus--10.1-1--10.2-1.sql @@ -0,0 +1,4 @@ +-- citus--10.1-1--10.2-1 + +-- bump version to 10.2-1 + diff --git a/src/backend/distributed/sql/citus--8.3-1--9.0-1.sql b/src/backend/distributed/sql/citus--8.3-1--9.0-1.sql index f873ac4b2..dccc66d16 100644 --- a/src/backend/distributed/sql/citus--8.3-1--9.0-1.sql +++ b/src/backend/distributed/sql/citus--8.3-1--9.0-1.sql @@ -83,24 +83,7 @@ DROP EXTENSION IF EXISTS shard_rebalancer; #include "udfs/get_rebalance_table_shards_plan/9.0-1.sql" #include "udfs/replicate_table_shards/9.0-1.sql" #include "udfs/rebalance_table_shards/9.0-1.sql" - --- get_rebalance_progress returns the list of shard placement move operations along with --- their progressions for ongoing rebalance operations. --- -CREATE OR REPLACE FUNCTION get_rebalance_progress() - RETURNS TABLE(sessionid integer, - table_name regclass, - shardid bigint, - shard_size bigint, - sourcename text, - sourceport int, - targetname text, - targetport int, - progress bigint) - AS 'MODULE_PATHNAME' - LANGUAGE C STRICT; -COMMENT ON FUNCTION get_rebalance_progress() - IS 'provides progress information about the ongoing rebalance operations'; +#include "udfs/get_rebalance_progress/9.0-1.sql" DROP FUNCTION master_add_node(text, integer, integer, noderole, name); CREATE FUNCTION master_add_node(nodename text, diff --git a/src/backend/distributed/sql/downgrades/citus--10.1-1--10.0-3.sql b/src/backend/distributed/sql/downgrades/citus--10.1-1--10.0-3.sql index 765cf9cbe..5946473f9 100644 --- a/src/backend/distributed/sql/downgrades/citus--10.1-1--10.0-3.sql +++ b/src/backend/distributed/sql/downgrades/citus--10.1-1--10.0-3.sql @@ -1,5 +1,10 @@ -- citus--10.1-1--10.0-3 +-- remove databases as distributed objects to prevent unknown object types being managed +-- on older versions. +DELETE FROM citus.pg_dist_object + WHERE classid = 'pg_catalog.pg_database'::regclass::oid; + #include "../../../columnar/sql/downgrades/columnar--10.1-1--10.0-3.sql" DROP FUNCTION pg_catalog.create_distributed_table(regclass, text, citus.distribution_type, text, int); @@ -21,4 +26,62 @@ DROP FUNCTION pg_catalog.worker_partitioned_relation_size(regclass); DROP FUNCTION pg_catalog.worker_partitioned_table_size(regclass); DROP FUNCTION pg_catalog.citus_local_disk_space_stats(); +#include "../udfs/citus_prepare_pg_upgrade/9.5-1.sql" #include "../udfs/citus_finish_pg_upgrade/10.0-1.sql" +#include "../udfs/get_rebalance_table_shards_plan/9.2-1.sql" + +-- the migration for citus_add_rebalance_strategy from 9.2-1 was the first one, +-- so it doesn't have a DROP. This is why we DROP manually here. +DROP FUNCTION pg_catalog.citus_add_rebalance_strategy; +#include "../udfs/citus_add_rebalance_strategy/9.2-1.sql" + +ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DROP COLUMN improvement_threshold; + +-- the migration for get_rebalance_progress from 9.0-1 was the first one, +-- so it doesn't have a DROP. This is why we DROP manually here. +DROP FUNCTION pg_catalog.get_rebalance_progress; +#include "../udfs/get_rebalance_progress/9.0-1.sql" + +CREATE OR REPLACE VIEW pg_catalog.citus_shards AS +WITH shard_sizes AS (SELECT * FROM pg_catalog.citus_shard_sizes()) +SELECT + pg_dist_shard.logicalrelid AS table_name, + pg_dist_shard.shardid, + shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) as shard_name, + CASE WHEN partkey IS NOT NULL THEN 'distributed' WHEN repmodel = 't' THEN 'reference' ELSE 'local' END AS citus_table_type, + colocationid AS colocation_id, + pg_dist_node.nodename, + pg_dist_node.nodeport, + (SELECT size FROM shard_sizes WHERE + shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) = table_name + OR + 'public.' || shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) = table_name + LIMIT 1) as shard_size +FROM + pg_dist_shard +JOIN + pg_dist_placement +ON + pg_dist_shard.shardid = pg_dist_placement.shardid +JOIN + pg_dist_node +ON + pg_dist_placement.groupid = pg_dist_node.groupid +JOIN + pg_dist_partition +ON + pg_dist_partition.logicalrelid = pg_dist_shard.logicalrelid +ORDER BY + pg_dist_shard.logicalrelid::text, shardid +; + +#include "../udfs/citus_finish_pg_upgrade/10.0-1.sql" +CREATE FUNCTION citus_internal.pg_dist_rebalance_strategy_enterprise_check() + RETURNS TRIGGER + LANGUAGE C + AS 'MODULE_PATHNAME'; +CREATE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger + BEFORE INSERT OR UPDATE OR DELETE OR TRUNCATE ON pg_dist_rebalance_strategy + FOR EACH STATEMENT EXECUTE FUNCTION citus_internal.pg_dist_rebalance_strategy_enterprise_check(); + +DROP PROCEDURE pg_catalog.citus_cleanup_orphaned_shards(); diff --git a/src/backend/distributed/sql/downgrades/citus--10.2-1--10.1-1.sql b/src/backend/distributed/sql/downgrades/citus--10.2-1--10.1-1.sql new file mode 100644 index 000000000..9b9bb7471 --- /dev/null +++ b/src/backend/distributed/sql/downgrades/citus--10.2-1--10.1-1.sql @@ -0,0 +1,2 @@ +-- citus--10.2-1--10.1-1 +-- this is an empty downgrade path since citus--10.1-1--10.2-1.sql is empty for now diff --git a/src/backend/distributed/sql/udfs/citus_add_rebalance_strategy/10.1-1.sql b/src/backend/distributed/sql/udfs/citus_add_rebalance_strategy/10.1-1.sql new file mode 100644 index 000000000..4c5f8ba79 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_add_rebalance_strategy/10.1-1.sql @@ -0,0 +1,30 @@ +DROP FUNCTION pg_catalog.citus_add_rebalance_strategy; +CREATE OR REPLACE FUNCTION pg_catalog.citus_add_rebalance_strategy( + name name, + shard_cost_function regproc, + node_capacity_function regproc, + shard_allowed_on_node_function regproc, + default_threshold float4, + minimum_threshold float4 DEFAULT 0, + improvement_threshold float4 DEFAULT 0 +) + RETURNS VOID AS $$ + INSERT INTO + pg_catalog.pg_dist_rebalance_strategy( + name, + shard_cost_function, + node_capacity_function, + shard_allowed_on_node_function, + default_threshold, + minimum_threshold + ) VALUES ( + name, + shard_cost_function, + node_capacity_function, + shard_allowed_on_node_function, + default_threshold, + minimum_threshold + ); + $$ LANGUAGE sql; +COMMENT ON FUNCTION pg_catalog.citus_add_rebalance_strategy(name,regproc,regproc,regproc,float4, float4, float4) + IS 'adds a new rebalance strategy which can be used when rebalancing shards or draining nodes'; diff --git a/src/backend/distributed/sql/udfs/citus_add_rebalance_strategy/latest.sql b/src/backend/distributed/sql/udfs/citus_add_rebalance_strategy/latest.sql index aeffc9c00..4c5f8ba79 100644 --- a/src/backend/distributed/sql/udfs/citus_add_rebalance_strategy/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_add_rebalance_strategy/latest.sql @@ -1,10 +1,12 @@ +DROP FUNCTION pg_catalog.citus_add_rebalance_strategy; CREATE OR REPLACE FUNCTION pg_catalog.citus_add_rebalance_strategy( name name, shard_cost_function regproc, node_capacity_function regproc, shard_allowed_on_node_function regproc, default_threshold float4, - minimum_threshold float4 DEFAULT 0 + minimum_threshold float4 DEFAULT 0, + improvement_threshold float4 DEFAULT 0 ) RETURNS VOID AS $$ INSERT INTO @@ -24,5 +26,5 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_add_rebalance_strategy( minimum_threshold ); $$ LANGUAGE sql; -COMMENT ON FUNCTION pg_catalog.citus_add_rebalance_strategy(name,regproc,regproc,regproc,float4, float4) +COMMENT ON FUNCTION pg_catalog.citus_add_rebalance_strategy(name,regproc,regproc,regproc,float4, float4, float4) IS 'adds a new rebalance strategy which can be used when rebalancing shards or draining nodes'; diff --git a/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_shards/10.1-1.sql b/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_shards/10.1-1.sql new file mode 100644 index 000000000..ed5723602 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_shards/10.1-1.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE PROCEDURE pg_catalog.citus_cleanup_orphaned_shards() + LANGUAGE C + AS 'citus', $$citus_cleanup_orphaned_shards$$; +COMMENT ON PROCEDURE pg_catalog.citus_cleanup_orphaned_shards() + IS 'cleanup orphaned shards'; diff --git a/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_shards/latest.sql b/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_shards/latest.sql new file mode 100644 index 000000000..ed5723602 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_shards/latest.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE PROCEDURE pg_catalog.citus_cleanup_orphaned_shards() + LANGUAGE C + AS 'citus', $$citus_cleanup_orphaned_shards$$; +COMMENT ON PROCEDURE pg_catalog.citus_cleanup_orphaned_shards() + IS 'cleanup orphaned shards'; diff --git a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.1-1.sql b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.1-1.sql index d936c958b..fdc05f6df 100644 --- a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.1-1.sql +++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.1-1.sql @@ -23,7 +23,6 @@ BEGIN INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo; INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo; - ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger; INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT name, default_strategy, @@ -31,9 +30,9 @@ BEGIN node_capacity_function::regprocedure::regproc, shard_allowed_on_node_function::regprocedure::regproc, default_threshold, - minimum_threshold + minimum_threshold, + improvement_threshold FROM public.pg_dist_rebalance_strategy; - ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger; -- -- drop backup tables diff --git a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql index d936c958b..fdc05f6df 100644 --- a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql @@ -23,7 +23,6 @@ BEGIN INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo; INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo; - ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger; INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT name, default_strategy, @@ -31,9 +30,9 @@ BEGIN node_capacity_function::regprocedure::regproc, shard_allowed_on_node_function::regprocedure::regproc, default_threshold, - minimum_threshold + minimum_threshold, + improvement_threshold FROM public.pg_dist_rebalance_strategy; - ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger; -- -- drop backup tables diff --git a/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/10.1-1.sql b/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/10.1-1.sql new file mode 100644 index 000000000..8b4ce1479 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/10.1-1.sql @@ -0,0 +1,54 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_prepare_pg_upgrade() + RETURNS void + LANGUAGE plpgsql + SET search_path = pg_catalog + AS $cppu$ +BEGIN + -- + -- Drop existing backup tables + -- + DROP TABLE IF EXISTS public.pg_dist_partition; + DROP TABLE IF EXISTS public.pg_dist_shard; + DROP TABLE IF EXISTS public.pg_dist_placement; + DROP TABLE IF EXISTS public.pg_dist_node_metadata; + DROP TABLE IF EXISTS public.pg_dist_node; + DROP TABLE IF EXISTS public.pg_dist_local_group; + DROP TABLE IF EXISTS public.pg_dist_transaction; + DROP TABLE IF EXISTS public.pg_dist_colocation; + DROP TABLE IF EXISTS public.pg_dist_authinfo; + DROP TABLE IF EXISTS public.pg_dist_poolinfo; + DROP TABLE IF EXISTS public.pg_dist_rebalance_strategy; + + -- + -- backup citus catalog tables + -- + CREATE TABLE public.pg_dist_partition AS SELECT * FROM pg_catalog.pg_dist_partition; + CREATE TABLE public.pg_dist_shard AS SELECT * FROM pg_catalog.pg_dist_shard; + CREATE TABLE public.pg_dist_placement AS SELECT * FROM pg_catalog.pg_dist_placement; + CREATE TABLE public.pg_dist_node_metadata AS SELECT * FROM pg_catalog.pg_dist_node_metadata; + CREATE TABLE public.pg_dist_node AS SELECT * FROM pg_catalog.pg_dist_node; + CREATE TABLE public.pg_dist_local_group AS SELECT * FROM pg_catalog.pg_dist_local_group; + CREATE TABLE public.pg_dist_transaction AS SELECT * FROM pg_catalog.pg_dist_transaction; + CREATE TABLE public.pg_dist_colocation AS SELECT * FROM pg_catalog.pg_dist_colocation; + -- enterprise catalog tables + CREATE TABLE public.pg_dist_authinfo AS SELECT * FROM pg_catalog.pg_dist_authinfo; + CREATE TABLE public.pg_dist_poolinfo AS SELECT * FROM pg_catalog.pg_dist_poolinfo; + CREATE TABLE public.pg_dist_rebalance_strategy AS SELECT + name, + default_strategy, + shard_cost_function::regprocedure::text, + node_capacity_function::regprocedure::text, + shard_allowed_on_node_function::regprocedure::text, + default_threshold, + minimum_threshold, + improvement_threshold + FROM pg_catalog.pg_dist_rebalance_strategy; + + -- store upgrade stable identifiers on pg_dist_object catalog + UPDATE citus.pg_dist_object + SET (type, object_names, object_args) = (SELECT * FROM pg_identify_object_as_address(classid, objid, objsubid)); +END; +$cppu$; + +COMMENT ON FUNCTION pg_catalog.citus_prepare_pg_upgrade() + IS 'perform tasks to copy citus settings to a location that could later be restored after pg_upgrade is done'; diff --git a/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/latest.sql b/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/latest.sql index fc5dab6b1..8b4ce1479 100644 --- a/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/latest.sql @@ -40,7 +40,8 @@ BEGIN node_capacity_function::regprocedure::text, shard_allowed_on_node_function::regprocedure::text, default_threshold, - minimum_threshold + minimum_threshold, + improvement_threshold FROM pg_catalog.pg_dist_rebalance_strategy; -- store upgrade stable identifiers on pg_dist_object catalog diff --git a/src/backend/distributed/sql/udfs/citus_shards/10.1-1.sql b/src/backend/distributed/sql/udfs/citus_shards/10.1-1.sql new file mode 100644 index 000000000..ff98638e7 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_shards/10.1-1.sql @@ -0,0 +1,35 @@ +CREATE OR REPLACE VIEW pg_catalog.citus_shards AS +SELECT + pg_dist_shard.logicalrelid AS table_name, + pg_dist_shard.shardid, + shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) as shard_name, + CASE WHEN partkey IS NOT NULL THEN 'distributed' WHEN repmodel = 't' THEN 'reference' ELSE 'local' END AS citus_table_type, + colocationid AS colocation_id, + pg_dist_node.nodename, + pg_dist_node.nodeport, + size as shard_size +FROM + pg_dist_shard +JOIN + pg_dist_placement +ON + pg_dist_shard.shardid = pg_dist_placement.shardid +JOIN + pg_dist_node +ON + pg_dist_placement.groupid = pg_dist_node.groupid +JOIN + pg_dist_partition +ON + pg_dist_partition.logicalrelid = pg_dist_shard.logicalrelid +LEFT JOIN + (SELECT (regexp_matches(table_name,'_(\d+)$'))[1]::int as shard_id, max(size) as size from citus_shard_sizes() GROUP BY shard_id) as shard_sizes +ON + pg_dist_shard.shardid = shard_sizes.shard_id +WHERE + pg_dist_placement.shardstate = 1 +ORDER BY + pg_dist_shard.logicalrelid::text, shardid +; + +GRANT SELECT ON pg_catalog.citus_shards TO public; diff --git a/src/backend/distributed/sql/udfs/citus_shards/latest.sql b/src/backend/distributed/sql/udfs/citus_shards/latest.sql index 9f09199ba..ff98638e7 100644 --- a/src/backend/distributed/sql/udfs/citus_shards/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_shards/latest.sql @@ -1,5 +1,4 @@ -CREATE OR REPLACE VIEW citus.citus_shards AS -WITH shard_sizes AS (SELECT * FROM pg_catalog.citus_shard_sizes()) +CREATE OR REPLACE VIEW pg_catalog.citus_shards AS SELECT pg_dist_shard.logicalrelid AS table_name, pg_dist_shard.shardid, @@ -8,11 +7,7 @@ SELECT colocationid AS colocation_id, pg_dist_node.nodename, pg_dist_node.nodeport, - (SELECT size FROM shard_sizes WHERE - shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) = table_name - OR - 'public.' || shard_name(pg_dist_shard.logicalrelid, pg_dist_shard.shardid) = table_name - LIMIT 1) as shard_size + size as shard_size FROM pg_dist_shard JOIN @@ -27,9 +22,14 @@ JOIN pg_dist_partition ON pg_dist_partition.logicalrelid = pg_dist_shard.logicalrelid +LEFT JOIN + (SELECT (regexp_matches(table_name,'_(\d+)$'))[1]::int as shard_id, max(size) as size from citus_shard_sizes() GROUP BY shard_id) as shard_sizes +ON + pg_dist_shard.shardid = shard_sizes.shard_id +WHERE + pg_dist_placement.shardstate = 1 ORDER BY pg_dist_shard.logicalrelid::text, shardid ; -ALTER VIEW citus.citus_shards SET SCHEMA pg_catalog; GRANT SELECT ON pg_catalog.citus_shards TO public; diff --git a/src/backend/distributed/sql/udfs/get_rebalance_progress/10.1-1.sql b/src/backend/distributed/sql/udfs/get_rebalance_progress/10.1-1.sql new file mode 100644 index 000000000..7df399ac1 --- /dev/null +++ b/src/backend/distributed/sql/udfs/get_rebalance_progress/10.1-1.sql @@ -0,0 +1,18 @@ +DROP FUNCTION pg_catalog.get_rebalance_progress(); + +CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_progress() + RETURNS TABLE(sessionid integer, + table_name regclass, + shardid bigint, + shard_size bigint, + sourcename text, + sourceport int, + targetname text, + targetport int, + progress bigint, + source_shard_size bigint, + target_shard_size bigint) + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT; +COMMENT ON FUNCTION pg_catalog.get_rebalance_progress() + IS 'provides progress information about the ongoing rebalance operations'; diff --git a/src/backend/distributed/sql/udfs/get_rebalance_progress/9.0-1.sql b/src/backend/distributed/sql/udfs/get_rebalance_progress/9.0-1.sql new file mode 100644 index 000000000..00adeec1f --- /dev/null +++ b/src/backend/distributed/sql/udfs/get_rebalance_progress/9.0-1.sql @@ -0,0 +1,17 @@ +-- get_rebalance_progress returns the list of shard placement move operations along with +-- their progressions for ongoing rebalance operations. +-- +CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_progress() + RETURNS TABLE(sessionid integer, + table_name regclass, + shardid bigint, + shard_size bigint, + sourcename text, + sourceport int, + targetname text, + targetport int, + progress bigint) + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT; +COMMENT ON FUNCTION pg_catalog.get_rebalance_progress() + IS 'provides progress information about the ongoing rebalance operations'; diff --git a/src/backend/distributed/sql/udfs/get_rebalance_progress/latest.sql b/src/backend/distributed/sql/udfs/get_rebalance_progress/latest.sql new file mode 100644 index 000000000..7df399ac1 --- /dev/null +++ b/src/backend/distributed/sql/udfs/get_rebalance_progress/latest.sql @@ -0,0 +1,18 @@ +DROP FUNCTION pg_catalog.get_rebalance_progress(); + +CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_progress() + RETURNS TABLE(sessionid integer, + table_name regclass, + shardid bigint, + shard_size bigint, + sourcename text, + sourceport int, + targetname text, + targetport int, + progress bigint, + source_shard_size bigint, + target_shard_size bigint) + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT; +COMMENT ON FUNCTION pg_catalog.get_rebalance_progress() + IS 'provides progress information about the ongoing rebalance operations'; diff --git a/src/backend/distributed/sql/udfs/get_rebalance_table_shards_plan/10.1-1.sql b/src/backend/distributed/sql/udfs/get_rebalance_table_shards_plan/10.1-1.sql new file mode 100644 index 000000000..8a875ce90 --- /dev/null +++ b/src/backend/distributed/sql/udfs/get_rebalance_table_shards_plan/10.1-1.sql @@ -0,0 +1,27 @@ +-- get_rebalance_table_shards_plan shows the actual events that will be performed +-- if a rebalance operation will be performed with the same arguments, which allows users +-- to understand the impact of the change overall availability of the application and +-- network trafic. +-- +DROP FUNCTION pg_catalog.get_rebalance_table_shards_plan; +CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_table_shards_plan( + relation regclass default NULL, + threshold float4 default NULL, + max_shard_moves int default 1000000, + excluded_shard_list bigint[] default '{}', + drain_only boolean default false, + rebalance_strategy name default NULL, + improvement_threshold float4 DEFAULT NULL + ) + RETURNS TABLE (table_name regclass, + shardid bigint, + shard_size bigint, + sourcename text, + sourceport int, + targetname text, + targetport int) + AS 'MODULE_PATHNAME' + LANGUAGE C VOLATILE; +COMMENT ON FUNCTION pg_catalog.get_rebalance_table_shards_plan(regclass, float4, int, bigint[], boolean, name, float4) + IS 'returns the list of shard placement moves to be done on a rebalance operation'; + diff --git a/src/backend/distributed/sql/udfs/get_rebalance_table_shards_plan/latest.sql b/src/backend/distributed/sql/udfs/get_rebalance_table_shards_plan/latest.sql index 7970a61dd..8a875ce90 100644 --- a/src/backend/distributed/sql/udfs/get_rebalance_table_shards_plan/latest.sql +++ b/src/backend/distributed/sql/udfs/get_rebalance_table_shards_plan/latest.sql @@ -10,7 +10,8 @@ CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_table_shards_plan( max_shard_moves int default 1000000, excluded_shard_list bigint[] default '{}', drain_only boolean default false, - rebalance_strategy name default NULL + rebalance_strategy name default NULL, + improvement_threshold float4 DEFAULT NULL ) RETURNS TABLE (table_name regclass, shardid bigint, @@ -21,6 +22,6 @@ CREATE OR REPLACE FUNCTION pg_catalog.get_rebalance_table_shards_plan( targetport int) AS 'MODULE_PATHNAME' LANGUAGE C VOLATILE; -COMMENT ON FUNCTION pg_catalog.get_rebalance_table_shards_plan(regclass, float4, int, bigint[], boolean, name) +COMMENT ON FUNCTION pg_catalog.get_rebalance_table_shards_plan(regclass, float4, int, bigint[], boolean, name, float4) IS 'returns the list of shard placement moves to be done on a rebalance operation'; diff --git a/src/backend/distributed/test/distributed_deadlock_detection.c b/src/backend/distributed/test/distributed_deadlock_detection.c index 6becf863c..84739cb49 100644 --- a/src/backend/distributed/test/distributed_deadlock_detection.c +++ b/src/backend/distributed/test/distributed_deadlock_detection.c @@ -39,6 +39,8 @@ PG_FUNCTION_INFO_V1(get_adjacency_list_wait_graph); Datum get_adjacency_list_wait_graph(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + TupleDesc tupleDescriptor = NULL; HASH_SEQ_STATUS status; @@ -47,8 +49,6 @@ get_adjacency_list_wait_graph(PG_FUNCTION_ARGS) Datum values[2]; bool isNulls[2]; - CheckCitusVersion(ERROR); - Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor); WaitGraph *waitGraph = BuildGlobalWaitGraph(); HTAB *adjacencyList = BuildAdjacencyListsForWaitGraph(waitGraph); diff --git a/src/backend/distributed/test/foreign_key_relationship_query.c b/src/backend/distributed/test/foreign_key_relationship_query.c index a03856f54..bae2e77e0 100644 --- a/src/backend/distributed/test/foreign_key_relationship_query.c +++ b/src/backend/distributed/test/foreign_key_relationship_query.c @@ -77,11 +77,11 @@ drop_constraint_cascade_via_perform_deletion(PG_FUNCTION_ARGS) Datum get_referencing_relation_id_list(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + FuncCallContext *functionContext = NULL; ListCell *foreignRelationCell = NULL; - CheckCitusVersion(ERROR); - /* for the first we call this UDF, we need to populate the result to return set */ if (SRF_IS_FIRSTCALL()) { @@ -136,11 +136,11 @@ get_referencing_relation_id_list(PG_FUNCTION_ARGS) Datum get_referenced_relation_id_list(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + FuncCallContext *functionContext = NULL; ListCell *foreignRelationCell = NULL; - CheckCitusVersion(ERROR); - /* for the first we call this UDF, we need to populate the result to return set */ if (SRF_IS_FIRSTCALL()) { diff --git a/src/backend/distributed/test/foreign_key_to_reference_table_rebalance.c b/src/backend/distributed/test/foreign_key_to_reference_table_rebalance.c index 4c5a21379..941c3ad6a 100644 --- a/src/backend/distributed/test/foreign_key_to_reference_table_rebalance.c +++ b/src/backend/distributed/test/foreign_key_to_reference_table_rebalance.c @@ -30,11 +30,11 @@ PG_FUNCTION_INFO_V1(get_foreign_key_to_reference_table_commands); Datum get_foreign_key_to_reference_table_commands(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + FuncCallContext *functionContext = NULL; ListCell *commandsCell = NULL; - CheckCitusVersion(ERROR); - /* for the first we call this UDF, we need to populate the result to return set */ if (SRF_IS_FIRSTCALL()) { diff --git a/src/backend/distributed/test/intermediate_results.c b/src/backend/distributed/test/intermediate_results.c index 22624eb8f..b4f14bca6 100644 --- a/src/backend/distributed/test/intermediate_results.c +++ b/src/backend/distributed/test/intermediate_results.c @@ -34,6 +34,8 @@ PG_FUNCTION_INFO_V1(store_intermediate_result_on_node); Datum store_intermediate_result_on_node(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *nodeNameText = PG_GETARG_TEXT_P(0); char *nodeNameString = text_to_cstring(nodeNameText); int nodePort = PG_GETARG_INT32(1); @@ -44,8 +46,6 @@ store_intermediate_result_on_node(PG_FUNCTION_ARGS) bool writeLocalFile = false; ParamListInfo paramListInfo = NULL; - CheckCitusVersion(ERROR); - WorkerNode *workerNode = FindWorkerNodeOrError(nodeNameString, nodePort); /* diff --git a/src/backend/distributed/test/metadata_sync.c b/src/backend/distributed/test/metadata_sync.c index 47e12ce7a..0c8622a83 100644 --- a/src/backend/distributed/test/metadata_sync.c +++ b/src/backend/distributed/test/metadata_sync.c @@ -15,6 +15,7 @@ #include "catalog/pg_type.h" #include "distributed/connection_management.h" +#include "distributed/intermediate_result_pruning.h" #include "distributed/listutils.h" #include "distributed/maintenanced.h" #include "distributed/metadata_sync.h" @@ -104,7 +105,7 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS) } MultiConnection *connection = GetNodeConnection(FORCE_NEW_CONNECTION, - "localhost", PostPortNumber); + LOCAL_HOST_NAME, PostPortNumber); ExecuteCriticalRemoteCommand(connection, "LISTEN " METADATA_SYNC_CHANNEL); int waitFlags = WL_SOCKET_READABLE | WL_TIMEOUT | WL_POSTMASTER_DEATH; diff --git a/src/backend/distributed/test/run_from_same_connection.c b/src/backend/distributed/test/run_from_same_connection.c index 336815f75..e0b7d806c 100644 --- a/src/backend/distributed/test/run_from_same_connection.c +++ b/src/backend/distributed/test/run_from_same_connection.c @@ -17,10 +17,10 @@ #include "access/xact.h" #include "distributed/connection_management.h" +#include "distributed/coordinator_protocol.h" #include "distributed/function_utils.h" #include "distributed/intermediate_result_pruning.h" #include "distributed/lock_graph.h" -#include "distributed/coordinator_protocol.h" #include "distributed/metadata_cache.h" #include "distributed/remote_commands.h" #include "distributed/run_from_same_connection.h" @@ -83,13 +83,13 @@ AllowNonIdleTransactionOnXactHandling(void) Datum start_session_level_connection_to_node(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *nodeName = PG_GETARG_TEXT_P(0); uint32 nodePort = PG_GETARG_UINT32(1); char *nodeNameString = text_to_cstring(nodeName); int connectionFlags = 0; - CheckCitusVersion(ERROR); - if (singleConnection != NULL && (strcmp(singleConnection->hostname, nodeNameString) != 0 || singleConnection->port != nodePort)) diff --git a/src/backend/distributed/test/shard_rebalancer.c b/src/backend/distributed/test/shard_rebalancer.c index 67785c30e..4cccd851d 100644 --- a/src/backend/distributed/test/shard_rebalancer.c +++ b/src/backend/distributed/test/shard_rebalancer.c @@ -21,6 +21,7 @@ #include "distributed/connection_management.h" #include "distributed/listutils.h" #include "distributed/multi_physical_planner.h" +#include "distributed/shard_cleaner.h" #include "distributed/shard_rebalancer.h" #include "funcapi.h" #include "miscadmin.h" @@ -50,6 +51,7 @@ static ShardCost GetShardCost(uint64 shardId, void *context); PG_FUNCTION_INFO_V1(shard_placement_rebalance_array); PG_FUNCTION_INFO_V1(shard_placement_replication_array); PG_FUNCTION_INFO_V1(worker_node_responsive); +PG_FUNCTION_INFO_V1(run_try_drop_marked_shards); typedef struct ShardPlacementTestInfo { @@ -71,6 +73,17 @@ typedef struct RebalancePlanContext List *shardPlacementTestInfoList; } RebalancePlacementContext; +/* + * run_try_drop_marked_shards is a wrapper to run TryDropOrphanedShards. + */ +Datum +run_try_drop_marked_shards(PG_FUNCTION_ARGS) +{ + bool waitForLocks = false; + TryDropOrphanedShards(waitForLocks); + PG_RETURN_VOID(); +} + /* * shard_placement_rebalance_array returns a list of operations which can make a @@ -89,6 +102,7 @@ shard_placement_rebalance_array(PG_FUNCTION_ARGS) float threshold = PG_GETARG_FLOAT4(2); int32 maxShardMoves = PG_GETARG_INT32(3); bool drainOnly = PG_GETARG_BOOL(4); + float utilizationImproventThreshold = PG_GETARG_FLOAT4(5); List *workerNodeList = NIL; List *shardPlacementListList = NIL; @@ -143,6 +157,7 @@ shard_placement_rebalance_array(PG_FUNCTION_ARGS) threshold, maxShardMoves, drainOnly, + utilizationImproventThreshold, &rebalancePlanFunctions); ArrayType *placementUpdateJsonArray = PlacementUpdateListToJsonArray( placementUpdateList); diff --git a/src/backend/distributed/transaction/backend_data.c b/src/backend/distributed/transaction/backend_data.c index 8026e50c1..45c2ff2a5 100644 --- a/src/backend/distributed/transaction/backend_data.c +++ b/src/backend/distributed/transaction/backend_data.c @@ -106,6 +106,8 @@ PG_FUNCTION_INFO_V1(get_all_active_transactions); Datum assign_distributed_transaction_id(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid userId = GetUserId(); /* prepare data before acquiring spinlock to protect against errors */ @@ -113,8 +115,6 @@ assign_distributed_transaction_id(PG_FUNCTION_ARGS) uint64 transactionNumber = PG_GETARG_INT64(1); TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(2); - CheckCitusVersion(ERROR); - /* MyBackendData should always be avaliable, just out of paranoia */ if (!MyBackendData) { @@ -166,14 +166,14 @@ assign_distributed_transaction_id(PG_FUNCTION_ARGS) Datum get_current_transaction_id(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + TupleDesc tupleDescriptor = NULL; Datum values[5]; bool isNulls[5]; - CheckCitusVersion(ERROR); - /* build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDescriptor) != TYPEFUNC_COMPOSITE) { @@ -225,12 +225,13 @@ get_current_transaction_id(PG_FUNCTION_ARGS) Datum get_global_active_transactions(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + TupleDesc tupleDescriptor = NULL; List *workerNodeList = ActivePrimaryNonCoordinatorNodeList(NoLock); List *connectionList = NIL; StringInfo queryToSend = makeStringInfo(); - CheckCitusVersion(ERROR); Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor); appendStringInfo(queryToSend, GET_ACTIVE_TRANSACTION_QUERY); @@ -336,9 +337,9 @@ get_global_active_transactions(PG_FUNCTION_ARGS) Datum get_all_active_transactions(PG_FUNCTION_ARGS) { - TupleDesc tupleDescriptor = NULL; - CheckCitusVersion(ERROR); + + TupleDesc tupleDescriptor = NULL; Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor); StoreAllActiveTransactions(tupleStore, tupleDescriptor); diff --git a/src/backend/distributed/transaction/relation_access_tracking.c b/src/backend/distributed/transaction/relation_access_tracking.c index 2e3941486..e534cb1af 100644 --- a/src/backend/distributed/transaction/relation_access_tracking.c +++ b/src/backend/distributed/transaction/relation_access_tracking.c @@ -793,7 +793,8 @@ CheckConflictingRelationAccesses(Oid relationId, ShardPlacementAccessType access "foreign keys. Any parallel modification to " "those hash distributed tables in the same " "transaction can only be executed in sequential query " - "execution mode", relationName))); + "execution mode", + relationName != NULL ? relationName : ""))); /* * Switching to sequential mode is admittedly confusing and, could be useless diff --git a/src/backend/distributed/transaction/transaction_management.c b/src/backend/distributed/transaction/transaction_management.c index 93d6351b1..88b820c0e 100644 --- a/src/backend/distributed/transaction/transaction_management.c +++ b/src/backend/distributed/transaction/transaction_management.c @@ -101,7 +101,7 @@ MemoryContext CommitContext = NULL; * do 2PC on the remote connections that did a modification. * * As a variable name ShouldCoordinatedTransactionUse2PC could - * be improved. We use CoordinatedTransactionShouldUse2PC() as the + * be improved. We use Use2PCForCoordinatedTransaction() as the * public API function, hence couldn't come up with a better name * for the underlying variable at the moment. */ @@ -190,14 +190,14 @@ InCoordinatedTransaction(void) /* - * CoordinatedTransactionShouldUse2PC() signals that the current coordinated + * Use2PCForCoordinatedTransaction() signals that the current coordinated * transaction should use 2PC to commit. * * Note that even if 2PC is enabled, it is only used for connections that make * modification (DML or DDL). */ void -CoordinatedTransactionShouldUse2PC(void) +Use2PCForCoordinatedTransaction(void) { Assert(InCoordinatedTransaction()); diff --git a/src/backend/distributed/transaction/worker_transaction.c b/src/backend/distributed/transaction/worker_transaction.c index b8422bfcc..891c01e76 100644 --- a/src/backend/distributed/transaction/worker_transaction.c +++ b/src/backend/distributed/transaction/worker_transaction.c @@ -96,7 +96,7 @@ SendCommandToWorkerAsUser(const char *nodeName, int32 nodePort, const char *node uint32 connectionFlags = 0; UseCoordinatedTransaction(); - CoordinatedTransactionShouldUse2PC(); + Use2PCForCoordinatedTransaction(); MultiConnection *transactionConnection = GetNodeUserDatabaseConnection( connectionFlags, nodeName, @@ -404,7 +404,7 @@ SendCommandToWorkersParamsInternal(TargetWorkerSet targetWorkerSet, const char * List *workerNodeList = TargetWorkerSetNodeList(targetWorkerSet, ShareLock); UseCoordinatedTransaction(); - CoordinatedTransactionShouldUse2PC(); + Use2PCForCoordinatedTransaction(); /* open connections in parallel */ WorkerNode *workerNode = NULL; diff --git a/src/backend/distributed/utils/citus_copyfuncs.c b/src/backend/distributed/utils/citus_copyfuncs.c index 8e725a7d0..fba73445f 100644 --- a/src/backend/distributed/utils/citus_copyfuncs.c +++ b/src/backend/distributed/utils/citus_copyfuncs.c @@ -135,6 +135,7 @@ CopyNodeDistributedPlan(COPYFUNC_ARGS) COPY_NODE_FIELD(subPlanList); COPY_NODE_FIELD(usedSubPlanNodeList); COPY_SCALAR_FIELD(fastPathRouterPlan); + COPY_SCALAR_FIELD(numberOfTimesExecuted); COPY_NODE_FIELD(planningError); } diff --git a/src/backend/distributed/utils/citus_outfuncs.c b/src/backend/distributed/utils/citus_outfuncs.c index f8bd9bbc7..a3743c281 100644 --- a/src/backend/distributed/utils/citus_outfuncs.c +++ b/src/backend/distributed/utils/citus_outfuncs.c @@ -198,6 +198,7 @@ OutDistributedPlan(OUTFUNC_ARGS) WRITE_NODE_FIELD(subPlanList); WRITE_NODE_FIELD(usedSubPlanNodeList); WRITE_BOOL_FIELD(fastPathRouterPlan); + WRITE_UINT_FIELD(numberOfTimesExecuted); WRITE_NODE_FIELD(planningError); } diff --git a/src/backend/distributed/utils/colocation_utils.c b/src/backend/distributed/utils/colocation_utils.c index b674350ca..91c9c2882 100644 --- a/src/backend/distributed/utils/colocation_utils.c +++ b/src/backend/distributed/utils/colocation_utils.c @@ -70,6 +70,9 @@ PG_FUNCTION_INFO_V1(update_distributed_table_colocation); Datum mark_tables_colocated(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + EnsureCoordinator(); + Oid sourceRelationId = PG_GETARG_OID(0); ArrayType *relationIdArrayObject = PG_GETARG_ARRAYTYPE_P(1); @@ -80,8 +83,6 @@ mark_tables_colocated(PG_FUNCTION_ARGS) "operation"))); } - CheckCitusVersion(ERROR); - EnsureCoordinator(); EnsureTableOwner(sourceRelationId); Datum *relationIdDatumArray = DeconstructArrayObject(relationIdArrayObject); @@ -108,11 +109,12 @@ mark_tables_colocated(PG_FUNCTION_ARGS) Datum update_distributed_table_colocation(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + EnsureCoordinator(); + Oid targetRelationId = PG_GETARG_OID(0); text *colocateWithTableNameText = PG_GETARG_TEXT_P(1); - CheckCitusVersion(ERROR); - EnsureCoordinator(); EnsureTableOwner(targetRelationId); char *colocateWithTableName = text_to_cstring(colocateWithTableNameText); diff --git a/src/backend/distributed/utils/distribution_column.c b/src/backend/distributed/utils/distribution_column.c index 2008eddb0..947740096 100644 --- a/src/backend/distributed/utils/distribution_column.c +++ b/src/backend/distributed/utils/distribution_column.c @@ -49,12 +49,12 @@ PG_FUNCTION_INFO_V1(column_to_column_name); Datum column_name_to_column(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); text *columnText = PG_GETARG_TEXT_P(1); char *columnName = text_to_cstring(columnText); - CheckCitusVersion(ERROR); - Relation relation = relation_open(relationId, AccessShareLock); Var *column = BuildDistributionKeyFromColumnName(relation, columnName); @@ -100,13 +100,13 @@ column_name_to_column_id(PG_FUNCTION_ARGS) Datum column_to_column_name(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); text *columnNodeText = PG_GETARG_TEXT_P(1); char *columnNodeString = text_to_cstring(columnNodeText); - CheckCitusVersion(ERROR); - char *columnName = ColumnToColumnName(relationId, columnNodeString); text *columnText = cstring_to_text(columnName); diff --git a/src/backend/distributed/utils/foreign_key_relationship.c b/src/backend/distributed/utils/foreign_key_relationship.c index 770a7175d..8996feb15 100644 --- a/src/backend/distributed/utils/foreign_key_relationship.c +++ b/src/backend/distributed/utils/foreign_key_relationship.c @@ -32,6 +32,7 @@ #if PG_VERSION_NUM >= PG_VERSION_13 #include "common/hashfn.h" #endif +#include "utils/inval.h" #include "utils/memutils.h" @@ -82,6 +83,7 @@ static ForeignConstraintRelationshipNode * GetRelationshipNodeForRelationId(Oid relationId, bool *isFound); static void CreateForeignConstraintRelationshipGraph(void); +static bool IsForeignConstraintRelationshipGraphValid(void); static List * GetNeighbourList(ForeignConstraintRelationshipNode *relationshipNode, bool isReferencing); static List * GetRelationIdsFromRelationshipNodeList(List *fKeyRelationshipNodeList); @@ -348,9 +350,15 @@ CreateForeignConstraintRelationshipGraph() /* * IsForeignConstraintGraphValid check whether there is a valid graph. */ -bool +static bool IsForeignConstraintRelationshipGraphValid() { + /* + * We might have some concurrent metadata changes. In order to get the changes, + * we first need to accept the cache invalidation messages. + */ + AcceptInvalidationMessages(); + if (fConstraintRelationshipGraph != NULL && fConstraintRelationshipGraph->isValid) { return true; diff --git a/src/backend/distributed/utils/maintenanced.c b/src/backend/distributed/utils/maintenanced.c index f595d06bf..2295830d6 100644 --- a/src/backend/distributed/utils/maintenanced.c +++ b/src/backend/distributed/utils/maintenanced.c @@ -93,7 +93,7 @@ typedef struct MaintenanceDaemonDBData /* config variable for distributed deadlock detection timeout */ double DistributedDeadlockDetectionTimeoutFactor = 2.0; int Recover2PCInterval = 60000; -int DeferShardDeleteInterval = 60000; +int DeferShardDeleteInterval = 15000; /* config variables for metadata sync timeout */ int MetadataSyncInterval = 60000; @@ -644,8 +644,8 @@ CitusMaintenanceDaemonMain(Datum main_arg) */ lastShardCleanTime = GetCurrentTimestamp(); - bool waitForCleanupLock = false; - numberOfDroppedShards = TryDropMarkedShards(waitForCleanupLock); + bool waitForLocks = false; + numberOfDroppedShards = TryDropOrphanedShards(waitForLocks); } CommitTransactionCommand(); diff --git a/src/backend/distributed/utils/reference_table_utils.c b/src/backend/distributed/utils/reference_table_utils.c index 28162a4b0..47e9a99df 100644 --- a/src/backend/distributed/utils/reference_table_utils.c +++ b/src/backend/distributed/utils/reference_table_utils.c @@ -193,7 +193,7 @@ EnsureReferenceTablesExistOnAllNodesExtended(char transferMode) int connectionFlags = OUTSIDE_TRANSACTION; MultiConnection *connection = GetNodeUserDatabaseConnection( - connectionFlags, "localhost", PostPortNumber, + connectionFlags, LocalHostName, PostPortNumber, userName, NULL); if (PQstatus(connection->pgConn) == CONNECTION_OK) diff --git a/src/backend/distributed/utils/resource_lock.c b/src/backend/distributed/utils/resource_lock.c index 602157339..3861c2ac1 100644 --- a/src/backend/distributed/utils/resource_lock.c +++ b/src/backend/distributed/utils/resource_lock.c @@ -97,11 +97,11 @@ PG_FUNCTION_INFO_V1(lock_relation_if_exists); Datum lock_shard_metadata(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + LOCKMODE lockMode = IntToLockMode(PG_GETARG_INT32(0)); ArrayType *shardIdArrayObject = PG_GETARG_ARRAYTYPE_P(1); - CheckCitusVersion(ERROR); - if (ARR_NDIM(shardIdArrayObject) == 0) { ereport(ERROR, (errmsg("no locks specified"))); @@ -134,11 +134,11 @@ lock_shard_metadata(PG_FUNCTION_ARGS) Datum lock_shard_resources(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + LOCKMODE lockMode = IntToLockMode(PG_GETARG_INT32(0)); ArrayType *shardIdArrayObject = PG_GETARG_ARRAYTYPE_P(1); - CheckCitusVersion(ERROR); - if (ARR_NDIM(shardIdArrayObject) == 0) { ereport(ERROR, (errmsg("no locks specified"))); diff --git a/src/backend/distributed/utils/role.c b/src/backend/distributed/utils/role.c index 9a714b1d1..667ea5fc5 100644 --- a/src/backend/distributed/utils/role.c +++ b/src/backend/distributed/utils/role.c @@ -73,6 +73,11 @@ alter_role_if_exists(PG_FUNCTION_ARGS) Datum worker_create_or_alter_role(PG_FUNCTION_ARGS) { + if (PG_ARGISNULL(0)) + { + ereport(ERROR, (errmsg("role name cannot be NULL"))); + } + text *rolenameText = PG_GETARG_TEXT_P(0); const char *rolename = text_to_cstring(rolenameText); diff --git a/src/backend/distributed/worker/worker_data_fetch_protocol.c b/src/backend/distributed/worker/worker_data_fetch_protocol.c index 823a5035f..6a7581bef 100644 --- a/src/backend/distributed/worker/worker_data_fetch_protocol.c +++ b/src/backend/distributed/worker/worker_data_fetch_protocol.c @@ -67,7 +67,6 @@ static void CitusDeleteFile(const char *filename); static bool check_log_statement(List *stmt_list); static void AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequenceName, Oid sequenceTypeId); -static void SetDefElemArg(AlterSeqStmt *statement, const char *name, Node *arg); /* exports for SQL callable functions */ @@ -94,6 +93,8 @@ PG_FUNCTION_INFO_V1(master_expire_table_cache); Datum worker_fetch_partition_file(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + uint64 jobId = PG_GETARG_INT64(0); uint32 partitionTaskId = PG_GETARG_UINT32(1); uint32 partitionFileId = PG_GETARG_UINT32(2); @@ -115,8 +116,6 @@ worker_fetch_partition_file(PG_FUNCTION_ARGS) */ bool taskDirectoryExists = DirectoryExists(taskDirectoryName); - CheckCitusVersion(ERROR); - if (!taskDirectoryExists) { InitTaskDirectory(jobId, upstreamTaskId); @@ -383,6 +382,8 @@ CitusDeleteFile(const char *filename) Datum worker_apply_shard_ddl_command(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + uint64 shardId = PG_GETARG_INT64(0); text *schemaNameText = PG_GETARG_TEXT_P(1); text *ddlCommandText = PG_GETARG_TEXT_P(2); @@ -391,8 +392,6 @@ worker_apply_shard_ddl_command(PG_FUNCTION_ARGS) const char *ddlCommand = text_to_cstring(ddlCommandText); Node *ddlCommandNode = ParseTreeNode(ddlCommand); - CheckCitusVersion(ERROR); - /* extend names in ddl command and apply extended command */ RelayEventExtendNames(ddlCommandNode, schemaName, shardId); ProcessUtilityParseTree(ddlCommandNode, ddlCommand, PROCESS_UTILITY_QUERY, NULL, @@ -410,6 +409,8 @@ worker_apply_shard_ddl_command(PG_FUNCTION_ARGS) Datum worker_apply_inter_shard_ddl_command(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + uint64 leftShardId = PG_GETARG_INT64(0); text *leftShardSchemaNameText = PG_GETARG_TEXT_P(1); uint64 rightShardId = PG_GETARG_INT64(2); @@ -421,8 +422,6 @@ worker_apply_inter_shard_ddl_command(PG_FUNCTION_ARGS) const char *ddlCommand = text_to_cstring(ddlCommandText); Node *ddlCommandNode = ParseTreeNode(ddlCommand); - CheckCitusVersion(ERROR); - /* extend names in ddl command and apply extended command */ RelayEventExtendNamesForInterShardCommands(ddlCommandNode, leftShardId, leftShardSchemaName, rightShardId, @@ -443,6 +442,8 @@ worker_apply_inter_shard_ddl_command(PG_FUNCTION_ARGS) Datum worker_apply_sequence_command(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *commandText = PG_GETARG_TEXT_P(0); Oid sequenceTypeId = PG_GETARG_OID(1); const char *commandString = text_to_cstring(commandText); @@ -450,8 +451,6 @@ worker_apply_sequence_command(PG_FUNCTION_ARGS) NodeTag nodeType = nodeTag(commandNode); - CheckCitusVersion(ERROR); - if (nodeType != T_CreateSeqStmt) { ereport(ERROR, @@ -579,6 +578,8 @@ ParseTreeRawStmt(const char *ddlCommand) Datum worker_append_table_to_shard(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *shardQualifiedNameText = PG_GETARG_TEXT_P(0); text *sourceQualifiedNameText = PG_GETARG_TEXT_P(1); text *sourceNodeNameText = PG_GETARG_TEXT_P(2); @@ -596,8 +597,6 @@ worker_append_table_to_shard(PG_FUNCTION_ARGS) Oid savedUserId = InvalidOid; int savedSecurityContext = 0; - CheckCitusVersion(ERROR); - /* We extract schema names and table names from qualified names */ DeconstructQualifiedName(shardQualifiedNameList, &shardSchemaName, &shardTableName); @@ -738,12 +737,10 @@ AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequenceName, if (sequenceTypeId == INT4OID) { valueBitLength = 28; - sequenceMaxValue = INT_MAX; } else if (sequenceTypeId == INT2OID) { valueBitLength = 12; - sequenceMaxValue = SHRT_MAX; } /* calculate min/max values that the sequence can generate in this worker */ @@ -793,7 +790,7 @@ AlterSequenceMinMax(Oid sequenceId, char *schemaName, char *sequenceName, * If a DefElem with the given defname does not exist it is created and * added to the AlterSeqStmt. */ -static void +void SetDefElemArg(AlterSeqStmt *statement, const char *name, Node *arg) { DefElem *defElem = NULL; diff --git a/src/backend/distributed/worker/worker_drop_protocol.c b/src/backend/distributed/worker/worker_drop_protocol.c index 682a2d95c..165eb13d1 100644 --- a/src/backend/distributed/worker/worker_drop_protocol.c +++ b/src/backend/distributed/worker/worker_drop_protocol.c @@ -49,15 +49,15 @@ PG_FUNCTION_INFO_V1(worker_drop_distributed_table); Datum worker_drop_distributed_table(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + EnsureSuperUser(); + text *relationName = PG_GETARG_TEXT_P(0); Oid relationId = ResolveRelationId(relationName, true); ObjectAddress distributedTableObject = { InvalidOid, InvalidOid, 0 }; char relationKind = '\0'; - CheckCitusVersion(ERROR); - EnsureSuperUser(); - if (!OidIsValid(relationId)) { ereport(NOTICE, (errmsg("relation %s does not exist, skipping", diff --git a/src/backend/distributed/worker/worker_file_access_protocol.c b/src/backend/distributed/worker/worker_file_access_protocol.c index b2a473dea..5a5535560 100644 --- a/src/backend/distributed/worker/worker_file_access_protocol.c +++ b/src/backend/distributed/worker/worker_file_access_protocol.c @@ -35,13 +35,13 @@ PG_FUNCTION_INFO_V1(worker_find_block_local_path); Datum worker_foreign_file_path(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + text *foreignTableName = PG_GETARG_TEXT_P(0); text *foreignFilePath = NULL; Oid relationId = ResolveRelationId(foreignTableName, false); ForeignTable *foreignTable = GetForeignTable(relationId); - CheckCitusVersion(ERROR); - DefElem *option = NULL; foreach_ptr(option, foreignTable->options) { @@ -75,6 +75,8 @@ worker_foreign_file_path(PG_FUNCTION_ARGS) Datum worker_find_block_local_path(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + int64 blockId = PG_GETARG_INT64(0); ArrayType *dataDirectoryObject = PG_GETARG_ARRAYTYPE_P(1); @@ -82,8 +84,6 @@ worker_find_block_local_path(PG_FUNCTION_ARGS) (void) blockId; (void) dataDirectoryObject; - CheckCitusVersion(ERROR); - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("called function is currently unsupported"))); diff --git a/src/backend/distributed/worker/worker_merge_protocol.c b/src/backend/distributed/worker/worker_merge_protocol.c index 8b1f07870..a539a9c90 100644 --- a/src/backend/distributed/worker/worker_merge_protocol.c +++ b/src/backend/distributed/worker/worker_merge_protocol.c @@ -68,14 +68,14 @@ PG_FUNCTION_INFO_V1(worker_repartition_cleanup); Datum worker_create_schema(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + uint64 jobId = PG_GETARG_INT64(0); text *ownerText = PG_GETARG_TEXT_P(1); char *ownerString = TextDatumGetCString(ownerText); StringInfo jobSchemaName = JobSchemaName(jobId); - CheckCitusVersion(ERROR); - bool schemaExists = JobSchemaExists(jobSchemaName); if (!schemaExists) { @@ -144,12 +144,12 @@ CreateJobSchema(StringInfo schemaName, char *schemaOwner) Datum worker_repartition_cleanup(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + uint64 jobId = PG_GETARG_INT64(0); StringInfo jobDirectoryName = JobDirectoryName(jobId); StringInfo jobSchemaName = JobSchemaName(jobId); - CheckCitusVersion(ERROR); - Oid schemaId = get_namespace_oid(jobSchemaName->data, false); EnsureSchemaOwner(schemaId); @@ -173,6 +173,8 @@ worker_repartition_cleanup(PG_FUNCTION_ARGS) Datum worker_merge_files_into_table(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + uint64 jobId = PG_GETARG_INT64(0); uint32 taskId = PG_GETARG_UINT32(1); ArrayType *columnNameObject = PG_GETARG_ARRAYTYPE_P(2); @@ -189,8 +191,6 @@ worker_merge_files_into_table(PG_FUNCTION_ARGS) int32 columnNameCount = ArrayObjectCount(columnNameObject); int32 columnTypeCount = ArrayObjectCount(columnTypeObject); - CheckCitusVersion(ERROR); - if (columnNameCount != columnTypeCount) { ereport(ERROR, (errmsg("column name array size: %d and type array size: %d" @@ -264,11 +264,11 @@ worker_merge_files_and_run_query(PG_FUNCTION_ARGS) Datum worker_cleanup_job_schema_cache(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + ScanKey scanKey = NULL; int scanKeyCount = 0; - CheckCitusVersion(ERROR); - Relation pgNamespace = table_open(NamespaceRelationId, AccessExclusiveLock); TableScanDesc scanDescriptor = table_beginscan_catalog(pgNamespace, scanKeyCount, scanKey); diff --git a/src/backend/distributed/worker/worker_partition_protocol.c b/src/backend/distributed/worker/worker_partition_protocol.c index 0da3a5bb0..7dc6c75ee 100644 --- a/src/backend/distributed/worker/worker_partition_protocol.c +++ b/src/backend/distributed/worker/worker_partition_protocol.c @@ -109,6 +109,8 @@ PG_FUNCTION_INFO_V1(worker_hash_partition_table); Datum worker_range_partition_table(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + uint64 jobId = PG_GETARG_INT64(0); uint32 taskId = PG_GETARG_UINT32(1); text *filterQueryText = PG_GETARG_TEXT_P(2); @@ -130,8 +132,6 @@ worker_range_partition_table(PG_FUNCTION_ARGS) /* first check that array element's and partition column's types match */ Oid splitPointType = ARR_ELEMTYPE(splitPointObject); - CheckCitusVersion(ERROR); - if (splitPointType != partitionColumnType) { ereport(ERROR, (errmsg("partition column type %u and split point type %u " @@ -188,6 +188,8 @@ worker_range_partition_table(PG_FUNCTION_ARGS) Datum worker_hash_partition_table(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + uint64 jobId = PG_GETARG_INT64(0); uint32 taskId = PG_GETARG_UINT32(1); text *filterQueryText = PG_GETARG_TEXT_P(2); @@ -209,8 +211,6 @@ worker_hash_partition_table(PG_FUNCTION_ARGS) Datum *hashRangeArray = DeconstructArrayObject(hashRangeObject); int32 partitionCount = ArrayObjectCount(hashRangeObject); - CheckCitusVersion(ERROR); - HashPartitionContext *partitionContext = palloc0(sizeof(HashPartitionContext)); partitionContext->syntheticShardIntervalArray = SyntheticShardIntervalArrayForShardMinValues(hashRangeArray, partitionCount); diff --git a/src/backend/distributed/worker/worker_shard_visibility.c b/src/backend/distributed/worker/worker_shard_visibility.c index e0c0d0747..05b8e0bad 100644 --- a/src/backend/distributed/worker/worker_shard_visibility.c +++ b/src/backend/distributed/worker/worker_shard_visibility.c @@ -14,6 +14,7 @@ #include "catalog/pg_class.h" #include "distributed/metadata_cache.h" #include "distributed/coordinator_protocol.h" +#include "distributed/local_executor.h" #include "distributed/worker_protocol.h" #include "distributed/worker_shard_visibility.h" #include "nodes/nodeFuncs.h" @@ -23,6 +24,7 @@ /* Config variable managed via guc.c */ bool OverrideTableVisibility = true; +bool EnableManualChangesToShards = false; static bool ReplaceTableVisibleFunctionWalker(Node *inputNode); @@ -38,12 +40,22 @@ PG_FUNCTION_INFO_V1(relation_is_a_known_shard); Datum relation_is_a_known_shard(PG_FUNCTION_ARGS) { - Oid relationId = PG_GETARG_OID(0); - bool onlySearchPath = true; - CheckCitusVersion(ERROR); - PG_RETURN_BOOL(RelationIsAKnownShard(relationId, onlySearchPath)); + Oid relationId = PG_GETARG_OID(0); + + if (!RelationIsVisible(relationId)) + { + /* + * Relation is not on the search path. + * + * TODO: it might be nicer to add a separate check in the + * citus_shards_on_worker views where this UDF is used. + */ + PG_RETURN_BOOL(false); + } + + PG_RETURN_BOOL(RelationIsAKnownShard(relationId)); } @@ -55,11 +67,10 @@ relation_is_a_known_shard(PG_FUNCTION_ARGS) Datum citus_table_is_visible(PG_FUNCTION_ARGS) { + CheckCitusVersion(ERROR); + Oid relationId = PG_GETARG_OID(0); char relKind = '\0'; - bool onlySearchPath = true; - - CheckCitusVersion(ERROR); /* * We don't want to deal with not valid/existing relations @@ -70,7 +81,13 @@ citus_table_is_visible(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } - if (RelationIsAKnownShard(relationId, onlySearchPath)) + if (!RelationIsVisible(relationId)) + { + /* relation is not on the search path */ + PG_RETURN_BOOL(false); + } + + if (RelationIsAKnownShard(relationId)) { /* * If the input relation is an index we simply replace the @@ -104,28 +121,53 @@ citus_table_is_visible(PG_FUNCTION_ARGS) void ErrorIfRelationIsAKnownShard(Oid relationId) { - /* search the relation in all schemas */ - bool onlySearchPath = false; - if (!RelationIsAKnownShard(relationId, onlySearchPath)) + if (!RelationIsAKnownShard(relationId)) { return; } const char *relationName = get_rel_name(relationId); + ereport(ERROR, (errmsg("relation \"%s\" is a shard relation ", relationName))); } +/* + * ErrorIfIllegallyChangingKnownShard errors out if the relation with relationId is + * a known shard and manual changes on known shards are disabled. This is + * valid for only non-citus (external) connections. + */ +void +ErrorIfIllegallyChangingKnownShard(Oid relationId) +{ + if (LocalExecutorLevel > 0 || IsCitusInitiatedRemoteBackend() || + EnableManualChangesToShards) + { + return; + } + + if (RelationIsAKnownShard(relationId)) + { + const char *relationName = get_rel_name(relationId); + ereport(ERROR, (errmsg("cannot modify \"%s\" because it is a shard of " + "a distributed table", + relationName), + errhint("Use the distributed table or set " + "citus.enable_manual_changes_to_shards to on " + "to modify shards directly"))); + } +} + + /* * RelationIsAKnownShard gets a relationId, check whether it's a shard of - * any distributed table. If onlySearchPath is true, then it searches - * the current search path. + * any distributed table. * * We can only do that in MX since both the metadata and tables are only * present there. */ bool -RelationIsAKnownShard(Oid shardRelationId, bool onlySearchPath) +RelationIsAKnownShard(Oid shardRelationId) { bool missingOk = true; char relKind = '\0'; @@ -159,12 +201,6 @@ RelationIsAKnownShard(Oid shardRelationId, bool onlySearchPath) } relation_close(relation, NoLock); - /* we're not interested in the relations that are not in the search path */ - if (!RelationIsVisible(shardRelationId) && onlySearchPath) - { - return false; - } - /* * If the input relation is an index we simply replace the * relationId with the corresponding relation to hide indexes diff --git a/src/backend/distributed/worker/worker_truncate_trigger_protocol.c b/src/backend/distributed/worker/worker_truncate_trigger_protocol.c index c4149dca2..9c7c5af78 100644 --- a/src/backend/distributed/worker/worker_truncate_trigger_protocol.c +++ b/src/backend/distributed/worker/worker_truncate_trigger_protocol.c @@ -35,10 +35,10 @@ PG_FUNCTION_INFO_V1(worker_create_truncate_trigger); Datum worker_create_truncate_trigger(PG_FUNCTION_ARGS) { - Oid relationId = PG_GETARG_OID(0); - - EnsureSuperUser(); CheckCitusVersion(ERROR); + EnsureSuperUser(); + + Oid relationId = PG_GETARG_OID(0); /* Create the truncate trigger */ CreateTruncateTrigger(relationId); diff --git a/src/include/distributed/adaptive_executor.h b/src/include/distributed/adaptive_executor.h index 3affd1877..0a3768177 100644 --- a/src/include/distributed/adaptive_executor.h +++ b/src/include/distributed/adaptive_executor.h @@ -8,8 +8,11 @@ extern bool ForceMaxQueryParallelization; extern int MaxAdaptiveExecutorPoolSize; extern bool EnableBinaryProtocol; + /* GUC, number of ms to wait between opening connections to the same worker */ extern int ExecutorSlowStartInterval; +extern bool EnableCostBasedConnectionEstablishment; +extern bool PreventIncompleteConnectionEstablishment; extern bool ShouldRunTasksSequentially(List *taskList); extern uint64 ExecuteUtilityTaskList(List *utilityTaskList, bool localExecutionSupported); diff --git a/src/include/distributed/citus_ruleutils.h b/src/include/distributed/citus_ruleutils.h index 27cfb0a4b..04529e287 100644 --- a/src/include/distributed/citus_ruleutils.h +++ b/src/include/distributed/citus_ruleutils.h @@ -42,6 +42,7 @@ extern char * pg_get_indexclusterdef_string(Oid indexRelationId); extern bool contain_nextval_expression_walker(Node *node, void *context); extern char * pg_get_replica_identity_command(Oid tableRelationId); extern const char * RoleSpecString(RoleSpec *spec, bool withQuoteIdentifier); +extern void EnsureSequenceTypeSupported(Oid relationId, AttrNumber attnum, Oid seqTypId); /* Function declarations for version dependent PostgreSQL ruleutils functions */ extern void pg_get_query_def(Query *query, StringInfo buffer); diff --git a/src/include/distributed/commands.h b/src/include/distributed/commands.h index a5070f2ad..1965e7725 100644 --- a/src/include/distributed/commands.h +++ b/src/include/distributed/commands.h @@ -120,7 +120,7 @@ extern List * PreprocessClusterStmt(Node *node, const char *clusterCommand, ProcessUtilityContext processUtilityContext); /* index.c */ -typedef void (*PGIndexProcessor)(Form_pg_index, List **); +typedef void (*PGIndexProcessor)(Form_pg_index, List **, int); /* call.c */ @@ -149,6 +149,13 @@ extern char * GenerateBackupNameForCollationCollision(const ObjectAddress *addre extern ObjectAddress DefineCollationStmtObjectAddress(Node *stmt, bool missing_ok); extern List * PostprocessDefineCollationStmt(Node *stmt, const char *queryString); +/* database.c - forward declarations */ +extern List * PreprocessAlterDatabaseOwnerStmt(Node *node, const char *queryString, + ProcessUtilityContext processUtilityContext); +extern List * PostprocessAlterDatabaseOwnerStmt(Node *node, const char *queryString); +extern ObjectAddress AlterDatabaseOwnerObjectAddress(Node *node, bool missing_ok); +extern List * DatabaseOwnerDDLCommands(const ObjectAddress *address); + /* extension.c - forward declarations */ extern bool IsDropCitusExtensionStmt(Node *parsetree); extern bool IsCreateAlterExtensionUpdateCitusStmt(Node *parsetree); @@ -200,6 +207,7 @@ extern bool AnyForeignKeyDependsOnIndex(Oid indexId); extern bool HasForeignKeyWithLocalTable(Oid relationId); extern bool HasForeignKeyToCitusLocalTable(Oid relationId); extern bool HasForeignKeyToReferenceTable(Oid relationOid); +extern List * GetForeignKeysFromLocalTables(Oid relationId); extern bool TableReferenced(Oid relationOid); extern bool TableReferencing(Oid relationOid); extern bool ConstraintIsAUniquenessConstraint(char *inputConstaintName, Oid relationId); @@ -210,6 +218,7 @@ extern bool ConstraintWithIdIsOfType(Oid constraintId, char targetConstraintType extern bool TableHasExternalForeignKeys(Oid relationId); extern List * GetForeignKeyOids(Oid relationId, int flags); extern Oid GetReferencedTableId(Oid foreignKeyId); +extern Oid GetReferencingTableId(Oid foreignKeyId); extern bool RelationInvolvedInAnyNonInheritedForeignKeys(Oid relationId); @@ -278,7 +287,7 @@ extern List * PostprocessIndexStmt(Node *node, extern void ErrorIfUnsupportedAlterIndexStmt(AlterTableStmt *alterTableStatement); extern void MarkIndexValid(IndexStmt *indexStmt); extern List * ExecuteFunctionOnEachTableIndex(Oid relationId, PGIndexProcessor - pgIndexProcessor); + pgIndexProcessor, int flags); /* objectaddress.c - forward declarations */ extern ObjectAddress CreateExtensionStmtObjectAddress(Node *stmt, bool missing_ok); @@ -336,6 +345,18 @@ extern List * PreprocessAlterSchemaRenameStmt(Node *node, const char *queryStrin extern ObjectAddress AlterSchemaRenameStmtObjectAddress(Node *node, bool missing_ok); /* sequence.c - forward declarations */ +extern List * PreprocessAlterSequenceStmt(Node *stmt, const char *queryString, + ProcessUtilityContext processUtilityContext); +extern List * PreprocessAlterSequenceSchemaStmt(Node *node, const char *queryString, + ProcessUtilityContext + processUtilityContext); +extern List * PreprocessDropSequenceStmt(Node *stmt, const char *queryString, + ProcessUtilityContext processUtilityContext); +extern List * PreprocessRenameSequenceStmt(Node *stmt, const char *queryString, + ProcessUtilityContext processUtilityContext); +extern ObjectAddress AlterSequenceObjectAddress(Node *stmt, bool missing_ok); +extern ObjectAddress AlterSequenceSchemaStmtObjectAddress(Node *stmt, bool missing_ok); +extern ObjectAddress RenameSequenceStmtObjectAddress(Node *stmt, bool missing_ok); extern void ErrorIfUnsupportedSeqStmt(CreateSeqStmt *createSeqStmt); extern void ErrorIfDistributedAlterSeqOwnedBy(AlterSeqStmt *alterSeqStmt); diff --git a/src/include/distributed/commands/utility_hook.h b/src/include/distributed/commands/utility_hook.h index ee61ed690..24717986e 100644 --- a/src/include/distributed/commands/utility_hook.h +++ b/src/include/distributed/commands/utility_hook.h @@ -35,6 +35,7 @@ extern bool EnableDependencyCreation; extern bool EnableCreateTypePropagation; extern bool EnableAlterRolePropagation; extern bool EnableAlterRoleSetPropagation; +extern bool EnableAlterDatabaseOwner; extern int UtilityHookLevel; diff --git a/src/include/distributed/connection_management.h b/src/include/distributed/connection_management.h index 5ce5b4835..5dffdef35 100644 --- a/src/include/distributed/connection_management.h +++ b/src/include/distributed/connection_management.h @@ -206,6 +206,7 @@ extern int MaxCachedConnectionLifetime; /* parameters used for outbound connections */ extern char *NodeConninfo; +extern char *LocalHostName; /* the hash tables are externally accessiable */ extern HTAB *ConnectionHash; diff --git a/src/include/distributed/coordinator_protocol.h b/src/include/distributed/coordinator_protocol.h index 4c30b2b04..ab5490fca 100644 --- a/src/include/distributed/coordinator_protocol.h +++ b/src/include/distributed/coordinator_protocol.h @@ -94,6 +94,21 @@ typedef enum TableDDLCommandType } TableDDLCommandType; +/* + * IndexDefinitionDeparseFlags helps to control which parts of the + * index creation commands are deparsed. + */ +typedef enum IndexDefinitionDeparseFlags +{ + INCLUDE_CREATE_INDEX_STATEMENTS = 1 << 0, + INCLUDE_INDEX_CLUSTERED_STATEMENTS = 1 << 1, + INCLUDE_INDEX_STATISTICS_STATEMENTTS = 1 << 2, + INCLUDE_INDEX_ALL_STATEMENTS = INCLUDE_CREATE_INDEX_STATEMENTS | + INCLUDE_INDEX_CLUSTERED_STATEMENTS | + INCLUDE_INDEX_STATISTICS_STATEMENTTS +} IndexDefinitionDeparseFlags; + + struct TableDDLCommand; typedef struct TableDDLCommand TableDDLCommand; typedef char *(*TableDDLFunction)(void *context); @@ -177,12 +192,20 @@ extern uint64 GetNextShardId(void); extern uint64 GetNextPlacementId(void); extern Oid ResolveRelationId(text *relationName, bool missingOk); extern List * GetFullTableCreationCommands(Oid relationId, bool includeSequenceDefaults); -extern List * GetPostLoadTableCreationCommands(Oid relationId, bool includeIndexes); +extern List * GetPostLoadTableCreationCommands(Oid relationId, bool includeIndexes, + bool includeReplicaIdentity); extern List * GetPreLoadTableCreationCommands(Oid relationId, bool includeSequenceDefaults, char *accessMethod); -extern List * GetTableIndexAndConstraintCommands(Oid relationId); +extern List * GetTableIndexAndConstraintCommands(Oid relationId, int indexFlags); +extern List * GetTableIndexAndConstraintCommandsExcludingReplicaIdentity(Oid relationId, + int indexFlags); +extern Oid GetRelationIdentityOrPK(Relation rel); +extern void GatherIndexAndConstraintDefinitionList(Form_pg_index indexForm, + List **indexDDLEventList, + int indexFlags); extern bool IndexImpliedByAConstraint(Form_pg_index indexForm); +extern List * GetTableReplicaIdentityCommand(Oid relationId); extern char ShardStorageType(Oid relationId); extern bool DistributedTableReplicationIsEnabled(void); extern void CheckDistributedTable(Oid relationId); @@ -255,6 +278,8 @@ extern ShardPlacement * SearchShardPlacementInList(List *shardPlacementList, extern ShardPlacement * SearchShardPlacementInListOrError(List *shardPlacementList, const char *nodeName, uint32 nodePort); +extern void ErrorIfTargetNodeIsNotSafeToMove(const char *targetNodeName, int + targetNodePort); extern void ErrorIfMoveCitusLocalTable(Oid relationId); extern char LookupShardTransferMode(Oid shardReplicationModeOid); extern void BlockWritesToShardList(List *shardList); diff --git a/src/include/distributed/deparser.h b/src/include/distributed/deparser.h index 7e264544c..b5dcf1905 100644 --- a/src/include/distributed/deparser.h +++ b/src/include/distributed/deparser.h @@ -48,6 +48,7 @@ extern void QualifyAlterCollationOwnerStmt(Node *stmt); /* forward declarations for deparse_table_stmts.c */ extern char * DeparseAlterTableSchemaStmt(Node *stmt); +extern char * DeparseAlterTableStmt(Node *node); extern void QualifyAlterTableSchemaStmt(Node *stmt); @@ -127,4 +128,12 @@ extern char * DeparseDropExtensionStmt(Node *stmt); extern char * DeparseAlterExtensionSchemaStmt(Node *stmt); extern char * DeparseAlterExtensionStmt(Node *stmt); +/* forward declarations for deparse_database_stmts.c */ +extern char * DeparseAlterDatabaseOwnerStmt(Node *node); + +/* forward declarations for deparse_sequence_stmts.c */ +extern char * DeparseDropSequenceStmt(Node *stmt); +extern char * DeparseRenameSequenceStmt(Node *stmt); +extern void QualifyRenameSequenceStmt(Node *stmt); + #endif /* CITUS_DEPARSER_H */ diff --git a/src/include/distributed/foreign_key_relationship.h b/src/include/distributed/foreign_key_relationship.h index 3aa040d76..491142d13 100644 --- a/src/include/distributed/foreign_key_relationship.h +++ b/src/include/distributed/foreign_key_relationship.h @@ -20,7 +20,6 @@ extern bool ConnectedToReferenceTableViaFKey(Oid relationId); extern List * ReferencedRelationIdList(Oid relationId); extern List * ReferencingRelationIdList(Oid relationId); extern void SetForeignConstraintRelationshipGraphInvalid(void); -extern bool IsForeignConstraintRelationshipGraphValid(void); extern void ClearForeignConstraintRelationshipGraphContext(void); extern HTAB * CreateOidVisitedHashSet(void); extern bool OidVisited(HTAB *oidVisitedMap, Oid oid); diff --git a/src/include/distributed/intermediate_result_pruning.h b/src/include/distributed/intermediate_result_pruning.h index d207397c2..5880cd23c 100644 --- a/src/include/distributed/intermediate_result_pruning.h +++ b/src/include/distributed/intermediate_result_pruning.h @@ -17,7 +17,13 @@ * UINT32_MAX is reserved in pg_dist_node, so we can use it safely. */ #define LOCAL_NODE_ID UINT32_MAX -#define LOCAL_HOST_NAME "localhost" /* connect to local backends using this name */ + +/* + * If you want to connect to the current node use `LocalHostName`, which is a GUC, instead + * of the hardcoded loopback hostname. Only if you really need the loopback hostname use + * this define. + */ +#define LOCAL_HOST_NAME "localhost" extern bool LogIntermediateResults; diff --git a/src/include/distributed/local_executor.h b/src/include/distributed/local_executor.h index b4c002d9b..7a02be0f6 100644 --- a/src/include/distributed/local_executor.h +++ b/src/include/distributed/local_executor.h @@ -19,6 +19,8 @@ extern bool EnableLocalExecution; extern bool LogLocalCommands; +extern int LocalExecutorLevel; + typedef enum LocalExecutionStatus { LOCAL_EXECUTION_REQUIRED, diff --git a/src/include/distributed/metadata_sync.h b/src/include/distributed/metadata_sync.h index 7ccc38495..c4822651b 100644 --- a/src/include/distributed/metadata_sync.h +++ b/src/include/distributed/metadata_sync.h @@ -55,6 +55,13 @@ extern void SyncMetadataToNodesMain(Datum main_arg); extern void SignalMetadataSyncDaemon(Oid database, int sig); extern bool ShouldInitiateMetadataSync(bool *lockFailure); +extern List * SequenceDDLCommandsForTable(Oid relationId); +extern List * GetSequencesFromAttrDef(Oid attrdefOid); +extern void GetDependentSequencesWithRelation(Oid relationId, List **attnumList, + List **dependentSequenceList, AttrNumber + attnum); +extern Oid GetAttributeTypeOid(Oid relationId, AttrNumber attnum); + #define DELETE_ALL_NODES "TRUNCATE pg_dist_node CASCADE" #define REMOVE_ALL_CLUSTERED_TABLES_COMMAND \ "SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_partition" diff --git a/src/include/distributed/metadata_utility.h b/src/include/distributed/metadata_utility.h index 360556e8f..31a42d476 100644 --- a/src/include/distributed/metadata_utility.h +++ b/src/include/distributed/metadata_utility.h @@ -30,7 +30,7 @@ /* total number of hash tokens (2^32) */ #define HASH_TOKEN_COUNT INT64CONST(4294967296) -#define SELECT_EXIST_QUERY "SELECT EXISTS (SELECT 1 FROM %s)" +#define SELECT_TRUE_QUERY "SELECT TRUE FROM %s LIMIT 1" #define PG_TABLE_SIZE_FUNCTION "pg_table_size(%s)" #define PG_RELATION_SIZE_FUNCTION "pg_relation_size(%s)" #define PG_TOTAL_RELATION_SIZE_FUNCTION "pg_total_relation_size(%s)" @@ -196,9 +196,6 @@ typedef enum SizeQueryType } SizeQueryType; -/* Config variable managed via guc.c */ -extern int ReplicationModel; - /* Size functions */ extern Datum citus_table_size(PG_FUNCTION_ARGS); extern Datum citus_total_relation_size(PG_FUNCTION_ARGS); @@ -212,6 +209,7 @@ extern int ShardIntervalCount(Oid relationId); extern List * LoadShardList(Oid relationId); extern ShardInterval * CopyShardInterval(ShardInterval *srcInterval); extern uint64 ShardLength(uint64 shardId); +extern bool NodeGroupHasLivePlacements(int32 groupId); extern bool NodeGroupHasShardPlacements(int32 groupId, bool onlyConsiderActivePlacements); extern List * ActiveShardPlacementListOnGroup(uint64 shardId, int32 groupId); @@ -267,9 +265,9 @@ extern void EnsureFunctionOwner(Oid functionId); extern void EnsureSuperUser(void); extern void ErrorIfTableIsACatalogTable(Relation relation); extern void EnsureTableNotDistributed(Oid relationId); -extern void EnsureReplicationSettings(Oid relationId, char replicationModel); extern void EnsureRelationExists(Oid relationId); extern bool RegularTable(Oid relationId); +extern bool TableEmpty(Oid tableId); extern bool RelationUsesIdentityColumns(TupleDesc relationDesc); extern char * ConstructQualifiedShardName(ShardInterval *shardInterval); extern uint64 GetFirstShardId(Oid relationId); @@ -291,4 +289,5 @@ extern List * SendShardStatisticsQueriesInParallel(List *citusTableIds, bool extern bool GetNodeDiskSpaceStatsForConnection(MultiConnection *connection, uint64 *availableBytes, uint64 *totalBytes); +extern void ExecuteQueryViaSPI(char *query, int SPIOK); #endif /* METADATA_UTILITY_H */ diff --git a/src/include/distributed/multi_physical_planner.h b/src/include/distributed/multi_physical_planner.h index 1fc75f8d2..740adfdd0 100644 --- a/src/include/distributed/multi_physical_planner.h +++ b/src/include/distributed/multi_physical_planner.h @@ -448,6 +448,9 @@ typedef struct DistributedPlan */ bool fastPathRouterPlan; + /* number of times this plan has been used (as a prepared statement) */ + uint32 numberOfTimesExecuted; + /* * NULL if this a valid plan, an error description otherwise. This will * e.g. be set if SQL features are present that a planner doesn't support, diff --git a/src/include/distributed/pg_dist_rebalance_strategy.h b/src/include/distributed/pg_dist_rebalance_strategy.h index 148c772cc..0c346501d 100644 --- a/src/include/distributed/pg_dist_rebalance_strategy.h +++ b/src/include/distributed/pg_dist_rebalance_strategy.h @@ -25,10 +25,11 @@ typedef struct FormData_pg_dist_rebalance_strategy NameData name; /* user readable name of the strategy */ bool default_strategy; /* if this strategy is the default strategy */ Oid shardCostFunction; /* function to calculate the shard cost */ - Oid nodeCapacityFunction; /* function to get the capacity of a node */ - Oid shardAllowedOnNodeFunction; /* function to check if shard is allowed on node */ - float4 defaultThreshold; /* default threshold that is used */ - float4 minimumThreshold; /* minimum threshold that is allowed */ + Oid nodeCapacityFunction; /* function to get the capacity of a node */ + Oid shardAllowedOnNodeFunction; /* function to check if shard is allowed on node */ + float4 defaultThreshold; /* default threshold that is used */ + float4 minimumThreshold; /* minimum threshold that is allowed */ + float4 improvementThreshold; /* the shard size threshold that is used */ } FormData_pg_dist_rebalance_strategy; /* ---------------- diff --git a/src/include/distributed/shard_cleaner.h b/src/include/distributed/shard_cleaner.h index caad5a615..8a98254f9 100644 --- a/src/include/distributed/shard_cleaner.h +++ b/src/include/distributed/shard_cleaner.h @@ -17,7 +17,8 @@ extern bool DeferShardDeleteOnMove; extern double DesiredPercentFreeAfterMove; extern bool CheckAvailableSpaceBeforeMove; -extern int TryDropMarkedShards(bool waitForCleanupLock); -extern int DropMarkedShards(bool waitForCleanupLock); +extern int TryDropOrphanedShards(bool waitForLocks); +extern int DropOrphanedShards(bool waitForLocks); +extern void DropOrphanedShardsInSeparateTransaction(void); #endif /*CITUS_SHARD_CLEANER_H */ diff --git a/src/include/distributed/shard_rebalancer.h b/src/include/distributed/shard_rebalancer.h index 7e0716cb5..de0684d68 100644 --- a/src/include/distributed/shard_rebalancer.h +++ b/src/include/distributed/shard_rebalancer.h @@ -105,22 +105,54 @@ typedef struct PlacementUpdateEventProgress int sourcePort; char targetName[255]; int targetPort; - uint64 shardSize; - uint64 progress; + pg_atomic_uint64 progress; } PlacementUpdateEventProgress; typedef struct NodeFillState { WorkerNode *node; + + /* + * capacity is how big this node is, relative to the other nodes in the + * cluster. This has no unit, it can represent whatever the user wants. + * Some examples: + * 1. GBs of RAM + * 2. number of CPUs + * 3. GBs of disk + * 4. relative improvement of new CPU generation in newly added nodes + */ float4 capacity; + + /* + * totalCost is the costs of ShardCosts on the node added together. This + * doesn't have a unit. See the ShardCost->cost comment for some examples. + */ float4 totalCost; + + /* + * utilization is how "full" the node is. This is always totalCost divided + * by capacity. Since neither of those have a unit, this also doesn't have + * one. + */ float4 utilization; + + /* + * shardCostListDesc contains all ShardCosts that are on the current node, + * ordered from high cost to low cost. + */ List *shardCostListDesc; } NodeFillState; typedef struct ShardCost { uint64 shardId; + + /* + * cost is the cost of the shard. This doesn't have a unit. + * Some examples of what this could represent: + * 1. GBs of data + * 2. number of queries per day + */ float4 cost; } ShardCost; @@ -138,6 +170,9 @@ typedef struct RebalancePlanFunctions void *context; } RebalancePlanFunctions; +extern int MaxRebalancerLoggedIgnoredMoves; +extern bool RunningUnderIsolationTest; + /* External function declarations */ extern Datum shard_placement_rebalance_array(PG_FUNCTION_ARGS); extern Datum shard_placement_replication_array(PG_FUNCTION_ARGS); @@ -151,9 +186,11 @@ extern List * RebalancePlacementUpdates(List *workerNodeList, List *shardPlaceme double threshold, int32 maxShardMoves, bool drainOnly, + float4 utilizationImproventThreshold, RebalancePlanFunctions *rebalancePlanFunctions); extern List * ReplicationPlacementUpdates(List *workerNodeList, List *shardPlacementList, int shardReplicationFactor); +extern void ExecuteCriticalCommandInSeparateTransaction(char *command); #endif /* SHARD_REBALANCER_H */ diff --git a/src/include/distributed/transaction_management.h b/src/include/distributed/transaction_management.h index ecee5eeee..fdb6be1b7 100644 --- a/src/include/distributed/transaction_management.h +++ b/src/include/distributed/transaction_management.h @@ -111,7 +111,7 @@ extern bool TransactionModifiedNodeMetadata; */ extern void UseCoordinatedTransaction(void); extern bool InCoordinatedTransaction(void); -extern void CoordinatedTransactionShouldUse2PC(void); +extern void Use2PCForCoordinatedTransaction(void); extern bool GetCoordinatedTransactionShouldUse2PC(void); extern bool IsMultiStatementTransaction(void); extern void EnsureDistributedTransactionId(void); diff --git a/src/include/distributed/worker_protocol.h b/src/include/distributed/worker_protocol.h index 77d2a25cc..e9d88f411 100644 --- a/src/include/distributed/worker_protocol.h +++ b/src/include/distributed/worker_protocol.h @@ -123,6 +123,7 @@ extern int32 ArrayObjectCount(ArrayType *arrayObject); extern FmgrInfo * GetFunctionInfo(Oid typeId, Oid accessMethodId, int16 procedureId); extern uint64 ExtractShardIdFromTableName(const char *tableName, bool missingOk); extern void RepartitionCleanupJobDirectories(void); +extern void SetDefElemArg(AlterSeqStmt *statement, const char *name, Node *arg); /* Function declarations shared with the master planner */ diff --git a/src/include/distributed/worker_shard_visibility.h b/src/include/distributed/worker_shard_visibility.h index 46f807fd8..c5c58d712 100644 --- a/src/include/distributed/worker_shard_visibility.h +++ b/src/include/distributed/worker_shard_visibility.h @@ -14,11 +14,13 @@ #include "nodes/nodes.h" extern bool OverrideTableVisibility; +extern bool EnableManualChangesToShards; extern void ReplaceTableVisibleFunction(Node *inputNode); extern void ErrorIfRelationIsAKnownShard(Oid relationId); -extern bool RelationIsAKnownShard(Oid shardRelationId, bool onlySearchPath); +extern void ErrorIfIllegallyChangingKnownShard(Oid relationId); +extern bool RelationIsAKnownShard(Oid shardRelationId); #endif /* WORKER_SHARD_VISIBILITY_H */ diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile index 12010e858..977301ac1 100644 --- a/src/test/regress/Makefile +++ b/src/test/regress/Makefile @@ -132,6 +132,10 @@ check-multi: all $(pg_regress_multi_check) --load-extension=citus \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_schedule $(EXTRA_TESTS) +check-multi-1: all + $(pg_regress_multi_check) --load-extension=citus \ + -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_1_schedule $(EXTRA_TESTS) + check-multi-hyperscale: all $(pg_regress_multi_check) --conninfo="$(conninfo)" --load-extension=citus \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_schedule_hyperscale $(EXTRA_TESTS) diff --git a/src/test/regress/before_pg_upgrade_schedule b/src/test/regress/before_pg_upgrade_schedule index d340ce62c..22102b314 100644 --- a/src/test/regress/before_pg_upgrade_schedule +++ b/src/test/regress/before_pg_upgrade_schedule @@ -3,4 +3,6 @@ test: multi_test_helpers multi_test_helpers_superuser test: multi_test_catalog_views test: upgrade_basic_before test: upgrade_columnar_before -test: upgrade_type_before upgrade_ref2ref_before upgrade_distributed_function_before upgrade_rebalance_strategy_before +test: upgrade_ref2ref_before +test: upgrade_type_before +test: upgrade_distributed_function_before upgrade_rebalance_strategy_before diff --git a/src/test/regress/bin/create_test.py b/src/test/regress/bin/create_test.py new file mode 100755 index 000000000..572d64510 --- /dev/null +++ b/src/test/regress/bin/create_test.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +import sys +import random +import os + +if len(sys.argv) != 2: + print( + "ERROR: Expected the name of the new test as an argument, such as:\n" + "src/test/regress/bin/create_test.py my_awesome_test" + ) + sys.exit(1) + +test_name = sys.argv[1] + +regress_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +filename = os.path.join(regress_dir, "sql", f"{test_name}.sql") + +if os.path.isfile(filename): + print(f"ERROR: test file '{filename}' already exists") + sys.exit(1) + +shard_id = random.randint(1, 999999) * 100 + +contents = f"""CREATE SCHEMA {test_name}; +SET search_path TO {test_name}; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO {shard_id}; + +-- add tests here + +SET client_min_messages TO WARNING; +DROP SCHEMA {test_name} CASCADE; +""" + + +with open(filename, "w") as f: + f.write(contents) + +print(f"Created {filename}") +print(f"Don't forget to add '{test_name}' in multi_schedule somewhere") diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index 0b4200b64..384fb04c2 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -221,3 +221,6 @@ s/^(DEBUG: the name of the shard \(abcde_01234567890123456789012345678901234567 # normalize long index name errors for multi_index_statements s/^(ERROR: The index name \(test_index_creation1_p2020_09_26)_([0-9])+_(tenant_id_timeperiod_idx)/\1_xxxxxx_\3/g s/^(DEBUG: the index name on the shards of the partition is too long, switching to sequential and local execution mode to prevent self deadlocks: test_index_creation1_p2020_09_26)_([0-9])+_(tenant_id_timeperiod_idx)/\1_xxxxxx_\3/g + +# normalize errors for not being able to connect to a non-existing host +s/could not translate host name "foobar" to address: .*$/could not translate host name "foobar" to address: /g diff --git a/src/test/regress/expected/.gitignore b/src/test/regress/expected/.gitignore index c63944e0d..f500ab38e 100644 --- a/src/test/regress/expected/.gitignore +++ b/src/test/regress/expected/.gitignore @@ -1,36 +1,24 @@ -/multi_agg_distinct.out -/multi_agg_type_conversion.out -/multi_alter_table_statements.out -/multi_alter_table_statements_0.out -/multi_append_table_to_shard.out -/multi_behavioral_analytics_create_table.out -/multi_behavioral_analytics_create_table_superuser.out -/multi_copy.out -/multi_create_schema.out -/multi_large_shardid.out -/multi_master_delete_protocol.out -/multi_outer_join.out -/multi_outer_join_reference.out -/multi_load_data.out -/multi_load_data_superuser.out -/multi_load_large_records.out -/multi_load_more_data.out -/worker_copy.out -/multi_complex_count_distinct.out -/multi_mx_copy_data.out -/multi_behavioral_analytics_create_table.out -/multi_insert_select_behavioral_analytics_create_table.out -/hyperscale_tutorial.out -/am_chunk_filtering.out -/am_copyto.out -/am_data_types.out -/am_load.out -/fdw_block_filtering.out -/fdw_copyto.out -/fdw_create.out -/fdw_data_types.out -/fdw_load.out /columnar_chunk_filtering.out /columnar_copyto.out /columnar_data_types.out /columnar_load.out +/hyperscale_tutorial.out +/multi_agg_distinct.out +/multi_agg_type_conversion.out +/multi_alter_table_statements.out +/multi_append_table_to_shard.out +/multi_behavioral_analytics_create_table.out +/multi_behavioral_analytics_create_table_superuser.out +/multi_complex_count_distinct.out +/multi_copy.out +/multi_create_schema.out +/multi_large_shardid.out +/multi_load_data.out +/multi_load_data_superuser.out +/multi_load_large_records.out +/multi_load_more_data.out +/multi_master_delete_protocol.out +/multi_mx_copy_data.out +/multi_outer_join.out +/multi_outer_join_reference.out +/worker_copy.out diff --git a/src/test/regress/expected/alter_database_owner.out b/src/test/regress/expected/alter_database_owner.out new file mode 100644 index 000000000..2e5e54aca --- /dev/null +++ b/src/test/regress/expected/alter_database_owner.out @@ -0,0 +1,301 @@ +CREATE SCHEMA alter_database_owner; +SET search_path TO alter_database_owner, public; +CREATE USER database_owner_1; +NOTICE: not propagating CREATE ROLE/USER commands to worker nodes +HINT: Connect to worker nodes directly to manually create all necessary users and roles. +CREATE USER database_owner_2; +NOTICE: not propagating CREATE ROLE/USER commands to worker nodes +HINT: Connect to worker nodes directly to manually create all necessary users and roles. +SELECT run_command_on_workers('CREATE USER database_owner_1'); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,"CREATE ROLE") + (localhost,57638,t,"CREATE ROLE") +(2 rows) + +SELECT run_command_on_workers('CREATE USER database_owner_2'); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,"CREATE ROLE") + (localhost,57638,t,"CREATE ROLE") +(2 rows) + +-- make sure the propagation of ALTER DATABASE ... OWNER TO ... is on +SET citus.enable_alter_database_owner TO on; +-- list the owners of the current database on all nodes +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,postgres) + (localhost,57638,t,postgres) +(2 rows) + +-- remove a node to verify addition later +SELECT master_remove_node('localhost', :worker_2_port); + master_remove_node +--------------------------------------------------------------------- + +(1 row) + +-- verify we can change the owner of a database +ALTER DATABASE regression OWNER TO database_owner_1; +-- list the owner of the current database on the coordinator +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); + rolname +--------------------------------------------------------------------- + database_owner_1 +(1 row) + +-- list the owners of the current database on all nodes +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,database_owner_1) +(1 row) + +-- turn off propagation to verify it does _not_ propagate to new nodes when turned off +SET citus.enable_alter_database_owner TO off; +-- add back second node to verify the owner of the database was set accordingly +SELECT 1 FROM master_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- list the owners of the current database on all nodes, should reflect on newly added node +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,database_owner_1) + (localhost,57638,t,postgres) +(2 rows) + +-- turn on propagation to verify it does propagate to new nodes when enabled +SET citus.enable_alter_database_owner TO on; +SELECT master_remove_node('localhost', :worker_2_port); -- remove so we can re add with propagation on + master_remove_node +--------------------------------------------------------------------- + +(1 row) + +-- add back second node to verify the owner of the database was set accordingly +SELECT 1 FROM master_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- list the owners of the current database on all nodes, should reflect on newly added node +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,database_owner_1) + (localhost,57638,t,database_owner_1) +(2 rows) + +-- test changing the owner in a transaction and rollback to cancel +BEGIN; +ALTER DATABASE regression OWNER TO database_owner_2; +ROLLBACK; +-- list the owners of the current database on all nodes +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); + rolname +--------------------------------------------------------------------- + database_owner_1 +(1 row) + +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,database_owner_1) + (localhost,57638,t,database_owner_1) +(2 rows) + +CREATE TABLE t (a int PRIMARY KEY); +SELECT create_distributed_table('t', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- test changing the owner in a xact that already had parallel execution +BEGIN; +SELECT count(*) FROM t; -- parallel execution; + count +--------------------------------------------------------------------- + 0 +(1 row) + +ALTER DATABASE regression OWNER TO database_owner_2; -- should ERROR +ERROR: cannot create or modify database because there was a parallel operation on a distributed table in the transaction +DETAIL: When creating or altering a database, Citus needs to perform all operations over a single connection per node to ensure consistency. +HINT: Try re-running the transaction with "SET LOCAL citus.multi_shard_modify_mode TO 'sequential';" +ROLLBACK; +-- list the owners of the current database on all nodes +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); + rolname +--------------------------------------------------------------------- + database_owner_1 +(1 row) + +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,database_owner_1) + (localhost,57638,t,database_owner_1) +(2 rows) + +BEGIN; +SET LOCAL citus.multi_shard_modify_mode TO 'sequential'; +SELECT count(*) FROM t; -- parallel execution; + count +--------------------------------------------------------------------- + 0 +(1 row) + +ALTER DATABASE regression OWNER TO database_owner_2; +COMMIT; +-- list the owners of the current database on all nodes +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); + rolname +--------------------------------------------------------------------- + database_owner_2 +(1 row) + +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,database_owner_2) + (localhost,57638,t,database_owner_2) +(2 rows) + +-- turn propagation off and verify it does not propagate interactively when turned off +SET citus.enable_alter_database_owner TO off; +ALTER DATABASE regression OWNER TO database_owner_1; +-- list the owners of the current database on all nodes +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); + rolname +--------------------------------------------------------------------- + database_owner_1 +(1 row) + +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,database_owner_2) + (localhost,57638,t,database_owner_2) +(2 rows) + +-- reset state of cluster +SET citus.enable_alter_database_owner TO on; +ALTER DATABASE regression OWNER TO current_user; +-- list the owners of the current database on all nodes +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); + rolname +--------------------------------------------------------------------- + postgres +(1 row) + +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,postgres) + (localhost,57638,t,postgres) +(2 rows) + +DROP USER database_owner_1; +DROP USER database_owner_2; +SELECT run_command_on_workers('DROP USER database_owner_1'); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,"DROP ROLE") + (localhost,57638,t,"DROP ROLE") +(2 rows) + +SELECT run_command_on_workers('DROP USER database_owner_2'); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,"DROP ROLE") + (localhost,57638,t,"DROP ROLE") +(2 rows) + +SET client_min_messages TO warning; +DROP SCHEMA alter_database_owner CASCADE; diff --git a/src/test/regress/expected/alter_distributed_table.out b/src/test/regress/expected/alter_distributed_table.out index edc640f58..de87dade4 100644 --- a/src/test/regress/expected/alter_distributed_table.out +++ b/src/test/regress/expected/alter_distributed_table.out @@ -498,7 +498,7 @@ SELECT table_name::text, shard_count, access_method FROM public.citus_tables WHE \endif -- test with metadata sync -SET citus.replication_model TO 'streaming'; +SET citus.shard_replication_factor TO 1; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); start_metadata_sync_to_node --------------------------------------------------------------------- @@ -530,7 +530,6 @@ SELECT table_name, shard_count FROM public.citus_tables WHERE table_name::text = metadata_sync_table | 8 (1 row) -SET citus.replication_model TO DEFAULT; SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); stop_metadata_sync_to_node --------------------------------------------------------------------- diff --git a/src/test/regress/expected/alter_table_set_access_method.out b/src/test/regress/expected/alter_table_set_access_method.out index 3d16ca4e5..f117199d0 100644 --- a/src/test/regress/expected/alter_table_set_access_method.out +++ b/src/test/regress/expected/alter_table_set_access_method.out @@ -393,7 +393,7 @@ SELECT c.relname, a.amname FROM pg_class c, pg_am a where c.relname SIMILAR TO ' SELECT alter_table_set_access_method('table_type_dist', 'fake_am'); NOTICE: creating a new table for alter_table_set_access_method.table_type_dist WARNING: fake_scan_getnextslot -CONTEXT: SQL statement "SELECT EXISTS (SELECT 1 FROM alter_table_set_access_method.table_type_dist_1533505599)" +CONTEXT: SQL statement "SELECT TRUE FROM alter_table_set_access_method.table_type_dist_1533505599 LIMIT 1" WARNING: fake_scan_getnextslot NOTICE: moving the data of alter_table_set_access_method.table_type_dist NOTICE: dropping the old alter_table_set_access_method.table_type_dist @@ -406,7 +406,7 @@ NOTICE: renaming the new table to alter_table_set_access_method.table_type_dist SELECT alter_table_set_access_method('table_type_ref', 'fake_am'); NOTICE: creating a new table for alter_table_set_access_method.table_type_ref WARNING: fake_scan_getnextslot -CONTEXT: SQL statement "SELECT EXISTS (SELECT 1 FROM alter_table_set_access_method.table_type_ref_1037855087)" +CONTEXT: SQL statement "SELECT TRUE FROM alter_table_set_access_method.table_type_ref_1037855087 LIMIT 1" WARNING: fake_scan_getnextslot NOTICE: moving the data of alter_table_set_access_method.table_type_ref NOTICE: dropping the old alter_table_set_access_method.table_type_ref @@ -695,7 +695,7 @@ SELECT alter_table_set_access_method('abcde_012345678901234567890123456789012345 DEBUG: the name of the shard (abcde_01234567890123456789012345678901234567890_f7ff6612_xxxxxx) for relation (abcde_012345678901234567890123456789012345678901234567890123456) is too long, switching to sequential and local execution mode to prevent self deadlocks NOTICE: creating a new table for alter_table_set_access_method.abcde_012345678901234567890123456789012345678901234567890123456 DEBUG: pathlist hook for columnar table am -CONTEXT: SQL statement "SELECT EXISTS (SELECT 1 FROM alter_table_set_access_method.abcde_0123456789012345678901234567890123456_f7ff6612_4160710162)" +CONTEXT: SQL statement "SELECT TRUE FROM alter_table_set_access_method.abcde_0123456789012345678901234567890123456_f7ff6612_4160710162 LIMIT 1" NOTICE: moving the data of alter_table_set_access_method.abcde_012345678901234567890123456789012345678901234567890123456 NOTICE: dropping the old alter_table_set_access_method.abcde_012345678901234567890123456789012345678901234567890123456 CONTEXT: SQL statement "DROP TABLE alter_table_set_access_method.abcde_012345678901234567890123456789012345678901234567890123456 CASCADE" diff --git a/src/test/regress/expected/auto_undist_citus_local.out b/src/test/regress/expected/auto_undist_citus_local.out index cf8a13ad2..260264f9c 100644 --- a/src/test/regress/expected/auto_undist_citus_local.out +++ b/src/test/regress/expected/auto_undist_citus_local.out @@ -30,7 +30,7 @@ ALTER TABLE citus_local_table ADD CONSTRAINT fkey_local_to_ref FOREIGN KEY(l1) R SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalrelid IN ('citus_local_table'::regclass, 'reference_table'::regclass) ORDER BY logicalrelid; logicalrelid | partmethod | repmodel --------------------------------------------------------------------- - citus_local_table | n | c + citus_local_table | n | s reference_table | n | t (2 rows) @@ -50,7 +50,7 @@ BEGIN; SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalrelid IN ('citus_local_table'::regclass, 'reference_table'::regclass) ORDER BY logicalrelid; logicalrelid | partmethod | repmodel --------------------------------------------------------------------- - citus_local_table | n | c + citus_local_table | n | s reference_table | n | t (2 rows) @@ -76,7 +76,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table | n | t - citus_local_table | n | c + citus_local_table | n | s (2 rows) ALTER TABLE reference_table DROP COLUMN r1 CASCADE; @@ -100,7 +100,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table | n | t - citus_local_table | n | c + citus_local_table | n | s (2 rows) ALTER TABLE citus_local_table DROP COLUMN l1 CASCADE; @@ -124,7 +124,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table | n | t - citus_local_table | n | c + citus_local_table | n | s (2 rows) ALTER TABLE reference_table DROP CONSTRAINT reference_table_pkey CASCADE; @@ -149,7 +149,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table | n | t - citus_local_table | n | c + citus_local_table | n | s (2 rows) DROP INDEX ref_unique CASCADE; @@ -173,7 +173,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table | n | t - citus_local_table | n | c + citus_local_table | n | s (2 rows) ALTER TABLE reference_table DROP CONSTRAINT reference_table_r1_key CASCADE; @@ -197,7 +197,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table | n | t - citus_local_table | n | c + citus_local_table | n | s (2 rows) DROP TABLE reference_table CASCADE; @@ -220,7 +220,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table | n | t - citus_local_table | n | c + citus_local_table | n | s (2 rows) ALTER TABLE reference_table DROP CONSTRAINT reference_table_r1_key CASCADE; @@ -245,7 +245,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- ref_table_drop_schema.reference_table | n | t - citus_local_table | n | c + citus_local_table | n | s (2 rows) DROP SCHEMA ref_table_drop_schema CASCADE; @@ -276,7 +276,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre --------------------------------------------------------------------- reference_table_1 | n | t reference_table_2 | n | t - citus_local_table | n | c + citus_local_table | n | s (3 rows) ALTER TABLE reference_table_1 DROP COLUMN r1 CASCADE; @@ -285,7 +285,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre --------------------------------------------------------------------- reference_table_1 | n | t reference_table_2 | n | t - citus_local_table | n | c + citus_local_table | n | s (3 rows) -- local table has multiple foreign keys to two tables @@ -311,7 +311,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre --------------------------------------------------------------------- reference_table_1 | n | t reference_table_2 | n | t - citus_local_table | n | c + citus_local_table | n | s (3 rows) DROP TABLE reference_table_1 CASCADE; @@ -319,7 +319,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table_2 | n | t - citus_local_table | n | c + citus_local_table | n | s (2 rows) CREATE TABLE distributed_table (d1 int); @@ -359,7 +359,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre --------------------------------------------------------------------- reference_table_1 | n | t reference_table_2 | n | t - citus_local_table | n | c + citus_local_table | n | s (3 rows) DROP TABLE reference_table_1, reference_table_2 CASCADE; @@ -391,7 +391,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre --------------------------------------------------------------------- reference_table_1 | n | t reference_table_2 | n | t - citus_local_table | n | c + citus_local_table | n | s (3 rows) BEGIN; @@ -413,7 +413,7 @@ BEGIN; --------------------------------------------------------------------- reference_table_1 | n | t reference_table_2 | n | t - citus_local_table | n | c + citus_local_table | n | s (3 rows) -- this should undistribute citus_local_table again @@ -442,9 +442,9 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table_1 | n | t - citus_local_table_1 | n | c - citus_local_table_2 | n | c - citus_local_table_3 | n | c + citus_local_table_1 | n | s + citus_local_table_2 | n | s + citus_local_table_3 | n | s (4 rows) ALTER TABLE reference_table_1 DROP COLUMN r1 CASCADE; @@ -470,9 +470,9 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table_1 | n | t - citus_local_table_1 | n | c - citus_local_table_2 | n | c - citus_local_table_3 | n | c + citus_local_table_1 | n | s + citus_local_table_2 | n | s + citus_local_table_3 | n | s (4 rows) -- test DROP OWNED BY @@ -525,7 +525,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table_1 | n | t - citus_local_table_1 | n | c + citus_local_table_1 | n | s (2 rows) CREATE OR REPLACE FUNCTION drop_constraint_via_func() @@ -577,7 +577,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table_1 | n | t - citus_local_table_1 | n | c + citus_local_table_1 | n | s (2 rows) create or replace procedure drop_constraint_via_proc_top_level() @@ -610,7 +610,7 @@ SELECT logicalrelid, partmethod, repmodel FROM pg_dist_partition WHERE logicalre logicalrelid | partmethod | repmodel --------------------------------------------------------------------- reference_table_1 | n | t - citus_local_table_1 | n | c + citus_local_table_1 | n | s (2 rows) create or replace procedure drop_constraint_via_proc_exception() diff --git a/src/test/regress/expected/ch_bench_having_mx.out b/src/test/regress/expected/ch_bench_having_mx.out index 1509376e6..17bab45d0 100644 --- a/src/test/regress/expected/ch_bench_having_mx.out +++ b/src/test/regress/expected/ch_bench_having_mx.out @@ -1,5 +1,4 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1640000; -SET citus.replication_model TO streaming; SET citus.shard_replication_factor to 1; SET citus.shard_count to 4; CREATE SCHEMA ch_bench_having; @@ -292,7 +291,6 @@ having (select max(s_order_cnt) > 2 as having_query from stock where s_i_id = order by s_i_id; ERROR: Subqueries in HAVING cannot refer to outer query \c - - - :master_port -SET citus.replication_model TO streaming; SET citus.shard_replication_factor to 1; SET citus.shard_count to 4; SET search_path = ch_bench_having, public; diff --git a/src/test/regress/expected/citus_local_tables.out b/src/test/regress/expected/citus_local_tables.out index ed95b69c0..517c1271b 100644 --- a/src/test/regress/expected/citus_local_tables.out +++ b/src/test/regress/expected/citus_local_tables.out @@ -472,7 +472,7 @@ BEGIN; SELECT logicalrelid::regclass::text FROM pg_dist_partition, pg_tables WHERE tablename=logicalrelid::regclass::text AND schemaname='citus_local_tables_test_schema' AND - partmethod = 'n' AND repmodel = 'c' + partmethod = 'n' AND repmodel = 's' ORDER BY 1; logicalrelid --------------------------------------------------------------------- @@ -505,7 +505,7 @@ BEGIN; SELECT logicalrelid::regclass::text FROM pg_dist_partition, pg_tables WHERE tablename=logicalrelid::regclass::text AND schemaname='citus_local_tables_test_schema' AND - partmethod = 'n' AND repmodel = 'c' + partmethod = 'n' AND repmodel = 's' ORDER BY 1; logicalrelid --------------------------------------------------------------------- diff --git a/src/test/regress/expected/citus_local_tables_queries_mx.out b/src/test/regress/expected/citus_local_tables_queries_mx.out index 322cd791e..684644476 100644 --- a/src/test/regress/expected/citus_local_tables_queries_mx.out +++ b/src/test/regress/expected/citus_local_tables_queries_mx.out @@ -21,7 +21,7 @@ SELECT start_metadata_sync_to_node('localhost', :worker_1_port); (1 row) -SET citus.replication_model TO streaming; +SET citus.shard_replication_factor TO 1; CREATE TABLE dummy_reference_table(a int unique, b int); SELECT create_reference_table('dummy_reference_table'); create_reference_table @@ -874,7 +874,7 @@ SELECT reference_table.* FROM reference_table, distributed_table; TRUNCATE reference_table, citus_local_table, distributed_table; \c - - - :master_port SET search_path TO citus_local_table_queries_mx; -SET citus.replication_model TO streaming; +SET citus.shard_replication_factor TO 1; ALTER TABLE reference_table ADD CONSTRAINT pkey_ref PRIMARY KEY (a); ALTER TABLE citus_local_table ADD CONSTRAINT pkey_c PRIMARY KEY (a); -- define a foreign key chain distributed table -> reference table -> citus local table @@ -918,7 +918,7 @@ NOTICE: truncate cascades to table "distributed_table_xxxxx" ROLLBACK; \c - - - :master_port SET search_path TO citus_local_table_queries_mx; -SET citus.replication_model TO streaming; +SET citus.shard_replication_factor TO 1; ALTER TABLE distributed_table DROP CONSTRAINT fkey_dist_to_ref; \c - - - :worker_1_port SET search_path TO citus_local_table_queries_mx; @@ -933,7 +933,7 @@ BEGIN; ROLLBACK; \c - - - :master_port SET search_path TO citus_local_table_queries_mx; -SET citus.replication_model TO streaming; +SET citus.shard_replication_factor TO 1; -- remove uniqueness constraint and dependent foreign key constraint for next tests ALTER TABLE reference_table DROP CONSTRAINT fkey_ref_to_local; ALTER TABLE citus_local_table DROP CONSTRAINT pkey_c; diff --git a/src/test/regress/expected/columnar_citus_integration.out b/src/test/regress/expected/columnar_citus_integration.out index fd0512193..f55db78ee 100644 --- a/src/test/regress/expected/columnar_citus_integration.out +++ b/src/test/regress/expected/columnar_citus_integration.out @@ -129,7 +129,7 @@ $cmd$); (4 rows) -- change setting -SELECT alter_columnar_table_set('table_option', chunk_group_row_limit => 100); +SELECT alter_columnar_table_set('table_option', chunk_group_row_limit => 2000); alter_columnar_table_set --------------------------------------------------------------------- @@ -139,12 +139,12 @@ SELECT alter_columnar_table_set('table_option', chunk_group_row_limit => 100); SELECT run_command_on_placements('table_option',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57637,20090000,t,100) - (localhost,57638,20090001,t,100) - (localhost,57637,20090002,t,100) - (localhost,57638,20090003,t,100) + (localhost,57637,20090000,t,2000) + (localhost,57638,20090001,t,2000) + (localhost,57637,20090002,t,2000) + (localhost,57638,20090003,t,2000) (4 rows) -- reset setting @@ -180,7 +180,7 @@ $cmd$); (4 rows) -- change setting -SELECT alter_columnar_table_set('table_option', stripe_row_limit => 100); +SELECT alter_columnar_table_set('table_option', stripe_row_limit => 2000); alter_columnar_table_set --------------------------------------------------------------------- @@ -190,12 +190,12 @@ SELECT alter_columnar_table_set('table_option', stripe_row_limit => 100); SELECT run_command_on_placements('table_option',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57637,20090000,t,100) - (localhost,57638,20090001,t,100) - (localhost,57637,20090002,t,100) - (localhost,57638,20090003,t,100) + (localhost,57637,20090000,t,2000) + (localhost,57638,20090001,t,2000) + (localhost,57637,20090002,t,2000) + (localhost,57638,20090003,t,2000) (4 rows) -- reset setting @@ -220,8 +220,8 @@ $cmd$); -- verify settings are propagated when creating a table CREATE TABLE table_option_2 (a int, b text) USING columnar; SELECT alter_columnar_table_set('table_option_2', - chunk_group_row_limit => 100, - stripe_row_limit => 1000, + chunk_group_row_limit => 2000, + stripe_row_limit => 20000, compression => 'pglz', compression_level => 15); alter_columnar_table_set @@ -239,12 +239,12 @@ SELECT create_distributed_table('table_option_2', 'a'); SELECT run_command_on_placements('table_option_2',$cmd$ SELECT ROW(chunk_group_row_limit, stripe_row_limit, compression, compression_level) FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57637,20090004,t,"(100,1000,pglz,15)") - (localhost,57638,20090005,t,"(100,1000,pglz,15)") - (localhost,57637,20090006,t,"(100,1000,pglz,15)") - (localhost,57638,20090007,t,"(100,1000,pglz,15)") + (localhost,57637,20090004,t,"(2000,20000,pglz,15)") + (localhost,57638,20090005,t,"(2000,20000,pglz,15)") + (localhost,57637,20090006,t,"(2000,20000,pglz,15)") + (localhost,57638,20090007,t,"(2000,20000,pglz,15)") (4 rows) -- verify undistribute works @@ -424,7 +424,7 @@ $cmd$); (8 rows) -- change setting -SELECT alter_columnar_table_set('table_option', chunk_group_row_limit => 100); +SELECT alter_columnar_table_set('table_option', chunk_group_row_limit => 2000); alter_columnar_table_set --------------------------------------------------------------------- @@ -434,16 +434,16 @@ SELECT alter_columnar_table_set('table_option', chunk_group_row_limit => 100); SELECT run_command_on_placements('table_option',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57637,20090008,t,100) - (localhost,57638,20090008,t,100) - (localhost,57637,20090009,t,100) - (localhost,57638,20090009,t,100) - (localhost,57637,20090010,t,100) - (localhost,57638,20090010,t,100) - (localhost,57637,20090011,t,100) - (localhost,57638,20090011,t,100) + (localhost,57637,20090008,t,2000) + (localhost,57638,20090008,t,2000) + (localhost,57637,20090009,t,2000) + (localhost,57638,20090009,t,2000) + (localhost,57637,20090010,t,2000) + (localhost,57638,20090010,t,2000) + (localhost,57637,20090011,t,2000) + (localhost,57638,20090011,t,2000) (8 rows) -- reset setting @@ -487,7 +487,7 @@ $cmd$); (8 rows) -- change setting -SELECT alter_columnar_table_set('table_option', stripe_row_limit => 100); +SELECT alter_columnar_table_set('table_option', stripe_row_limit => 2000); alter_columnar_table_set --------------------------------------------------------------------- @@ -497,16 +497,16 @@ SELECT alter_columnar_table_set('table_option', stripe_row_limit => 100); SELECT run_command_on_placements('table_option',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57637,20090008,t,100) - (localhost,57638,20090008,t,100) - (localhost,57637,20090009,t,100) - (localhost,57638,20090009,t,100) - (localhost,57637,20090010,t,100) - (localhost,57638,20090010,t,100) - (localhost,57637,20090011,t,100) - (localhost,57638,20090011,t,100) + (localhost,57637,20090008,t,2000) + (localhost,57638,20090008,t,2000) + (localhost,57637,20090009,t,2000) + (localhost,57638,20090009,t,2000) + (localhost,57637,20090010,t,2000) + (localhost,57638,20090010,t,2000) + (localhost,57637,20090011,t,2000) + (localhost,57638,20090011,t,2000) (8 rows) -- reset setting @@ -535,8 +535,8 @@ $cmd$); -- verify settings are propagated when creating a table CREATE TABLE table_option_2 (a int, b text) USING columnar; SELECT alter_columnar_table_set('table_option_2', - chunk_group_row_limit => 100, - stripe_row_limit => 1000, + chunk_group_row_limit => 2000, + stripe_row_limit => 20000, compression => 'pglz', compression_level => 19); alter_columnar_table_set @@ -554,16 +554,16 @@ SELECT create_distributed_table('table_option_2', 'a'); SELECT run_command_on_placements('table_option_2',$cmd$ SELECT ROW(chunk_group_row_limit, stripe_row_limit, compression, compression_level) FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57637,20090012,t,"(100,1000,pglz,19)") - (localhost,57638,20090012,t,"(100,1000,pglz,19)") - (localhost,57637,20090013,t,"(100,1000,pglz,19)") - (localhost,57638,20090013,t,"(100,1000,pglz,19)") - (localhost,57637,20090014,t,"(100,1000,pglz,19)") - (localhost,57638,20090014,t,"(100,1000,pglz,19)") - (localhost,57637,20090015,t,"(100,1000,pglz,19)") - (localhost,57638,20090015,t,"(100,1000,pglz,19)") + (localhost,57637,20090012,t,"(2000,20000,pglz,19)") + (localhost,57638,20090012,t,"(2000,20000,pglz,19)") + (localhost,57637,20090013,t,"(2000,20000,pglz,19)") + (localhost,57638,20090013,t,"(2000,20000,pglz,19)") + (localhost,57637,20090014,t,"(2000,20000,pglz,19)") + (localhost,57638,20090014,t,"(2000,20000,pglz,19)") + (localhost,57637,20090015,t,"(2000,20000,pglz,19)") + (localhost,57638,20090015,t,"(2000,20000,pglz,19)") (8 rows) -- verify undistribute works @@ -699,7 +699,7 @@ $cmd$); (2 rows) -- change setting -SELECT alter_columnar_table_set('table_option_reference', chunk_group_row_limit => 100); +SELECT alter_columnar_table_set('table_option_reference', chunk_group_row_limit => 2000); alter_columnar_table_set --------------------------------------------------------------------- @@ -709,10 +709,10 @@ SELECT alter_columnar_table_set('table_option_reference', chunk_group_row_limit SELECT run_command_on_placements('table_option_reference',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57637,20090016,t,100) - (localhost,57638,20090016,t,100) + (localhost,57637,20090016,t,2000) + (localhost,57638,20090016,t,2000) (2 rows) -- reset setting @@ -744,7 +744,7 @@ $cmd$); (2 rows) -- change setting -SELECT alter_columnar_table_set('table_option_reference', stripe_row_limit => 100); +SELECT alter_columnar_table_set('table_option_reference', stripe_row_limit => 2000); alter_columnar_table_set --------------------------------------------------------------------- @@ -754,10 +754,10 @@ SELECT alter_columnar_table_set('table_option_reference', stripe_row_limit => 10 SELECT run_command_on_placements('table_option_reference',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57637,20090016,t,100) - (localhost,57638,20090016,t,100) + (localhost,57637,20090016,t,2000) + (localhost,57638,20090016,t,2000) (2 rows) -- reset setting @@ -780,8 +780,8 @@ $cmd$); -- verify settings are propagated when creating a table CREATE TABLE table_option_reference_2 (a int, b text) USING columnar; SELECT alter_columnar_table_set('table_option_reference_2', - chunk_group_row_limit => 100, - stripe_row_limit => 1000, + chunk_group_row_limit => 2000, + stripe_row_limit => 20000, compression => 'pglz', compression_level => 9); alter_columnar_table_set @@ -799,10 +799,10 @@ SELECT create_reference_table('table_option_reference_2'); SELECT run_command_on_placements('table_option_reference_2',$cmd$ SELECT ROW(chunk_group_row_limit, stripe_row_limit, compression, compression_level) FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57637,20090017,t,"(100,1000,pglz,9)") - (localhost,57638,20090017,t,"(100,1000,pglz,9)") + (localhost,57637,20090017,t,"(2000,20000,pglz,9)") + (localhost,57638,20090017,t,"(2000,20000,pglz,9)") (2 rows) -- verify undistribute works @@ -938,7 +938,7 @@ $cmd$); (1 row) -- change setting -SELECT alter_columnar_table_set('table_option_citus_local', chunk_group_row_limit => 100); +SELECT alter_columnar_table_set('table_option_citus_local', chunk_group_row_limit => 2000); alter_columnar_table_set --------------------------------------------------------------------- @@ -948,9 +948,9 @@ SELECT alter_columnar_table_set('table_option_citus_local', chunk_group_row_limi SELECT run_command_on_placements('table_option_citus_local',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57636,20090018,t,100) + (localhost,57636,20090018,t,2000) (1 row) -- reset setting @@ -980,7 +980,7 @@ $cmd$); (1 row) -- change setting -SELECT alter_columnar_table_set('table_option_citus_local', stripe_row_limit => 100); +SELECT alter_columnar_table_set('table_option_citus_local', stripe_row_limit => 2000); alter_columnar_table_set --------------------------------------------------------------------- @@ -990,9 +990,9 @@ SELECT alter_columnar_table_set('table_option_citus_local', stripe_row_limit => SELECT run_command_on_placements('table_option_citus_local',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57636,20090018,t,100) + (localhost,57636,20090018,t,2000) (1 row) -- reset setting @@ -1014,8 +1014,8 @@ $cmd$); -- verify settings are propagated when creating a table CREATE TABLE table_option_citus_local_2 (a int, b text) USING columnar; SELECT alter_columnar_table_set('table_option_citus_local_2', - chunk_group_row_limit => 100, - stripe_row_limit => 1000, + chunk_group_row_limit => 2000, + stripe_row_limit => 20000, compression => 'pglz', compression_level => 9); alter_columnar_table_set @@ -1033,9 +1033,9 @@ SELECT citus_add_local_table_to_metadata('table_option_citus_local_2'); SELECT run_command_on_placements('table_option_citus_local_2',$cmd$ SELECT ROW(chunk_group_row_limit, stripe_row_limit, compression, compression_level) FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); - run_command_on_placements + run_command_on_placements --------------------------------------------------------------------- - (localhost,57636,20090019,t,"(100,1000,pglz,9)") + (localhost,57636,20090019,t,"(2000,20000,pglz,9)") (1 row) -- verify undistribute works diff --git a/src/test/regress/expected/columnar_empty.out b/src/test/regress/expected/columnar_empty.out index 67bde4e0a..728f60f12 100644 --- a/src/test/regress/expected/columnar_empty.out +++ b/src/test/regress/expected/columnar_empty.out @@ -11,13 +11,13 @@ SELECT alter_columnar_table_set('t_compressed', compression => 'pglz'); (1 row) -SELECT alter_columnar_table_set('t_compressed', stripe_row_limit => 100); +SELECT alter_columnar_table_set('t_compressed', stripe_row_limit => 2000); alter_columnar_table_set --------------------------------------------------------------------- (1 row) -SELECT alter_columnar_table_set('t_compressed', chunk_group_row_limit => 100); +SELECT alter_columnar_table_set('t_compressed', chunk_group_row_limit => 1000); alter_columnar_table_set --------------------------------------------------------------------- @@ -26,7 +26,7 @@ SELECT alter_columnar_table_set('t_compressed', chunk_group_row_limit => 100); SELECT * FROM columnar.options WHERE regclass = 't_compressed'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - t_compressed | 100 | 100 | 3 | pglz + t_compressed | 1000 | 2000 | 3 | pglz (1 row) -- select diff --git a/src/test/regress/expected/columnar_insert.out b/src/test/regress/expected/columnar_insert.out index 04d7734f6..ff950e1e0 100644 --- a/src/test/regress/expected/columnar_insert.out +++ b/src/test/regress/expected/columnar_insert.out @@ -169,7 +169,7 @@ DROP TABLE test_toast_columnar; -- We support writing into zero column tables, but not reading from them. -- We test that metadata makes sense so we can fix the read path in future. CREATE TABLE zero_col() USING columnar; -SELECT alter_columnar_table_set('zero_col', chunk_group_row_limit => 10); +SELECT alter_columnar_table_set('zero_col', chunk_group_row_limit => 1000); alter_columnar_table_set --------------------------------------------------------------------- @@ -206,7 +206,7 @@ ORDER BY 1,2,3,4; zero_col | 2 | 1 | 1 zero_col | 3 | 1 | 1 zero_col | 4 | 1 | 1 - zero_col | 5 | 7 | 64 + zero_col | 5 | 1 | 64 (5 rows) SELECT relname, stripe_num, value_count FROM columnar.chunk a, pg_class b @@ -225,13 +225,7 @@ ORDER BY 1,2,3,4; zero_col | 2 | 0 | 1 zero_col | 3 | 0 | 1 zero_col | 4 | 0 | 1 - zero_col | 5 | 0 | 10 - zero_col | 5 | 1 | 10 - zero_col | 5 | 2 | 10 - zero_col | 5 | 3 | 10 - zero_col | 5 | 4 | 10 - zero_col | 5 | 5 | 10 - zero_col | 5 | 6 | 4 -(11 rows) + zero_col | 5 | 0 | 64 +(5 rows) DROP TABLE zero_col; diff --git a/src/test/regress/expected/columnar_tableoptions.out b/src/test/regress/expected/columnar_tableoptions.out index bbf2b7803..7bf650e02 100644 --- a/src/test/regress/expected/columnar_tableoptions.out +++ b/src/test/regress/expected/columnar_tableoptions.out @@ -8,7 +8,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 10000 | 150000 | 3 | none + table_options | 10000 | 150000 | 3 | none (1 row) -- test changing the compression @@ -23,7 +23,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 10000 | 150000 | 3 | pglz + table_options | 10000 | 150000 | 3 | pglz (1 row) -- test changing the compression level @@ -38,11 +38,11 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 10000 | 150000 | 5 | pglz + table_options | 10000 | 150000 | 5 | pglz (1 row) -- test changing the chunk_group_row_limit -SELECT alter_columnar_table_set('table_options', chunk_group_row_limit => 10); +SELECT alter_columnar_table_set('table_options', chunk_group_row_limit => 2000); alter_columnar_table_set --------------------------------------------------------------------- @@ -53,11 +53,11 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 10 | 150000 | 5 | pglz + table_options | 2000 | 150000 | 5 | pglz (1 row) -- test changing the chunk_group_row_limit -SELECT alter_columnar_table_set('table_options', stripe_row_limit => 100); +SELECT alter_columnar_table_set('table_options', stripe_row_limit => 4000); alter_columnar_table_set --------------------------------------------------------------------- @@ -68,7 +68,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 10 | 100 | 5 | pglz + table_options | 2000 | 4000 | 5 | pglz (1 row) -- VACUUM FULL creates a new table, make sure it copies settings from the table you are vacuuming @@ -78,11 +78,11 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 10 | 100 | 5 | pglz + table_options | 2000 | 4000 | 5 | pglz (1 row) -- set all settings at the same time -SELECT alter_columnar_table_set('table_options', stripe_row_limit => 1000, chunk_group_row_limit => 100, compression => 'none', compression_level => 7); +SELECT alter_columnar_table_set('table_options', stripe_row_limit => 8000, chunk_group_row_limit => 4000, compression => 'none', compression_level => 7); alter_columnar_table_set --------------------------------------------------------------------- @@ -93,7 +93,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 100 | 1000 | 7 | none + table_options | 4000 | 8000 | 7 | none (1 row) -- make sure table options are not changed when VACUUM a table @@ -103,7 +103,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 100 | 1000 | 7 | none + table_options | 4000 | 8000 | 7 | none (1 row) -- make sure table options are not changed when VACUUM FULL a table @@ -113,7 +113,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 100 | 1000 | 7 | none + table_options | 4000 | 8000 | 7 | none (1 row) -- make sure table options are not changed when truncating a table @@ -123,7 +123,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 100 | 1000 | 7 | none + table_options | 4000 | 8000 | 7 | none (1 row) ALTER TABLE table_options ALTER COLUMN a TYPE bigint; @@ -132,7 +132,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 100 | 1000 | 7 | none + table_options | 4000 | 8000 | 7 | none (1 row) -- reset settings one by one to the version of the GUC's @@ -146,7 +146,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 100 | 1000 | 7 | none + table_options | 4000 | 8000 | 7 | none (1 row) SELECT alter_columnar_table_reset('table_options', chunk_group_row_limit => true); @@ -160,7 +160,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 1000 | 1000 | 7 | none + table_options | 1000 | 8000 | 7 | none (1 row) SELECT alter_columnar_table_reset('table_options', stripe_row_limit => true); @@ -174,7 +174,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 1000 | 10000 | 7 | none + table_options | 1000 | 10000 | 7 | none (1 row) SELECT alter_columnar_table_reset('table_options', compression => true); @@ -188,7 +188,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 1000 | 10000 | 7 | pglz + table_options | 1000 | 10000 | 7 | pglz (1 row) SELECT alter_columnar_table_reset('table_options', compression_level => true); @@ -202,7 +202,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 1000 | 10000 | 11 | pglz + table_options | 1000 | 10000 | 11 | pglz (1 row) -- verify resetting all settings at once work @@ -215,7 +215,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 1000 | 10000 | 11 | pglz + table_options | 1000 | 10000 | 11 | pglz (1 row) SELECT alter_columnar_table_reset( @@ -234,7 +234,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; regclass | chunk_group_row_limit | stripe_row_limit | compression_level | compression --------------------------------------------------------------------- - table_options | 10000 | 100000 | 13 | none + table_options | 10000 | 100000 | 13 | none (1 row) -- verify edge cases @@ -254,6 +254,23 @@ HINT: compression level must be between 1 and 19 SELECT alter_columnar_table_set('table_options', compression_level => 20); ERROR: compression level out of range HINT: compression level must be between 1 and 19 +-- verify cannot set out of range stripe_row_limit & chunk_group_row_limit options +SELECT alter_columnar_table_set('table_options', stripe_row_limit => 999); +ERROR: stripe row count limit out of range +HINT: stripe row count limit must be between 1000 and 10000000 +SELECT alter_columnar_table_set('table_options', stripe_row_limit => 10000001); +ERROR: stripe row count limit out of range +HINT: stripe row count limit must be between 1000 and 10000000 +SELECT alter_columnar_table_set('table_options', chunk_group_row_limit => 999); +ERROR: chunk group row count limit out of range +HINT: chunk group row count limit must be between 1000 and 100000 +SELECT alter_columnar_table_set('table_options', chunk_group_row_limit => 100001); +ERROR: chunk group row count limit out of range +HINT: chunk group row count limit must be between 1000 and 100000 +SELECT alter_columnar_table_set('table_options', chunk_group_row_limit => 0); +ERROR: chunk group row count limit out of range +HINT: chunk group row count limit must be between 1000 and 100000 +INSERT INTO table_options VALUES (1); -- verify options are removed when table is dropped DROP TABLE table_options; -- we expect no entries in çstore.options for anything not found int pg_class diff --git a/src/test/regress/expected/coordinator_evaluation_modify.out b/src/test/regress/expected/coordinator_evaluation_modify.out index 588466fa1..0a73b5ffb 100644 --- a/src/test/regress/expected/coordinator_evaluation_modify.out +++ b/src/test/regress/expected/coordinator_evaluation_modify.out @@ -33,7 +33,6 @@ BEGIN RETURN 1; END; $$ language plpgsql STABLE; CREATE TYPE user_data AS (name text, age int); -SET citus.replication_model TO streaming; SET citus.shard_replication_factor TO 1; CREATE TABLE user_info_data (user_id int, u_data user_data, user_index int); SELECT create_distributed_table('user_info_data', 'user_id'); diff --git a/src/test/regress/expected/coordinator_evaluation_select.out b/src/test/regress/expected/coordinator_evaluation_select.out index c837090d6..c48621e42 100644 --- a/src/test/regress/expected/coordinator_evaluation_select.out +++ b/src/test/regress/expected/coordinator_evaluation_select.out @@ -27,7 +27,6 @@ SELECT create_distributed_function('get_local_node_id_volatile()'); (1 row) CREATE TYPE user_data AS (name text, age int); -SET citus.replication_model TO streaming; SET citus.shard_replication_factor TO 1; CREATE TABLE user_info_data (user_id int, u_data user_data, user_index int); SELECT create_distributed_table('user_info_data', 'user_id'); diff --git a/src/test/regress/expected/create_ref_dist_from_citus_local.out b/src/test/regress/expected/create_ref_dist_from_citus_local.out index e24fe3e20..a4eceb8e1 100644 --- a/src/test/regress/expected/create_ref_dist_from_citus_local.out +++ b/src/test/regress/expected/create_ref_dist_from_citus_local.out @@ -90,11 +90,11 @@ BEGIN; tablename | partmethod | repmodel --------------------------------------------------------------------- citus_local_table_1 | n | t - citus_local_table_2 | n | c - citus_local_table_3 | n | c - citus_local_table_4 | n | c - distributed_table_1 | h | c - partitioned_dist_table_1 | h | c + citus_local_table_2 | n | s + citus_local_table_3 | n | s + citus_local_table_4 | n | s + distributed_table_1 | h | s + partitioned_dist_table_1 | h | s reference_table_1 | n | t reference_table_2 | n | t (8 rows) @@ -120,12 +120,12 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - citus_local_table_1 | n | c + citus_local_table_1 | n | s citus_local_table_2 | n | t - citus_local_table_3 | n | c - citus_local_table_4 | n | c - distributed_table_1 | h | c - partitioned_dist_table_1 | h | c + citus_local_table_3 | n | s + citus_local_table_4 | n | s + distributed_table_1 | h | s + partitioned_dist_table_1 | h | s reference_table_1 | n | t reference_table_2 | n | t (8 rows) @@ -204,13 +204,13 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - citus_local_table_1 | n | c - citus_local_table_2 | n | c - citus_local_table_3 | n | c - citus_local_table_4 | n | c - citus_local_table_5 | h | c - distributed_table_1 | h | c - partitioned_dist_table_1 | h | c + citus_local_table_1 | n | s + citus_local_table_2 | n | s + citus_local_table_3 | n | s + citus_local_table_4 | n | s + citus_local_table_5 | h | s + distributed_table_1 | h | s + partitioned_dist_table_1 | h | s reference_table_1 | n | t reference_table_2 | n | t (9 rows) @@ -237,13 +237,13 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - citus_local_table_1 | n | c - citus_local_table_2 | n | c - citus_local_table_3 | n | c - citus_local_table_4 | n | c - citus_local_table_5 | h | c - distributed_table_1 | h | c - partitioned_dist_table_1 | h | c + citus_local_table_1 | n | s + citus_local_table_2 | n | s + citus_local_table_3 | n | s + citus_local_table_4 | n | s + citus_local_table_5 | h | s + distributed_table_1 | h | s + partitioned_dist_table_1 | h | s reference_table_1 | n | t reference_table_2 | n | t (9 rows) @@ -272,13 +272,13 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - citus_local_table_1 | n | c - citus_local_table_2 | n | c - citus_local_table_3 | n | c - citus_local_table_4 | n | c - citus_local_table_5 | h | c - distributed_table_1 | h | c - partitioned_dist_table_1 | h | c + citus_local_table_1 | n | s + citus_local_table_2 | n | s + citus_local_table_3 | n | s + citus_local_table_4 | n | s + citus_local_table_5 | h | s + distributed_table_1 | h | s + partitioned_dist_table_1 | h | s reference_table_1 | n | t reference_table_2 | n | t (9 rows) diff --git a/src/test/regress/expected/distributed_functions.out b/src/test/regress/expected/distributed_functions.out index 7cfbcbbdb..7fb8166a3 100644 --- a/src/test/regress/expected/distributed_functions.out +++ b/src/test/regress/expected/distributed_functions.out @@ -169,8 +169,7 @@ CREATE AGGREGATE agg_names(x dup_result, yz dup_result) ( SET citus.enable_ddl_propagation TO on; -- use an unusual type to force a new colocation group CREATE TABLE statement_table(id int2); -SET citus.replication_model TO 'statement'; -SET citus.shard_replication_factor TO 1; +SET citus.shard_replication_factor TO 2; SELECT create_distributed_table('statement_table','id'); create_distributed_table --------------------------------------------------------------------- @@ -179,7 +178,6 @@ SELECT create_distributed_table('statement_table','id'); -- create a table uses streaming-based replication (can be synced) CREATE TABLE streaming_table(id macaddr); -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; SELECT create_distributed_table('streaming_table','id'); create_distributed_table @@ -228,19 +226,17 @@ select bool_or(hasmetadata) from pg_dist_node WHERE isactive AND noderole = 'pr -- try to co-locate with a table that uses statement-based replication SELECT create_distributed_function('increment(int2)', '$1'); -ERROR: cannot colocate function "increment" and table "statement_table" -DETAIL: Citus currently only supports colocating function with distributed tables that are created using streaming replication model. -HINT: When distributing tables make sure that citus.replication_model = 'streaming' +ERROR: cannot distribute the function "increment" since there is no table to colocate with +HINT: Provide a distributed table via "colocate_with" option to create_distributed_function() SELECT create_distributed_function('increment(int2)', '$1', colocate_with := 'statement_table'); ERROR: cannot colocate function "increment" and table "statement_table" DETAIL: Citus currently only supports colocating function with distributed tables that are created using streaming replication model. -HINT: When distributing tables make sure that citus.replication_model = 'streaming' +HINT: When distributing tables make sure that citus.shard_replication_factor = 1 BEGIN; -SET LOCAL citus.replication_model TO 'statement'; DROP TABLE statement_table; SELECT create_distributed_function('increment(int2)', '$1'); -ERROR: cannot create a function with a distribution argument when citus.replication_model is 'statement' -HINT: Set citus.replication_model to 'streaming' before creating distributed tables +ERROR: cannot distribute the function "increment" since there is no table to colocate with +HINT: Provide a distributed table via "colocate_with" option to create_distributed_function() END; -- try to co-locate with a table that uses streaming replication SELECT create_distributed_function('dup(macaddr)', '$1', colocate_with := 'streaming_table'); @@ -649,7 +645,6 @@ SELECT create_distributed_function('eq_with_param_names(macaddr, macaddr)','$1') -- a function cannot be colocated with a table that is not "streaming" replicated SET citus.shard_replication_factor TO 2; CREATE TABLE replicated_table_func_test (a macaddr); -SET citus.replication_model TO "statement"; SELECT create_distributed_table('replicated_table_func_test', 'a'); create_distributed_table --------------------------------------------------------------------- @@ -659,7 +654,7 @@ SELECT create_distributed_table('replicated_table_func_test', 'a'); SELECT create_distributed_function('eq_with_param_names(macaddr, macaddr)', '$1', colocate_with:='replicated_table_func_test'); ERROR: cannot colocate function "eq_with_param_names" and table "replicated_table_func_test" DETAIL: Citus currently only supports colocating function with distributed tables that are created using streaming replication model. -HINT: When distributing tables make sure that citus.replication_model = 'streaming' +HINT: When distributing tables make sure that citus.shard_replication_factor = 1 SELECT public.wait_until_metadata_sync(30000); wait_until_metadata_sync --------------------------------------------------------------------- @@ -670,7 +665,6 @@ SELECT public.wait_until_metadata_sync(30000); -- as long as there is a coercion path SET citus.shard_replication_factor TO 1; CREATE TABLE replicated_table_func_test_2 (a macaddr8); -SET citus.replication_model TO "streaming"; SELECT create_distributed_table('replicated_table_func_test_2', 'a'); create_distributed_table --------------------------------------------------------------------- @@ -694,7 +688,6 @@ ERROR: relation replicated_table_func_test_3 is not distributed -- finally, colocate the function with a distributed table SET citus.shard_replication_factor TO 1; CREATE TABLE replicated_table_func_test_4 (a macaddr); -SET citus.replication_model TO "streaming"; SELECT create_distributed_table('replicated_table_func_test_4', 'a'); create_distributed_table --------------------------------------------------------------------- diff --git a/src/test/regress/expected/distributed_procedure.out b/src/test/regress/expected/distributed_procedure.out index c13dd07a7..879ad5e57 100644 --- a/src/test/regress/expected/distributed_procedure.out +++ b/src/test/regress/expected/distributed_procedure.out @@ -35,7 +35,6 @@ CREATE OR REPLACE FUNCTION wait_until_metadata_sync(timeout INTEGER DEFAULT 1500 AS 'citus'; -- procedures are distributed by text arguments, when run in isolation it is not guaranteed a table actually exists. CREATE TABLE colocation_table(id text); -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; SELECT create_distributed_table('colocation_table','id'); create_distributed_table diff --git a/src/test/regress/expected/failure_create_table.out b/src/test/regress/expected/failure_create_table.out index eff391e68..b949b0688 100644 --- a/src/test/regress/expected/failure_create_table.out +++ b/src/test/regress/expected/failure_create_table.out @@ -418,7 +418,7 @@ COMMIT; SELECT recover_prepared_transactions(); recover_prepared_transactions --------------------------------------------------------------------- - 1 + 4 (1 row) SELECT citus.mitmproxy('conn.allow()'); diff --git a/src/test/regress/expected/failure_ddl.out b/src/test/regress/expected/failure_ddl.out index 80bd88a96..73b05c3af 100644 --- a/src/test/regress/expected/failure_ddl.out +++ b/src/test/regress/expected/failure_ddl.out @@ -128,41 +128,13 @@ SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where r {key,value} (1 row) --- kill as soon as the coordinator sends COMMIT -SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); - mitmproxy ---------------------------------------------------------------------- - -(1 row) - -ALTER TABLE test_table ADD COLUMN new_column INT; +-- manually drop & re-create the table for the next tests SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) --- since we've killed the connection just after --- the coordinator sends the COMMIT, the command should be applied --- to the distributed table and the shards on the other worker --- however, there is no way to recover the failure on the shards --- that live in the failed worker, since we're running 1PC -SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; - array_agg ---------------------------------------------------------------------- - {key,new_column,value} -(1 row) - -SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; - run_command_on_placements ---------------------------------------------------------------------- - (localhost,9060,100800,t,"{key,value}") - (localhost,9060,100802,t,"{key,value}") - (localhost,57637,100801,t,"{key,new_column,value}") - (localhost,57637,100803,t,"{key,new_column,value}") -(4 rows) - --- manually drop & re-create the table for the next tests DROP TABLE test_table; SET citus.next_shard_id TO 100800; SET citus.multi_shard_commit_protocol TO '1pc'; @@ -229,8 +201,6 @@ CONTEXT: while executing command on localhost:xxxxx WARNING: failed to commit transaction on localhost:xxxxx WARNING: connection not open CONTEXT: while executing command on localhost:xxxxx -WARNING: could not commit transaction for shard xxxxx on any active node -WARNING: could not commit transaction for shard xxxxx on any active node SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- diff --git a/src/test/regress/expected/failure_failover_to_local_execution.out b/src/test/regress/expected/failure_failover_to_local_execution.out index 5427ca9ae..0b5aebccd 100644 --- a/src/test/regress/expected/failure_failover_to_local_execution.out +++ b/src/test/regress/expected/failure_failover_to_local_execution.out @@ -20,7 +20,6 @@ SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); (1 row) -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; CREATE TABLE failover_to_local (key int PRIMARY KEY, value varchar(10)); SELECT create_distributed_table('failover_to_local', 'key'); diff --git a/src/test/regress/expected/failure_mx_metadata_sync.out b/src/test/regress/expected/failure_mx_metadata_sync.out index a4aeb7704..67f82ed67 100644 --- a/src/test/regress/expected/failure_mx_metadata_sync.out +++ b/src/test/regress/expected/failure_mx_metadata_sync.out @@ -6,7 +6,6 @@ SET SEARCH_PATH = mx_metadata_sync; SET citus.shard_count TO 2; SET citus.next_shard_id TO 16000000; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT pg_backend_pid() as pid \gset SELECT citus.mitmproxy('conn.allow()'); mitmproxy diff --git a/src/test/regress/expected/failure_replicated_partitions.out b/src/test/regress/expected/failure_replicated_partitions.out index bfa8189af..b9fcc2723 100644 --- a/src/test/regress/expected/failure_replicated_partitions.out +++ b/src/test/regress/expected/failure_replicated_partitions.out @@ -6,7 +6,6 @@ SELECT citus.mitmproxy('conn.allow()'); (1 row) SET citus.shard_replication_factor TO 2; -SET "citus.replication_model" to "statement"; SET citus.shard_count TO 4; CREATE TABLE partitioned_table ( dist_key bigint, diff --git a/src/test/regress/expected/fkeys_between_local_ref.out b/src/test/regress/expected/fkeys_between_local_ref.out index 690432feb..7a620b84e 100644 --- a/src/test/regress/expected/fkeys_between_local_ref.out +++ b/src/test/regress/expected/fkeys_between_local_ref.out @@ -9,7 +9,7 @@ CREATE VIEW citus_local_tables_in_schema AS SELECT logicalrelid FROM pg_dist_partition, pg_tables WHERE tablename=logicalrelid::regclass::text AND schemaname='fkeys_between_local_ref' AND - partmethod = 'n' AND repmodel = 'c'; + partmethod = 'n' AND repmodel = 's'; -- remove coordinator if it is added to pg_dist_node and test -- behavior when coordinator is not added to metadata SELECT COUNT(master_remove_node(nodename, nodeport)) < 2 @@ -195,10 +195,10 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - local_table_1 | n | c - local_table_2 | n | c - local_table_3 | n | c - local_table_4 | n | c + local_table_1 | n | s + local_table_2 | n | s + local_table_3 | n | s + local_table_4 | n | s reference_table_1 | n | t reference_table_2 | n | t (6 rows) @@ -279,12 +279,12 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - local_table_1 | n | c - local_table_2 | n | c - local_table_3 | n | c - local_table_4 | n | c - local_table_5 | n | c - local_table_6 | n | c + local_table_1 | n | s + local_table_2 | n | s + local_table_3 | n | s + local_table_4 | n | s + local_table_5 | n | s + local_table_6 | n | s reference_table_1 | n | t (7 rows) @@ -318,12 +318,12 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - distributed_table | h | c - local_table_1 | n | c - local_table_2 | n | c - local_table_3 | n | c - local_table_4 | n | c - local_table_5 | n | c + distributed_table | h | s + local_table_1 | n | s + local_table_2 | n | s + local_table_3 | n | s + local_table_4 | n | s + local_table_5 | n | s reference_table_1 | n | t (7 rows) @@ -348,13 +348,13 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - another_schema_fkeys_between_local_ref.local_table_6 | n | c - distributed_table | h | c - local_table_1 | n | c - local_table_2 | n | c - local_table_3 | n | c - local_table_4 | n | c - local_table_5 | n | c + another_schema_fkeys_between_local_ref.local_table_6 | n | s + distributed_table | h | s + local_table_1 | n | s + local_table_2 | n | s + local_table_3 | n | s + local_table_4 | n | s + local_table_5 | n | s reference_table_1 | n | t (8 rows) @@ -366,10 +366,10 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - distributed_table | h | c - local_table_1 | n | c - local_table_2 | n | c - local_table_4 | n | c + distributed_table | h | s + local_table_1 | n | s + local_table_2 | n | s + local_table_4 | n | s reference_table_1 | n | t (5 rows) @@ -395,13 +395,13 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - distributed_table | h | c - local_table_1 | n | c - local_table_2 | n | c - local_table_3 | n | c - local_table_4 | n | c - local_table_5 | n | c - local_table_6 | n | c + distributed_table | h | s + local_table_1 | n | s + local_table_2 | n | s + local_table_3 | n | s + local_table_4 | n | s + local_table_5 | n | s + local_table_6 | n | s reference_table_1 | n | t (8 rows) @@ -423,12 +423,12 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - distributed_table | h | c - local_table_1 | n | c + distributed_table | h | s + local_table_1 | n | s local_table_2 | n | t - local_table_3 | n | c - local_table_4 | n | c - local_table_5 | n | c + local_table_3 | n | s + local_table_4 | n | s + local_table_5 | n | s local_table_6 | n | t reference_table_1 | n | t (8 rows) @@ -452,7 +452,7 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - distributed_table | h | c + distributed_table | h | s local_table_2 | n | t local_table_6 | n | t reference_table_1 | n | t @@ -476,7 +476,7 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - distributed_table | h | c + distributed_table | h | s reference_table_1 | n | t (2 rows) @@ -501,11 +501,11 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - distributed_table | h | c + distributed_table | h | s local_table_1 | n | t - local_table_2 | h | c - local_table_3 | n | c - local_table_4 | n | c + local_table_2 | h | s + local_table_3 | n | s + local_table_4 | n | s reference_table_1 | n | t (6 rows) @@ -528,9 +528,9 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - distributed_table | h | c - local_table_1 | n | c - local_table_2 | n | c + distributed_table | h | s + local_table_1 | n | s + local_table_2 | n | s local_table_3 | n | t local_table_4 | n | t reference_table_1 | n | t @@ -562,12 +562,12 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - distributed_table | h | c + distributed_table | h | s local_table_1 | n | t - local_table_2 | h | c - local_table_3 | n | c - local_table_4 | n | c - local_table_5 | h | c + local_table_2 | h | s + local_table_3 | n | s + local_table_4 | n | s + local_table_5 | h | s reference_table_1 | n | t (7 rows) @@ -611,12 +611,12 @@ WHERE logicalrelid::text IN (SELECT tablename FROM pg_tables WHERE schemaname='f ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - distributed_table | h | c + distributed_table | h | s local_table_1 | n | t - local_table_2 | h | c - local_table_3 | n | c - local_table_4 | n | c - local_table_6 | h | c + local_table_2 | h | s + local_table_3 | n | s + local_table_4 | n | s + local_table_6 | h | s reference_table_1 | n | t (7 rows) @@ -662,7 +662,7 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - pg_local_1 | n | c + pg_local_1 | n | s ref_1 | n | t ref_2 | n | t (3 rows) @@ -691,7 +691,7 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - pg_local_1 | n | c + pg_local_1 | n | s ref_1 | n | t ref_2 | n | t (3 rows) @@ -720,7 +720,7 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - pg_local_1 | n | c + pg_local_1 | n | s ref_1 | n | t ref_2 | n | t (3 rows) @@ -755,7 +755,7 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - pg_local_3 | n | c + pg_local_3 | n | s ref_1 | n | t (2 rows) @@ -767,10 +767,10 @@ BEGIN; ORDER BY tablename; tablename | partmethod | repmodel --------------------------------------------------------------------- - pg_local_1 | n | c - pg_local_2 | n | c - pg_local_3 | n | c - pg_local_4 | n | c + pg_local_1 | n | s + pg_local_2 | n | s + pg_local_3 | n | s + pg_local_4 | n | s ref_1 | n | t (5 rows) diff --git a/src/test/regress/expected/foreign_key_to_reference_shard_rebalance.out b/src/test/regress/expected/foreign_key_to_reference_shard_rebalance.out index 7bffe0b6f..bedfcc6f7 100644 --- a/src/test/regress/expected/foreign_key_to_reference_shard_rebalance.out +++ b/src/test/regress/expected/foreign_key_to_reference_shard_rebalance.out @@ -61,6 +61,8 @@ SELECT count(*) FROM referencing_table2; 101 (1 row) +CALL citus_cleanup_orphaned_shards(); +NOTICE: cleaned up 2 orphaned shards SELECT * FROM table_fkeys_in_workers WHERE relid LIKE 'fkey_to_reference_shard_rebalance.%' AND refd_relid LIKE 'fkey_to_reference_shard_rebalance.%' ORDER BY 1,2,3; name | relid | refd_relid --------------------------------------------------------------------- @@ -102,6 +104,8 @@ SELECT count(*) FROM referencing_table2; 101 (1 row) +CALL citus_cleanup_orphaned_shards(); +NOTICE: cleaned up 2 orphaned shards SELECT * FROM table_fkeys_in_workers WHERE relid LIKE 'fkey_to_reference_shard_rebalance.%' AND refd_relid LIKE 'fkey_to_reference_shard_rebalance.%' ORDER BY 1,2,3; name | relid | refd_relid --------------------------------------------------------------------- diff --git a/src/test/regress/expected/insert_select_repartition.out b/src/test/regress/expected/insert_select_repartition.out index 8845e7001..163985ace 100644 --- a/src/test/regress/expected/insert_select_repartition.out +++ b/src/test/regress/expected/insert_select_repartition.out @@ -3,7 +3,6 @@ CREATE SCHEMA insert_select_repartition; SET search_path TO 'insert_select_repartition'; SET citus.next_shard_id TO 4213581; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; -- 4 shards, hash distributed. -- Negate distribution column value. SET citus.shard_count TO 4; @@ -51,8 +50,6 @@ CREATE TYPE composite_key_type AS (f1 int, f2 text); -- source CREATE TABLE source_table(f1 int, key composite_key_type, value int, mapped_key composite_key_type); SELECT create_distributed_table('source_table', 'key', 'range'); -NOTICE: using statement-based replication -DETAIL: Streaming replication is supported only for hash-distributed tables. create_distributed_table --------------------------------------------------------------------- @@ -69,8 +66,6 @@ INSERT INTO source_table VALUES (6, (32, 'g'), 50, (8, 'g')); -- target CREATE TABLE target_table(f1 int DEFAULT 0, value int, key composite_key_type PRIMARY KEY); SELECT create_distributed_table('target_table', 'key', 'range'); -NOTICE: using statement-based replication -DETAIL: Streaming replication is supported only for hash-distributed tables. create_distributed_table --------------------------------------------------------------------- @@ -824,7 +819,6 @@ UPDATE source_table SET b = NULL where b IN (9, 4); SET citus.shard_replication_factor TO 2; CREATE TABLE target_table(a int, b int not null); SELECT create_distributed_table('target_table', 'a', 'range'); -NOTICE: using statement-based replication create_distributed_table --------------------------------------------------------------------- @@ -939,7 +933,6 @@ DROP TABLE source_table, target_table; -- Range partitioned target's ranges doesn't cover the whole range -- SET citus.shard_replication_factor TO 2; -SET citus.replication_model TO 'statement'; SET citus.shard_count TO 4; CREATE TABLE source_table(a int, b int); SELECT create_distributed_table('source_table', 'a'); @@ -1261,17 +1254,43 @@ NOTICE: copying the data has completed (1 row) -explain insert into table_with_sequences select y, x from table_with_sequences; - QUERY PLAN +explain (costs off) insert into table_with_sequences select y, x from table_with_sequences; + QUERY PLAN --------------------------------------------------------------------- - Custom Scan (Citus INSERT ... SELECT) (cost=0.00..0.00 rows=0 width=0) + Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator - -> Custom Scan (Citus Adaptive) (cost=0.00..250.00 rows=100000 width=16) + -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression - -> Seq Scan on table_with_sequences_4213648 table_with_sequences (cost=0.00..28.50 rows=1850 width=8) + -> Seq Scan on table_with_sequences_4213648 table_with_sequences +(8 rows) + +-- verify that we don't report repartitioned insert/select for tables +-- with user-defined sequences. +CREATE SEQUENCE user_defined_sequence; +create table table_with_user_sequences (x int, y int, z bigint default nextval('user_defined_sequence')); +insert into table_with_user_sequences values (1,1); +select create_distributed_table('table_with_user_sequences','x'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +explain (costs off) insert into table_with_user_sequences select y, x from table_with_user_sequences; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on table_with_user_sequences_4213652 table_with_user_sequences (8 rows) -- clean-up diff --git a/src/test/regress/expected/intermediate_result_pruning.out b/src/test/regress/expected/intermediate_result_pruning.out index 7de87b86c..2670c48a6 100644 --- a/src/test/regress/expected/intermediate_result_pruning.out +++ b/src/test/regress/expected/intermediate_result_pruning.out @@ -832,7 +832,6 @@ DEBUG: Subplan XXX_2 will be written to local file DEBUG: Subplan XXX_3 will be sent to localhost:xxxxx DEBUG: Subplan XXX_3 will be sent to localhost:xxxxx -- append partitioned/heap-type -SET citus.replication_model TO statement; -- do not print out 'building index pg_toast_xxxxx_index' messages SET client_min_messages TO DEFAULT; CREATE TABLE range_partitioned(range_column text, data int); diff --git a/src/test/regress/expected/isolation_blocking_move_multi_shard_commands.out b/src/test/regress/expected/isolation_blocking_move_multi_shard_commands.out index 4e09e34ad..25c680d2f 100644 --- a/src/test/regress/expected/isolation_blocking_move_multi_shard_commands.out +++ b/src/test/regress/expected/isolation_blocking_move_multi_shard_commands.out @@ -11,9 +11,9 @@ step s2-insert: INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172); step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -31,7 +31,7 @@ x y 15 15 172 172 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -49,9 +49,9 @@ step s2-upsert: INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172) ON CONFLICT (x) DO UPDATE SET y = logical_replicate_placement.y + 1; step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -69,7 +69,7 @@ x y 15 16 172 173 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -89,9 +89,9 @@ step s2-update: UPDATE logical_replicate_placement SET y = y + 1; step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -109,7 +109,7 @@ x y 15 16 172 173 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -129,9 +129,9 @@ step s2-delete: DELETE FROM logical_replicate_placement; step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -147,7 +147,7 @@ step s1-select: x y step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -171,20 +171,19 @@ x y 15 15 172 172 step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; - -step s2-end: - COMMIT; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s1-move-placement: <... completed> master_move_shard_placement +step s2-end: + COMMIT; + step s1-end: COMMIT; step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -201,9 +200,9 @@ step s2-copy: COPY logical_replicate_placement FROM PROGRAM 'echo "1,1\n2,2\n3,3\n4,4\n5,5\n15,30"' WITH CSV; step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -225,7 +224,7 @@ x y 5 5 15 30 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -245,9 +244,9 @@ step s2-truncate: TRUNCATE logical_replicate_placement; step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -263,7 +262,7 @@ step s1-select: x y step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -280,9 +279,9 @@ step s2-alter-table: ALTER TABLE logical_replicate_placement ADD COLUMN z INT; step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -298,7 +297,7 @@ step s1-select: x y z step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport diff --git a/src/test/regress/expected/isolation_blocking_move_multi_shard_commands_on_mx.out b/src/test/regress/expected/isolation_blocking_move_multi_shard_commands_on_mx.out index 68cb2c1fb..ad62d5481 100644 --- a/src/test/regress/expected/isolation_blocking_move_multi_shard_commands_on_mx.out +++ b/src/test/regress/expected/isolation_blocking_move_multi_shard_commands_on_mx.out @@ -2,7 +2,7 @@ Parsed test spec with 2 sessions starting permutation: s1-begin s2-start-session-level-connection s2-begin-on-worker s2-insert s1-move-placement s2-commit-worker s1-commit s1-select s1-get-shard-distribution s2-stop-connection step s1-begin: - BEGIN; + BEGIN; step s2-start-session-level-connection: SELECT start_session_level_connection_to_node('localhost', 57638); @@ -23,10 +23,10 @@ run_commands_on_session_level_connection_to_node step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s2-commit-worker: - SELECT run_commands_on_session_level_connection_to_node('COMMIT'); +step s2-commit-worker: + SELECT run_commands_on_session_level_connection_to_node('COMMIT'); run_commands_on_session_level_connection_to_node @@ -36,7 +36,7 @@ master_move_shard_placement step s1-commit: - COMMIT; + COMMIT; step s1-select: SELECT * FROM logical_replicate_placement order by y; @@ -46,7 +46,7 @@ x y 15 15 172 172 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -66,7 +66,7 @@ step s1-insert: INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172); step s1-begin: - BEGIN; + BEGIN; step s2-start-session-level-connection: SELECT start_session_level_connection_to_node('localhost', 57638); @@ -87,10 +87,10 @@ run_commands_on_session_level_connection_to_node step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s2-commit-worker: - SELECT run_commands_on_session_level_connection_to_node('COMMIT'); +step s2-commit-worker: + SELECT run_commands_on_session_level_connection_to_node('COMMIT'); run_commands_on_session_level_connection_to_node @@ -100,7 +100,7 @@ master_move_shard_placement step s1-commit: - COMMIT; + COMMIT; step s1-select: SELECT * FROM logical_replicate_placement order by y; @@ -110,7 +110,7 @@ x y 15 16 172 173 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -130,7 +130,7 @@ step s1-insert: INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172); step s1-begin: - BEGIN; + BEGIN; step s2-start-session-level-connection: SELECT start_session_level_connection_to_node('localhost', 57638); @@ -151,10 +151,10 @@ run_commands_on_session_level_connection_to_node step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; -step s2-commit-worker: - SELECT run_commands_on_session_level_connection_to_node('COMMIT'); +step s2-commit-worker: + SELECT run_commands_on_session_level_connection_to_node('COMMIT'); run_commands_on_session_level_connection_to_node @@ -164,7 +164,7 @@ master_move_shard_placement step s1-commit: - COMMIT; + COMMIT; step s1-select: SELECT * FROM logical_replicate_placement order by y; @@ -172,7 +172,7 @@ step s1-select: x y step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -192,7 +192,7 @@ step s1-insert: INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172); step s1-begin: - BEGIN; + BEGIN; step s2-start-session-level-connection: SELECT start_session_level_connection_to_node('localhost', 57638); @@ -213,23 +213,22 @@ run_commands_on_session_level_connection_to_node step s1-move-placement: - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; - + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + +master_move_shard_placement + + step s2-commit-worker: - SELECT run_commands_on_session_level_connection_to_node('COMMIT'); + SELECT run_commands_on_session_level_connection_to_node('COMMIT'); run_commands_on_session_level_connection_to_node -step s1-move-placement: <... completed> -master_move_shard_placement - - step s1-commit: - COMMIT; + COMMIT; step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport diff --git a/src/test/regress/expected/isolation_blocking_move_single_shard_commands.out b/src/test/regress/expected/isolation_blocking_move_single_shard_commands.out index 800e41aca..15b801533 100644 --- a/src/test/regress/expected/isolation_blocking_move_single_shard_commands.out +++ b/src/test/regress/expected/isolation_blocking_move_single_shard_commands.out @@ -11,9 +11,9 @@ step s2-insert: INSERT INTO logical_replicate_placement VALUES (15, 15); step s1-move-placement: - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -30,7 +30,7 @@ x y 15 15 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -48,9 +48,9 @@ step s2-upsert: INSERT INTO logical_replicate_placement VALUES (15, 15) ON CONFLICT (x) DO UPDATE SET y = logical_replicate_placement.y + 1; step s1-move-placement: - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -67,7 +67,7 @@ x y 15 16 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -87,9 +87,9 @@ step s2-update: UPDATE logical_replicate_placement SET y = y + 1 WHERE x = 15; step s1-move-placement: - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -106,7 +106,7 @@ x y 15 16 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -126,9 +126,9 @@ step s2-delete: DELETE FROM logical_replicate_placement WHERE x = 15; step s1-move-placement: - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); -step s2-end: +step s2-end: COMMIT; step s1-move-placement: <... completed> @@ -144,7 +144,7 @@ step s1-select: x y step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -167,20 +167,19 @@ x y 15 15 step s1-move-placement: - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); - -step s2-end: - COMMIT; + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); -step s1-move-placement: <... completed> master_move_shard_placement +step s2-end: + COMMIT; + step s1-end: COMMIT; step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -203,20 +202,19 @@ x y 15 15 step s1-move-placement: - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); - -step s2-end: - COMMIT; + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); -step s1-move-placement: <... completed> master_move_shard_placement +step s2-end: + COMMIT; + step s1-end: COMMIT; step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport diff --git a/src/test/regress/expected/isolation_blocking_move_single_shard_commands_on_mx.out b/src/test/regress/expected/isolation_blocking_move_single_shard_commands_on_mx.out index 209275253..c89b918f5 100644 --- a/src/test/regress/expected/isolation_blocking_move_single_shard_commands_on_mx.out +++ b/src/test/regress/expected/isolation_blocking_move_single_shard_commands_on_mx.out @@ -25,7 +25,7 @@ run_commands_on_session_level_connection_to_node step s1-move-placement: SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); -step s2-commit-worker: +step s2-commit-worker: SELECT run_commands_on_session_level_connection_to_node('COMMIT'); run_commands_on_session_level_connection_to_node @@ -36,7 +36,7 @@ master_move_shard_placement step s1-commit: - COMMIT; + COMMIT; step s1-select: SELECT * FROM logical_replicate_placement order by y; @@ -45,7 +45,7 @@ x y 15 15 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -88,7 +88,7 @@ run_commands_on_session_level_connection_to_node step s1-move-placement: SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); -step s2-commit-worker: +step s2-commit-worker: SELECT run_commands_on_session_level_connection_to_node('COMMIT'); run_commands_on_session_level_connection_to_node @@ -99,7 +99,7 @@ master_move_shard_placement step s1-commit: - COMMIT; + COMMIT; step s1-select: SELECT * FROM logical_replicate_placement order by y; @@ -108,7 +108,7 @@ x y 15 16 step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -151,7 +151,7 @@ run_commands_on_session_level_connection_to_node step s1-move-placement: SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); -step s2-commit-worker: +step s2-commit-worker: SELECT run_commands_on_session_level_connection_to_node('COMMIT'); run_commands_on_session_level_connection_to_node @@ -162,7 +162,7 @@ master_move_shard_placement step s1-commit: - COMMIT; + COMMIT; step s1-select: SELECT * FROM logical_replicate_placement order by y; @@ -170,7 +170,7 @@ step s1-select: x y step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -212,22 +212,21 @@ run_commands_on_session_level_connection_to_node step s1-move-placement: SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); - + +master_move_shard_placement + + step s2-commit-worker: SELECT run_commands_on_session_level_connection_to_node('COMMIT'); run_commands_on_session_level_connection_to_node -step s1-move-placement: <... completed> -master_move_shard_placement - - step s1-commit: - COMMIT; + COMMIT; step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport @@ -269,22 +268,21 @@ run_commands_on_session_level_connection_to_node step s1-move-placement: SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); - + +master_move_shard_placement + + step s2-commit-worker: SELECT run_commands_on_session_level_connection_to_node('COMMIT'); run_commands_on_session_level_connection_to_node -step s1-move-placement: <... completed> -master_move_shard_placement - - step s1-commit: - COMMIT; + COMMIT; step s1-get-shard-distribution: - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; nodeport diff --git a/src/test/regress/expected/isolation_copy_placement_vs_modification.out b/src/test/regress/expected/isolation_copy_placement_vs_modification.out index 803311cbc..36e7bdf40 100644 --- a/src/test/regress/expected/isolation_copy_placement_vs_modification.out +++ b/src/test/regress/expected/isolation_copy_placement_vs_modification.out @@ -1,9 +1,6 @@ Parsed test spec with 2 sessions starting permutation: s1-load-cache s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-update s2-commit s1-commit s2-print-content -create_distributed_table - - step s1-load-cache: TRUNCATE test_repair_placement_vs_modification; @@ -35,7 +32,7 @@ master_copy_shard_placement step s1-update: UPDATE test_repair_placement_vs_modification SET y = 5 WHERE x = 5; -step s2-commit: +step s2-commit: COMMIT; step s1-update: <... completed> @@ -58,9 +55,6 @@ nodeport success result 57638 t 5 starting permutation: s1-load-cache s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-delete s2-commit s1-commit s2-print-content -create_distributed_table - - step s1-load-cache: TRUNCATE test_repair_placement_vs_modification; @@ -92,7 +86,7 @@ master_copy_shard_placement step s1-delete: DELETE FROM test_repair_placement_vs_modification WHERE x = 5; -step s2-commit: +step s2-commit: COMMIT; step s1-delete: <... completed> @@ -115,9 +109,6 @@ nodeport success result 57638 t starting permutation: s1-load-cache s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-insert s2-commit s1-commit s2-print-content -create_distributed_table - - step s1-load-cache: TRUNCATE test_repair_placement_vs_modification; @@ -146,7 +137,7 @@ master_copy_shard_placement step s1-insert: INSERT INTO test_repair_placement_vs_modification VALUES (5, 10); -step s2-commit: +step s2-commit: COMMIT; step s1-insert: <... completed> @@ -169,9 +160,6 @@ nodeport success result 57638 t 10 starting permutation: s1-load-cache s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-copy s2-commit s1-commit s2-print-content -create_distributed_table - - step s1-load-cache: TRUNCATE test_repair_placement_vs_modification; @@ -200,7 +188,7 @@ master_copy_shard_placement step s1-copy: COPY test_repair_placement_vs_modification FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; -step s2-commit: +step s2-commit: COMMIT; step s1-copy: <... completed> @@ -223,9 +211,6 @@ nodeport success result 57638 t 5 starting permutation: s1-load-cache s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-ddl s2-commit s1-commit s2-print-index-count -create_distributed_table - - step s1-load-cache: TRUNCATE test_repair_placement_vs_modification; @@ -254,7 +239,7 @@ master_copy_shard_placement step s1-ddl: CREATE INDEX test_repair_placement_vs_modification_index ON test_repair_placement_vs_modification(x); -step s2-commit: +step s2-commit: COMMIT; step s1-ddl: <... completed> @@ -277,9 +262,6 @@ nodeport success result 57638 t 1 starting permutation: s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-update s2-commit s1-commit s2-print-content -create_distributed_table - - step s1-insert: INSERT INTO test_repair_placement_vs_modification VALUES (5, 10); @@ -308,7 +290,7 @@ master_copy_shard_placement step s1-update: UPDATE test_repair_placement_vs_modification SET y = 5 WHERE x = 5; -step s2-commit: +step s2-commit: COMMIT; step s1-update: <... completed> @@ -331,9 +313,6 @@ nodeport success result 57638 t 5 starting permutation: s1-insert s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-delete s2-commit s1-commit s2-print-content -create_distributed_table - - step s1-insert: INSERT INTO test_repair_placement_vs_modification VALUES (5, 10); @@ -362,7 +341,7 @@ master_copy_shard_placement step s1-delete: DELETE FROM test_repair_placement_vs_modification WHERE x = 5; -step s2-commit: +step s2-commit: COMMIT; step s1-delete: <... completed> @@ -385,9 +364,6 @@ nodeport success result 57638 t starting permutation: s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-insert s2-commit s1-commit s2-print-content -create_distributed_table - - step s1-begin: BEGIN; SET LOCAL citus.select_opens_transaction_block TO off; @@ -413,7 +389,7 @@ master_copy_shard_placement step s1-insert: INSERT INTO test_repair_placement_vs_modification VALUES (5, 10); -step s2-commit: +step s2-commit: COMMIT; step s1-insert: <... completed> @@ -436,9 +412,6 @@ nodeport success result 57638 t 10 starting permutation: s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-copy s2-commit s1-commit s2-print-content -create_distributed_table - - step s1-begin: BEGIN; SET LOCAL citus.select_opens_transaction_block TO off; @@ -464,7 +437,7 @@ master_copy_shard_placement step s1-copy: COPY test_repair_placement_vs_modification FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; -step s2-commit: +step s2-commit: COMMIT; step s1-copy: <... completed> @@ -487,9 +460,6 @@ nodeport success result 57638 t 5 starting permutation: s1-begin s1-select s2-set-placement-inactive s2-begin s2-repair-placement s1-ddl s2-commit s1-commit s2-print-index-count -create_distributed_table - - step s1-begin: BEGIN; SET LOCAL citus.select_opens_transaction_block TO off; @@ -515,7 +485,7 @@ master_copy_shard_placement step s1-ddl: CREATE INDEX test_repair_placement_vs_modification_index ON test_repair_placement_vs_modification(x); -step s2-commit: +step s2-commit: COMMIT; step s1-ddl: <... completed> @@ -536,367 +506,3 @@ nodeport success result 57637 t 1 57638 t 1 57638 t 1 - -starting permutation: s1-begin s2-begin s2-copy-placement s1-update-copy-table s2-commit s1-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -master_copy_shard_placement - - -step s1-update-copy-table: - UPDATE test_copy_placement_vs_modification SET y = 5 WHERE x = 5; - -step s2-commit: - COMMIT; - -step s1-update-copy-table: <... completed> -step s1-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s2-copy-placement s1-delete-copy-table s2-commit s1-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -master_copy_shard_placement - - -step s1-delete-copy-table: - DELETE FROM test_copy_placement_vs_modification WHERE x = 5; - -step s2-commit: - COMMIT; - -step s1-delete-copy-table: <... completed> -step s1-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s2-copy-placement s1-insert-copy-table s2-commit s1-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -master_copy_shard_placement - - -step s1-insert-copy-table: - INSERT INTO test_copy_placement_vs_modification VALUES (5, 10); - -step s2-commit: - COMMIT; - -step s1-insert-copy-table: <... completed> -step s1-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s2-copy-placement s1-copy-copy-table s2-commit s1-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -master_copy_shard_placement - - -step s1-copy-copy-table: - COPY test_copy_placement_vs_modification FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; - -step s2-commit: - COMMIT; - -step s1-copy-copy-table: <... completed> -step s1-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s2-copy-placement s1-ddl-copy-table s2-commit s1-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -master_copy_shard_placement - - -step s1-ddl-copy-table: - CREATE INDEX test_copy_placement_vs_modification_index ON test_copy_placement_vs_modification(x); - -step s2-commit: - COMMIT; - -step s1-ddl-copy-table: <... completed> -step s1-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s2-copy-placement s1-select-copy-table s2-commit s1-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -master_copy_shard_placement - - -step s1-select-copy-table: - SELECT count(*) FROM test_copy_placement_vs_modification WHERE x = 5; - -count - -0 -step s2-commit: - COMMIT; - -step s1-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s1-update-copy-table s2-copy-placement s1-commit s2-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s1-update-copy-table: - UPDATE test_copy_placement_vs_modification SET y = 5 WHERE x = 5; - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -step s1-commit: - COMMIT; - -step s2-copy-placement: <... completed> -master_copy_shard_placement - - -step s2-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s1-delete-copy-table s2-copy-placement s1-commit s2-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s1-delete-copy-table: - DELETE FROM test_copy_placement_vs_modification WHERE x = 5; - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -step s1-commit: - COMMIT; - -step s2-copy-placement: <... completed> -master_copy_shard_placement - - -step s2-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s1-insert-copy-table s2-copy-placement s1-commit s2-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s1-insert-copy-table: - INSERT INTO test_copy_placement_vs_modification VALUES (5, 10); - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -step s1-commit: - COMMIT; - -step s2-copy-placement: <... completed> -master_copy_shard_placement - - -step s2-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s1-copy-copy-table s2-copy-placement s1-commit s2-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s1-copy-copy-table: - COPY test_copy_placement_vs_modification FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -step s1-commit: - COMMIT; - -step s2-copy-placement: <... completed> -master_copy_shard_placement - - -step s2-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s1-ddl-copy-table s2-copy-placement s1-commit s2-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s1-ddl-copy-table: - CREATE INDEX test_copy_placement_vs_modification_index ON test_copy_placement_vs_modification(x); - -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -step s1-commit: - COMMIT; - -step s2-copy-placement: <... completed> -master_copy_shard_placement - - -step s2-commit: - COMMIT; - - -starting permutation: s1-begin s2-begin s1-select-copy-table s2-copy-placement s1-commit s2-commit -create_distributed_table - - -step s1-begin: - BEGIN; - SET LOCAL citus.select_opens_transaction_block TO off; - -step s2-begin: - BEGIN; - -step s1-select-copy-table: - SELECT count(*) FROM test_copy_placement_vs_modification WHERE x = 5; - -count - -0 -step s2-copy-placement: - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); - -master_copy_shard_placement - - -step s1-commit: - COMMIT; - -step s2-commit: - COMMIT; - diff --git a/src/test/regress/expected/isolation_create_restore_point.out b/src/test/regress/expected/isolation_create_restore_point.out index c0bb77d6b..b39c1f387 100644 --- a/src/test/regress/expected/isolation_create_restore_point.out +++ b/src/test/regress/expected/isolation_create_restore_point.out @@ -5,21 +5,21 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-create-distributed: - CREATE TABLE test_create_distributed_table (test_id integer NOT NULL, data text); - SELECT create_distributed_table('test_create_distributed_table', 'test_id'); + CREATE TABLE test_create_distributed_table (test_id integer NOT NULL, data text); + SELECT create_distributed_table('test_create_distributed_table', 'test_id'); create_distributed_table step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-create-restore: <... completed> ?column? @@ -31,20 +31,20 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-insert: - INSERT INTO restore_table VALUES (1,'hello'); + INSERT INTO restore_table VALUES (1,'hello'); step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-commit: - COMMIT; + COMMIT; starting permutation: s1-begin s1-modify-multiple s2-create-restore s1-commit @@ -52,20 +52,20 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-modify-multiple: - UPDATE restore_table SET data = 'world'; + UPDATE restore_table SET data = 'world'; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-commit: - COMMIT; + COMMIT; starting permutation: s1-begin s1-ddl s2-create-restore s1-commit @@ -73,41 +73,42 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-ddl: - ALTER TABLE restore_table ADD COLUMN x int; + ALTER TABLE restore_table ADD COLUMN x int; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); + +step s1-commit: + COMMIT; +step s2-create-restore: <... completed> ?column? 1 -step s1-commit: - COMMIT; - starting permutation: s1-begin s1-copy s2-create-restore s1-commit create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-copy: - COPY restore_table FROM PROGRAM 'echo 1,hello' WITH CSV; + COPY restore_table FROM PROGRAM 'echo 1,hello' WITH CSV; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-commit: - COMMIT; + COMMIT; starting permutation: s1-begin s1-recover s2-create-restore s1-commit @@ -115,20 +116,20 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-recover: - SELECT recover_prepared_transactions(); + SELECT recover_prepared_transactions(); recover_prepared_transactions 0 step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-create-restore: <... completed> ?column? @@ -140,17 +141,17 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-drop: - DROP TABLE restore_table; + DROP TABLE restore_table; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-create-restore: <... completed> ?column? @@ -162,20 +163,20 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-add-node: - SELECT 1 FROM master_add_inactive_node('localhost', 9999); + SELECT 1 FROM master_add_inactive_node('localhost', 9999); ?column? 1 step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-create-restore: <... completed> ?column? @@ -187,20 +188,20 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-remove-node: - SELECT master_remove_node('localhost', 9999); + SELECT master_remove_node('localhost', 9999); master_remove_node step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-create-restore: <... completed> ?column? @@ -212,20 +213,20 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test-2'); + SELECT 1 FROM citus_create_restore_point('citus-test-2'); ?column? 1 step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-create-restore: <... completed> ?column? @@ -237,19 +238,19 @@ create_reference_table step s2-begin: - BEGIN; + BEGIN; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-modify-multiple: - UPDATE restore_table SET data = 'world'; + UPDATE restore_table SET data = 'world'; -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s1-modify-multiple: <... completed> @@ -258,19 +259,19 @@ create_reference_table step s2-begin: - BEGIN; + BEGIN; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-ddl: - ALTER TABLE restore_table ADD COLUMN x int; + ALTER TABLE restore_table ADD COLUMN x int; -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s1-ddl: <... completed> @@ -279,23 +280,23 @@ create_reference_table step s2-begin: - BEGIN; + BEGIN; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-multi-statement: - SET citus.multi_shard_commit_protocol TO '2pc'; - BEGIN; - INSERT INTO restore_table VALUES (1,'hello'); - INSERT INTO restore_table VALUES (2,'hello'); - COMMIT; + SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + INSERT INTO restore_table VALUES (1,'hello'); + INSERT INTO restore_table VALUES (2,'hello'); + COMMIT; -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s1-multi-statement: <... completed> @@ -304,21 +305,21 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-create-reference: - CREATE TABLE test_create_reference_table (test_id integer NOT NULL, data text); - SELECT create_reference_table('test_create_reference_table'); + CREATE TABLE test_create_reference_table (test_id integer NOT NULL, data text); + SELECT create_reference_table('test_create_reference_table'); create_reference_table step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-create-restore: <... completed> ?column? @@ -330,20 +331,20 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-insert-ref: - INSERT INTO restore_ref_table VALUES (1,'hello'); + INSERT INTO restore_ref_table VALUES (1,'hello'); step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-commit: - COMMIT; + COMMIT; starting permutation: s1-begin s1-modify-multiple-ref s2-create-restore s1-commit @@ -351,20 +352,20 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-modify-multiple-ref: - UPDATE restore_ref_table SET data = 'world'; + UPDATE restore_ref_table SET data = 'world'; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-commit: - COMMIT; + COMMIT; starting permutation: s1-begin s1-ddl-ref s2-create-restore s1-commit @@ -372,17 +373,17 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-ddl-ref: - ALTER TABLE restore_ref_table ADD COLUMN x int; + ALTER TABLE restore_ref_table ADD COLUMN x int; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-create-restore: <... completed> ?column? @@ -394,20 +395,20 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-copy-ref: - COPY restore_ref_table FROM PROGRAM 'echo 1,hello' WITH CSV; + COPY restore_ref_table FROM PROGRAM 'echo 1,hello' WITH CSV; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-commit: - COMMIT; + COMMIT; starting permutation: s1-begin s1-drop-ref s2-create-restore s1-commit @@ -415,17 +416,17 @@ create_reference_table step s1-begin: - BEGIN; - SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + SET citus.multi_shard_commit_protocol TO '2pc'; step s1-drop-ref: - DROP TABLE restore_ref_table; + DROP TABLE restore_ref_table; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-create-restore: <... completed> ?column? @@ -437,19 +438,19 @@ create_reference_table step s2-begin: - BEGIN; + BEGIN; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-modify-multiple-ref: - UPDATE restore_ref_table SET data = 'world'; + UPDATE restore_ref_table SET data = 'world'; -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s1-modify-multiple-ref: <... completed> @@ -458,19 +459,19 @@ create_reference_table step s2-begin: - BEGIN; + BEGIN; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-ddl-ref: - ALTER TABLE restore_ref_table ADD COLUMN x int; + ALTER TABLE restore_ref_table ADD COLUMN x int; -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s1-ddl-ref: <... completed> @@ -479,22 +480,22 @@ create_reference_table step s2-begin: - BEGIN; + BEGIN; step s2-create-restore: - SELECT 1 FROM citus_create_restore_point('citus-test'); + SELECT 1 FROM citus_create_restore_point('citus-test'); ?column? 1 step s1-multi-statement-ref: - SET citus.multi_shard_commit_protocol TO '2pc'; - BEGIN; - INSERT INTO restore_ref_table VALUES (1,'hello'); - INSERT INTO restore_ref_table VALUES (2,'hello'); - COMMIT; + SET citus.multi_shard_commit_protocol TO '2pc'; + BEGIN; + INSERT INTO restore_ref_table VALUES (1,'hello'); + INSERT INTO restore_ref_table VALUES (2,'hello'); + COMMIT; -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s1-multi-statement-ref: <... completed> diff --git a/src/test/regress/expected/isolation_dump_global_wait_edges.out b/src/test/regress/expected/isolation_dump_global_wait_edges.out index b0343b982..4880b879a 100644 --- a/src/test/regress/expected/isolation_dump_global_wait_edges.out +++ b/src/test/regress/expected/isolation_dump_global_wait_edges.out @@ -13,7 +13,7 @@ step s1-update: step s2-update: UPDATE distributed_table SET y = 2 WHERE x = 1; -step detector-dump-wait-edges: +step detector-dump-wait-edges: SELECT waiting_transaction_num, blocking_transaction_num, @@ -28,11 +28,11 @@ step detector-dump-wait-edges: waiting_transaction_numblocking_transaction_numblocking_transaction_waiting -395 394 f +400 399 f transactionnumberwaitingtransactionnumbers -394 -395 394 +399 +400 399 step s1-abort: ABORT; @@ -57,10 +57,10 @@ step s1-update: step s2-update: UPDATE distributed_table SET y = 2 WHERE x = 1; -step s3-update: +step s3-update: UPDATE distributed_table SET y = 3 WHERE x = 1; -step detector-dump-wait-edges: +step detector-dump-wait-edges: SELECT waiting_transaction_num, blocking_transaction_num, @@ -75,14 +75,14 @@ step detector-dump-wait-edges: waiting_transaction_numblocking_transaction_numblocking_transaction_waiting -399 398 f -400 398 f -400 399 t +404 403 f +405 403 f +405 404 t transactionnumberwaitingtransactionnumbers -398 -399 398 -400 398,399 +403 +404 403 +405 403,404 step s1-abort: ABORT; diff --git a/src/test/regress/expected/isolation_ensure_dependency_activate_node.out b/src/test/regress/expected/isolation_ensure_dependency_activate_node.out index 4bd7941eb..2279abd76 100644 --- a/src/test/regress/expected/isolation_ensure_dependency_activate_node.out +++ b/src/test/regress/expected/isolation_ensure_dependency_activate_node.out @@ -12,7 +12,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -24,6 +24,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -54,7 +55,7 @@ step s1-begin: BEGIN; step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -63,13 +64,12 @@ step s2-public-schema: SET search_path TO public; step s2-create-table: - CREATE TABLE t1 (a int, b int); + CREATE TABLE t1 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t1', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t1', 'a'); -step s1-commit: +step s1-commit: COMMIT; step s2-create-table: <... completed> @@ -83,7 +83,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -91,6 +91,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -131,7 +132,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -143,6 +144,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -173,10 +175,10 @@ step s1-begin: BEGIN; step s2-begin: - BEGIN; + BEGIN; step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -185,13 +187,12 @@ step s2-public-schema: SET search_path TO public; step s2-create-table: - CREATE TABLE t1 (a int, b int); + CREATE TABLE t1 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t1', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t1', 'a'); -step s1-commit: +step s1-commit: COMMIT; step s2-create-table: <... completed> @@ -199,7 +200,7 @@ create_distributed_table step s2-commit: - COMMIT; + COMMIT; step s2-print-distributed-objects: -- print an overview of all distributed objects @@ -208,7 +209,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -216,6 +217,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -256,7 +258,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -268,6 +270,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -298,26 +301,25 @@ step s1-begin: BEGIN; step s2-begin: - BEGIN; + BEGIN; step s2-public-schema: SET search_path TO public; step s2-create-table: - CREATE TABLE t1 (a int, b int); + CREATE TABLE t1 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t1', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t1', 'a'); create_distributed_table step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s1-add-worker: <... completed> ?column? @@ -333,7 +335,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -341,6 +343,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -381,7 +384,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -393,6 +396,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -423,7 +427,7 @@ step s1-begin: BEGIN; step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -433,13 +437,12 @@ step s2-create-schema: SET search_path TO myschema; step s2-create-table: - CREATE TABLE t1 (a int, b int); + CREATE TABLE t1 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t1', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t1', 'a'); -step s1-commit: +step s1-commit: COMMIT; step s2-create-table: <... completed> @@ -453,7 +456,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -461,6 +464,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{myschema},{}) (schema,{public},{}) @@ -502,7 +506,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -514,6 +518,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -544,10 +549,10 @@ step s1-begin: BEGIN; step s2-begin: - BEGIN; + BEGIN; step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -557,13 +562,12 @@ step s2-create-schema: SET search_path TO myschema; step s2-create-table: - CREATE TABLE t1 (a int, b int); + CREATE TABLE t1 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t1', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t1', 'a'); -step s1-commit: +step s1-commit: COMMIT; step s2-create-table: <... completed> @@ -571,7 +575,7 @@ create_distributed_table step s2-commit: - COMMIT; + COMMIT; step s2-print-distributed-objects: -- print an overview of all distributed objects @@ -580,7 +584,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -588,6 +592,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{myschema},{}) (schema,{public},{}) @@ -629,7 +634,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -641,6 +646,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -671,27 +677,26 @@ step s1-begin: BEGIN; step s2-begin: - BEGIN; + BEGIN; step s2-create-schema: CREATE SCHEMA myschema; SET search_path TO myschema; step s2-create-table: - CREATE TABLE t1 (a int, b int); + CREATE TABLE t1 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t1', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t1', 'a'); create_distributed_table step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s1-add-worker: <... completed> ?column? @@ -707,7 +712,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -715,6 +720,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{myschema},{}) (schema,{public},{}) @@ -756,7 +762,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -768,6 +774,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -802,34 +809,33 @@ step s1-begin: BEGIN; step s2-begin: - BEGIN; + BEGIN; step s3-begin: - BEGIN; + BEGIN; step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? 1 step s2-create-table: - CREATE TABLE t1 (a int, b int); + CREATE TABLE t1 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t1', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t1', 'a'); -step s3-use-schema: +step s3-use-schema: SET search_path TO myschema; step s3-create-table: - CREATE TABLE t2 (a int, b int); + CREATE TABLE t2 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t2', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t2', 'a'); -step s1-commit: +step s1-commit: COMMIT; step s2-create-table: <... completed> @@ -837,14 +843,14 @@ create_distributed_table step s2-commit: - COMMIT; + COMMIT; step s3-create-table: <... completed> create_distributed_table step s3-commit: - COMMIT; + COMMIT; step s2-print-distributed-objects: -- print an overview of all distributed objects @@ -853,7 +859,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -861,6 +867,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{myschema},{}) (schema,{public},{}) @@ -902,7 +909,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -914,6 +921,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -941,7 +949,7 @@ master_remove_node step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -951,39 +959,38 @@ step s2-create-schema: SET search_path TO myschema; step s2-begin: - BEGIN; + BEGIN; step s3-begin: - BEGIN; + BEGIN; step s3-use-schema: SET search_path TO myschema; step s2-create-table: - CREATE TABLE t1 (a int, b int); + CREATE TABLE t1 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t1', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t1', 'a'); create_distributed_table step s3-create-table: - CREATE TABLE t2 (a int, b int); + CREATE TABLE t2 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t2', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t2', 'a'); -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s3-create-table: <... completed> create_distributed_table step s3-commit: - COMMIT; + COMMIT; step s2-print-distributed-objects: -- print an overview of all distributed objects @@ -992,7 +999,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1000,6 +1007,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{myschema},{}) (schema,{public},{}) @@ -1041,7 +1049,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1053,6 +1061,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -1083,13 +1092,13 @@ step s1-begin: BEGIN; step s2-begin: - BEGIN; + BEGIN; step s3-begin: - BEGIN; + BEGIN; step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -1103,19 +1112,18 @@ step s3-create-schema2: SET search_path TO myschema2; step s2-create-table: - CREATE TABLE t1 (a int, b int); + CREATE TABLE t1 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t1', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t1', 'a'); -step s3-create-table: - CREATE TABLE t2 (a int, b int); +step s3-create-table: + CREATE TABLE t2 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t2', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t2', 'a'); -step s1-commit: +step s1-commit: COMMIT; step s2-create-table: <... completed> @@ -1127,10 +1135,10 @@ create_distributed_table step s3-commit: - COMMIT; + COMMIT; step s2-commit: - COMMIT; + COMMIT; step s2-print-distributed-objects: -- print an overview of all distributed objects @@ -1139,7 +1147,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1147,6 +1155,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{myschema},{}) (schema,{myschema2},{}) @@ -1189,7 +1198,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1201,6 +1210,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -1231,7 +1241,7 @@ step s1-begin: BEGIN; step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -1240,9 +1250,9 @@ step s2-public-schema: SET search_path TO public; step s2-create-type: - CREATE TYPE tt1 AS (a int, b int); + CREATE TYPE tt1 AS (a int, b int); -step s1-commit: +step s1-commit: COMMIT; step s2-create-type: <... completed> @@ -1253,7 +1263,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1261,6 +1271,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) (type,{public.tt1},{}) @@ -1302,7 +1313,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1314,6 +1325,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -1347,10 +1359,10 @@ step s2-public-schema: SET search_path TO public; step s2-create-type: - CREATE TYPE tt1 AS (a int, b int); + CREATE TYPE tt1 AS (a int, b int); step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -1365,7 +1377,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1373,6 +1385,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) (type,{public.tt1},{}) @@ -1414,7 +1427,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1426,6 +1439,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -1456,30 +1470,29 @@ step s1-begin: BEGIN; step s2-begin: - BEGIN; + BEGIN; step s2-create-schema: CREATE SCHEMA myschema; SET search_path TO myschema; step s2-create-type: - CREATE TYPE tt1 AS (a int, b int); + CREATE TYPE tt1 AS (a int, b int); step s2-create-table-with-type: - CREATE TABLE t1 (a int, b tt1); + CREATE TABLE t1 (a int, b tt1); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; - SET citus.shard_replication_factor TO 1; - SELECT create_distributed_table('t1', 'a'); + SET citus.shard_replication_factor TO 1; + SELECT create_distributed_table('t1', 'a'); create_distributed_table step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s1-add-worker: <... completed> ?column? @@ -1495,7 +1508,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1503,6 +1516,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{myschema},{}) (schema,{public},{}) @@ -1545,7 +1559,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1557,6 +1571,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -1587,7 +1602,7 @@ step s1-begin: BEGIN; step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -1599,7 +1614,7 @@ step s2-distribute-function: CREATE OR REPLACE FUNCTION add (INT,INT) RETURNS INT AS $$ SELECT $1 + $2 $$ LANGUAGE SQL; SELECT create_distributed_function('add(INT,INT)', '$1'); -step s1-commit: +step s1-commit: COMMIT; step s2-distribute-function: <... completed> @@ -1607,10 +1622,10 @@ create_distributed_function step s2-begin: - BEGIN; + BEGIN; step s2-commit: - COMMIT; + COMMIT; step s3-wait-for-metadata-sync: SELECT public.wait_until_metadata_sync(5000); @@ -1625,7 +1640,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1633,6 +1648,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (function,"{public,add}","{integer,integer}") (role,{postgres},{}) (schema,{public},{}) @@ -1674,7 +1690,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1686,6 +1702,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -1726,10 +1743,10 @@ create_distributed_function step s2-begin: - BEGIN; + BEGIN; step s2-commit: - COMMIT; + COMMIT; step s3-wait-for-metadata-sync: SELECT public.wait_until_metadata_sync(5000); @@ -1738,7 +1755,7 @@ wait_until_metadata_sync step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -1759,7 +1776,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1767,6 +1784,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (function,"{public,add}","{integer,integer}") (role,{postgres},{}) (schema,{public},{}) @@ -1808,7 +1826,7 @@ step s1-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1820,6 +1838,7 @@ step s1-print-distributed-objects: 1 pg_identify_object_as_address +(database,{regression},{}) (role,{postgres},{}) (schema,{public},{}) count @@ -1847,7 +1866,7 @@ master_remove_node step s2-begin: - BEGIN; + BEGIN; step s2-create-schema: CREATE SCHEMA myschema; @@ -1861,7 +1880,7 @@ create_distributed_function step s2-commit: - COMMIT; + COMMIT; step s3-wait-for-metadata-sync: SELECT public.wait_until_metadata_sync(5000); @@ -1873,7 +1892,7 @@ step s1-begin: BEGIN; step s1-add-worker: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? @@ -1894,7 +1913,7 @@ step s2-print-distributed-objects: SELECT count(*) FROM pg_namespace where nspname = 'myschema'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_namespace where nspname = 'myschema';$$); -- print if the type has been created - SELECT count(*) FROM pg_type where typname = 'tt1'; + SELECT count(*) FROM pg_type where typname = 'tt1'; SELECT run_command_on_workers($$SELECT count(*) FROM pg_type where typname = 'tt1';$$); -- print if the function has been created SELECT count(*) FROM pg_proc WHERE proname='add'; @@ -1902,6 +1921,7 @@ step s2-print-distributed-objects: pg_identify_object_as_address +(database,{regression},{}) (function,"{myschema,add}","{integer,integer}") (role,{postgres},{}) (schema,{myschema},{}) diff --git a/src/test/regress/expected/isolation_extension_commands.out b/src/test/regress/expected/isolation_extension_commands.out index e2a5d6090..c41e9e16f 100644 --- a/src/test/regress/expected/isolation_extension_commands.out +++ b/src/test/regress/expected/isolation_extension_commands.out @@ -26,7 +26,7 @@ step s1-print: count -3 +4 extname extversion nspname seg 1.1 public @@ -73,7 +73,7 @@ step s1-print: count -3 +4 extname extversion nspname seg 1.2 public @@ -126,7 +126,7 @@ step s1-print: count -2 +3 extname extversion nspname run_command_on_workers @@ -168,7 +168,7 @@ step s1-print: count -4 +5 extname extversion nspname seg 1.3 schema1 @@ -215,7 +215,7 @@ step s1-print: count -3 +4 extname extversion nspname run_command_on_workers @@ -270,7 +270,7 @@ step s1-print: count -6 +7 extname extversion nspname seg 1.3 schema3 @@ -322,7 +322,7 @@ step s1-print: count -6 +7 extname extversion nspname seg 1.3 schema1 @@ -379,7 +379,7 @@ step s1-print: count -5 +6 extname extversion nspname seg 1.1 public @@ -444,7 +444,7 @@ step s1-print: count -6 +7 extname extversion nspname seg 1.2 public @@ -497,7 +497,7 @@ step s1-print: count -5 +6 extname extversion nspname run_command_on_workers @@ -538,7 +538,7 @@ step s1-print: count -5 +6 extname extversion nspname seg 1.3 schema1 @@ -597,7 +597,7 @@ step s1-print: count -6 +7 extname extversion nspname seg 1.3 schema2 @@ -648,7 +648,7 @@ step s1-print: count -5 +6 extname extversion nspname seg 1.1 public @@ -709,7 +709,7 @@ step s1-print: count -5 +6 extname extversion nspname run_command_on_workers diff --git a/src/test/regress/expected/isolation_rebalancer_deferred_drop.out b/src/test/regress/expected/isolation_rebalancer_deferred_drop.out index 779e70252..199678ca0 100644 --- a/src/test/regress/expected/isolation_rebalancer_deferred_drop.out +++ b/src/test/regress/expected/isolation_rebalancer_deferred_drop.out @@ -5,52 +5,44 @@ step s1-begin: BEGIN; step s1-move-placement: - SET citus.defer_drop_after_shard_move TO ON; - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); master_move_shard_placement +s1: NOTICE: cleaned up 1 orphaned shards step s1-drop-marked-shards: - SELECT public.master_defer_delete_shards(); + SET client_min_messages to NOTICE; + CALL isolation_cleanup_orphaned_shards(); -master_defer_delete_shards - -1 step s2-drop-marked-shards: - SELECT public.master_defer_delete_shards(); + SET client_min_messages to DEBUG1; + CALL isolation_cleanup_orphaned_shards(); step s1-commit: COMMIT; step s2-drop-marked-shards: <... completed> -master_defer_delete_shards - -0 starting permutation: s1-begin s1-move-placement s2-drop-marked-shards s1-drop-marked-shards s1-commit step s1-begin: BEGIN; step s1-move-placement: - SET citus.defer_drop_after_shard_move TO ON; - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); master_move_shard_placement step s2-drop-marked-shards: - SELECT public.master_defer_delete_shards(); + SET client_min_messages to DEBUG1; + CALL isolation_cleanup_orphaned_shards(); -master_defer_delete_shards - -0 +s1: NOTICE: cleaned up 1 orphaned shards step s1-drop-marked-shards: - SELECT public.master_defer_delete_shards(); + SET client_min_messages to NOTICE; + CALL isolation_cleanup_orphaned_shards(); -master_defer_delete_shards - -1 step s1-commit: COMMIT; @@ -60,8 +52,7 @@ step s1-begin: BEGIN; step s1-move-placement: - SET citus.defer_drop_after_shard_move TO ON; - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); master_move_shard_placement @@ -82,14 +73,13 @@ run_commands_on_session_level_connection_to_node run_commands_on_session_level_connection_to_node -s1: WARNING: canceling statement due to lock timeout -s1: WARNING: Failed to drop 1 old shards out of 1 step s1-drop-marked-shards: - SELECT public.master_defer_delete_shards(); - -master_defer_delete_shards - -0 + SET client_min_messages to NOTICE; + CALL isolation_cleanup_orphaned_shards(); + +s1: WARNING: canceling statement due to lock timeout +step s1-drop-marked-shards: <... completed> +s1: WARNING: Failed to drop 1 orphaned shards out of 1 step s1-commit: COMMIT; @@ -99,3 +89,100 @@ step s2-stop-connection: stop_session_level_connection_to_node + +starting permutation: s1-begin s1-move-placement s2-start-session-level-connection s2-lock-table-on-worker s1-commit s1-begin s1-move-placement-back s1-commit s2-stop-connection +step s1-begin: + BEGIN; + +step s1-move-placement: + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + +master_move_shard_placement + + +step s2-start-session-level-connection: + SELECT start_session_level_connection_to_node('localhost', 57637); + +start_session_level_connection_to_node + + +step s2-lock-table-on-worker: + SELECT run_commands_on_session_level_connection_to_node('BEGIN;'); + SELECT run_commands_on_session_level_connection_to_node('LOCK TABLE t1_120000'); + +run_commands_on_session_level_connection_to_node + + +run_commands_on_session_level_connection_to_node + + +step s1-commit: + COMMIT; + +step s1-begin: + BEGIN; + +step s1-move-placement-back: + SET client_min_messages to NOTICE; + SHOW log_error_verbosity; + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57638, 'localhost', 57637); + +log_error_verbosity + +verbose +ERROR: shard xxxxx still exists on the target node as an orphaned shard +step s1-commit: + COMMIT; + +step s2-stop-connection: + SELECT stop_session_level_connection_to_node(); + +stop_session_level_connection_to_node + + + +starting permutation: s1-begin s1-lock-pg-dist-placement s2-drop-old-shards s1-commit +step s1-begin: + BEGIN; + +step s1-lock-pg-dist-placement: + LOCK TABLE pg_dist_placement IN SHARE ROW EXCLUSIVE MODE; + +s2: DEBUG: could not acquire shard lock to cleanup placements +step s2-drop-old-shards: + SELECT run_try_drop_marked_shards(); + +run_try_drop_marked_shards + + +step s1-commit: + COMMIT; + + +starting permutation: s1-begin s2-begin s2-select s1-move-placement-without-deferred s2-commit s1-commit +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s2-select: + SELECT COUNT(*) FROM t1; + +count + +0 +step s1-move-placement-without-deferred: + SET citus.defer_drop_after_shard_move TO OFF; + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + +step s2-commit: + COMMIT; + +step s1-move-placement-without-deferred: <... completed> +master_move_shard_placement + + +step s1-commit: + COMMIT; + diff --git a/src/test/regress/expected/isolation_shard_rebalancer.out b/src/test/regress/expected/isolation_shard_rebalancer.out index 4bb980230..2eab1d8b8 100644 --- a/src/test/regress/expected/isolation_shard_rebalancer.out +++ b/src/test/regress/expected/isolation_shard_rebalancer.out @@ -286,7 +286,7 @@ rebalance_table_shards step s2-rebalance-all: select rebalance_table_shards(); -ERROR: could not acquire the lock required to rebalance public.distributed_transaction_id_table +ERROR: could not acquire the lock required to rebalance public.colocated1 step s1-commit: COMMIT; @@ -308,7 +308,7 @@ replicate_table_shards step s2-rebalance-all: select rebalance_table_shards(); -ERROR: could not acquire the lock required to rebalance public.distributed_transaction_id_table +ERROR: could not acquire the lock required to rebalance public.colocated1 step s1-commit: COMMIT; @@ -374,7 +374,7 @@ rebalance_table_shards step s2-drain: select master_drain_node('localhost', 57638); -ERROR: could not acquire the lock required to move public.distributed_transaction_id_table +ERROR: could not acquire the lock required to move public.colocated1 step s1-commit: COMMIT; @@ -396,7 +396,7 @@ replicate_table_shards step s2-drain: select master_drain_node('localhost', 57638); -ERROR: could not acquire the lock required to move public.distributed_transaction_id_table +ERROR: could not acquire the lock required to move public.colocated1 step s1-commit: COMMIT; diff --git a/src/test/regress/expected/isolation_shard_rebalancer_progress.out b/src/test/regress/expected/isolation_shard_rebalancer_progress.out new file mode 100644 index 000000000..a941d6d2b --- /dev/null +++ b/src/test/regress/expected/isolation_shard_rebalancer_progress.out @@ -0,0 +1,127 @@ +Parsed test spec with 3 sessions + +starting permutation: s2-lock-1 s2-lock-2 s1-rebalance-c1 s3-progress s2-unlock-1 s3-progress s2-unlock-2 s3-progress s1-commit s3-progress +master_set_node_property + + +step s2-lock-1: + SELECT pg_advisory_lock(29279, 1); + +pg_advisory_lock + + +step s2-lock-2: + SELECT pg_advisory_lock(29279, 2); + +pg_advisory_lock + + +step s1-rebalance-c1: + BEGIN; + SELECT * FROM get_rebalance_table_shards_plan('colocated1'); + SELECT rebalance_table_shards('colocated1', shard_transfer_mode:='block_writes'); + +step s3-progress: + set client_min_messages=NOTICE; + SELECT + table_name, + shardid, + shard_size, + sourcename, + sourceport, + source_shard_size, + targetname, + targetport, + target_shard_size, + progress + FROM get_rebalance_progress(); + +table_name shardid shard_size sourcename sourceport source_shard_sizetargetname targetport target_shard_sizeprogress + +colocated1 1500001 49152 localhost 57637 49152 localhost 57638 0 1 +colocated2 1500005 376832 localhost 57637 376832 localhost 57638 0 1 +colocated1 1500002 196608 localhost 57637 196608 localhost 57638 0 0 +colocated2 1500006 8192 localhost 57637 8192 localhost 57638 0 0 +step s2-unlock-1: + SELECT pg_advisory_unlock(29279, 1); + +pg_advisory_unlock + +t +step s3-progress: + set client_min_messages=NOTICE; + SELECT + table_name, + shardid, + shard_size, + sourcename, + sourceport, + source_shard_size, + targetname, + targetport, + target_shard_size, + progress + FROM get_rebalance_progress(); + +table_name shardid shard_size sourcename sourceport source_shard_sizetargetname targetport target_shard_sizeprogress + +colocated1 1500001 49152 localhost 57637 49152 localhost 57638 49152 2 +colocated2 1500005 376832 localhost 57637 376832 localhost 57638 376832 2 +colocated1 1500002 196608 localhost 57637 196608 localhost 57638 0 1 +colocated2 1500006 8192 localhost 57637 8192 localhost 57638 0 1 +step s2-unlock-2: + SELECT pg_advisory_unlock(29279, 2); + +pg_advisory_unlock + +t +step s1-rebalance-c1: <... completed> +table_name shardid shard_size sourcename sourceport targetname targetport + +colocated1 1500001 0 localhost 57637 localhost 57638 +colocated2 1500005 0 localhost 57637 localhost 57638 +colocated1 1500002 0 localhost 57637 localhost 57638 +colocated2 1500006 0 localhost 57637 localhost 57638 +rebalance_table_shards + + +step s3-progress: + set client_min_messages=NOTICE; + SELECT + table_name, + shardid, + shard_size, + sourcename, + sourceport, + source_shard_size, + targetname, + targetport, + target_shard_size, + progress + FROM get_rebalance_progress(); + +table_name shardid shard_size sourcename sourceport source_shard_sizetargetname targetport target_shard_sizeprogress + +step s1-commit: + COMMIT; + +step s3-progress: + set client_min_messages=NOTICE; + SELECT + table_name, + shardid, + shard_size, + sourcename, + sourceport, + source_shard_size, + targetname, + targetport, + target_shard_size, + progress + FROM get_rebalance_progress(); + +table_name shardid shard_size sourcename sourceport source_shard_sizetargetname targetport target_shard_sizeprogress + +restore_isolation_tester_func + + diff --git a/src/test/regress/expected/isolation_update_node.out b/src/test/regress/expected/isolation_update_node.out index 7b22761dc..23602b509 100644 --- a/src/test/regress/expected/isolation_update_node.out +++ b/src/test/regress/expected/isolation_update_node.out @@ -6,7 +6,7 @@ nodeid nodename nodeport 22 localhost 57637 23 localhost 57638 step s1-begin: - BEGIN; + BEGIN; step s1-update-node-1: SELECT 1 FROM master_update_node( @@ -23,8 +23,8 @@ step s2-update-node-2: 'localhost', 58638); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-update-node-2: <... completed> ?column? @@ -48,7 +48,7 @@ nodeid nodename nodeport 24 localhost 57637 25 localhost 57638 step s1-begin: - BEGIN; + BEGIN; step s1-update-node-1: SELECT 1 FROM master_update_node( @@ -60,7 +60,7 @@ step s1-update-node-1: 1 step s2-begin: - BEGIN; + BEGIN; step s2-update-node-1: SELECT 1 FROM master_update_node( @@ -68,15 +68,15 @@ step s2-update-node-1: 'localhost', 58637); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-update-node-1: <... completed> ?column? 1 step s2-abort: - ABORT; + ABORT; step s1-show-nodes: SELECT nodeid, nodename, nodeport, isactive @@ -96,7 +96,7 @@ nodeid nodename nodeport 26 localhost 57637 27 localhost 57638 step s1-begin: - BEGIN; + BEGIN; step s1-update-node-1: SELECT 1 FROM master_update_node( @@ -110,8 +110,8 @@ step s1-update-node-1: step s2-start-metadata-sync-node-2: SELECT start_metadata_sync_to_node('localhost', 57638); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-start-metadata-sync-node-2: <... completed> start_metadata_sync_to_node @@ -133,3 +133,82 @@ master_run_on_worker (localhost,57638,t,"[{""f1"": 26, ""f2"": 26, ""f3"": ""localhost"", ""f4"": 58637}, {""f1"": 27, ""f2"": 27, ""f3"": ""localhost"", ""f4"": 57638}]") nodeid nodename nodeport + +starting permutation: s2-create-table s1-begin s1-update-node-nonexistent s1-prepare-transaction s2-cache-prepared-statement s1-commit-prepared s2-execute-prepared s1-update-node-existent s2-drop-table +nodeid nodename nodeport + +28 localhost 57637 +29 localhost 57638 +step s2-create-table: + CREATE TABLE test (a int); + SELECT create_distributed_table('test','a'); + +create_distributed_table + + +step s1-begin: + BEGIN; + +step s1-update-node-nonexistent: + SELECT 1 FROM master_update_node( + (select nodeid from pg_dist_node where nodeport = 57637), + 'non-existent', + 57637); + +?column? + +1 +step s1-prepare-transaction: + PREPARE transaction 'label'; + +step s2-cache-prepared-statement: + PREPARE foo AS SELECT COUNT(*) FROM test WHERE a = 3; + EXECUTE foo; + EXECUTE foo; + EXECUTE foo; + EXECUTE foo; + EXECUTE foo; + EXECUTE foo; + +count + +0 +count + +0 +count + +0 +count + +0 +count + +0 +count + +0 +step s1-commit-prepared: + COMMIT prepared 'label'; + +s2: WARNING: connection to the remote node non-existent:57637 failed with the following error: could not translate host name "non-existent" to address: Name or service not known +step s2-execute-prepared: + EXECUTE foo; + +count + +0 +step s1-update-node-existent: + SELECT 1 FROM master_update_node( + (select nodeid from pg_dist_node where nodeport = 57637), + 'localhost', + 57637); + +?column? + +1 +step s2-drop-table: + DROP TABLE test; + +nodeid nodename nodeport + diff --git a/src/test/regress/expected/isolation_update_node_lock_writes.out b/src/test/regress/expected/isolation_update_node_lock_writes.out index dcaa5b991..b60044b15 100644 --- a/src/test/regress/expected/isolation_update_node_lock_writes.out +++ b/src/test/regress/expected/isolation_update_node_lock_writes.out @@ -5,7 +5,7 @@ create_distributed_table step s1-begin: - BEGIN; + BEGIN; step s1-update-node-1: SELECT 1 FROM master_update_node( @@ -17,20 +17,20 @@ step s1-update-node-1: 1 step s2-begin: - BEGIN; + BEGIN; step s2-insert: INSERT INTO update_node(id, f1) SELECT id, md5(id::text) FROM generate_series(1, 10) as t(id); -step s1-commit: - COMMIT; +step s1-commit: + COMMIT; step s2-insert: <... completed> -error in steps s1-commit s2-insert: ERROR: relation "public.update_node_102008" does not exist +error in steps s1-commit s2-insert: ERROR: relation "public.update_node_102012" does not exist step s2-abort: - ABORT; + ABORT; nodeid nodename nodeport @@ -40,7 +40,7 @@ create_distributed_table step s2-begin: - BEGIN; + BEGIN; step s2-insert: INSERT INTO update_node(id, f1) @@ -53,8 +53,8 @@ step s1-update-node-1: 'localhost', 57638); -step s2-commit: - COMMIT; +step s2-commit: + COMMIT; step s1-update-node-1: <... completed> ?column? diff --git a/src/test/regress/expected/local_shard_copy.out b/src/test/regress/expected/local_shard_copy.out index 9d856653c..733833c68 100644 --- a/src/test/regress/expected/local_shard_copy.out +++ b/src/test/regress/expected/local_shard_copy.out @@ -11,7 +11,6 @@ SELECT 1 FROM master_add_node('localhost', :master_port, groupid := 0); SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE reference_table (key int PRIMARY KEY); DEBUG: CREATE TABLE / PRIMARY KEY will create implicit index "reference_table_pkey" for table "reference_table" SELECT create_reference_table('reference_table'); @@ -52,9 +51,17 @@ SELECT create_distributed_table('collections_list', 'key'); CREATE TABLE collections_list_0 PARTITION OF collections_list (key, collection_id) FOR VALUES IN ( 0 ); +DEBUG: relation "collections_list_key_seq" already exists, skipping +DETAIL: from localhost:xxxxx +DEBUG: relation "collections_list_key_seq" already exists, skipping +DETAIL: from localhost:xxxxx CREATE TABLE collections_list_1 PARTITION OF collections_list (key, collection_id) FOR VALUES IN ( 1 ); +DEBUG: relation "collections_list_key_seq" already exists, skipping +DETAIL: from localhost:xxxxx +DEBUG: relation "collections_list_key_seq" already exists, skipping +DETAIL: from localhost:xxxxx -- connection worker and get ready for the tests \c - - - :worker_1_port SET search_path TO local_shard_copy; diff --git a/src/test/regress/expected/local_shard_execution.out b/src/test/regress/expected/local_shard_execution.out index 805d69569..6b9d73217 100644 --- a/src/test/regress/expected/local_shard_execution.out +++ b/src/test/regress/expected/local_shard_execution.out @@ -2,7 +2,6 @@ CREATE SCHEMA local_shard_execution; SET search_path TO local_shard_execution; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SET citus.next_shard_id TO 1470000; CREATE TABLE reference_table (key int PRIMARY KEY); SELECT create_reference_table('reference_table'); @@ -1799,7 +1798,6 @@ RESET citus.log_local_commands; \c - - - :master_port SET citus.next_shard_id TO 1480000; -- test both local and remote execution with custom type -SET citus.replication_model TO "streaming"; SET citus.shard_replication_factor TO 1; CREATE TYPE invite_resp AS ENUM ('yes', 'no', 'maybe'); CREATE TABLE event_responses ( @@ -2234,6 +2232,40 @@ DEBUG: Creating router plan (2 rows) \c - - - :master_port +-- verify the local_hostname guc is used for local executions that should connect to the +-- local host +ALTER SYSTEM SET citus.local_hostname TO 'foobar'; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(0.1); -- wait to make sure the config has changed before running the GUC + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +SET citus.enable_local_execution TO false; -- force a connection to the dummy placements +-- run queries that use dummy placements for local execution +SELECT * FROM event_responses WHERE FALSE; +ERROR: connection to the remote node foobar:57636 failed with the following error: could not translate host name "foobar" to address: +WITH cte_1 AS (SELECT * FROM event_responses LIMIT 1) SELECT count(*) FROM cte_1; +ERROR: connection to the remote node foobar:57636 failed with the following error: could not translate host name "foobar" to address: +ALTER SYSTEM RESET citus.local_hostname; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + pg_sleep +--------------------------------------------------------------------- + +(1 row) + SET client_min_messages TO ERROR; SET search_path TO public; DROP SCHEMA local_shard_execution CASCADE; diff --git a/src/test/regress/expected/locally_execute_intermediate_results.out b/src/test/regress/expected/locally_execute_intermediate_results.out index d6a52bd17..717f9f27e 100644 --- a/src/test/regress/expected/locally_execute_intermediate_results.out +++ b/src/test/regress/expected/locally_execute_intermediate_results.out @@ -5,7 +5,6 @@ SET citus.log_local_commands TO TRUE; SET citus.shard_count TO 4; SET citus.next_shard_id TO 1580000; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE table_1 (key int, value text); SELECT create_distributed_table('table_1', 'key'); create_distributed_table diff --git a/src/test/regress/expected/master_copy_shard_placement.out b/src/test/regress/expected/master_copy_shard_placement.out index 65af97264..4b4f23a28 100644 --- a/src/test/regress/expected/master_copy_shard_placement.out +++ b/src/test/regress/expected/master_copy_shard_placement.out @@ -3,7 +3,6 @@ CREATE SCHEMA mcsp; SET search_path TO mcsp; SET citus.next_shard_id TO 8139000; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'statement'; CREATE TABLE ref_table(a int, b text unique); SELECT create_reference_table('ref_table'); create_reference_table @@ -36,6 +35,9 @@ SELECT create_distributed_table('history','key'); (1 row) +-- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement +UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN + ('data'::regclass, 'history'::regclass); INSERT INTO data VALUES ('key-1', 'value-1'); INSERT INTO data VALUES ('key-2', 'value-2'); INSERT INTO history VALUES ('key-1', '2020-02-01', 'old'); @@ -107,7 +109,13 @@ SELECT count(*) FROM history; -- test we can not replicate MX tables SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; +-- metadata sync will fail as we have a statement replicated table +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); +ERROR: relation "mcsp.history" does not exist +CONTEXT: while executing command on localhost:xxxxx +-- use streaming replication to enable metadata syncing +UPDATE pg_dist_partition SET repmodel='s' WHERE logicalrelid IN + ('history'::regclass); SELECT start_metadata_sync_to_node('localhost', :worker_1_port); start_metadata_sync_to_node --------------------------------------------------------------------- diff --git a/src/test/regress/expected/mixed_relkind_tests.out b/src/test/regress/expected/mixed_relkind_tests.out index eca897eaf..c201f37fb 100644 --- a/src/test/regress/expected/mixed_relkind_tests.out +++ b/src/test/regress/expected/mixed_relkind_tests.out @@ -64,6 +64,7 @@ CREATE VIEW view_on_part_dist AS SELECT * FROM partitioned_distributed_table; CREATE MATERIALIZED VIEW mat_view_on_part_dist AS SELECT * FROM partitioned_distributed_table; CREATE FOREIGN TABLE foreign_distributed_table (a int, b int) SERVER fake_fdw_server; SELECT create_distributed_table('foreign_distributed_table', 'a'); +NOTICE: foreign-data wrapper "fake_fdw" does not have an extension defined NOTICE: foreign-data wrapper "fake_fdw" does not have an extension defined create_distributed_table --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_cluster_management.out b/src/test/regress/expected/multi_cluster_management.out index d31b46fe5..963894ddd 100644 --- a/src/test/regress/expected/multi_cluster_management.out +++ b/src/test/regress/expected/multi_cluster_management.out @@ -81,12 +81,18 @@ SELECT master_get_active_worker_nodes(); -- add some shard placements to the cluster SET citus.shard_count TO 16; SET citus.shard_replication_factor TO 1; +-- test warnings on setting the deprecated guc for replication model +BEGIN; +SET citus.replication_model to 'statement'; +NOTICE: Setting citus.replication_model has no effect. Please use citus.shard_replication_factor instead. +DETAIL: Citus determines the replication model based on the replication factor and the replication models of the colocated shards. If a colocated table is present, the replication model is inherited. Otherwise 'streaming' replication is preferred if supported by the replication factor. +ROLLBACK; SELECT * FROM citus_activate_node('localhost', :worker_2_port); WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created citus_activate_node --------------------------------------------------------------------- - 3 + 3 (1 row) CREATE TABLE cluster_management_test (col_1 text, col_2 int); @@ -130,6 +136,10 @@ SELECT create_reference_table('test_reference_table'); (1 row) INSERT INTO test_reference_table VALUES (1, '1'); +-- try to remove a node with active placements and reference tables +SELECT citus_remove_node('localhost', :worker_2_port); +ERROR: cannot remove the primary node of a node group which has shard placements +HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables -- try to disable a node with active placements see that node is removed -- observe that a notification is displayed SELECT master_disable_node('localhost', :worker_2_port); @@ -260,6 +270,7 @@ ABORT; \c - postgres - :master_port SET citus.next_shard_id TO 1220016; SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup +SET citus.shard_replication_factor TO 1; SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes --------------------------------------------------------------------- @@ -322,7 +333,16 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHER (8 rows) CREATE TABLE cluster_management_test_colocated (col_1 text, col_2 int); -SELECT create_distributed_table('cluster_management_test_colocated', 'col_1', 'hash', colocate_with=>'cluster_management_test'); +-- Check that we warn the user about colocated shards that will not get created for shards that do not have active placements +SELECT create_distributed_table('cluster_management_test_colocated', 'col_1', 'hash', colocate_with => 'cluster_management_test'); +WARNING: could not find any shard placements for shardId 1220017 +WARNING: could not find any shard placements for shardId 1220019 +WARNING: could not find any shard placements for shardId 1220021 +WARNING: could not find any shard placements for shardId 1220023 +WARNING: could not find any shard placements for shardId 1220025 +WARNING: could not find any shard placements for shardId 1220027 +WARNING: could not find any shard placements for shardId 1220029 +WARNING: could not find any shard placements for shardId 1220031 create_distributed_table --------------------------------------------------------------------- @@ -358,17 +378,30 @@ SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_ cluster_management_test | 1220015 | 4 | localhost | 57638 (24 rows) --- try to remove a node with only to be deleted placements and see that removal still fails +SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4; +-- try to remove a node with only to be deleted placements and see that removal succeeds SELECT master_remove_node('localhost', :worker_2_port); -ERROR: cannot remove the primary node of a node group which has shard placements -HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables + master_remove_node +--------------------------------------------------------------------- + +(1 row) + SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes --------------------------------------------------------------------- - (localhost,57638) (localhost,57637) -(2 rows) +(1 row) +SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group); +WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker +DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created + master_add_node +--------------------------------------------------------------------- + 7 +(1 row) + +-- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard) +INSERT INTO pg_dist_placement SELECT * FROM removed_placements; -- clean-up SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? @@ -503,14 +536,14 @@ WARNING: citus.enable_object_propagation is off, not creating distributed objec DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created master_add_node | master_add_node --------------------------------------------------------------------- - 11 | 12 + 12 | 13 (1 row) SELECT * FROM pg_dist_node ORDER BY nodeid; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------------------------------------------------------------------- - 11 | 9 | localhost | 57637 | default | f | t | primary | default | f | t - 12 | 10 | localhost | 57638 | default | f | t | primary | default | f | t + 12 | 9 | localhost | 57637 | default | f | t | primary | default | f | t + 13 | 10 | localhost | 57638 | default | f | t | primary | default | f | t (2 rows) -- check that mixed add/remove node commands work fine inside transaction @@ -708,13 +741,13 @@ SELECT 1 FROM master_add_inactive_node('localhost', 9996, groupid => :worker_2_g SELECT master_add_inactive_node('localhost', 9999, groupid => :worker_2_group, nodecluster => 'olap', noderole => 'secondary'); master_add_inactive_node --------------------------------------------------------------------- - 22 + 23 (1 row) SELECT master_activate_node('localhost', 9999); master_activate_node --------------------------------------------------------------------- - 22 + 23 (1 row) SELECT master_disable_node('localhost', 9999); @@ -742,17 +775,17 @@ CONTEXT: PL/pgSQL function citus_internal.pg_dist_node_trigger_func() line 18 a INSERT INTO pg_dist_node (nodename, nodeport, groupid, noderole, nodecluster) VALUES ('localhost', 5000, 1000, 'primary', 'olap'); ERROR: new row for relation "pg_dist_node" violates check constraint "primaries_are_only_allowed_in_the_default_cluster" -DETAIL: Failing row contains (24, 1000, localhost, 5000, default, f, t, primary, olap, f, t). +DETAIL: Failing row contains (25, 1000, localhost, 5000, default, f, t, primary, olap, f, t). UPDATE pg_dist_node SET nodecluster = 'olap' WHERE nodeport = :worker_1_port; ERROR: new row for relation "pg_dist_node" violates check constraint "primaries_are_only_allowed_in_the_default_cluster" -DETAIL: Failing row contains (16, 14, localhost, 57637, default, f, t, primary, olap, f, t). +DETAIL: Failing row contains (17, 14, localhost, 57637, default, f, t, primary, olap, f, t). -- check that you /can/ add a secondary node to a non-default cluster SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport = :worker_2_port \gset SELECT master_add_node('localhost', 8888, groupid => :worker_1_group, noderole => 'secondary', nodecluster=> 'olap'); master_add_node --------------------------------------------------------------------- - 25 + 26 (1 row) -- check that super-long cluster names are truncated @@ -765,13 +798,13 @@ SELECT master_add_node('localhost', 8887, groupid => :worker_1_group, noderole = ); master_add_node --------------------------------------------------------------------- - 26 + 27 (1 row) SELECT * FROM pg_dist_node WHERE nodeport=8887; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------------------------------------------------------------------- - 26 | 14 | localhost | 8887 | default | f | t | secondary | thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars. | f | t + 27 | 14 | localhost | 8887 | default | f | t | secondary | thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars. | f | t (1 row) -- don't remove the secondary and unavailable nodes, check that no commands are sent to @@ -780,13 +813,13 @@ SELECT * FROM pg_dist_node WHERE nodeport=8887; SELECT master_add_secondary_node('localhost', 9995, 'localhost', :worker_1_port); master_add_secondary_node --------------------------------------------------------------------- - 27 + 28 (1 row) SELECT master_add_secondary_node('localhost', 9994, primaryname => 'localhost', primaryport => :worker_2_port); master_add_secondary_node --------------------------------------------------------------------- - 28 + 29 (1 row) SELECT master_add_secondary_node('localhost', 9993, 'localhost', 2000); @@ -794,7 +827,7 @@ ERROR: node at "localhost:xxxxx" does not exist SELECT master_add_secondary_node('localhost', 9992, 'localhost', :worker_1_port, nodecluster => 'second-cluster'); master_add_secondary_node --------------------------------------------------------------------- - 29 + 30 (1 row) SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset @@ -814,7 +847,7 @@ SELECT master_update_node(:worker_1_node, 'somehost', 9000); SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------------------------------------------------------------------- - 16 | 14 | somehost | 9000 | default | f | t | primary | default | f | t + 17 | 14 | somehost | 9000 | default | f | t | primary | default | f | t (1 row) -- cleanup @@ -827,7 +860,7 @@ SELECT master_update_node(:worker_1_node, 'localhost', :worker_1_port); SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------------------------------------------------------------------- - 16 | 14 | localhost | 57637 | default | f | t | primary | default | f | t + 17 | 14 | localhost | 57637 | default | f | t | primary | default | f | t (1 row) SET citus.shard_replication_factor TO 1; diff --git a/src/test/regress/expected/multi_colocated_shard_rebalance.out b/src/test/regress/expected/multi_colocated_shard_rebalance.out index 70c4d8f20..939414ef9 100644 --- a/src/test/regress/expected/multi_colocated_shard_rebalance.out +++ b/src/test/regress/expected/multi_colocated_shard_rebalance.out @@ -260,6 +260,7 @@ WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) + AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; shardid | logicalrelid | nodeport --------------------------------------------------------------------- @@ -303,6 +304,7 @@ WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND p.logicalrelid = 'table5_groupX'::regclass + AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; shardid | logicalrelid | nodeport --------------------------------------------------------------------- @@ -331,7 +333,8 @@ FROM WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND - p.logicalrelid = 'table5_groupX'::regclass + p.logicalrelid = 'table5_groupX'::regclass AND + sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; shardid | logicalrelid | nodeport --------------------------------------------------------------------- @@ -355,6 +358,7 @@ WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND p.logicalrelid = 'table6_append'::regclass + AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; shardid | logicalrelid | nodeport --------------------------------------------------------------------- @@ -376,7 +380,8 @@ FROM WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND - p.logicalrelid = 'table6_append'::regclass + p.logicalrelid = 'table6_append'::regclass AND + sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; shardid | logicalrelid | nodeport --------------------------------------------------------------------- @@ -386,8 +391,7 @@ ORDER BY s.shardid, sp.nodeport; -- try to move shard from wrong node SELECT master_move_shard_placement(13000021, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -ERROR: could not find placement matching "localhost:xxxxx" -HINT: Confirm the placement still exists and try again. +ERROR: source placement must be in active state -- test shard move with foreign constraints DROP TABLE IF EXISTS table1_group1, table2_group1; SET citus.shard_count TO 6; @@ -418,6 +422,7 @@ WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) + AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; shardid | logicalrelid | nodeport --------------------------------------------------------------------- @@ -449,6 +454,7 @@ WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) + AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; shardid | logicalrelid | nodeport --------------------------------------------------------------------- @@ -581,6 +587,7 @@ SELECT count(*) FROM move_partitions.events; SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port) FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port +AND shardstate != 4 ORDER BY shardid LIMIT 1; master_move_shard_placement --------------------------------------------------------------------- @@ -598,7 +605,7 @@ ALTER TABLE move_partitions.events_1 ADD CONSTRAINT e_1_pk PRIMARY KEY (id); -- should be able to move automatically now SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port) FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) -WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port +WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port AND shardstate != 4 ORDER BY shardid LIMIT 1; master_move_shard_placement --------------------------------------------------------------------- @@ -614,7 +621,7 @@ SELECT count(*) FROM move_partitions.events; -- should also be able to move with block writes SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) -WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port +WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port AND shardstate != 4 ORDER BY shardid LIMIT 1; master_move_shard_placement --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_colocation_utils.out b/src/test/regress/expected/multi_colocation_utils.out index dfc26c7a2..f017f740c 100644 --- a/src/test/regress/expected/multi_colocation_utils.out +++ b/src/test/regress/expected/multi_colocation_utils.out @@ -1037,10 +1037,13 @@ SELECT create_distributed_table('table1_groupG', 'id'); UPDATE pg_dist_partition SET repmodel = 's' WHERE logicalrelid = 'table1_groupG'::regclass; CREATE TABLE table2_groupG ( id int ); SELECT create_distributed_table('table2_groupG', 'id', colocate_with => 'table1_groupG'); -ERROR: cannot colocate tables table1_groupg and table2_groupg -DETAIL: Replication models don't match for table1_groupg and table2_groupg. + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE table2_groupG; CREATE TABLE table2_groupG ( id int ); -ERROR: relation "table2_groupg" already exists SELECT create_distributed_table('table2_groupG', 'id', colocate_with => 'NONE'); create_distributed_table --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_create_shards.out b/src/test/regress/expected/multi_create_shards.out index 6604a247f..a1116e2ef 100644 --- a/src/test/regress/expected/multi_create_shards.out +++ b/src/test/regress/expected/multi_create_shards.out @@ -68,7 +68,7 @@ SELECT master_create_distributed_table('table_to_distribute', 'name', 'hash'); SELECT partmethod, partkey FROM pg_dist_partition WHERE logicalrelid = 'table_to_distribute'::regclass; - partmethod | partkey + partmethod | partkey --------------------------------------------------------------------- h | {VAR :varno 1 :varattno 1 :vartype 25 :vartypmod -1 :varcollid 100 :varlevelsup 0 :varnoold 1 :varoattno 1 :location -1} (1 row) @@ -159,6 +159,7 @@ SERVER fake_fdw_server; SET citus.shard_count TO 16; SET citus.shard_replication_factor TO 1; SELECT create_distributed_table('foreign_table_to_distribute', 'id', 'hash'); +NOTICE: foreign-data wrapper "fake_fdw" does not have an extension defined NOTICE: foreign-data wrapper "fake_fdw" does not have an extension defined create_distributed_table --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_create_table_superuser.out b/src/test/regress/expected/multi_create_table_superuser.out index ff34e8ef3..fdafd0c41 100644 --- a/src/test/regress/expected/multi_create_table_superuser.out +++ b/src/test/regress/expected/multi_create_table_superuser.out @@ -1,60 +1,9 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 360005; ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 100000; --- Since we're superuser, we can set the replication model to 'streaming' to --- create a one-off MX table... but if we forget to set the replication factor to one, --- we should see an error reminding us to fix that -SET citus.replication_model TO 'streaming'; -SELECT create_distributed_table('mx_table_test', 'col1'); -ERROR: replication factors above one are incompatible with the streaming replication model -HINT: Try again after reducing "citus.shard_replication_factor" to one or setting "citus.replication_model" to "statement". --- ok, so now actually create the one-off MX table SET citus.shard_replication_factor TO 1; -SELECT create_distributed_table('mx_table_test', 'col1'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='mx_table_test'::regclass; - repmodel ---------------------------------------------------------------------- - s -(1 row) - -DROP TABLE mx_table_test; --- Show that master_create_distributed_table ignores citus.replication_model GUC -CREATE TABLE s_table(a int); -SELECT master_create_distributed_table('s_table', 'a', 'hash'); -NOTICE: using statement-based replication -DETAIL: The current replication_model setting is 'streaming', which is not supported by master_create_distributed_table. -HINT: Use create_distributed_table to use the streaming replication model. - master_create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='s_table'::regclass; - repmodel ---------------------------------------------------------------------- - c -(1 row) - --- Show that master_create_worker_shards complains when RF>1 and replication model is streaming -UPDATE pg_dist_partition SET repmodel = 's' WHERE logicalrelid='s_table'::regclass; -SELECT master_create_worker_shards('s_table', 4, 2); -ERROR: using replication factor 2 with the streaming replication model is not supported -DETAIL: The table s_table is marked as streaming replicated and the shard replication factor of streaming replicated tables must be 1. -HINT: Use replication factor 1. -DROP TABLE s_table; -RESET citus.replication_model; --- Show that create_distributed_table with append and range distributions ignore --- citus.replication_model GUC -SET citus.shard_replication_factor TO 2; -SET citus.replication_model TO streaming; +-- test that range and append distributed tables have coordinator replication CREATE TABLE repmodel_test (a int); SELECT create_distributed_table('repmodel_test', 'a', 'append'); -NOTICE: using statement-based replication -DETAIL: Streaming replication is supported only for hash-distributed tables. create_distributed_table --------------------------------------------------------------------- @@ -69,8 +18,6 @@ SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regcl DROP TABLE repmodel_test; CREATE TABLE repmodel_test (a int); SELECT create_distributed_table('repmodel_test', 'a', 'range'); -NOTICE: using statement-based replication -DETAIL: Streaming replication is supported only for hash-distributed tables. create_distributed_table --------------------------------------------------------------------- @@ -83,13 +30,9 @@ SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regcl (1 row) DROP TABLE repmodel_test; --- Show that master_create_distributed_table created statement replicated tables no matter --- what citus.replication_model set to +-- test that deprecated api creates distributed tables with coordinator replication CREATE TABLE repmodel_test (a int); SELECT master_create_distributed_table('repmodel_test', 'a', 'hash'); -NOTICE: using statement-based replication -DETAIL: The current replication_model setting is 'streaming', which is not supported by master_create_distributed_table. -HINT: Use create_distributed_table to use the streaming replication model. master_create_distributed_table --------------------------------------------------------------------- @@ -104,9 +47,6 @@ SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regcl DROP TABLE repmodel_test; CREATE TABLE repmodel_test (a int); SELECT master_create_distributed_table('repmodel_test', 'a', 'append'); -NOTICE: using statement-based replication -DETAIL: The current replication_model setting is 'streaming', which is not supported by master_create_distributed_table. -HINT: Use create_distributed_table to use the streaming replication model. master_create_distributed_table --------------------------------------------------------------------- @@ -121,9 +61,6 @@ SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regcl DROP TABLE repmodel_test; CREATE TABLE repmodel_test (a int); SELECT master_create_distributed_table('repmodel_test', 'a', 'range'); -NOTICE: using statement-based replication -DETAIL: The current replication_model setting is 'streaming', which is not supported by master_create_distributed_table. -HINT: Use create_distributed_table to use the streaming replication model. master_create_distributed_table --------------------------------------------------------------------- @@ -136,92 +73,7 @@ SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regcl (1 row) DROP TABLE repmodel_test; --- Check that the replication_model overwrite behavior is the same with RF=1 -SET citus.shard_replication_factor TO 1; -CREATE TABLE repmodel_test (a int); -SELECT create_distributed_table('repmodel_test', 'a', 'append'); -NOTICE: using statement-based replication -DETAIL: Streaming replication is supported only for hash-distributed tables. - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; - repmodel ---------------------------------------------------------------------- - c -(1 row) - -DROP TABLE repmodel_test; -CREATE TABLE repmodel_test (a int); -SELECT create_distributed_table('repmodel_test', 'a', 'range'); -NOTICE: using statement-based replication -DETAIL: Streaming replication is supported only for hash-distributed tables. - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; - repmodel ---------------------------------------------------------------------- - c -(1 row) - -DROP TABLE repmodel_test; -CREATE TABLE repmodel_test (a int); -SELECT master_create_distributed_table('repmodel_test', 'a', 'hash'); -NOTICE: using statement-based replication -DETAIL: The current replication_model setting is 'streaming', which is not supported by master_create_distributed_table. -HINT: Use create_distributed_table to use the streaming replication model. - master_create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; - repmodel ---------------------------------------------------------------------- - c -(1 row) - -DROP TABLE repmodel_test; -CREATE TABLE repmodel_test (a int); -SELECT master_create_distributed_table('repmodel_test', 'a', 'append'); -NOTICE: using statement-based replication -DETAIL: The current replication_model setting is 'streaming', which is not supported by master_create_distributed_table. -HINT: Use create_distributed_table to use the streaming replication model. - master_create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; - repmodel ---------------------------------------------------------------------- - c -(1 row) - -DROP TABLE repmodel_test; -CREATE TABLE repmodel_test (a int); -SELECT master_create_distributed_table('repmodel_test', 'a', 'range'); -NOTICE: using statement-based replication -DETAIL: The current replication_model setting is 'streaming', which is not supported by master_create_distributed_table. -HINT: Use create_distributed_table to use the streaming replication model. - master_create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; - repmodel ---------------------------------------------------------------------- - c -(1 row) - -DROP TABLE repmodel_test; -RESET citus.replication_model; +RESET citus.shard_replication_factor; ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 360025; -- There should be no table on the worker node \c - - :public_worker_1_host :worker_1_port @@ -339,7 +191,7 @@ SELECT create_distributed_table('unlogged_table', 'key'); (1 row) SELECT * FROM master_get_table_ddl_events('unlogged_table'); - master_get_table_ddl_events + master_get_table_ddl_events --------------------------------------------------------------------- CREATE UNLOGGED TABLE public.unlogged_table (key text, value text) ALTER TABLE public.unlogged_table OWNER TO postgres diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 5fa875416..2914db9fd 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -52,7 +52,9 @@ BEGIN TRUNCATE TABLE extension_diff; CREATE TABLE current_objects AS - SELECT pg_catalog.pg_describe_object(classid, objid, 0) AS description + SELECT pg_catalog.pg_describe_object(classid, objid, 0) + || ' ' || + coalesce(pg_catalog.pg_get_function_result(objid), '') AS description FROM pg_catalog.pg_depend, pg_catalog.pg_extension e WHERE refclassid = 'pg_catalog.pg_extension'::pg_catalog.regclass AND refobjid = e.oid @@ -129,166 +131,166 @@ ALTER EXTENSION citus UPDATE TO '9.2-1'; ALTER EXTENSION citus UPDATE TO '9.2-2'; -- Snapshot of state at 9.2-2 SELECT * FROM print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- | event trigger citus_cascade_to_partition - | function alter_role_if_exists(text,text) - | function any_value(anyelement) - | function any_value_agg(anyelement,anyelement) - | function array_cat_agg(anyarray) - | function assign_distributed_transaction_id(integer,bigint,timestamp with time zone) - | function authinfo_valid(text) - | function broadcast_intermediate_result(text,text) - | function check_distributed_deadlocks() - | function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real) - | function citus_blocking_pids(integer) - | function citus_create_restore_point(text) - | function citus_dist_stat_activity() - | function citus_drop_trigger() - | function citus_executor_name(integer) - | function citus_extradata_container(internal) - | function citus_finish_pg_upgrade() - | function citus_internal.find_groupid_for_node(text,integer) - | function citus_internal.pg_dist_node_trigger_func() - | function citus_internal.pg_dist_rebalance_strategy_enterprise_check() - | function citus_internal.pg_dist_rebalance_strategy_trigger_func() - | function citus_internal.pg_dist_shard_placement_trigger_func() - | function citus_internal.refresh_isolation_tester_prepared_statement() - | function citus_internal.replace_isolation_tester_func() - | function citus_internal.restore_isolation_tester_func() - | function citus_isolation_test_session_is_blocked(integer,integer[]) - | function citus_json_concatenate(json,json) - | function citus_json_concatenate_final(json) - | function citus_jsonb_concatenate(jsonb,jsonb) - | function citus_jsonb_concatenate_final(jsonb) - | function citus_node_capacity_1(integer) - | function citus_prepare_pg_upgrade() - | function citus_query_stats() - | function citus_relation_size(regclass) - | function citus_server_id() - | function citus_set_default_rebalance_strategy(text) - | function citus_shard_allowed_on_node_true(bigint,integer) - | function citus_shard_cost_1(bigint) - | function citus_shard_cost_by_disk_size(bigint) - | function citus_stat_statements() - | function citus_stat_statements_reset() - | function citus_table_is_visible(oid) - | function citus_table_size(regclass) - | function citus_text_send_as_jsonb(text) - | function citus_total_relation_size(regclass) - | function citus_truncate_trigger() - | function citus_validate_rebalance_strategy_functions(regproc,regproc,regproc) - | function citus_version() - | function citus_worker_stat_activity() - | function column_name_to_column(regclass,text) - | function column_to_column_name(regclass,text) - | function coord_combine_agg(oid,cstring,anyelement) - | function coord_combine_agg_ffunc(internal,oid,cstring,anyelement) - | function coord_combine_agg_sfunc(internal,oid,cstring,anyelement) - | function create_distributed_function(regprocedure,text,text) - | function create_distributed_table(regclass,text,citus.distribution_type,text) - | function create_intermediate_result(text,text) - | function create_reference_table(regclass) - | function distributed_tables_colocated(regclass,regclass) - | function dump_global_wait_edges() - | function dump_local_wait_edges() - | function fetch_intermediate_results(text[],text,integer) - | function get_all_active_transactions() - | function get_colocated_shard_array(bigint) - | function get_colocated_table_array(regclass) - | function get_current_transaction_id() - | function get_global_active_transactions() - | function get_rebalance_progress() - | function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name) - | function get_shard_id_for_distribution_column(regclass,"any") - | function isolate_tenant_to_new_shard(regclass,"any",text) - | function json_cat_agg(json) - | function jsonb_cat_agg(jsonb) - | function lock_relation_if_exists(text,text) - | function lock_shard_metadata(integer,bigint[]) - | function lock_shard_resources(integer,bigint[]) - | function mark_tables_colocated(regclass,regclass[]) - | function master_activate_node(text,integer) - | function master_add_inactive_node(text,integer,integer,noderole,name) - | function master_add_node(text,integer,integer,noderole,name) - | function master_add_secondary_node(text,integer,text,integer,name) - | function master_append_table_to_shard(bigint,text,text,integer) - | function master_apply_delete_command(text) - | function master_conninfo_cache_invalidate() - | function master_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) - | function master_create_distributed_table(regclass,text,citus.distribution_type) - | function master_create_empty_shard(text) - | function master_create_worker_shards(text,integer,integer) - | function master_disable_node(text,integer) - | function master_dist_local_group_cache_invalidate() - | function master_dist_node_cache_invalidate() - | function master_dist_object_cache_invalidate() - | function master_dist_partition_cache_invalidate() - | function master_dist_placement_cache_invalidate() - | function master_dist_shard_cache_invalidate() - | function master_drain_node(text,integer,citus.shard_transfer_mode,name) - | function master_drop_all_shards(regclass,text,text) - | function master_drop_sequences(text[]) - | function master_get_active_worker_nodes() - | function master_get_new_placementid() - | function master_get_new_shardid() - | function master_get_table_ddl_events(text) - | function master_get_table_metadata(text) - | function master_modify_multiple_shards(text) - | function master_move_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) - | function master_remove_distributed_table_metadata_from_workers(regclass,text,text) - | function master_remove_node(text,integer) - | function master_remove_partition_metadata(regclass,text,text) - | function master_run_on_worker(text[],integer[],text[],boolean) - | function master_set_node_property(text,integer,text,boolean) - | function master_unmark_object_distributed(oid,oid,integer) - | function master_update_node(integer,text,integer,boolean,integer) - | function master_update_shard_statistics(bigint) - | function master_update_table_statistics(regclass) - | function poolinfo_valid(text) - | function read_intermediate_result(text,citus_copy_format) - | function read_intermediate_results(text[],citus_copy_format) - | function rebalance_table_shards(regclass,real,integer,bigint[],citus.shard_transfer_mode,boolean,name) - | function recover_prepared_transactions() - | function relation_is_a_known_shard(regclass) - | function replicate_table_shards(regclass,integer,integer,bigint[],citus.shard_transfer_mode) - | function role_exists(name) - | function run_command_on_colocated_placements(regclass,regclass,text,boolean) - | function run_command_on_placements(regclass,text,boolean) - | function run_command_on_shards(regclass,text,boolean) - | function run_command_on_workers(text,boolean) - | function shard_name(regclass,bigint) - | function start_metadata_sync_to_node(text,integer) - | function stop_metadata_sync_to_node(text,integer) - | function task_tracker_assign_task(bigint,integer,text) - | function task_tracker_cleanup_job(bigint) - | function task_tracker_conninfo_cache_invalidate() - | function task_tracker_task_status(bigint,integer) - | function upgrade_to_reference_table(regclass) - | function worker_append_table_to_shard(text,text,text,integer) - | function worker_apply_inter_shard_ddl_command(bigint,text,bigint,text,text) - | function worker_apply_sequence_command(text) - | function worker_apply_sequence_command(text,regtype) - | function worker_apply_shard_ddl_command(bigint,text) - | function worker_apply_shard_ddl_command(bigint,text,text) - | function worker_cleanup_job_schema_cache() - | function worker_create_or_replace_object(text) - | function worker_create_schema(bigint,text) - | function worker_create_truncate_trigger(regclass) - | function worker_drop_distributed_table(text) - | function worker_execute_sql_task(bigint,integer,text,boolean) - | function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) - | function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) - | function worker_hash("any") - | function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) - | function worker_merge_files_and_run_query(bigint,integer,text,text) - | function worker_merge_files_into_table(bigint,integer,text[],text[]) - | function worker_partial_agg(oid,anyelement) - | function worker_partial_agg_ffunc(internal) - | function worker_partial_agg_sfunc(internal,oid,anyelement) - | function worker_partition_query_result(text,text,integer,citus.distribution_type,text[],text[],boolean) - | function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) - | function worker_repartition_cleanup(bigint) + | function alter_role_if_exists(text,text) boolean + | function any_value(anyelement) anyelement + | function any_value_agg(anyelement,anyelement) anyelement + | function array_cat_agg(anyarray) anyarray + | function assign_distributed_transaction_id(integer,bigint,timestamp with time zone) void + | function authinfo_valid(text) boolean + | function broadcast_intermediate_result(text,text) bigint + | function check_distributed_deadlocks() boolean + | function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real) void + | function citus_blocking_pids(integer) integer[] + | function citus_create_restore_point(text) pg_lsn + | function citus_dist_stat_activity() SETOF record + | function citus_drop_trigger() event_trigger + | function citus_executor_name(integer) text + | function citus_extradata_container(internal) void + | function citus_finish_pg_upgrade() void + | function citus_internal.find_groupid_for_node(text,integer) integer + | function citus_internal.pg_dist_node_trigger_func() trigger + | function citus_internal.pg_dist_rebalance_strategy_enterprise_check() trigger + | function citus_internal.pg_dist_rebalance_strategy_trigger_func() trigger + | function citus_internal.pg_dist_shard_placement_trigger_func() trigger + | function citus_internal.refresh_isolation_tester_prepared_statement() void + | function citus_internal.replace_isolation_tester_func() void + | function citus_internal.restore_isolation_tester_func() void + | function citus_isolation_test_session_is_blocked(integer,integer[]) boolean + | function citus_json_concatenate(json,json) json + | function citus_json_concatenate_final(json) json + | function citus_jsonb_concatenate(jsonb,jsonb) jsonb + | function citus_jsonb_concatenate_final(jsonb) jsonb + | function citus_node_capacity_1(integer) real + | function citus_prepare_pg_upgrade() void + | function citus_query_stats() SETOF record + | function citus_relation_size(regclass) bigint + | function citus_server_id() uuid + | function citus_set_default_rebalance_strategy(text) void + | function citus_shard_allowed_on_node_true(bigint,integer) boolean + | function citus_shard_cost_1(bigint) real + | function citus_shard_cost_by_disk_size(bigint) real + | function citus_stat_statements() SETOF record + | function citus_stat_statements_reset() void + | function citus_table_is_visible(oid) boolean + | function citus_table_size(regclass) bigint + | function citus_text_send_as_jsonb(text) bytea + | function citus_total_relation_size(regclass) bigint + | function citus_truncate_trigger() trigger + | function citus_validate_rebalance_strategy_functions(regproc,regproc,regproc) void + | function citus_version() text + | function citus_worker_stat_activity() SETOF record + | function column_name_to_column(regclass,text) text + | function column_to_column_name(regclass,text) text + | function coord_combine_agg(oid,cstring,anyelement) anyelement + | function coord_combine_agg_ffunc(internal,oid,cstring,anyelement) anyelement + | function coord_combine_agg_sfunc(internal,oid,cstring,anyelement) internal + | function create_distributed_function(regprocedure,text,text) void + | function create_distributed_table(regclass,text,citus.distribution_type,text) void + | function create_intermediate_result(text,text) bigint + | function create_reference_table(regclass) void + | function distributed_tables_colocated(regclass,regclass) boolean + | function dump_global_wait_edges() SETOF record + | function dump_local_wait_edges() SETOF record + | function fetch_intermediate_results(text[],text,integer) bigint + | function get_all_active_transactions() SETOF record + | function get_colocated_shard_array(bigint) bigint[] + | function get_colocated_table_array(regclass) regclass[] + | function get_current_transaction_id() record + | function get_global_active_transactions() SETOF record + | function get_rebalance_progress() TABLE(sessionid integer, table_name regclass, shardid bigint, shard_size bigint, sourcename text, sourceport integer, targetname text, targetport integer, progress bigint) + | function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name) TABLE(table_name regclass, shardid bigint, shard_size bigint, sourcename text, sourceport integer, targetname text, targetport integer) + | function get_shard_id_for_distribution_column(regclass,"any") bigint + | function isolate_tenant_to_new_shard(regclass,"any",text) bigint + | function json_cat_agg(json) json + | function jsonb_cat_agg(jsonb) jsonb + | function lock_relation_if_exists(text,text) boolean + | function lock_shard_metadata(integer,bigint[]) void + | function lock_shard_resources(integer,bigint[]) void + | function mark_tables_colocated(regclass,regclass[]) void + | function master_activate_node(text,integer) integer + | function master_add_inactive_node(text,integer,integer,noderole,name) integer + | function master_add_node(text,integer,integer,noderole,name) integer + | function master_add_secondary_node(text,integer,text,integer,name) integer + | function master_append_table_to_shard(bigint,text,text,integer) real + | function master_apply_delete_command(text) integer + | function master_conninfo_cache_invalidate() trigger + | function master_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) void + | function master_create_distributed_table(regclass,text,citus.distribution_type) void + | function master_create_empty_shard(text) bigint + | function master_create_worker_shards(text,integer,integer) void + | function master_disable_node(text,integer) void + | function master_dist_local_group_cache_invalidate() trigger + | function master_dist_node_cache_invalidate() trigger + | function master_dist_object_cache_invalidate() trigger + | function master_dist_partition_cache_invalidate() trigger + | function master_dist_placement_cache_invalidate() trigger + | function master_dist_shard_cache_invalidate() trigger + | function master_drain_node(text,integer,citus.shard_transfer_mode,name) void + | function master_drop_all_shards(regclass,text,text) integer + | function master_drop_sequences(text[]) void + | function master_get_active_worker_nodes() SETOF record + | function master_get_new_placementid() bigint + | function master_get_new_shardid() bigint + | function master_get_table_ddl_events(text) SETOF text + | function master_get_table_metadata(text) record + | function master_modify_multiple_shards(text) integer + | function master_move_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) void + | function master_remove_distributed_table_metadata_from_workers(regclass,text,text) void + | function master_remove_node(text,integer) void + | function master_remove_partition_metadata(regclass,text,text) void + | function master_run_on_worker(text[],integer[],text[],boolean) SETOF record + | function master_set_node_property(text,integer,text,boolean) void + | function master_unmark_object_distributed(oid,oid,integer) void + | function master_update_node(integer,text,integer,boolean,integer) void + | function master_update_shard_statistics(bigint) bigint + | function master_update_table_statistics(regclass) void + | function poolinfo_valid(text) boolean + | function read_intermediate_result(text,citus_copy_format) SETOF record + | function read_intermediate_results(text[],citus_copy_format) SETOF record + | function rebalance_table_shards(regclass,real,integer,bigint[],citus.shard_transfer_mode,boolean,name) void + | function recover_prepared_transactions() integer + | function relation_is_a_known_shard(regclass) boolean + | function replicate_table_shards(regclass,integer,integer,bigint[],citus.shard_transfer_mode) void + | function role_exists(name) boolean + | function run_command_on_colocated_placements(regclass,regclass,text,boolean) SETOF record + | function run_command_on_placements(regclass,text,boolean) SETOF record + | function run_command_on_shards(regclass,text,boolean) SETOF record + | function run_command_on_workers(text,boolean) SETOF record + | function shard_name(regclass,bigint) text + | function start_metadata_sync_to_node(text,integer) void + | function stop_metadata_sync_to_node(text,integer) void + | function task_tracker_assign_task(bigint,integer,text) void + | function task_tracker_cleanup_job(bigint) void + | function task_tracker_conninfo_cache_invalidate() trigger + | function task_tracker_task_status(bigint,integer) integer + | function upgrade_to_reference_table(regclass) void + | function worker_append_table_to_shard(text,text,text,integer) void + | function worker_apply_inter_shard_ddl_command(bigint,text,bigint,text,text) void + | function worker_apply_sequence_command(text) void + | function worker_apply_sequence_command(text,regtype) void + | function worker_apply_shard_ddl_command(bigint,text) void + | function worker_apply_shard_ddl_command(bigint,text,text) void + | function worker_cleanup_job_schema_cache() void + | function worker_create_or_replace_object(text) boolean + | function worker_create_schema(bigint,text) void + | function worker_create_truncate_trigger(regclass) void + | function worker_drop_distributed_table(text) void + | function worker_execute_sql_task(bigint,integer,text,boolean) bigint + | function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) void + | function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) void + | function worker_hash("any") integer + | function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) void + | function worker_merge_files_and_run_query(bigint,integer,text,text) void + | function worker_merge_files_into_table(bigint,integer,text[],text[]) void + | function worker_partial_agg(oid,anyelement) cstring + | function worker_partial_agg_ffunc(internal) cstring + | function worker_partial_agg_sfunc(internal,oid,anyelement) internal + | function worker_partition_query_result(text,text,integer,citus.distribution_type,text[],text[],boolean) SETOF record + | function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) void + | function worker_repartition_cleanup(bigint) void | schema citus | schema citus_internal | sequence pg_dist_colocationid_seq @@ -357,14 +359,16 @@ SELECT * FROM print_extension_changes(); -- Snapshot of state at 9.3-2 ALTER EXTENSION citus UPDATE TO '9.3-2'; SELECT * FROM print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- - | function citus_remote_connection_stats() - | function replicate_reference_tables() - | function truncate_local_data_after_distributing_table(regclass) - | function update_distributed_table_colocation(regclass,text) - | function worker_create_or_alter_role(text,text,text) -(5 rows) + function citus_extradata_container(internal) void | + | function citus_extradata_container(internal) SETOF record + | function citus_remote_connection_stats() SETOF record + | function replicate_reference_tables() void + | function truncate_local_data_after_distributing_table(regclass) void + | function update_distributed_table_colocation(regclass,text) void + | function worker_create_or_alter_role(text,text,text) boolean +(7 rows) -- Test downgrade to 9.3-2 from 9.4-1 ALTER EXTENSION citus UPDATE TO '9.4-1'; @@ -378,10 +382,10 @@ SELECT * FROM print_extension_changes(); -- Snapshot of state at 9.4-1 ALTER EXTENSION citus UPDATE TO '9.4-1'; SELECT * FROM print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- - | function worker_last_saved_explain_analyze() - | function worker_save_query_explain_analyze(text,jsonb) + | function worker_last_saved_explain_analyze() TABLE(explain_analyze_output text, execution_duration double precision) + | function worker_save_query_explain_analyze(text,jsonb) SETOF record (2 rows) -- Test downgrade to 9.4-1 from 9.5-1 @@ -418,18 +422,18 @@ SELECT * FROM print_extension_changes(); -- Snapshot of state at 9.5-1 ALTER EXTENSION citus UPDATE TO '9.5-1'; SELECT * FROM print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- - function master_drop_sequences(text[]) | - function task_tracker_assign_task(bigint,integer,text) | - function task_tracker_cleanup_job(bigint) | - function task_tracker_conninfo_cache_invalidate() | - function task_tracker_task_status(bigint,integer) | - function worker_execute_sql_task(bigint,integer,text,boolean) | - function worker_merge_files_and_run_query(bigint,integer,text,text) | - | function create_citus_local_table(regclass) - | function undistribute_table(regclass) - | function worker_record_sequence_dependency(regclass,regclass,name) + function master_drop_sequences(text[]) void | + function task_tracker_assign_task(bigint,integer,text) void | + function task_tracker_cleanup_job(bigint) void | + function task_tracker_conninfo_cache_invalidate() trigger | + function task_tracker_task_status(bigint,integer) integer | + function worker_execute_sql_task(bigint,integer,text,boolean) bigint | + function worker_merge_files_and_run_query(bigint,integer,text,text) void | + | function create_citus_local_table(regclass) void + | function undistribute_table(regclass) void + | function worker_record_sequence_dependency(regclass,regclass,name) void (10 rows) -- Test downgrade to 9.5-1 from 10.0-1 @@ -444,75 +448,75 @@ SELECT * FROM print_extension_changes(); -- Snapshot of state at 10.0-1 ALTER EXTENSION citus UPDATE TO '10.0-1'; SELECT * FROM print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- - function citus_total_relation_size(regclass) | - function create_citus_local_table(regclass) | - function mark_tables_colocated(regclass,regclass[]) | - function master_conninfo_cache_invalidate() | - function master_create_distributed_table(regclass,text,citus.distribution_type) | - function master_create_worker_shards(text,integer,integer) | - function master_dist_local_group_cache_invalidate() | - function master_dist_node_cache_invalidate() | - function master_dist_object_cache_invalidate() | - function master_dist_partition_cache_invalidate() | - function master_dist_placement_cache_invalidate() | - function master_dist_shard_cache_invalidate() | - function master_drop_all_shards(regclass,text,text) | - function master_modify_multiple_shards(text) | - function undistribute_table(regclass) | - function upgrade_to_reference_table(regclass) | - | access method columnar - | function alter_columnar_table_reset(regclass,boolean,boolean,boolean,boolean) - | function alter_columnar_table_set(regclass,integer,integer,name,integer) - | function alter_distributed_table(regclass,text,integer,text,boolean) - | function alter_old_partitions_set_access_method(regclass,timestamp with time zone,name) - | function alter_table_set_access_method(regclass,text) - | function citus_activate_node(text,integer) - | function citus_add_inactive_node(text,integer,integer,noderole,name) - | function citus_add_local_table_to_metadata(regclass,boolean) - | function citus_add_node(text,integer,integer,noderole,name) - | function citus_add_secondary_node(text,integer,text,integer,name) - | function citus_conninfo_cache_invalidate() - | function citus_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) - | function citus_disable_node(text,integer) - | function citus_dist_local_group_cache_invalidate() - | function citus_dist_node_cache_invalidate() - | function citus_dist_object_cache_invalidate() - | function citus_dist_partition_cache_invalidate() - | function citus_dist_placement_cache_invalidate() - | function citus_dist_shard_cache_invalidate() - | function citus_drain_node(text,integer,citus.shard_transfer_mode,name) - | function citus_drop_all_shards(regclass,text,text) - | function citus_internal.columnar_ensure_objects_exist() - | function citus_move_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) - | function citus_remove_node(text,integer) - | function citus_set_coordinator_host(text,integer,noderole,name) - | function citus_set_node_property(text,integer,text,boolean) - | function citus_shard_sizes() - | function citus_total_relation_size(regclass,boolean) - | function citus_unmark_object_distributed(oid,oid,integer) - | function citus_update_node(integer,text,integer,boolean,integer) - | function citus_update_shard_statistics(bigint) - | function citus_update_table_statistics(regclass) - | function columnar.columnar_handler(internal) - | function fix_pre_citus10_partitioned_table_constraint_names() - | function fix_pre_citus10_partitioned_table_constraint_names(regclass) - | function notify_constraint_dropped() - | function remove_local_tables_from_metadata() - | function time_partition_range(regclass) - | function undistribute_table(regclass,boolean) - | function worker_change_sequence_dependency(regclass,regclass,regclass) - | function worker_fix_pre_citus10_partitioned_table_constraint_names(regclass,bigint,text) - | schema columnar - | sequence columnar.storageid_seq - | table columnar.chunk - | table columnar.chunk_group - | table columnar.options - | table columnar.stripe - | view citus_shards - | view citus_tables - | view time_partitions + function citus_total_relation_size(regclass) bigint | + function create_citus_local_table(regclass) void | + function mark_tables_colocated(regclass,regclass[]) void | + function master_conninfo_cache_invalidate() trigger | + function master_create_distributed_table(regclass,text,citus.distribution_type) void | + function master_create_worker_shards(text,integer,integer) void | + function master_dist_local_group_cache_invalidate() trigger | + function master_dist_node_cache_invalidate() trigger | + function master_dist_object_cache_invalidate() trigger | + function master_dist_partition_cache_invalidate() trigger | + function master_dist_placement_cache_invalidate() trigger | + function master_dist_shard_cache_invalidate() trigger | + function master_drop_all_shards(regclass,text,text) integer | + function master_modify_multiple_shards(text) integer | + function undistribute_table(regclass) void | + function upgrade_to_reference_table(regclass) void | + | access method columnar + | function alter_columnar_table_reset(regclass,boolean,boolean,boolean,boolean) void + | function alter_columnar_table_set(regclass,integer,integer,name,integer) void + | function alter_distributed_table(regclass,text,integer,text,boolean) void + | function alter_old_partitions_set_access_method(regclass,timestamp with time zone,name) + | function alter_table_set_access_method(regclass,text) void + | function citus_activate_node(text,integer) integer + | function citus_add_inactive_node(text,integer,integer,noderole,name) integer + | function citus_add_local_table_to_metadata(regclass,boolean) void + | function citus_add_node(text,integer,integer,noderole,name) integer + | function citus_add_secondary_node(text,integer,text,integer,name) integer + | function citus_conninfo_cache_invalidate() trigger + | function citus_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) void + | function citus_disable_node(text,integer) void + | function citus_dist_local_group_cache_invalidate() trigger + | function citus_dist_node_cache_invalidate() trigger + | function citus_dist_object_cache_invalidate() trigger + | function citus_dist_partition_cache_invalidate() trigger + | function citus_dist_placement_cache_invalidate() trigger + | function citus_dist_shard_cache_invalidate() trigger + | function citus_drain_node(text,integer,citus.shard_transfer_mode,name) void + | function citus_drop_all_shards(regclass,text,text) integer + | function citus_internal.columnar_ensure_objects_exist() void + | function citus_move_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) void + | function citus_remove_node(text,integer) void + | function citus_set_coordinator_host(text,integer,noderole,name) void + | function citus_set_node_property(text,integer,text,boolean) void + | function citus_shard_sizes() SETOF record + | function citus_total_relation_size(regclass,boolean) bigint + | function citus_unmark_object_distributed(oid,oid,integer) void + | function citus_update_node(integer,text,integer,boolean,integer) void + | function citus_update_shard_statistics(bigint) bigint + | function citus_update_table_statistics(regclass) void + | function columnar.columnar_handler(internal) table_am_handler + | function fix_pre_citus10_partitioned_table_constraint_names() SETOF regclass + | function fix_pre_citus10_partitioned_table_constraint_names(regclass) void + | function notify_constraint_dropped() void + | function remove_local_tables_from_metadata() void + | function time_partition_range(regclass) record + | function undistribute_table(regclass,boolean) void + | function worker_change_sequence_dependency(regclass,regclass,regclass) void + | function worker_fix_pre_citus10_partitioned_table_constraint_names(regclass,bigint,text) void + | schema columnar + | sequence columnar.storageid_seq + | table columnar.chunk + | table columnar.chunk_group + | table columnar.options + | table columnar.stripe + | view citus_shards + | view citus_tables + | view time_partitions (67 rows) -- Test downgrade to 10.0-1 from 10.0-2 @@ -543,9 +547,9 @@ SELECT * FROM print_extension_changes(); -- Snapshot of state at 10.0-3 ALTER EXTENSION citus UPDATE TO '10.0-3'; SELECT * FROM print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- - | function citus_get_active_worker_nodes() + | function citus_get_active_worker_nodes() SETOF record (1 row) -- Test downgrade to 10.0-3 from 10.1-1 @@ -560,25 +564,47 @@ SELECT * FROM print_extension_changes(); -- Snapshot of state at 10.1-1 ALTER EXTENSION citus UPDATE TO '10.1-1'; SELECT * FROM print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- - function citus_internal.columnar_ensure_objects_exist() | - function create_distributed_table(regclass,text,citus.distribution_type,text) | - | function citus_internal.downgrade_columnar_storage(regclass) - | function citus_internal.upgrade_columnar_storage(regclass) - | function citus_local_disk_space_stats() - | function create_distributed_table(regclass,text,citus.distribution_type,text,integer) - | function worker_partitioned_relation_size(regclass) - | function worker_partitioned_relation_total_size(regclass) - | function worker_partitioned_table_size(regclass) -(9 rows) + function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real) void | + function citus_internal.columnar_ensure_objects_exist() void | + function citus_internal.pg_dist_rebalance_strategy_enterprise_check() trigger | + function create_distributed_table(regclass,text,citus.distribution_type,text) void | + function get_rebalance_progress() TABLE(sessionid integer, table_name regclass, shardid bigint, shard_size bigint, sourcename text, sourceport integer, targetname text, targetport integer, progress bigint) | + function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name) TABLE(table_name regclass, shardid bigint, shard_size bigint, sourcename text, sourceport integer, targetname text, targetport integer) | + | function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real,real) void + | function citus_cleanup_orphaned_shards() + | function citus_local_disk_space_stats() record + | function create_distributed_table(regclass,text,citus.distribution_type,text,integer) void + | function get_rebalance_progress() TABLE(sessionid integer, table_name regclass, shardid bigint, shard_size bigint, sourcename text, sourceport integer, targetname text, targetport integer, progress bigint, source_shard_size bigint, target_shard_size bigint) + | function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name,real) TABLE(table_name regclass, shardid bigint, shard_size bigint, sourcename text, sourceport integer, targetname text, targetport integer) + | function worker_partitioned_relation_size(regclass) bigint + | function worker_partitioned_relation_total_size(regclass) bigint + | function worker_partitioned_table_size(regclass) bigint +(15 rows) + +-- Test downgrade to 10.1-1 from 10.2-1 +ALTER EXTENSION citus UPDATE TO '10.2-1'; +ALTER EXTENSION citus UPDATE TO '10.1-1'; +-- Should be empty result since upgrade+downgrade should be a no-op +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- +(0 rows) + +-- Snapshot of state at 10.2-1 +ALTER EXTENSION citus UPDATE TO '10.2-1'; +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- +(0 rows) DROP TABLE prev_objects, extension_diff; -- show running version SHOW citus.version; citus.version --------------------------------------------------------------------- - 10.1devel + 10.2devel (1 row) -- ensure no unexpected objects were created outside pg_catalog @@ -601,7 +627,7 @@ RESET citus.enable_version_checks; DROP EXTENSION citus; CREATE EXTENSION citus VERSION '8.0-1'; ERROR: specified version incompatible with loaded Citus library -DETAIL: Loaded library requires 10.1, but 8.0-1 was specified. +DETAIL: Loaded library requires 10.2, but 8.0-1 was specified. HINT: If a newer library is present, restart the database and try the command again. -- Test non-distributed queries work even in version mismatch SET citus.enable_version_checks TO 'false'; @@ -644,7 +670,7 @@ ORDER BY 1; -- We should not distribute table in version mistmatch SELECT create_distributed_table('version_mismatch_table', 'column1'); ERROR: loaded Citus library version differs from installed extension version -DETAIL: Loaded library requires 10.1, but the installed extension version is 8.1-1. +DETAIL: Loaded library requires 10.2, but the installed extension version is 8.1-1. HINT: Run ALTER EXTENSION citus UPDATE and try again. -- This function will cause fail in next ALTER EXTENSION CREATE OR REPLACE FUNCTION pg_catalog.relation_is_a_known_shard(regclass) diff --git a/src/test/regress/expected/multi_function_in_join.out b/src/test/regress/expected/multi_function_in_join.out index 1f253aad9..5d1b2a672 100644 --- a/src/test/regress/expected/multi_function_in_join.out +++ b/src/test/regress/expected/multi_function_in_join.out @@ -12,7 +12,6 @@ CREATE SCHEMA functions_in_joins; SET search_path TO 'functions_in_joins'; SET citus.next_shard_id TO 2500000; -SET citus.replication_model to 'streaming'; SET citus.shard_replication_factor to 1; CREATE TABLE table1 (id int, data int); SELECT create_distributed_table('table1','id'); diff --git a/src/test/regress/expected/multi_function_in_join_0.out b/src/test/regress/expected/multi_function_in_join_0.out index 8498c4a13..21279ab8d 100644 --- a/src/test/regress/expected/multi_function_in_join_0.out +++ b/src/test/regress/expected/multi_function_in_join_0.out @@ -12,7 +12,6 @@ CREATE SCHEMA functions_in_joins; SET search_path TO 'functions_in_joins'; SET citus.next_shard_id TO 2500000; -SET citus.replication_model to 'streaming'; SET citus.shard_replication_factor to 1; CREATE TABLE table1 (id int, data int); SELECT create_distributed_table('table1','id'); diff --git a/src/test/regress/expected/multi_insert_select.out b/src/test/regress/expected/multi_insert_select.out index 24b7d2a2d..99c5e8e56 100644 --- a/src/test/regress/expected/multi_insert_select.out +++ b/src/test/regress/expected/multi_insert_select.out @@ -2212,7 +2212,7 @@ FROM table_with_defaults GROUP BY store_id, first_name, last_name; --- Volatile function in default should be disallowed +-- Volatile function in default should be disallowed - SERIAL pseudo-types CREATE TABLE table_with_serial ( store_id int, s bigserial @@ -2224,6 +2224,25 @@ SELECT create_distributed_table('table_with_serial', 'store_id'); (1 row) INSERT INTO table_with_serial (store_id) +SELECT + store_id +FROM + table_with_defaults +GROUP BY + store_id; +-- Volatile function in default should be disallowed - user-defined sequence +CREATE SEQUENCE user_defined_sequence; +CREATE TABLE table_with_user_sequence ( + store_id int, + s bigint default nextval('user_defined_sequence') +); +SELECT create_distributed_table('table_with_user_sequence', 'store_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO table_with_user_sequence (store_id) SELECT store_id FROM @@ -2719,6 +2738,92 @@ SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; (6 rows) DROP TABLE dist_table_with_sequence; +-- Select into distributed table with a user-defined sequence +CREATE SEQUENCE seq1; +CREATE SEQUENCE seq2; +CREATE TABLE dist_table_with_user_sequence (user_id int default nextval('seq1'), value_1 bigint default nextval('seq2')); +SELECT create_distributed_table('dist_table_with_user_sequence', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- from local query +INSERT INTO dist_table_with_user_sequence (value_1) +SELECT s FROM generate_series(1,5) s; +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +-- from a distributed query +INSERT INTO dist_table_with_user_sequence (value_1) +SELECT value_1 FROM dist_table_with_user_sequence ORDER BY value_1; +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 1 + 7 | 2 + 8 | 3 + 9 | 4 + 10 | 5 +(10 rows) + +TRUNCATE dist_table_with_user_sequence; +INSERT INTO dist_table_with_user_sequence (user_id) +SELECT user_id FROM raw_events_second ORDER BY user_id; +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +WITH top10 AS ( + SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10 +) +INSERT INTO dist_table_with_user_sequence (value_1) +SELECT * FROM top10; +ERROR: cannot handle complex subqueries when the router executor is disabled +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +-- router queries become logical planner queries when there is a nextval call +INSERT INTO dist_table_with_user_sequence (user_id) +SELECT user_id FROM dist_table_with_user_sequence WHERE user_id = 1; +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 1 | 6 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(6 rows) + +DROP TABLE dist_table_with_user_sequence; +DROP SEQUENCE seq1, seq2; -- Select from distributed table into reference table CREATE TABLE ref_table (user_id serial, value_1 int); SELECT create_reference_table('ref_table'); @@ -2783,6 +2888,72 @@ SELECT * FROM ref_table ORDER BY user_id, value_1; (20 rows) DROP TABLE ref_table; +-- Select from distributed table into reference table with user-defined sequence +CREATE SEQUENCE seq1; +CREATE TABLE ref_table_with_user_sequence (user_id int default nextval('seq1'), value_1 int); +SELECT create_reference_table('ref_table_with_user_sequence'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO ref_table_with_user_sequence +SELECT user_id, value_1 FROM raw_events_second; +SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +INSERT INTO ref_table_with_user_sequence (value_1) +SELECT value_1 FROM raw_events_second ORDER BY value_1; +SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | + 1 | + 2 | + 2 | + 3 | + 3 | + 4 | + 4 | + 5 | + 5 | +(10 rows) + +INSERT INTO ref_table_with_user_sequence SELECT * FROM ref_table_with_user_sequence; +SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; + user_id | value_1 +--------------------------------------------------------------------- + 1 | + 1 | + 1 | + 1 | + 2 | + 2 | + 2 | + 2 | + 3 | + 3 | + 3 | + 3 | + 4 | + 4 | + 4 | + 4 | + 5 | + 5 | + 5 | + 5 | +(20 rows) + +DROP TABLE ref_table_with_user_sequence; +DROP SEQUENCE seq1; -- Select from reference table into reference table CREATE TABLE ref1 (d timestamptz); SELECT create_reference_table('ref1'); @@ -3000,7 +3171,7 @@ FROM ( SELECT user_id, value_1 FROM coerce_events ) AS ftop; -ERROR: new row for relation "coerce_agg_13300060" violates check constraint "small_number_13300060" +ERROR: new row for relation "coerce_agg_13300067" violates check constraint "small_number_13300067" \set VERBOSITY DEFAULT SELECT * FROM coerce_agg ORDER BY 1 DESC, 2 DESC; user_id | value_1_agg @@ -3076,6 +3247,8 @@ DROP TABLE reference_table; DROP TABLE agg_events; DROP TABLE table_with_defaults; DROP TABLE table_with_serial; +DROP TABLE table_with_user_sequence; +DROP SEQUENCE user_defined_sequence; DROP TABLE text_table; DROP TABLE char_table; DROP TABLE table_with_starts_with_defaults; diff --git a/src/test/regress/expected/multi_metadata_attributes.out b/src/test/regress/expected/multi_metadata_attributes.out index 91d927c18..85ececdac 100644 --- a/src/test/regress/expected/multi_metadata_attributes.out +++ b/src/test/regress/expected/multi_metadata_attributes.out @@ -6,7 +6,7 @@ -- part of the query so new changes to it won't affect this test. SELECT attrelid::regclass, attname, atthasmissing, attmissingval FROM pg_attribute -WHERE atthasmissing AND attrelid NOT IN ('pg_dist_node'::regclass) +WHERE atthasmissing AND attrelid NOT IN ('pg_dist_node'::regclass, 'pg_dist_rebalance_strategy'::regclass) ORDER BY attrelid, attname; attrelid | attname | atthasmissing | attmissingval --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_metadata_sync.out b/src/test/regress/expected/multi_metadata_sync.out index 8d1452be9..4a3dec5d8 100644 --- a/src/test/regress/expected/multi_metadata_sync.out +++ b/src/test/regress/expected/multi_metadata_sync.out @@ -50,8 +50,9 @@ CREATE OR REPLACE FUNCTION pg_catalog.master_create_worker_shards(table_name tex RETURNS void AS 'citus', $$master_create_worker_shards$$ LANGUAGE C STRICT; --- Create a test table with constraints and SERIAL -CREATE TABLE mx_test_table (col_1 int UNIQUE, col_2 text NOT NULL, col_3 BIGSERIAL); +-- Create a test table with constraints and SERIAL and default from user defined sequence +CREATE SEQUENCE user_defined_seq; +CREATE TABLE mx_test_table (col_1 int UNIQUE, col_2 text NOT NULL, col_3 BIGSERIAL, col_4 BIGINT DEFAULT nextval('user_defined_seq')); SELECT master_create_distributed_table('mx_test_table', 'col_1', 'hash'); master_create_distributed_table --------------------------------------------------------------------- @@ -72,20 +73,22 @@ SELECT unnest(master_metadata_snapshot()) order by 1; unnest --------------------------------------------------------------------- ALTER SEQUENCE public.mx_test_table_col_3_seq OWNER TO postgres + ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE public.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE public.mx_test_table OWNER TO postgres ALTER TABLE public.mx_test_table OWNER TO postgres - CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL) + CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) INSERT INTO pg_dist_node (nodeid, groupid, nodename, nodeport, noderack, hasmetadata, metadatasynced, isactive, noderole, nodecluster) VALUES (1, 1, 'localhost', 57637, 'default', FALSE, FALSE, TRUE, 'primary'::noderole, 'default'),(2, 2, 'localhost', 57638, 'default', FALSE, FALSE, TRUE, 'primary'::noderole, 'default') INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('public.mx_test_table'::regclass, 'h', column_name_to_column('public.mx_test_table','col_1'), 0, 's') INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310000, 1, 0, 1, 100000),(1310001, 1, 0, 2, 100001),(1310002, 1, 0, 1, 100002),(1310003, 1, 0, 2, 100003),(1310004, 1, 0, 1, 100004),(1310005, 1, 0, 2, 100005),(1310006, 1, 0, 1, 100006),(1310007, 1, 0, 2, 100007) INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('public.mx_test_table'::regclass, 1310000, 't', '-2147483648', '-1610612737'),('public.mx_test_table'::regclass, 1310001, 't', '-1610612736', '-1073741825'),('public.mx_test_table'::regclass, 1310002, 't', '-1073741824', '-536870913'),('public.mx_test_table'::regclass, 1310003, 't', '-536870912', '-1'),('public.mx_test_table'::regclass, 1310004, 't', '0', '536870911'),('public.mx_test_table'::regclass, 1310005, 't', '536870912', '1073741823'),('public.mx_test_table'::regclass, 1310006, 't', '1073741824', '1610612735'),('public.mx_test_table'::regclass, 1310007, 't', '1610612736', '2147483647') SELECT pg_catalog.worker_record_sequence_dependency('public.mx_test_table_col_3_seq'::regclass,'public.mx_test_table'::regclass,'col_3') SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.mx_test_table_col_3_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') + SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') SELECT worker_create_truncate_trigger('public.mx_test_table') SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_partition TRUNCATE pg_dist_node CASCADE -(14 rows) +(16 rows) -- Show that CREATE INDEX commands are included in the metadata snapshot CREATE INDEX mx_index ON mx_test_table(col_2); @@ -93,21 +96,23 @@ SELECT unnest(master_metadata_snapshot()) order by 1; unnest --------------------------------------------------------------------- ALTER SEQUENCE public.mx_test_table_col_3_seq OWNER TO postgres + ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE public.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE public.mx_test_table OWNER TO postgres ALTER TABLE public.mx_test_table OWNER TO postgres CREATE INDEX mx_index ON public.mx_test_table USING btree (col_2) - CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL) + CREATE TABLE public.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('public.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) INSERT INTO pg_dist_node (nodeid, groupid, nodename, nodeport, noderack, hasmetadata, metadatasynced, isactive, noderole, nodecluster) VALUES (1, 1, 'localhost', 57637, 'default', FALSE, FALSE, TRUE, 'primary'::noderole, 'default'),(2, 2, 'localhost', 57638, 'default', FALSE, FALSE, TRUE, 'primary'::noderole, 'default') INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('public.mx_test_table'::regclass, 'h', column_name_to_column('public.mx_test_table','col_1'), 0, 's') INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310000, 1, 0, 1, 100000),(1310001, 1, 0, 2, 100001),(1310002, 1, 0, 1, 100002),(1310003, 1, 0, 2, 100003),(1310004, 1, 0, 1, 100004),(1310005, 1, 0, 2, 100005),(1310006, 1, 0, 1, 100006),(1310007, 1, 0, 2, 100007) INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('public.mx_test_table'::regclass, 1310000, 't', '-2147483648', '-1610612737'),('public.mx_test_table'::regclass, 1310001, 't', '-1610612736', '-1073741825'),('public.mx_test_table'::regclass, 1310002, 't', '-1073741824', '-536870913'),('public.mx_test_table'::regclass, 1310003, 't', '-536870912', '-1'),('public.mx_test_table'::regclass, 1310004, 't', '0', '536870911'),('public.mx_test_table'::regclass, 1310005, 't', '536870912', '1073741823'),('public.mx_test_table'::regclass, 1310006, 't', '1073741824', '1610612735'),('public.mx_test_table'::regclass, 1310007, 't', '1610612736', '2147483647') SELECT pg_catalog.worker_record_sequence_dependency('public.mx_test_table_col_3_seq'::regclass,'public.mx_test_table'::regclass,'col_3') SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.mx_test_table_col_3_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') + SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') SELECT worker_create_truncate_trigger('public.mx_test_table') SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_partition TRUNCATE pg_dist_node CASCADE -(15 rows) +(17 rows) -- Show that schema changes are included in the metadata snapshot CREATE SCHEMA mx_testing_schema; @@ -116,21 +121,23 @@ SELECT unnest(master_metadata_snapshot()) order by 1; unnest --------------------------------------------------------------------- ALTER SEQUENCE mx_testing_schema.mx_test_table_col_3_seq OWNER TO postgres + ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) - CREATE TABLE mx_testing_schema.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('mx_testing_schema.mx_test_table_col_3_seq'::regclass) NOT NULL) + CREATE TABLE mx_testing_schema.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('mx_testing_schema.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) INSERT INTO pg_dist_node (nodeid, groupid, nodename, nodeport, noderack, hasmetadata, metadatasynced, isactive, noderole, nodecluster) VALUES (1, 1, 'localhost', 57637, 'default', FALSE, FALSE, TRUE, 'primary'::noderole, 'default'),(2, 2, 'localhost', 57638, 'default', FALSE, FALSE, TRUE, 'primary'::noderole, 'default') INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('mx_testing_schema.mx_test_table'::regclass, 'h', column_name_to_column('mx_testing_schema.mx_test_table','col_1'), 0, 's') INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310000, 1, 0, 1, 100000),(1310001, 1, 0, 2, 100001),(1310002, 1, 0, 1, 100002),(1310003, 1, 0, 2, 100003),(1310004, 1, 0, 1, 100004),(1310005, 1, 0, 2, 100005),(1310006, 1, 0, 1, 100006),(1310007, 1, 0, 2, 100007) INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't', '-2147483648', '-1610612737'),('mx_testing_schema.mx_test_table'::regclass, 1310001, 't', '-1610612736', '-1073741825'),('mx_testing_schema.mx_test_table'::regclass, 1310002, 't', '-1073741824', '-536870913'),('mx_testing_schema.mx_test_table'::regclass, 1310003, 't', '-536870912', '-1'),('mx_testing_schema.mx_test_table'::regclass, 1310004, 't', '0', '536870911'),('mx_testing_schema.mx_test_table'::regclass, 1310005, 't', '536870912', '1073741823'),('mx_testing_schema.mx_test_table'::regclass, 1310006, 't', '1073741824', '1610612735'),('mx_testing_schema.mx_test_table'::regclass, 1310007, 't', '1610612736', '2147483647') SELECT pg_catalog.worker_record_sequence_dependency('mx_testing_schema.mx_test_table_col_3_seq'::regclass,'mx_testing_schema.mx_test_table'::regclass,'col_3') SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS mx_testing_schema.mx_test_table_col_3_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') + SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_partition TRUNCATE pg_dist_node CASCADE -(15 rows) +(17 rows) -- Show that append distributed tables are not included in the metadata snapshot CREATE TABLE non_mx_test_table (col_1 int, col_2 text); @@ -145,21 +152,23 @@ SELECT unnest(master_metadata_snapshot()) order by 1; unnest --------------------------------------------------------------------- ALTER SEQUENCE mx_testing_schema.mx_test_table_col_3_seq OWNER TO postgres + ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) - CREATE TABLE mx_testing_schema.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('mx_testing_schema.mx_test_table_col_3_seq'::regclass) NOT NULL) + CREATE TABLE mx_testing_schema.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('mx_testing_schema.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) INSERT INTO pg_dist_node (nodeid, groupid, nodename, nodeport, noderack, hasmetadata, metadatasynced, isactive, noderole, nodecluster) VALUES (1, 1, 'localhost', 57637, 'default', FALSE, FALSE, TRUE, 'primary'::noderole, 'default'),(2, 2, 'localhost', 57638, 'default', FALSE, FALSE, TRUE, 'primary'::noderole, 'default') INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('mx_testing_schema.mx_test_table'::regclass, 'h', column_name_to_column('mx_testing_schema.mx_test_table','col_1'), 0, 's') INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310000, 1, 0, 1, 100000),(1310001, 1, 0, 2, 100001),(1310002, 1, 0, 1, 100002),(1310003, 1, 0, 2, 100003),(1310004, 1, 0, 1, 100004),(1310005, 1, 0, 2, 100005),(1310006, 1, 0, 1, 100006),(1310007, 1, 0, 2, 100007) INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't', '-2147483648', '-1610612737'),('mx_testing_schema.mx_test_table'::regclass, 1310001, 't', '-1610612736', '-1073741825'),('mx_testing_schema.mx_test_table'::regclass, 1310002, 't', '-1073741824', '-536870913'),('mx_testing_schema.mx_test_table'::regclass, 1310003, 't', '-536870912', '-1'),('mx_testing_schema.mx_test_table'::regclass, 1310004, 't', '0', '536870911'),('mx_testing_schema.mx_test_table'::regclass, 1310005, 't', '536870912', '1073741823'),('mx_testing_schema.mx_test_table'::regclass, 1310006, 't', '1073741824', '1610612735'),('mx_testing_schema.mx_test_table'::regclass, 1310007, 't', '1610612736', '2147483647') SELECT pg_catalog.worker_record_sequence_dependency('mx_testing_schema.mx_test_table_col_3_seq'::regclass,'mx_testing_schema.mx_test_table'::regclass,'col_3') SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS mx_testing_schema.mx_test_table_col_3_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') + SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_partition TRUNCATE pg_dist_node CASCADE -(15 rows) +(17 rows) -- Show that range distributed tables are not included in the metadata snapshot UPDATE pg_dist_partition SET partmethod='r' WHERE logicalrelid='non_mx_test_table'::regclass; @@ -167,21 +176,23 @@ SELECT unnest(master_metadata_snapshot()) order by 1; unnest --------------------------------------------------------------------- ALTER SEQUENCE mx_testing_schema.mx_test_table_col_3_seq OWNER TO postgres + ALTER SEQUENCE public.user_defined_seq OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) - CREATE TABLE mx_testing_schema.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('mx_testing_schema.mx_test_table_col_3_seq'::regclass) NOT NULL) + CREATE TABLE mx_testing_schema.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('mx_testing_schema.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) INSERT INTO pg_dist_node (nodeid, groupid, nodename, nodeport, noderack, hasmetadata, metadatasynced, isactive, noderole, nodecluster) VALUES (1, 1, 'localhost', 57637, 'default', FALSE, FALSE, TRUE, 'primary'::noderole, 'default'),(2, 2, 'localhost', 57638, 'default', FALSE, FALSE, TRUE, 'primary'::noderole, 'default') INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('mx_testing_schema.mx_test_table'::regclass, 'h', column_name_to_column('mx_testing_schema.mx_test_table','col_1'), 0, 's') INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310000, 1, 0, 1, 100000),(1310001, 1, 0, 2, 100001),(1310002, 1, 0, 1, 100002),(1310003, 1, 0, 2, 100003),(1310004, 1, 0, 1, 100004),(1310005, 1, 0, 2, 100005),(1310006, 1, 0, 1, 100006),(1310007, 1, 0, 2, 100007) INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't', '-2147483648', '-1610612737'),('mx_testing_schema.mx_test_table'::regclass, 1310001, 't', '-1610612736', '-1073741825'),('mx_testing_schema.mx_test_table'::regclass, 1310002, 't', '-1073741824', '-536870913'),('mx_testing_schema.mx_test_table'::regclass, 1310003, 't', '-536870912', '-1'),('mx_testing_schema.mx_test_table'::regclass, 1310004, 't', '0', '536870911'),('mx_testing_schema.mx_test_table'::regclass, 1310005, 't', '536870912', '1073741823'),('mx_testing_schema.mx_test_table'::regclass, 1310006, 't', '1073741824', '1610612735'),('mx_testing_schema.mx_test_table'::regclass, 1310007, 't', '1610612736', '2147483647') SELECT pg_catalog.worker_record_sequence_dependency('mx_testing_schema.mx_test_table_col_3_seq'::regclass,'mx_testing_schema.mx_test_table'::regclass,'col_3') SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS mx_testing_schema.mx_test_table_col_3_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') + SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_partition TRUNCATE pg_dist_node CASCADE -(15 rows) +(17 rows) -- Test start_metadata_sync_to_node UDF -- Ensure that hasmetadata=false for all nodes @@ -261,7 +272,7 @@ SELECT * FROM pg_dist_node ORDER BY nodeid; (4 rows) SELECT * FROM pg_dist_partition ORDER BY logicalrelid; - logicalrelid | partmethod | partkey | colocationid | repmodel + logicalrelid | partmethod | partkey | colocationid | repmodel --------------------------------------------------------------------- mx_testing_schema.mx_test_table | h | {VAR :varno 1 :varattno 1 :vartype 23 :vartypmod -1 :varcollid 0 :varlevelsup 0 :varnoold 1 :varoattno 1 :location -1} | 0 | s (1 row) @@ -298,7 +309,8 @@ SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='mx_testing_sch col_1 | integer | col_2 | text | not null col_3 | bigint | not null default nextval('mx_testing_schema.mx_test_table_col_3_seq'::regclass) -(3 rows) + col_4 | bigint | default nextval('user_defined_seq'::regclass) +(4 rows) SELECT "Column", "Type", "Definition" FROM index_attrs WHERE relid = 'mx_testing_schema.mx_test_table_col_1_key'::regclass; @@ -332,7 +344,6 @@ SELECT count(*) FROM pg_trigger WHERE tgrelid='mx_testing_schema.mx_test_table': -- Since we're superuser, we can set the replication model to 'streaming' to -- create some MX tables SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE SCHEMA mx_testing_schema_2; CREATE TABLE mx_testing_schema.fk_test_1 (col1 int, col2 text, col3 int, UNIQUE(col1, col3)); CREATE TABLE mx_testing_schema_2.fk_test_2 (col1 int, col2 int, col3 text, @@ -358,7 +369,7 @@ SELECT start_metadata_sync_to_node('localhost', :worker_1_port); -- Check that foreign key metadata exists on the worker \c - - - :worker_1_port SELECT "Constraint", "Definition" FROM table_fkeys WHERE relid='mx_testing_schema_2.fk_test_2'::regclass; - Constraint | Definition + Constraint | Definition --------------------------------------------------------------------- fk_test_2_col1_fkey | FOREIGN KEY (col1, col2) REFERENCES mx_testing_schema.fk_test_1(col1, col3) (1 row) @@ -367,7 +378,6 @@ SELECT "Constraint", "Definition" FROM table_fkeys WHERE relid='mx_testing_schem DROP TABLE mx_testing_schema_2.fk_test_2; DROP TABLE mx_testing_schema.fk_test_1; RESET citus.shard_replication_factor; -RESET citus.replication_model; -- Check that repeated calls to start_metadata_sync_to_node has no side effects \c - - - :master_port SELECT start_metadata_sync_to_node('localhost', :worker_1_port); @@ -399,7 +409,7 @@ SELECT * FROM pg_dist_node ORDER BY nodeid; (4 rows) SELECT * FROM pg_dist_partition ORDER BY logicalrelid; - logicalrelid | partmethod | partkey | colocationid | repmodel + logicalrelid | partmethod | partkey | colocationid | repmodel --------------------------------------------------------------------- mx_testing_schema.mx_test_table | h | {VAR :varno 1 :varattno 1 :vartype 23 :vartypmod -1 :varcollid 0 :varlevelsup 0 :varnoold 1 :varoattno 1 :location -1} | 0 | s (1 row) @@ -436,7 +446,8 @@ SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='mx_testing_sch col_1 | integer | col_2 | text | not null col_3 | bigint | not null default nextval('mx_testing_schema.mx_test_table_col_3_seq'::regclass) -(3 rows) + col_4 | bigint | default nextval('user_defined_seq'::regclass) +(4 rows) SELECT "Column", "Type", "Definition" FROM index_attrs WHERE relid = 'mx_testing_schema.mx_test_table_col_1_key'::regclass; @@ -473,7 +484,6 @@ SELECT hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_port; -- Check that the distributed table can be queried from the worker \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); start_metadata_sync_to_node --------------------------------------------------------------------- @@ -564,7 +574,6 @@ CREATE SCHEMA mx_test_schema_1; CREATE SCHEMA mx_test_schema_2; -- Create MX tables SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE mx_test_schema_1.mx_table_1 (col1 int UNIQUE, col2 text); CREATE INDEX mx_index_1 ON mx_test_schema_1.mx_table_1 (col1); CREATE TABLE mx_test_schema_2.mx_table_2 (col1 int, col2 text); @@ -814,7 +823,6 @@ SELECT nextval('pg_catalog.pg_dist_colocationid_seq') AS last_colocation_id \gse ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 10000; SET citus.shard_count TO 7; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE mx_colocation_test_1 (a int); SELECT create_distributed_table('mx_colocation_test_1', 'a'); create_distributed_table @@ -907,7 +915,6 @@ DROP TABLE mx_colocation_test_2; \c - - - :master_port SET citus.shard_count TO 7; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE mx_temp_drop_test (a int); SELECT create_distributed_table('mx_temp_drop_test', 'a'); create_distributed_table @@ -940,7 +947,6 @@ DROP TABLE mx_temp_drop_test; \c - - - :master_port SET citus.shard_count TO 3; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); stop_metadata_sync_to_node --------------------------------------------------------------------- @@ -981,7 +987,6 @@ INSERT INTO mx_table_with_small_sequence VALUES (0); INSERT INTO mx_table_with_small_sequence VALUES (1), (3); \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; -- Create an MX table with (BIGSERIAL) sequences CREATE TABLE mx_table_with_sequence(a int, b BIGSERIAL, c BIGSERIAL); SELECT create_distributed_table('mx_table_with_sequence', 'a'); @@ -1203,7 +1208,6 @@ HINT: Connect to worker nodes directly to manually create all necessary users a -- Create an mx table as a different user CREATE TABLE mx_table (a int, b BIGSERIAL); SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT create_distributed_table('mx_table', 'a'); create_distributed_table --------------------------------------------------------------------- @@ -1540,7 +1544,6 @@ SELECT pg_reload_conf(); t (1 row) -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; CREATE TABLE dist_table_1(a int); SELECT create_distributed_table('dist_table_1', 'a'); @@ -1606,6 +1609,145 @@ SELECT pg_reload_conf(); (1 row) UPDATE pg_dist_node SET metadatasynced=true WHERE nodeport=:worker_1_port; +SELECT master_add_node('localhost', :worker_2_port); + master_add_node +--------------------------------------------------------------------- + 7 +(1 row) + +SELECT start_metadata_sync_to_node('localhost', :worker_2_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +CREATE SEQUENCE mx_test_sequence_0; +CREATE SEQUENCE mx_test_sequence_1; +-- test create_distributed_table +CREATE TABLE test_table (id int DEFAULT nextval('mx_test_sequence_0')); +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- shouldn't work since it's partition column +ALTER TABLE test_table ALTER COLUMN id SET DEFAULT nextval('mx_test_sequence_1'); +ERROR: cannot execute ALTER TABLE command involving partition column +-- test different plausible commands +ALTER TABLE test_table ADD COLUMN id2 int DEFAULT nextval('mx_test_sequence_1'); +ALTER TABLE test_table ALTER COLUMN id2 DROP DEFAULT; +ALTER TABLE test_table ALTER COLUMN id2 SET DEFAULT nextval('mx_test_sequence_1'); +SELECT unnest(master_metadata_snapshot()) order by 1; + unnest +--------------------------------------------------------------------- + ALTER SEQUENCE mx_testing_schema.mx_test_table_col_3_seq OWNER TO postgres + ALTER SEQUENCE public.mx_test_sequence_0 OWNER TO postgres + ALTER SEQUENCE public.mx_test_sequence_1 OWNER TO postgres + ALTER SEQUENCE public.user_defined_seq OWNER TO postgres + ALTER TABLE mx_test_schema_1.mx_table_1 ADD CONSTRAINT mx_fk_constraint_2 FOREIGN KEY (col1) REFERENCES mx_test_schema_2.mx_table_2(col1) NOT VALID + ALTER TABLE mx_test_schema_1.mx_table_1 ADD CONSTRAINT mx_table_1_col1_key UNIQUE (col1) + ALTER TABLE mx_test_schema_1.mx_table_1 OWNER TO postgres + ALTER TABLE mx_test_schema_1.mx_table_1 OWNER TO postgres + ALTER TABLE mx_test_schema_2.mx_table_2 ADD CONSTRAINT mx_fk_constraint FOREIGN KEY (col1) REFERENCES mx_test_schema_1.mx_table_1(col1) + ALTER TABLE mx_test_schema_2.mx_table_2 ADD CONSTRAINT mx_table_2_col1_key UNIQUE (col1) + ALTER TABLE mx_test_schema_2.mx_table_2 OWNER TO postgres + ALTER TABLE mx_test_schema_2.mx_table_2 OWNER TO postgres + ALTER TABLE mx_testing_schema.mx_test_table ADD CONSTRAINT mx_test_table_col_1_key UNIQUE (col_1) + ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + ALTER TABLE mx_testing_schema.mx_test_table OWNER TO postgres + ALTER TABLE public.dist_table_1 OWNER TO postgres + ALTER TABLE public.dist_table_1 OWNER TO postgres + ALTER TABLE public.mx_ref OWNER TO postgres + ALTER TABLE public.mx_ref OWNER TO postgres + ALTER TABLE public.test_table OWNER TO postgres + ALTER TABLE public.test_table OWNER TO postgres + CREATE INDEX mx_index ON mx_testing_schema.mx_test_table USING btree (col_2) + CREATE INDEX mx_index_1 ON mx_test_schema_1.mx_table_1 USING btree (col1) + CREATE INDEX mx_index_2 ON mx_test_schema_2.mx_table_2 USING btree (col2) + CREATE TABLE mx_test_schema_1.mx_table_1 (col1 integer, col2 text, col3 integer) + CREATE TABLE mx_test_schema_2.mx_table_2 (col1 integer, col2 text) + CREATE TABLE mx_testing_schema.mx_test_table (col_1 integer, col_2 text NOT NULL, col_3 bigint DEFAULT nextval('mx_testing_schema.mx_test_table_col_3_seq'::regclass) NOT NULL, col_4 bigint DEFAULT nextval('public.user_defined_seq'::regclass)) + CREATE TABLE public.dist_table_1 (a integer) + CREATE TABLE public.mx_ref (col_1 integer, col_2 text) + CREATE TABLE public.test_table (id integer DEFAULT nextval('public.mx_test_sequence_0'::regclass), id2 integer DEFAULT nextval('public.mx_test_sequence_1'::regclass)) + INSERT INTO pg_dist_node (nodeid, groupid, nodename, nodeport, noderack, hasmetadata, metadatasynced, isactive, noderole, nodecluster) VALUES (4, 1, 'localhost', 8888, 'default', FALSE, FALSE, TRUE, 'secondary'::noderole, 'default'),(5, 1, 'localhost', 8889, 'default', FALSE, FALSE, TRUE, 'secondary'::noderole, 'second-cluster'),(1, 1, 'localhost', 57637, 'default', TRUE, TRUE, TRUE, 'primary'::noderole, 'default'),(7, 5, 'localhost', 57638, 'default', TRUE, TRUE, TRUE, 'primary'::noderole, 'default') + INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('mx_test_schema_1.mx_table_1'::regclass, 'h', column_name_to_column('mx_test_schema_1.mx_table_1','col1'), 3, 's') + INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('mx_test_schema_2.mx_table_2'::regclass, 'h', column_name_to_column('mx_test_schema_2.mx_table_2','col1'), 3, 's') + INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('mx_testing_schema.mx_test_table'::regclass, 'h', column_name_to_column('mx_testing_schema.mx_test_table','col_1'), 0, 's') + INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('public.dist_table_1'::regclass, 'h', column_name_to_column('public.dist_table_1','a'), 10004, 's') + INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('public.mx_ref'::regclass, 'n', NULL, 10002, 't') + INSERT INTO pg_dist_partition (logicalrelid, partmethod, partkey, colocationid, repmodel) VALUES ('public.test_table'::regclass, 'h', column_name_to_column('public.test_table','id'), 10004, 's') + INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310000, 1, 0, 1, 100000),(1310001, 1, 0, 5, 100001),(1310002, 1, 0, 1, 100002),(1310003, 1, 0, 5, 100003),(1310004, 1, 0, 1, 100004),(1310005, 1, 0, 5, 100005),(1310006, 1, 0, 1, 100006),(1310007, 1, 0, 5, 100007) + INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310020, 1, 0, 1, 100020),(1310021, 1, 0, 5, 100021),(1310022, 1, 0, 1, 100022),(1310023, 1, 0, 5, 100023),(1310024, 1, 0, 1, 100024) + INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310025, 1, 0, 1, 100025),(1310026, 1, 0, 5, 100026),(1310027, 1, 0, 1, 100027),(1310028, 1, 0, 5, 100028),(1310029, 1, 0, 1, 100029) + INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310073, 1, 0, 1, 100074),(1310073, 1, 0, 5, 100075) + INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310074, 1, 0, 1, 100076),(1310075, 1, 0, 5, 100077),(1310076, 1, 0, 1, 100078),(1310077, 1, 0, 5, 100079) + INSERT INTO pg_dist_placement (shardid, shardstate, shardlength, groupid, placementid) VALUES (1310083, 1, 0, 1, 100086),(1310084, 1, 0, 5, 100087),(1310085, 1, 0, 1, 100088),(1310086, 1, 0, 5, 100089) + INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('mx_test_schema_1.mx_table_1'::regclass, 1310020, 't', '-2147483648', '-1288490190'),('mx_test_schema_1.mx_table_1'::regclass, 1310021, 't', '-1288490189', '-429496731'),('mx_test_schema_1.mx_table_1'::regclass, 1310022, 't', '-429496730', '429496728'),('mx_test_schema_1.mx_table_1'::regclass, 1310023, 't', '429496729', '1288490187'),('mx_test_schema_1.mx_table_1'::regclass, 1310024, 't', '1288490188', '2147483647') + INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('mx_test_schema_2.mx_table_2'::regclass, 1310025, 't', '-2147483648', '-1288490190'),('mx_test_schema_2.mx_table_2'::regclass, 1310026, 't', '-1288490189', '-429496731'),('mx_test_schema_2.mx_table_2'::regclass, 1310027, 't', '-429496730', '429496728'),('mx_test_schema_2.mx_table_2'::regclass, 1310028, 't', '429496729', '1288490187'),('mx_test_schema_2.mx_table_2'::regclass, 1310029, 't', '1288490188', '2147483647') + INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('mx_testing_schema.mx_test_table'::regclass, 1310000, 't', '-2147483648', '-1610612737'),('mx_testing_schema.mx_test_table'::regclass, 1310001, 't', '-1610612736', '-1073741825'),('mx_testing_schema.mx_test_table'::regclass, 1310002, 't', '-1073741824', '-536870913'),('mx_testing_schema.mx_test_table'::regclass, 1310003, 't', '-536870912', '-1'),('mx_testing_schema.mx_test_table'::regclass, 1310004, 't', '0', '536870911'),('mx_testing_schema.mx_test_table'::regclass, 1310005, 't', '536870912', '1073741823'),('mx_testing_schema.mx_test_table'::regclass, 1310006, 't', '1073741824', '1610612735'),('mx_testing_schema.mx_test_table'::regclass, 1310007, 't', '1610612736', '2147483647') + INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('public.dist_table_1'::regclass, 1310074, 't', '-2147483648', '-1073741825'),('public.dist_table_1'::regclass, 1310075, 't', '-1073741824', '-1'),('public.dist_table_1'::regclass, 1310076, 't', '0', '1073741823'),('public.dist_table_1'::regclass, 1310077, 't', '1073741824', '2147483647') + INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('public.mx_ref'::regclass, 1310073, 't', NULL, NULL) + INSERT INTO pg_dist_shard (logicalrelid, shardid, shardstorage, shardminvalue, shardmaxvalue) VALUES ('public.test_table'::regclass, 1310083, 't', '-2147483648', '-1073741825'),('public.test_table'::regclass, 1310084, 't', '-1073741824', '-1'),('public.test_table'::regclass, 1310085, 't', '0', '1073741823'),('public.test_table'::regclass, 1310086, 't', '1073741824', '2147483647') + SELECT pg_catalog.worker_record_sequence_dependency('mx_testing_schema.mx_test_table_col_3_seq'::regclass,'mx_testing_schema.mx_test_table'::regclass,'col_3') + SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS mx_testing_schema.mx_test_table_col_3_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') + SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.mx_test_sequence_0 INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START WITH 1 NO CYCLE','integer') + SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.mx_test_sequence_1 INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START WITH 1 NO CYCLE','integer') + SELECT worker_apply_sequence_command ('CREATE SEQUENCE IF NOT EXISTS public.user_defined_seq INCREMENT BY 1 MINVALUE 1 MAXVALUE 9223372036854775807 START WITH 1 NO CYCLE','bigint') + SELECT worker_create_truncate_trigger('mx_test_schema_1.mx_table_1') + SELECT worker_create_truncate_trigger('mx_test_schema_2.mx_table_2') + SELECT worker_create_truncate_trigger('mx_testing_schema.mx_test_table') + SELECT worker_create_truncate_trigger('public.dist_table_1') + SELECT worker_create_truncate_trigger('public.mx_ref') + SELECT worker_create_truncate_trigger('public.test_table') + SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_partition + TRUNCATE pg_dist_node CASCADE +(62 rows) + +-- shouldn't work since test_table is MX +ALTER TABLE test_table ADD COLUMN id3 bigserial; +ERROR: cannot execute ADD COLUMN commands involving serial pseudotypes when metadata is synchronized to workers +-- shouldn't work since the above operations should be the only subcommands +ALTER TABLE test_table ADD COLUMN id4 int DEFAULT nextval('mx_test_sequence_1') CHECK (id4 > 0); +ERROR: cannot execute ADD COLUMN .. DEFAULT nextval('..') command with other subcommands/constraints +HINT: You can issue each subcommand separately +ALTER TABLE test_table ADD COLUMN id4 int, ADD COLUMN id5 int DEFAULT nextval('mx_test_sequence_1'); +ERROR: cannot execute ADD COLUMN .. DEFAULT nextval('..') command with other subcommands/constraints +HINT: You can issue each subcommand separately +ALTER TABLE test_table ALTER COLUMN id1 SET DEFAULT nextval('mx_test_sequence_1'), ALTER COLUMN id2 DROP DEFAULT; +ERROR: cannot execute ALTER COLUMN COLUMN .. SET DEFAULT nextval('..') command with other subcommands +HINT: You can issue each subcommand separately +ALTER TABLE test_table ADD COLUMN id4 bigserial CHECK (id4 > 0); +ERROR: cannot execute ADD COLUMN commands involving serial pseudotypes when metadata is synchronized to workers +\c - - - :worker_1_port +\ds + List of relations + Schema | Name | Type | Owner +--------------------------------------------------------------------- + public | mx_test_sequence_0 | sequence | postgres + public | mx_test_sequence_1 | sequence | postgres + public | user_defined_seq | sequence | postgres +(3 rows) + +\c - - - :master_port +CREATE SEQUENCE local_sequence; +-- verify that DROP SEQUENCE will propagate the command to workers for +-- the distributed sequences mx_test_sequence_0 and mx_test_sequence_1 +DROP SEQUENCE mx_test_sequence_0, mx_test_sequence_1, local_sequence CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to default value for column id2 of table test_table +drop cascades to default value for column id of table test_table +\c - - - :worker_1_port +\ds + List of relations + Schema | Name | Type | Owner +--------------------------------------------------------------------- + public | user_defined_seq | sequence | postgres +(1 row) + +\c - - - :master_port +DROP TABLE test_table CASCADE; -- Cleanup SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); stop_metadata_sync_to_node @@ -1627,7 +1769,6 @@ DROP TABLE mx_ref; DROP TABLE dist_table_1, dist_table_2; RESET citus.shard_count; RESET citus.shard_replication_factor; -RESET citus.replication_model; RESET citus.multi_shard_commit_protocol; ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART :last_group_id; ALTER SEQUENCE pg_catalog.pg_dist_node_nodeid_seq RESTART :last_node_id; diff --git a/src/test/regress/expected/multi_modifying_xacts.out b/src/test/regress/expected/multi_modifying_xacts.out index 24b6e7133..069b730b1 100644 --- a/src/test/regress/expected/multi_modifying_xacts.out +++ b/src/test/regress/expected/multi_modifying_xacts.out @@ -1359,6 +1359,7 @@ SELECT create_reference_table('itemgroups'); (1 row) +DROP TABLE IF EXISTS users ; CREATE TABLE users ( id int PRIMARY KEY, name text, diff --git a/src/test/regress/expected/multi_move_mx.out b/src/test/regress/expected/multi_move_mx.out index 3d67c97de..b0405153b 100644 --- a/src/test/regress/expected/multi_move_mx.out +++ b/src/test/regress/expected/multi_move_mx.out @@ -11,7 +11,6 @@ SELECT start_metadata_sync_to_node('localhost', :worker_2_port); -- Create mx test tables SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE mx_table_1 (a int); SELECT create_distributed_table('mx_table_1', 'a'); create_distributed_table @@ -141,9 +140,10 @@ SELECT FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE - logicalrelid = 'mx_table_1'::regclass + (logicalrelid = 'mx_table_1'::regclass OR logicalrelid = 'mx_table_2'::regclass - OR logicalrelid = 'mx_table_3'::regclass + OR logicalrelid = 'mx_table_3'::regclass) + AND shardstate != 4 ORDER BY logicalrelid, shardid; logicalrelid | shardid | nodename | nodeport @@ -231,5 +231,3 @@ DELETE FROM pg_dist_node; DELETE FROM pg_dist_partition; DELETE FROM pg_dist_shard; DELETE FROM pg_dist_shard_placement; -\c - - - :master_port -RESET citus.replication_model; diff --git a/src/test/regress/expected/multi_multiuser.out b/src/test/regress/expected/multi_multiuser.out index 38fd338dd..6501861d4 100644 --- a/src/test/regress/expected/multi_multiuser.out +++ b/src/test/regress/expected/multi_multiuser.out @@ -108,7 +108,6 @@ GRANT USAGE ON SCHEMA full_access_user_schema TO full_access; GRANT ALL ON SCHEMA full_access_user_schema TO full_access; GRANT USAGE ON SCHEMA full_access_user_schema TO usage_access; \c - - - :master_port -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; -- create prepare tests PREPARE prepare_insert AS INSERT INTO test VALUES ($1); @@ -273,7 +272,7 @@ IF substring(current_Setting('server_version'), '\d+')::int >= 12 THEN -- create columnar table CREATE TABLE columnar_table (a int) USING columnar; -- alter a columnar table that is created by that unprivileged user - SELECT alter_columnar_table_set('columnar_table', chunk_group_row_limit => 100); + SELECT alter_columnar_table_set('columnar_table', chunk_group_row_limit => 2000); -- and drop it DROP TABLE columnar_table; $$; diff --git a/src/test/regress/expected/multi_mx_add_coordinator.out b/src/test/regress/expected/multi_mx_add_coordinator.out index a3cd98351..de3f67f9f 100644 --- a/src/test/regress/expected/multi_mx_add_coordinator.out +++ b/src/test/regress/expected/multi_mx_add_coordinator.out @@ -4,7 +4,6 @@ SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 8; SET citus.next_shard_id TO 7000000; SET citus.next_placement_id TO 7000000; -SET citus.replication_model TO streaming; SET client_min_messages TO WARNING; CREATE USER reprefuser WITH LOGIN; SELECT run_command_on_workers('CREATE USER reprefuser WITH LOGIN'); diff --git a/src/test/regress/expected/multi_mx_alter_distributed_table.out b/src/test/regress/expected/multi_mx_alter_distributed_table.out index 3c9056218..5960c1c14 100644 --- a/src/test/regress/expected/multi_mx_alter_distributed_table.out +++ b/src/test/regress/expected/multi_mx_alter_distributed_table.out @@ -2,7 +2,6 @@ CREATE SCHEMA mx_alter_distributed_table; SET search_path TO mx_alter_distributed_table; SET citus.shard_replication_factor TO 1; ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 1410000; -SET citus.replication_model TO 'streaming'; -- test alter_distributed_table UDF CREATE TABLE adt_table (a INT, b INT); CREATE TABLE adt_col (a INT UNIQUE, b INT); diff --git a/src/test/regress/expected/multi_mx_call.out b/src/test/regress/expected/multi_mx_call.out index 0453bb752..35bdf672e 100644 --- a/src/test/regress/expected/multi_mx_call.out +++ b/src/test/regress/expected/multi_mx_call.out @@ -3,7 +3,6 @@ create schema multi_mx_call; set search_path to multi_mx_call, public; -- Create worker-local tables to test procedure calls were routed set citus.shard_replication_factor to 2; -set citus.replication_model to 'statement'; -- This table requires specific settings, create before getting into things create table mx_call_dist_table_replica(id int, val int); select create_distributed_table('mx_call_dist_table_replica', 'id'); @@ -14,7 +13,6 @@ select create_distributed_table('mx_call_dist_table_replica', 'id'); insert into mx_call_dist_table_replica values (9,1),(8,2),(7,3),(6,4),(5,5); set citus.shard_replication_factor to 1; -set citus.replication_model to 'streaming'; -- -- Create tables and procedures we want to use in tests -- diff --git a/src/test/regress/expected/multi_mx_create_table.out b/src/test/regress/expected/multi_mx_create_table.out index 9c351f763..deae386ea 100644 --- a/src/test/regress/expected/multi_mx_create_table.out +++ b/src/test/regress/expected/multi_mx_create_table.out @@ -140,7 +140,6 @@ CREATE OPERATOR citus_mx_test_schema.=== ( -- connect back to the master, and do some more tests \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; SET search_path TO public; CREATE TABLE nation_hash( n_nationkey integer not null, diff --git a/src/test/regress/expected/multi_mx_ddl.out b/src/test/regress/expected/multi_mx_ddl.out index a7b203df3..f69e4b49a 100644 --- a/src/test/regress/expected/multi_mx_ddl.out +++ b/src/test/regress/expected/multi_mx_ddl.out @@ -219,7 +219,6 @@ DROP INDEX ddl_test_index; -- show that sequences owned by mx tables result in unique values SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 4; -SET citus.replication_model TO streaming; CREATE TABLE mx_sequence(key INT, value BIGSERIAL); SELECT create_distributed_table('mx_sequence', 'key'); create_distributed_table @@ -242,7 +241,9 @@ SELECT :worker_1_lastval = :worker_2_lastval; -- the type of sequences can't be changed ALTER TABLE mx_sequence ALTER value TYPE BIGINT; +ERROR: cannot execute ALTER COLUMN TYPE .. command because the column involves a default coming from a sequence ALTER TABLE mx_sequence ALTER value TYPE INT; +ERROR: cannot execute ALTER COLUMN TYPE .. command because the column involves a default coming from a sequence -- test distributed tables owned by extension CREATE TABLE seg_test (x int); INSERT INTO seg_test VALUES (42); @@ -267,7 +268,6 @@ HINT: You can add/drop the member objects on the workers as well. -- sync table metadata, but skip CREATE TABLE SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 4; -SET citus.replication_model TO streaming; SELECT create_distributed_table('seg_test', 'x'); NOTICE: Copying data from local table... NOTICE: copying the data has completed diff --git a/src/test/regress/expected/multi_mx_function_call_delegation.out b/src/test/regress/expected/multi_mx_function_call_delegation.out index 10e5bf231..8f36b094a 100644 --- a/src/test/regress/expected/multi_mx_function_call_delegation.out +++ b/src/test/regress/expected/multi_mx_function_call_delegation.out @@ -2,7 +2,6 @@ CREATE SCHEMA multi_mx_function_call_delegation; SET search_path TO multi_mx_function_call_delegation, public; SET citus.shard_replication_factor TO 2; -SET citus.replication_model TO 'statement'; -- This table requires specific settings, create before getting into things create table mx_call_dist_table_replica(id int, val int); select create_distributed_table('mx_call_dist_table_replica', 'id'); @@ -13,7 +12,6 @@ select create_distributed_table('mx_call_dist_table_replica', 'id'); insert into mx_call_dist_table_replica values (9,1),(8,2),(7,3),(6,4),(5,5); SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; -- -- Create tables and functions we want to use in tests -- @@ -544,7 +542,7 @@ select start_metadata_sync_to_node('localhost', :worker_2_port); \c - - - :master_port SET search_path to multi_mx_function_call_delegation, public; SET client_min_messages TO DEBUG1; -SET citus.replication_model = 'streaming'; +SET citus.shard_replication_factor = 1; -- -- Test non-const parameter values -- diff --git a/src/test/regress/expected/multi_mx_function_table_reference.out b/src/test/regress/expected/multi_mx_function_table_reference.out index 2dce631d3..c0516c772 100644 --- a/src/test/regress/expected/multi_mx_function_table_reference.out +++ b/src/test/regress/expected/multi_mx_function_table_reference.out @@ -9,7 +9,6 @@ CREATE SCHEMA function_table_reference; SET search_path TO function_table_reference; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); start_metadata_sync_to_node --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_mx_hide_shard_names.out b/src/test/regress/expected/multi_mx_hide_shard_names.out index cbc57ef7b..b2965fba5 100644 --- a/src/test/regress/expected/multi_mx_hide_shard_names.out +++ b/src/test/regress/expected/multi_mx_hide_shard_names.out @@ -24,7 +24,6 @@ CREATE SCHEMA mx_hide_shard_names; SET search_path TO 'mx_hide_shard_names'; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); start_metadata_sync_to_node --------------------------------------------------------------------- @@ -139,7 +138,6 @@ SELECT pg_table_is_visible('test_table_1130000'::regclass); SET search_path TO 'mx_hide_shard_names'; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; -- not existing shard ids appended to the distributed table name CREATE TABLE test_table_102008(id int, time date); SELECT create_distributed_table('test_table_102008', 'id'); @@ -179,7 +177,6 @@ CREATE SCHEMA mx_hide_shard_names_2; SET search_path TO 'mx_hide_shard_names_2'; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE test_table(id int, time date); SELECT create_distributed_table('test_table', 'id'); create_distributed_table @@ -236,7 +233,6 @@ SELECT * FROM citus_shard_indexes_on_worker ORDER BY 2; \c - - - :master_port SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE SCHEMA mx_hide_shard_names_3; SET search_path TO 'mx_hide_shard_names_3'; -- Verify that a table name > 56 characters handled properly. @@ -269,7 +265,6 @@ SELECT * FROM citus_shards_on_worker ORDER BY 2; \c - - - :master_port SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE SCHEMA "CiTuS.TeeN"; SET search_path TO "CiTuS.TeeN"; CREATE TABLE "TeeNTabLE.1!?!"(id int, "TeNANt_Id" int); diff --git a/src/test/regress/expected/multi_mx_insert_select_repartition.out b/src/test/regress/expected/multi_mx_insert_select_repartition.out index ce8a8053b..59dca03e3 100644 --- a/src/test/regress/expected/multi_mx_insert_select_repartition.out +++ b/src/test/regress/expected/multi_mx_insert_select_repartition.out @@ -2,7 +2,6 @@ CREATE SCHEMA multi_mx_insert_select_repartition; SET search_path TO multi_mx_insert_select_repartition; SET citus.next_shard_id TO 4213581; -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 4; CREATE TABLE source_table(a int, b int); diff --git a/src/test/regress/expected/multi_mx_metadata.out b/src/test/regress/expected/multi_mx_metadata.out index dbeb23e1e..fe03d35dd 100644 --- a/src/test/regress/expected/multi_mx_metadata.out +++ b/src/test/regress/expected/multi_mx_metadata.out @@ -29,7 +29,6 @@ CREATE TABLE distributed_mx_table ( ); CREATE INDEX ON distributed_mx_table USING GIN (value); SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; SET citus.shard_count TO 4; SELECT create_distributed_table('distributed_mx_table', 'key'); create_distributed_table @@ -135,7 +134,6 @@ WHERE logicalrelid = 'distributed_mx_table'::regclass; -- Create a table and then roll back the transaction \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; BEGIN; CREATE TABLE should_not_exist ( key text primary key, @@ -159,7 +157,6 @@ SELECT count(*) FROM pg_tables WHERE tablename = 'should_not_exist'; -- Ensure that we don't allow prepare on a metadata transaction \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; BEGIN; CREATE TABLE should_not_exist ( key text primary key, @@ -208,7 +205,6 @@ WHERE logicalrelid = 'citus_mx_schema_for_xacts.objects_for_xacts'::regclass; \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; -- now show that we can rollback on creating mx table, but shards remain.... BEGIN; CREATE SCHEMA IF NOT EXISTS citus_mx_schema_for_xacts; diff --git a/src/test/regress/expected/multi_mx_modifications_to_reference_tables.out b/src/test/regress/expected/multi_mx_modifications_to_reference_tables.out index fb2961934..b8590fc7d 100644 --- a/src/test/regress/expected/multi_mx_modifications_to_reference_tables.out +++ b/src/test/regress/expected/multi_mx_modifications_to_reference_tables.out @@ -6,7 +6,6 @@ CREATE SCHEMA mx_modify_reference_table; SET search_path TO 'mx_modify_reference_table'; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); start_metadata_sync_to_node --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_mx_node_metadata.out b/src/test/regress/expected/multi_mx_node_metadata.out index bf473c310..9ac2c6043 100644 --- a/src/test/regress/expected/multi_mx_node_metadata.out +++ b/src/test/regress/expected/multi_mx_node_metadata.out @@ -5,7 +5,6 @@ SELECT nextval('pg_catalog.pg_dist_groupid_seq') AS last_group_id \gset SELECT nextval('pg_catalog.pg_dist_node_nodeid_seq') AS last_node_id \gset SELECT nextval('pg_catalog.pg_dist_colocationid_seq') AS last_colocation_id \gset SELECT nextval('pg_catalog.pg_dist_shardid_seq') AS last_shard_id \gset -SET citus.replication_model TO streaming; SET citus.shard_count TO 8; SET citus.shard_replication_factor TO 1; SET citus.replicate_reference_tables_on_activate TO off; @@ -65,6 +64,14 @@ SELECT create_distributed_table('dist_table_1', 'a'); (1 row) +CREATE SEQUENCE sequence; +CREATE TABLE reference_table (a int default nextval('sequence')); +SELECT create_reference_table('reference_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + -- update the node SELECT 1 FROM master_update_node((SELECT nodeid FROM pg_dist_node), 'localhost', :worker_2_port); @@ -805,7 +812,10 @@ SELECT datname FROM pg_stat_activity WHERE application_name LIKE 'Citus Met%'; (0 rows) -- cleanup +DROP SEQUENCE sequence CASCADE; +NOTICE: drop cascades to default value for column a of table reference_table DROP TABLE ref_table; +DROP TABLE reference_table; TRUNCATE pg_dist_colocation; SELECT count(*) FROM (SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node) t; count @@ -820,4 +830,3 @@ ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART :last_placem ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART :last_shard_id; RESET citus.shard_count; RESET citus.shard_replication_factor; -RESET citus.replication_model; diff --git a/src/test/regress/expected/multi_mx_partitioning.out b/src/test/regress/expected/multi_mx_partitioning.out index 3b4704abb..346dd29ec 100644 --- a/src/test/regress/expected/multi_mx_partitioning.out +++ b/src/test/regress/expected/multi_mx_partitioning.out @@ -5,7 +5,6 @@ SET citus.next_shard_id TO 1700000; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -- make sure wen can create partitioning tables in MX -SET citus.replication_model TO 'streaming'; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); start_metadata_sync_to_node --------------------------------------------------------------------- @@ -88,7 +87,6 @@ SELECT inhrelid::regclass FROM pg_inherits WHERE inhparent = 'partitioning_test' (2 rows) \c - - - :master_port -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; -- 2-) Creating partition of a distributed table CREATE TABLE partitioning_test_2011 PARTITION OF partitioning_test FOR VALUES FROM ('2011-01-01') TO ('2012-01-01'); @@ -131,7 +129,6 @@ SELECT inhrelid::regclass FROM pg_inherits WHERE inhparent = 'partitioning_test' (3 rows) \c - - - :master_port -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; -- 3-) Attaching non distributed table to a distributed table CREATE TABLE partitioning_test_2012(id int, time date); @@ -195,7 +192,6 @@ SELECT inhrelid::regclass FROM pg_inherits WHERE inhparent = 'partitioning_test' (4 rows) \c - - - :master_port -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; -- 4-) Attaching distributed table to distributed table CREATE TABLE partitioning_test_2013(id int, time date); @@ -281,7 +277,6 @@ DROP TABLE partitioning_test; DROP TABLE IF EXISTS partitioning_test_2013; NOTICE: table "partitioning_test_2013" does not exist, skipping -- test schema drop with partitioned tables -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; CREATE SCHEMA partition_test; SET SEARCH_PATH TO partition_test; diff --git a/src/test/regress/expected/multi_mx_reference_table.out b/src/test/regress/expected/multi_mx_reference_table.out index 6ec100c80..ee1f07e3a 100644 --- a/src/test/regress/expected/multi_mx_reference_table.out +++ b/src/test/regress/expected/multi_mx_reference_table.out @@ -779,7 +779,6 @@ ORDER BY \c - - - :master_port SET citus.shard_count TO 6; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_distributed_table('colocated_table_test', 'value_1'); create_distributed_table diff --git a/src/test/regress/expected/multi_mx_repartition_udt_prepare.out b/src/test/regress/expected/multi_mx_repartition_udt_prepare.out index 92835d031..114e55a0f 100644 --- a/src/test/regress/expected/multi_mx_repartition_udt_prepare.out +++ b/src/test/regress/expected/multi_mx_repartition_udt_prepare.out @@ -124,7 +124,6 @@ FUNCTION 1 test_udt_hash(test_udt); \c - - - :master_port -- Distribute and populate the two tables. SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; SET citus.shard_count TO 3; SELECT create_distributed_table('repartition_udt', 'pk'); create_distributed_table diff --git a/src/test/regress/expected/multi_mx_schema_support.out b/src/test/regress/expected/multi_mx_schema_support.out index fca95340c..7cb5ae1fb 100644 --- a/src/test/regress/expected/multi_mx_schema_support.out +++ b/src/test/regress/expected/multi_mx_schema_support.out @@ -381,7 +381,6 @@ CREATE SCHEMA mx_ddl_schema_2; CREATE SCHEMA "CiTuS.TeAeN"; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; -- in the first test make sure that we handle DDLs -- when search path is set SET search_path TO mx_ddl_schema_1; diff --git a/src/test/regress/expected/multi_mx_transaction_recovery.out b/src/test/regress/expected/multi_mx_transaction_recovery.out index d108c61ec..f253d5b5d 100644 --- a/src/test/regress/expected/multi_mx_transaction_recovery.out +++ b/src/test/regress/expected/multi_mx_transaction_recovery.out @@ -1,7 +1,6 @@ -- Tests for running transaction recovery from a worker node SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; CREATE TABLE test_recovery (x text); SELECT create_distributed_table('test_recovery', 'x'); create_distributed_table diff --git a/src/test/regress/expected/multi_mx_truncate_from_worker.out b/src/test/regress/expected/multi_mx_truncate_from_worker.out index 3f2d4ee4a..ab87821fb 100644 --- a/src/test/regress/expected/multi_mx_truncate_from_worker.out +++ b/src/test/regress/expected/multi_mx_truncate_from_worker.out @@ -4,7 +4,6 @@ SET citus.next_shard_id TO 2380000; SET citus.next_placement_id TO 2380000; SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 6; -SET citus.replication_model TO streaming; CREATE TABLE "refer'ence_table"(id int PRIMARY KEY); SELECT create_reference_table('refer''ence_table'); create_reference_table diff --git a/src/test/regress/expected/multi_partitioning.out b/src/test/regress/expected/multi_partitioning.out index 9677ba68e..dfcc422b7 100644 --- a/src/test/regress/expected/multi_partitioning.out +++ b/src/test/regress/expected/multi_partitioning.out @@ -1815,7 +1815,7 @@ IF EXISTS partitioning_locks, partitioning_locks_for_select; -- make sure we can create a partitioned table with streaming replication -SET citus.replication_model TO 'streaming'; +SET citus.shard_replication_factor TO 1; CREATE TABLE partitioning_test(id int, time date) PARTITION BY RANGE (time); CREATE TABLE partitioning_test_2009 PARTITION OF partitioning_test FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); SELECT create_distributed_table('partitioning_test', 'id'); @@ -1947,6 +1947,7 @@ ORDER BY "schema-test_2009" | 4 (2 rows) +SET citus.next_shard_id TO 1660300; -- test we don't deadlock when attaching and detaching partitions from partitioned -- tables with foreign keys CREATE TABLE reference_table(id int PRIMARY KEY); @@ -2001,8 +2002,8 @@ INSERT INTO partitioning_test_2010 VALUES (1, '2010-02-01'); -- This should fail because of foreign key constraint violation ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2010 FOR VALUES FROM ('2010-01-01') TO ('2011-01-01'); -ERROR: insert or update on table "partitioning_test_2010_1660191" violates foreign key constraint "partitioning_reference_fkey_1660179" -DETAIL: Key (id)=(X) is not present in table "reference_table_1660177". +ERROR: insert or update on table "partitioning_test_2010_1660314" violates foreign key constraint "partitioning_reference_fkey_1660302" +DETAIL: Key (id)=(X) is not present in table "reference_table_1660300". CONTEXT: while executing command on localhost:xxxxx -- Truncate, so attaching again won't fail TRUNCATE partitioning_test_2010; @@ -2083,9 +2084,28 @@ ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2009; ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2010; ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2011; ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2013; -DROP TABLE partitioning_test, partitioning_test_2008, partitioning_test_2009, - partitioning_test_2010, partitioning_test_2011, partitioning_test_2013, - reference_table, reference_table_2; +DROP TABLE partitioning_test_2008, partitioning_test_2009, partitioning_test_2010, + partitioning_test_2011, partitioning_test_2013, reference_table_2; +-- verify this doesn't crash and gives a debug message for dropped table +SET client_min_messages TO DEBUG1; +DROP TABLE partitioning_test, reference_table; +DEBUG: switching to sequential query execution mode +DETAIL: Table "" is modified, which might lead to data inconsistencies or distributed deadlocks via parallel accesses to hash distributed tables due to foreign keys. Any parallel modification to those hash distributed tables in the same transaction can only be executed in sequential query execution mode +CONTEXT: SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name)" +PL/pgSQL function citus_drop_trigger() line 16 at PERFORM +DEBUG: drop cascades to 2 other objects +DETAIL: drop cascades to constraint partitioning_reference_fkey_1660302 on table partitioning_schema.partitioning_test_1660302 +drop cascades to constraint partitioning_reference_fkey_1660304 on table partitioning_schema.partitioning_test_1660304 +DETAIL: from localhost:xxxxx +CONTEXT: SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name)" +PL/pgSQL function citus_drop_trigger() line 16 at PERFORM +DEBUG: drop cascades to 2 other objects +DETAIL: drop cascades to constraint partitioning_reference_fkey_1660303 on table partitioning_schema.partitioning_test_1660303 +drop cascades to constraint partitioning_reference_fkey_1660305 on table partitioning_schema.partitioning_test_1660305 +DETAIL: from localhost:xxxxx +CONTEXT: SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name)" +PL/pgSQL function citus_drop_trigger() line 16 at PERFORM +RESET client_min_messages; RESET SEARCH_PATH; -- not timestamp partitioned CREATE TABLE not_time_partitioned (x int, y int) PARTITION BY RANGE (x); diff --git a/src/test/regress/expected/multi_read_from_secondaries.out b/src/test/regress/expected/multi_read_from_secondaries.out index 6e7dc64bc..5c69458e4 100644 --- a/src/test/regress/expected/multi_read_from_secondaries.out +++ b/src/test/regress/expected/multi_read_from_secondaries.out @@ -24,7 +24,7 @@ INSERT INTO dest_table (a, b) VALUES (2, 1); INSERT INTO source_table (a, b) VALUES (1, 5); INSERT INTO source_table (a, b) VALUES (10, 10); -- simulate actually having secondary nodes -SELECT nodeid, groupid, nodename, nodeport, noderack, isactive, noderole, nodecluster FROM pg_dist_node; +SELECT nodeid, groupid, nodename, nodeport, noderack, isactive, noderole, nodecluster FROM pg_dist_node ORDER BY 1, 2; nodeid | groupid | nodename | nodeport | noderack | isactive | noderole | nodecluster --------------------------------------------------------------------- 1 | 1 | localhost | 57637 | default | t | primary | default diff --git a/src/test/regress/expected/multi_replicate_reference_table.out b/src/test/regress/expected/multi_replicate_reference_table.out index 92fd3558f..10806f3be 100644 --- a/src/test/regress/expected/multi_replicate_reference_table.out +++ b/src/test/regress/expected/multi_replicate_reference_table.out @@ -358,7 +358,6 @@ SELECT create_reference_table('replicate_reference_table_reference_one'); SET citus.shard_count TO 1; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE replicate_reference_table_reference_two(column1 int); -- status before master_add_node SELECT @@ -950,7 +949,7 @@ SELECT 1 FROM master_add_node('localhost', :worker_2_port); 1 (1 row) -SET citus.replication_model TO streaming; +SET citus.shard_replication_factor TO 1; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); start_metadata_sync_to_node --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_row_insert.out b/src/test/regress/expected/multi_row_insert.out index 1d976987a..caa149ce2 100644 --- a/src/test/regress/expected/multi_row_insert.out +++ b/src/test/regress/expected/multi_row_insert.out @@ -18,7 +18,7 @@ SELECT * FROM pg_dist_shard WHERE logicalrelid='source_table_xyz'::regclass::oid source_table_xyz | 4213582 | t | (25,z) | (49,z) (2 rows) -SELECT shardid, nodename, nodeport FROM pg_dist_shard_placement WHERE EXISTS(SELECT shardid FROM pg_dist_shard WHERE shardid=pg_dist_shard_placement.shardid AND logicalrelid='source_table_xyz'::regclass::oid); +SELECT shardid, nodename, nodeport FROM pg_dist_shard_placement WHERE EXISTS(SELECT shardid FROM pg_dist_shard WHERE shardid=pg_dist_shard_placement.shardid AND logicalrelid='source_table_xyz'::regclass::oid) ORDER BY 1, 2, 3; shardid | nodename | nodeport --------------------------------------------------------------------- 4213581 | localhost | 57637 diff --git a/src/test/regress/expected/multi_sequence_default.out b/src/test/regress/expected/multi_sequence_default.out new file mode 100644 index 000000000..378f003b4 --- /dev/null +++ b/src/test/regress/expected/multi_sequence_default.out @@ -0,0 +1,466 @@ +-- +-- MULTI_SEQUENCE_DEFAULT +-- +-- Tests related to column defaults coming from a sequence +-- +SET citus.next_shard_id TO 890000; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +CREATE SCHEMA sequence_default; +SET search_path = sequence_default, public; +-- Cannot add a column involving DEFAULT nextval('..') because the table is not empty +CREATE SEQUENCE seq_0; +CREATE TABLE seq_test_0 (x int, y int); +SELECT create_distributed_table('seq_test_0','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO seq_test_0 SELECT 1, s FROM generate_series(1, 50) s; +ALTER TABLE seq_test_0 ADD COLUMN z int DEFAULT nextval('seq_0'); +ERROR: cannot add a column involving DEFAULT nextval('..') because the table is not empty +HINT: You can first call ALTER TABLE .. ADD COLUMN .. smallint/int/bigint +Then set the default by ALTER TABLE .. ALTER COLUMN .. SET DEFAULT nextval('..') +ALTER TABLE seq_test_0 ADD COLUMN z serial; +ERROR: Cannot add a column involving serial pseudotypes because the table is not empty +HINT: You can first call ALTER TABLE .. ADD COLUMN .. smallint/int/bigint +Then set the default by ALTER TABLE .. ALTER COLUMN .. SET DEFAULT nextval('..') +-- follow hint +ALTER TABLE seq_test_0 ADD COLUMN z int; +ALTER TABLE seq_test_0 ALTER COLUMN z SET DEFAULT nextval('seq_0'); +SELECT * FROM seq_test_0 ORDER BY 1, 2 LIMIT 5; + x | y | z +--------------------------------------------------------------------- + 1 | 1 | + 1 | 2 | + 1 | 3 | + 1 | 4 | + 1 | 5 | +(5 rows) + +\d seq_test_0 + Table "sequence_default.seq_test_0" + Column | Type | Collation | Nullable | Default +--------------------------------------------------------------------- + x | integer | | | + y | integer | | | + z | integer | | | nextval('seq_0'::regclass) + +-- check that we can add serial pseudo-type columns +-- when metadata is not yet synced to workers +TRUNCATE seq_test_0; +ALTER TABLE seq_test_0 ADD COLUMN w00 smallserial; +ALTER TABLE seq_test_0 ADD COLUMN w01 serial2; +ALTER TABLE seq_test_0 ADD COLUMN w10 serial; +ALTER TABLE seq_test_0 ADD COLUMN w11 serial4; +ALTER TABLE seq_test_0 ADD COLUMN w20 bigserial; +ALTER TABLE seq_test_0 ADD COLUMN w21 serial8; +-- check alter column type precaution +ALTER TABLE seq_test_0 ALTER COLUMN z TYPE bigint; +ERROR: cannot execute ALTER COLUMN TYPE .. command because the column involves a default coming from a sequence +ALTER TABLE seq_test_0 ALTER COLUMN z TYPE smallint; +ERROR: cannot execute ALTER COLUMN TYPE .. command because the column involves a default coming from a sequence +-- MX tests +-- check that there's not problem with group ID cache +CREATE TABLE seq_test_4 (x int, y int); +SELECT create_distributed_table('seq_test_4','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE SEQUENCE seq_4; +ALTER TABLE seq_test_4 ADD COLUMN a int DEFAULT nextval('seq_4'); +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +DROP SEQUENCE seq_4 CASCADE; +NOTICE: drop cascades to default value for column a of table seq_test_4 +TRUNCATE seq_test_4; +CREATE SEQUENCE seq_4; +ALTER TABLE seq_test_4 ADD COLUMN b int DEFAULT nextval('seq_4'); +-- on worker it should generate high sequence number +\c - - - :worker_1_port +INSERT INTO sequence_default.seq_test_4 VALUES (1,2) RETURNING *; + x | y | a | b +--------------------------------------------------------------------- + 1 | 2 | | 268435457 +(1 row) + +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +-- check sequence type consistency in all nodes +CREATE SEQUENCE seq_1; +-- type is bigint by default +\d seq_1 + Sequence "sequence_default.seq_1" + Type | Start | Minimum | Maximum | Increment | Cycles? | Cache +--------------------------------------------------------------------- + bigint | 1 | 1 | 9223372036854775807 | 1 | no | 1 + +CREATE TABLE seq_test_1 (x int, y int); +SELECT create_distributed_table('seq_test_1','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE seq_test_1 ADD COLUMN z int DEFAULT nextval('seq_1'); +-- type is changed to int +\d seq_1 + Sequence "sequence_default.seq_1" + Type | Start | Minimum | Maximum | Increment | Cycles? | Cache +--------------------------------------------------------------------- + integer | 1 | 1 | 2147483647 | 1 | no | 1 + +-- check insertion is within int bounds in the worker +\c - - - :worker_1_port +INSERT INTO sequence_default.seq_test_1 values (1, 2) RETURNING *; + x | y | z +--------------------------------------------------------------------- + 1 | 2 | 268435457 +(1 row) + +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +-- check that we cannot add serial pseudo-type columns +-- when metadata is synced to workers +ALTER TABLE seq_test_1 ADD COLUMN w bigserial; +ERROR: cannot execute ADD COLUMN commands involving serial pseudotypes when metadata is synchronized to workers +-- check for sequence type clashes +CREATE SEQUENCE seq_2; +CREATE TABLE seq_test_2 (x int, y bigint DEFAULT nextval('seq_2')); +-- should work +SELECT create_distributed_table('seq_test_2','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE seq_test_2; +CREATE TABLE seq_test_2 (x int, y int DEFAULT nextval('seq_2')); +-- should work +SELECT create_distributed_table('seq_test_2','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE seq_test_2_0(x int, y smallint DEFAULT nextval('seq_2')); +-- shouldn't work +SELECT create_distributed_table('seq_test_2_0','x'); +ERROR: The sequence sequence_default.seq_2 is already used for a different type in column 2 of the table sequence_default.seq_test_2 +DROP TABLE seq_test_2; +DROP TABLE seq_test_2_0; +-- should work +CREATE TABLE seq_test_2 (x int, y bigint DEFAULT nextval('seq_2')); +SELECT create_distributed_table('seq_test_2','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE seq_test_2; +CREATE TABLE seq_test_2 (x int, y int DEFAULT nextval('seq_2'), z bigint DEFAULT nextval('seq_2')); +-- shouldn't work +SELECT create_distributed_table('seq_test_2','x'); +ERROR: The sequence sequence_default.seq_2 is already used for a different type in column 3 of the table sequence_default.seq_test_2 +-- check rename is propagated properly +ALTER SEQUENCE seq_2 RENAME TO sequence_2; +-- check in the worker +\c - - - :worker_1_port +\d sequence_default.sequence_2 + Sequence "sequence_default.sequence_2" + Type | Start | Minimum | Maximum | Increment | Cycles? | Cache +--------------------------------------------------------------------- + bigint | 281474976710657 | 281474976710657 | 562949953421313 | 1 | no | 1 + +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +-- check rename with another schema +-- we notice that schema is also propagated as one of the sequence's dependencies +CREATE SCHEMA sequence_default_0; +CREATE SEQUENCE sequence_default_0.seq_3; +CREATE TABLE seq_test_3 (x int, y bigint DEFAULT nextval('sequence_default_0.seq_3')); +SELECT create_distributed_table('seq_test_3', 'x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ALTER SEQUENCE sequence_default_0.seq_3 RENAME TO sequence_3; +-- check in the worker +\c - - - :worker_1_port +\d sequence_default_0.sequence_3 + Sequence "sequence_default_0.sequence_3" + Type | Start | Minimum | Maximum | Increment | Cycles? | Cache +--------------------------------------------------------------------- + bigint | 281474976710657 | 281474976710657 | 562949953421313 | 1 | no | 1 + +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +DROP SEQUENCE sequence_default_0.sequence_3 CASCADE; +NOTICE: drop cascades to default value for column y of table seq_test_3 +DROP SCHEMA sequence_default_0; +-- DROP SCHEMA problem: expected since we don't propagate DROP SCHEMA +CREATE TABLE seq_test_5 (x int, y int); +SELECT create_distributed_table('seq_test_5','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE SCHEMA sequence_default_1; +CREATE SEQUENCE sequence_default_1.seq_5; +ALTER TABLE seq_test_5 ADD COLUMN a int DEFAULT nextval('sequence_default_1.seq_5'); +DROP SCHEMA sequence_default_1 CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to sequence sequence_default_1.seq_5 +drop cascades to default value for column a of table seq_test_5 +-- sequence is gone from coordinator +INSERT INTO seq_test_5 VALUES (1, 2) RETURNING *; + x | y | a +--------------------------------------------------------------------- + 1 | 2 | +(1 row) + +-- but is still present on worker +\c - - - :worker_1_port +INSERT INTO sequence_default.seq_test_5 VALUES (1, 2) RETURNING *; + x | y | a +--------------------------------------------------------------------- + 1 | 2 | 268435457 +(1 row) + +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +-- apply workaround +SELECT run_command_on_workers('DROP SCHEMA sequence_default_1 CASCADE'); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,"DROP SCHEMA") + (localhost,57638,t,"DROP SCHEMA") +(2 rows) + +-- now the sequence is gone from the worker as well +\c - - - :worker_1_port +INSERT INTO sequence_default.seq_test_5 VALUES (1, 2) RETURNING *; + x | y | a +--------------------------------------------------------------------- + 1 | 2 | +(1 row) + +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +-- check some more complex cases +CREATE SEQUENCE seq_6; +CREATE TABLE seq_test_6 (x int, t timestamptz DEFAULT now(), s int DEFAULT nextval('seq_6'), m int) PARTITION BY RANGE (t); +SELECT create_distributed_table('seq_test_6','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- shouldn't work since x is the partition column +ALTER TABLE seq_test_6 ALTER COLUMN x SET DEFAULT nextval('seq_6'); +ERROR: cannot execute ALTER TABLE command involving partition column +-- should work since both s and m have int type +ALTER TABLE seq_test_6 ALTER COLUMN m SET DEFAULT nextval('seq_6'); +-- It is possible for a partition to have a different DEFAULT than its parent +CREATE SEQUENCE seq_7; +CREATE TABLE seq_test_7 (x text, s bigint DEFAULT nextval('seq_7'), t timestamptz DEFAULT now()) PARTITION BY RANGE (t); +SELECT create_distributed_table('seq_test_7','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE SEQUENCE seq_7_par; +CREATE TABLE seq_test_7_par (x text, s bigint DEFAULT nextval('seq_7_par'), t timestamptz DEFAULT now()); +ALTER TABLE seq_test_7 ATTACH PARTITION seq_test_7_par FOR VALUES FROM ('2021-05-31') TO ('2021-06-01'); +-- check that both sequences are in worker +\c - - - :worker_1_port +\d sequence_default.seq_7 + Sequence "sequence_default.seq_7" + Type | Start | Minimum | Maximum | Increment | Cycles? | Cache +--------------------------------------------------------------------- + bigint | 281474976710657 | 281474976710657 | 562949953421313 | 1 | no | 1 + +\d sequence_default.seq_7_par + Sequence "sequence_default.seq_7_par" + Type | Start | Minimum | Maximum | Increment | Cycles? | Cache +--------------------------------------------------------------------- + bigint | 281474976710657 | 281474976710657 | 562949953421313 | 1 | no | 1 + +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +-- Check that various ALTER SEQUENCE commands +-- are not allowed for a distributed sequence for now +CREATE SEQUENCE seq_8; +CREATE SCHEMA sequence_default_8; +-- can change schema in a sequence not yet distributed +ALTER SEQUENCE seq_8 SET SCHEMA sequence_default_8; +ALTER SEQUENCE sequence_default_8.seq_8 SET SCHEMA sequence_default; +CREATE TABLE seq_test_8 (x int, y int DEFAULT nextval('seq_8')); +SELECT create_distributed_table('seq_test_8', 'x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- cannot change sequence specifications +ALTER SEQUENCE seq_8 AS bigint; +ERROR: This operation is currently not allowed for a distributed sequence. +ALTER SEQUENCE seq_8 INCREMENT BY 2; +ERROR: This operation is currently not allowed for a distributed sequence. +ALTER SEQUENCE seq_8 MINVALUE 5 MAXVALUE 5000; +ERROR: This operation is currently not allowed for a distributed sequence. +ALTER SEQUENCE seq_8 START WITH 6; +ERROR: This operation is currently not allowed for a distributed sequence. +ALTER SEQUENCE seq_8 RESTART WITH 6; +ERROR: This operation is currently not allowed for a distributed sequence. +ALTER SEQUENCE seq_8 NO CYCLE; +ERROR: This operation is currently not allowed for a distributed sequence. +ALTER SEQUENCE seq_8 OWNED BY seq_test_7; +ERROR: This operation is currently not allowed for a distributed sequence. +-- cannot change schema in a distributed sequence +ALTER SEQUENCE seq_8 SET SCHEMA sequence_default_8; +ERROR: This operation is currently not allowed for a distributed sequence. +DROP SCHEMA sequence_default_8; +-- cannot use more than one sequence in a column default +CREATE SEQUENCE seq_9; +CREATE SEQUENCE seq_10; +CREATE TABLE seq_test_9 (x int, y int DEFAULT nextval('seq_9') - nextval('seq_10')); +SELECT create_distributed_table('seq_test_9', 'x'); +ERROR: More than one sequence in a column default is not supported for distribution +-- Check some cases when default is defined by +-- DEFAULT nextval('seq_name'::text) (not by DEFAULT nextval('seq_name')) +SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); + stop_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +CREATE SEQUENCE seq_11; +CREATE TABLE seq_test_10 (col0 int, col1 int DEFAULT nextval('seq_11'::text)); +SELECT create_reference_table('seq_test_10'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO seq_test_10 VALUES (0); +CREATE TABLE seq_test_11 (col0 int, col1 bigint DEFAULT nextval('seq_11'::text)); +-- works but doesn't create seq_11 in the workers +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +-- works because there is no dependency created between seq_11 and seq_test_10 +SELECT create_distributed_table('seq_test_11', 'col1'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- insertion from workers fails +\c - - - :worker_1_port +INSERT INTO sequence_default.seq_test_10 VALUES (1); +ERROR: relation "seq_11" does not exist +\c - - - :master_port +-- clean up +DROP TABLE sequence_default.seq_test_7_par; +DROP SCHEMA sequence_default CASCADE; +NOTICE: drop cascades to 23 other objects +DETAIL: drop cascades to sequence sequence_default.seq_0 +drop cascades to table sequence_default.seq_test_0 +drop cascades to table sequence_default.seq_test_4 +drop cascades to sequence sequence_default.seq_4 +drop cascades to sequence sequence_default.seq_1 +drop cascades to table sequence_default.seq_test_1 +drop cascades to sequence sequence_default.sequence_2 +drop cascades to table sequence_default.seq_test_2 +drop cascades to table sequence_default.seq_test_3 +drop cascades to table sequence_default.seq_test_5 +drop cascades to sequence sequence_default.seq_6 +drop cascades to table sequence_default.seq_test_6 +drop cascades to sequence sequence_default.seq_7 +drop cascades to table sequence_default.seq_test_7 +drop cascades to sequence sequence_default.seq_7_par +drop cascades to sequence sequence_default.seq_8 +drop cascades to table sequence_default.seq_test_8 +drop cascades to sequence sequence_default.seq_9 +drop cascades to sequence sequence_default.seq_10 +drop cascades to table sequence_default.seq_test_9 +drop cascades to sequence sequence_default.seq_11 +drop cascades to table sequence_default.seq_test_10 +drop cascades to table sequence_default.seq_test_11 +SELECT run_command_on_workers('DROP SCHEMA IF EXISTS sequence_default CASCADE'); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,"DROP SCHEMA") + (localhost,57638,t,"DROP SCHEMA") +(2 rows) + +SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); + stop_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +SET search_path TO public; diff --git a/src/test/regress/expected/multi_table_ddl.out b/src/test/regress/expected/multi_table_ddl.out index 826b7aad1..e645c60bb 100644 --- a/src/test/regress/expected/multi_table_ddl.out +++ b/src/test/regress/expected/multi_table_ddl.out @@ -2,6 +2,8 @@ -- MULTI_TABLE_DDL -- -- Tests around changing the schema and dropping of a distributed table +-- Test DEFAULTS coming from SERIAL pseudo-types, user-defined sequences +-- SET citus.next_shard_id TO 870000; CREATE TABLE testtableddl(somecol int, distributecol text NOT NULL); SELECT create_distributed_table('testtableddl', 'distributecol', 'append'); @@ -98,9 +100,8 @@ SELECT create_distributed_table('testserialtable', 'group_id', 'hash'); (1 row) --- should not be able to add additional serial columns +-- can add additional serial columns ALTER TABLE testserialtable ADD COLUMN other_id serial; -ERROR: cannot execute ADD COLUMN commands involving serial pseudotypes -- and we shouldn't be able to change a distributed sequence's owner ALTER SEQUENCE testserialtable_id_seq OWNED BY NONE; ERROR: cannot alter OWNED BY option of a sequence already owned by a distributed table @@ -108,13 +109,16 @@ ERROR: cannot alter OWNED BY option of a sequence already owned by a distribute CREATE SEQUENCE standalone_sequence OWNED BY testserialtable.group_id; ERROR: cannot create sequences that specify a distributed table in their OWNED BY option HINT: Use a sequence in a distributed table by specifying a serial column type before creating any shards. +-- EDIT: this doesn't error out for now in order to allow adding +-- new serial columns (they always come with owned_by command) +-- should be fixed later in ALTER SEQUENCE preprocessing -- or even change a manual sequence to be owned by a distributed table CREATE SEQUENCE standalone_sequence; ALTER SEQUENCE standalone_sequence OWNED BY testserialtable.group_id; -ERROR: cannot associate an existing sequence with a distributed table -HINT: Use a sequence in a distributed table by specifying a serial column type before creating any shards. -- an edge case, but it's OK to change an owner to the same distributed table +-- EDIT: this doesn't work for now for a distributed sequence ALTER SEQUENCE testserialtable_id_seq OWNED BY testserialtable.id; +ERROR: This operation is currently not allowed for a distributed sequence. -- drop distributed table \c - - - :master_port DROP TABLE testserialtable; @@ -126,3 +130,38 @@ DROP TABLE testserialtable; --------------------------------------------------------------------- (0 rows) +\c - - - :master_port +-- test DEFAULT coming from SERIAL pseudo-types and user-defined sequences +CREATE SEQUENCE test_sequence_0; +CREATE SEQUENCE test_sequence_1; +CREATE TABLE test_table (id1 int DEFAULT nextval('test_sequence_0')); +SELECT create_distributed_table('test_table', 'id1'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- shouldn't work since it's partition column +ALTER TABLE test_table ALTER COLUMN id1 SET DEFAULT nextval('test_sequence_1'); +ERROR: cannot execute ALTER TABLE command involving partition column +-- test different plausible commands +ALTER TABLE test_table ADD COLUMN id2 int DEFAULT nextval('test_sequence_1'); +ALTER TABLE test_table ALTER COLUMN id2 DROP DEFAULT; +ALTER TABLE test_table ALTER COLUMN id2 SET DEFAULT nextval('test_sequence_1'); +ALTER TABLE test_table ADD COLUMN id3 bigserial; +-- shouldn't work since the above operations should be the only subcommands +ALTER TABLE test_table ADD COLUMN id4 int DEFAULT nextval('test_sequence_1') CHECK (id4 > 0); +ERROR: cannot execute ADD COLUMN .. DEFAULT nextval('..') command with other subcommands/constraints +HINT: You can issue each subcommand separately +ALTER TABLE test_table ADD COLUMN id4 int, ADD COLUMN id5 int DEFAULT nextval('test_sequence_1'); +ERROR: cannot execute ADD COLUMN .. DEFAULT nextval('..') command with other subcommands/constraints +HINT: You can issue each subcommand separately +ALTER TABLE test_table ALTER COLUMN id3 SET DEFAULT nextval('test_sequence_1'), ALTER COLUMN id2 DROP DEFAULT; +ERROR: cannot execute ALTER COLUMN COLUMN .. SET DEFAULT nextval('..') command with other subcommands +HINT: You can issue each subcommand separately +ALTER TABLE test_table ADD COLUMN id4 bigserial CHECK (id4 > 0); +ERROR: cannot execute ADD COLUMN commands involving serial pseudotypes with other subcommands/constraints +HINT: You can issue each subcommand separately +DROP TABLE test_table CASCADE; +DROP SEQUENCE test_sequence_0; +DROP SEQUENCE test_sequence_1; diff --git a/src/test/regress/expected/multi_test_helpers_superuser.out b/src/test/regress/expected/multi_test_helpers_superuser.out index cfc3cf02b..01676131c 100644 --- a/src/test/regress/expected/multi_test_helpers_superuser.out +++ b/src/test/regress/expected/multi_test_helpers_superuser.out @@ -1,9 +1,3 @@ -CREATE OR REPLACE FUNCTION master_defer_delete_shards() - RETURNS int - LANGUAGE C STRICT - AS 'citus', $$master_defer_delete_shards$$; -COMMENT ON FUNCTION master_defer_delete_shards() - IS 'remove orphaned shards'; CREATE OR REPLACE FUNCTION wait_until_metadata_sync(timeout INTEGER DEFAULT 15000) RETURNS void LANGUAGE C STRICT diff --git a/src/test/regress/expected/multi_transaction_recovery.out b/src/test/regress/expected/multi_transaction_recovery.out index 575e62068..6b862cddc 100644 --- a/src/test/regress/expected/multi_transaction_recovery.out +++ b/src/test/regress/expected/multi_transaction_recovery.out @@ -347,6 +347,40 @@ SELECT recover_prepared_transactions(); 0 (1 row) +SELECT shardid INTO selected_shard FROM pg_dist_shard WHERE logicalrelid='test_2pcskip'::regclass LIMIT 1; +SELECT COUNT(*) FROM pg_dist_transaction; + count +--------------------------------------------------------------------- + 0 +(1 row) + +BEGIN; +SET LOCAL citus.defer_drop_after_shard_move TO OFF; +SELECT citus_move_shard_placement((SELECT * FROM selected_shard), 'localhost', :worker_1_port, 'localhost', :worker_2_port); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +COMMIT; +SELECT COUNT(*) FROM pg_dist_transaction; + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT citus_move_shard_placement((SELECT * FROM selected_shard), 'localhost', :worker_2_port, 'localhost', :worker_1_port); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + -- for the following test, ensure that 6 and 7 go to different shards on different workers SELECT count(DISTINCT nodeport) FROM pg_dist_shard_placement WHERE shardid IN (get_shard_id_for_distribution_column('test_2pcskip', 6),get_shard_id_for_distribution_column('test_2pcskip', 7)); count diff --git a/src/test/regress/expected/multi_truncate.out b/src/test/regress/expected/multi_truncate.out index 7a77dee23..c21a65fee 100644 --- a/src/test/regress/expected/multi_truncate.out +++ b/src/test/regress/expected/multi_truncate.out @@ -8,7 +8,7 @@ SET search_path TO multi_truncate; CREATE VIEW table_sizes AS SELECT c.relname as name, - pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as size + pg_catalog.pg_table_size(c.oid) > 0 as has_data FROM pg_catalog.pg_class c LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace WHERE c.relkind = 'r' @@ -330,7 +330,7 @@ SELECT * FROM test_local_truncate; SELECT citus_drop_all_shards('test_local_truncate', 'public', 'test_local_truncate'); citus_drop_all_shards --------------------------------------------------------------------- - 4 + 4 (1 row) DELETE FROM pg_dist_partition WHERE logicalrelid = 'test_local_truncate'::regclass; @@ -369,7 +369,7 @@ SELECT * FROM test_local_truncate; SELECT citus_drop_all_shards('test_local_truncate', 'public', 'test_local_truncate'); citus_drop_all_shards --------------------------------------------------------------------- - 4 + 4 (1 row) DELETE FROM pg_dist_partition WHERE logicalrelid = 'test_local_truncate'::regclass; @@ -427,19 +427,19 @@ NOTICE: truncate cascades to table "referencing_table" (1 row) SELECT * FROM table_sizes; - name | size + name | has_data --------------------------------------------------------------------- - referenced_table | 0 bytes - referencing_table | 0 bytes + referenced_table | f + referencing_table | f (2 rows) ROLLBACK; -- observe that none of the tables are truncated SELECT * FROM table_sizes; - name | size + name | has_data --------------------------------------------------------------------- - referenced_table | 384 kB - referencing_table | 384 kB + referenced_table | t + referencing_table | t (2 rows) -- test that if we truncate the referencing table, only said table is affected @@ -451,10 +451,10 @@ SELECT truncate_local_data_after_distributing_table('referencing_table'); (1 row) SELECT * FROM table_sizes; - name | size + name | has_data --------------------------------------------------------------------- - referenced_table | 384 kB - referencing_table | 0 bytes + referenced_table | t + referencing_table | f (2 rows) ROLLBACK; @@ -470,10 +470,10 @@ NOTICE: truncate cascades to table "referencing_table" (1 row) SELECT * FROM table_sizes; - name | size + name | has_data --------------------------------------------------------------------- - referenced_table | 0 bytes - referencing_table | 0 bytes + referenced_table | f + referencing_table | f (2 rows) ROLLBACK; @@ -498,25 +498,56 @@ INSERT INTO dist SELECT x,x FROM generate_series(1,10000) x; SELECT truncate_local_data_after_distributing_table('ref'); ERROR: cannot truncate a table referenced in a foreign key constraint by a local table DETAIL: Table "dist" references "ref" --- distribute the table and start testing allowed truncation queries +-- test that we do not allow distributing tables that have foreign keys to reference tables SELECT create_distributed_table('dist','id'); ERROR: cannot distribute "dist" in sequential mode because it is not empty HINT: If you have manually set citus.multi_shard_modify_mode to 'sequential', try with 'parallel' option. If that is not the case, try distributing local tables when they are empty. +SHOW citus.multi_shard_modify_mode; + citus.multi_shard_modify_mode +--------------------------------------------------------------------- + parallel +(1 row) + +-- distribute the table after a truncate +TRUNCATE dist; +SELECT create_distributed_table('dist','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + -- the following should truncate ref and dist BEGIN; SELECT truncate_local_data_after_distributing_table('ref'); -ERROR: cannot truncate a table referenced in a foreign key constraint by a local table -DETAIL: Table "dist" references "ref" +NOTICE: truncate cascades to table "dist" + truncate_local_data_after_distributing_table +--------------------------------------------------------------------- + +(1 row) + SELECT * FROM table_sizes; -ERROR: current transaction is aborted, commands ignored until end of transaction block + name | has_data +--------------------------------------------------------------------- + dist | f + ref | f +(2 rows) + ROLLBACK; -- the following should truncate dist table only BEGIN; SELECT truncate_local_data_after_distributing_table('dist'); -ERROR: supplied parameter is not a distributed relation -DETAIL: This UDF only truncates local records of distributed tables. + truncate_local_data_after_distributing_table +--------------------------------------------------------------------- + +(1 row) + SELECT * FROM table_sizes; -ERROR: current transaction is aborted, commands ignored until end of transaction block + name | has_data +--------------------------------------------------------------------- + dist | f + ref | t +(2 rows) + ROLLBACK; DROP TABLE ref, dist; -- tests for issue 1770 diff --git a/src/test/regress/expected/multi_unsupported_worker_operations.out b/src/test/regress/expected/multi_unsupported_worker_operations.out index ceb34e252..d86be3685 100644 --- a/src/test/regress/expected/multi_unsupported_worker_operations.out +++ b/src/test/regress/expected/multi_unsupported_worker_operations.out @@ -11,7 +11,6 @@ SELECT nextval('pg_catalog.pg_dist_colocationid_seq') AS last_colocation_id \gse ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 150000; -- Prepare the environment SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SET citus.shard_count TO 5; -- Create test tables CREATE TABLE mx_table (col_1 int, col_2 text, col_3 BIGSERIAL); @@ -387,4 +386,3 @@ SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_ \c - - - :master_port ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART :last_colocation_id; RESET citus.shard_replication_factor; -RESET citus.replication_model; diff --git a/src/test/regress/expected/multi_utilities.out b/src/test/regress/expected/multi_utilities.out index 1b8804b3b..91b21e7d8 100644 --- a/src/test/regress/expected/multi_utilities.out +++ b/src/test/regress/expected/multi_utilities.out @@ -364,6 +364,11 @@ SELECT worker_hash('(1, 2)'::test_composite_type); SELECT citus_truncate_trigger(); ERROR: must be called as trigger +-- make sure worker_create_or_alter_role does not crash with NULL input +SELECT worker_create_or_alter_role(NULL, NULL, NULL); +ERROR: role name cannot be NULL +SELECT worker_create_or_alter_role(NULL, 'create role dontcrash', NULL); +ERROR: role name cannot be NULL -- confirm that citus_create_restore_point works SELECT 1 FROM citus_create_restore_point('regression-test'); NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); diff --git a/src/test/regress/expected/mx_coordinator_shouldhaveshards.out b/src/test/regress/expected/mx_coordinator_shouldhaveshards.out index 5b5a87f05..2582400d6 100644 --- a/src/test/regress/expected/mx_coordinator_shouldhaveshards.out +++ b/src/test/regress/expected/mx_coordinator_shouldhaveshards.out @@ -1,7 +1,6 @@ CREATE SCHEMA mx_coordinator_shouldhaveshards; SET search_path TO mx_coordinator_shouldhaveshards; SET citus.shard_replication_factor to 1; -SET citus.replication_model TO streaming; SET client_min_messages TO WARNING; SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0); ?column? diff --git a/src/test/regress/expected/mx_foreign_key_to_reference_table.out b/src/test/regress/expected/mx_foreign_key_to_reference_table.out index d99500a13..009ee5c01 100644 --- a/src/test/regress/expected/mx_foreign_key_to_reference_table.out +++ b/src/test/regress/expected/mx_foreign_key_to_reference_table.out @@ -4,7 +4,6 @@ SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 8; SET citus.next_shard_id TO 7000000; SET citus.next_placement_id TO 7000000; -SET citus.replication_model TO streaming; -- Setup the view so that we can check if the foreign keys are created properly CREATE TYPE foreign_details AS (name text, relid text, refd_relid text); CREATE VIEW table_fkeys_in_workers AS diff --git a/src/test/regress/expected/recursive_dml_queries_mx.out b/src/test/regress/expected/recursive_dml_queries_mx.out index 8ab760472..fed88e1f2 100644 --- a/src/test/regress/expected/recursive_dml_queries_mx.out +++ b/src/test/regress/expected/recursive_dml_queries_mx.out @@ -1,7 +1,6 @@ CREATE SCHEMA recursive_dml_queries_mx; SET search_path TO recursive_dml_queries_mx, public; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; CREATE TABLE recursive_dml_queries_mx.distributed_table (tenant_id text, dept int, info jsonb); SELECT create_distributed_table('distributed_table', 'tenant_id'); create_distributed_table @@ -171,4 +170,3 @@ DETAIL: drop cascades to table distributed_table drop cascades to table second_distributed_table drop cascades to table reference_table RESET citus.shard_replication_factor; -RESET citus.replication_model; diff --git a/src/test/regress/expected/shard_move_deferred_delete.out b/src/test/regress/expected/shard_move_deferred_delete.out index 3329aaa54..ed0d46500 100644 --- a/src/test/regress/expected/shard_move_deferred_delete.out +++ b/src/test/regress/expected/shard_move_deferred_delete.out @@ -50,13 +50,14 @@ $cmd$); (localhost,57638,t,1) (2 rows) +-- Make sure this cannot be run in a transaction +BEGIN; +CALL citus_cleanup_orphaned_shards(); +ERROR: citus_cleanup_orphaned_shards cannot run inside a transaction block +COMMIT; -- execute delayed removal -SELECT public.master_defer_delete_shards(); - master_defer_delete_shards ---------------------------------------------------------------------- - 1 -(1 row) - +CALL citus_cleanup_orphaned_shards(); +NOTICE: cleaned up 1 orphaned shards -- we expect the shard to be on only the second worker SELECT run_command_on_workers($cmd$ SELECT count(*) FROM pg_class WHERE relname = 't1_20000000'; @@ -133,6 +134,14 @@ $cmd$); (localhost,57638,t,1) (2 rows) +-- master_move_shard_placement automatically cleans up orphaned shards if +-- needed. +SELECT master_move_shard_placement(20000000, 'localhost', :worker_2_port, 'localhost', :worker_1_port); + master_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + SELECT run_command_on_workers($cmd$ -- override the function for testing purpose create or replace function pg_catalog.citus_local_disk_space_stats(OUT available_disk_size bigint, OUT total_disk_size bigint) @@ -169,7 +178,6 @@ SELECT master_move_shard_placement(20000001, 'localhost', :worker_2_port, 'local (1 row) ROLLBACK; --- we expect shard xxxxx to be on both of the workers SELECT run_command_on_workers($cmd$ SELECT count(*) FROM pg_class WHERE relname = 't1_20000000'; $cmd$); diff --git a/src/test/regress/expected/shard_rebalancer.out b/src/test/regress/expected/shard_rebalancer.out index 843d7c170..33852f037 100644 --- a/src/test/regress/expected/shard_rebalancer.out +++ b/src/test/regress/expected/shard_rebalancer.out @@ -31,12 +31,14 @@ SELECT rebalance_table_shards('dist_table_test'); (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT rebalance_table_shards(); rebalance_table_shards --------------------------------------------------------------------- (1 row) +CALL citus_cleanup_orphaned_shards(); -- test that calling rebalance_table_shards without specifying relation -- wouldn't move shard of the citus local table. CREATE TABLE citus_local_table(a int, b int); @@ -53,6 +55,7 @@ SELECT rebalance_table_shards(); (1 row) +CALL citus_cleanup_orphaned_shards(); -- show that citus local table shard is still on the coordinator SELECT tablename FROM pg_catalog.pg_tables where tablename like 'citus_local_table_%'; tablename @@ -67,12 +70,43 @@ SELECT count(*) FROM citus_local_table; 1 (1 row) +-- verify drain_node uses the localhostname guc by seeing it fail to connect to a non-existing name +ALTER SYSTEM SET citus.local_hostname TO 'foobar'; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +SELECT master_drain_node('localhost', :master_port); +ERROR: connection to the remote node foobar:57636 failed with the following error: could not translate host name "foobar" to address: +CALL citus_cleanup_orphaned_shards(); +ALTER SYSTEM RESET citus.local_hostname; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + pg_sleep +--------------------------------------------------------------------- + +(1 row) + SELECT master_drain_node('localhost', :master_port); master_drain_node --------------------------------------------------------------------- (1 row) +CALL citus_cleanup_orphaned_shards(); -- show that citus local table shard is still on the coordinator SELECT tablename FROM pg_catalog.pg_tables where tablename like 'citus_local_table_%'; tablename @@ -97,13 +131,45 @@ DROP TABLE citus_local_table; CREATE TABLE dist_table_test_2(a int); SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO "statement"; SELECT create_distributed_table('dist_table_test_2', 'a'); create_distributed_table --------------------------------------------------------------------- (1 row) +-- Mark tables as coordinator replicated in order to be able to test replicate_table_shards +UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN + ('dist_table_test_2'::regclass); +-- replicate_table_shards should fail when the hostname GUC is set to a non-reachable node +ALTER SYSTEM SET citus.local_hostname TO 'foobar'; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +SET citus.shard_replication_factor TO 2; +SELECT replicate_table_shards('dist_table_test_2', max_shard_copies := 4, shard_transfer_mode:='block_writes'); +ERROR: connection to the remote node foobar:57636 failed with the following error: could not translate host name "foobar" to address: +ALTER SYSTEM RESET citus.local_hostname; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + pg_sleep +--------------------------------------------------------------------- + +(1 row) + -- replicate reference table should ignore the coordinator SET citus.shard_replication_factor TO 2; SELECT replicate_table_shards('dist_table_test_2', max_shard_copies := 4, shard_transfer_mode:='block_writes'); @@ -119,8 +185,10 @@ NOTICE: Copying shard xxxxx from localhost:xxxxx to localhost:xxxxx ... DROP TABLE dist_table_test, dist_table_test_2, ref_table_test; RESET citus.shard_count; RESET citus.shard_replication_factor; -RESET citus.replication_model; -- Create a user to test multiuser usage of rebalancer functions +-- We explicitely don't create this user on worker nodes yet, so we can +-- test some more error handling. We create them later there. +SET citus.enable_create_role_propagation TO OFF; CREATE USER testrole; NOTICE: not propagating CREATE ROLE/USER commands to worker nodes HINT: Connect to worker nodes directly to manually create all necessary users and roles. @@ -132,7 +200,8 @@ CREATE OR REPLACE FUNCTION shard_placement_rebalance_array( shard_placement_list json[], threshold float4 DEFAULT 0, max_shard_moves int DEFAULT 1000000, - drain_only bool DEFAULT false + drain_only bool DEFAULT false, + improvement_threshold float4 DEFAULT 0.5 ) RETURNS json[] AS 'citus' @@ -340,6 +409,7 @@ SELECT master_create_distributed_table('replication_test_table', 'int_column', ' CREATE VIEW replication_test_table_placements_per_node AS SELECT count(*) FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard WHERE logicalrelid = 'replication_test_table'::regclass + AND shardstate != 4 GROUP BY nodename, nodeport ORDER BY nodename, nodeport; -- Create four shards with replication factor 2, and delete the placements @@ -462,6 +532,7 @@ SELECT master_create_distributed_table('rebalance_test_table', 'int_column', 'ap CREATE VIEW table_placements_per_node AS SELECT nodeport, logicalrelid::regclass, count(*) FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard +WHERE shardstate != 4 GROUP BY logicalrelid::regclass, nodename, nodeport ORDER BY logicalrelid::regclass, nodename, nodeport; -- Create six shards with replication factor 1 and move them to the same @@ -482,6 +553,7 @@ AS $$ pg_dist_shard_placement src USING (shardid), (SELECT nodename, nodeport FROM pg_dist_shard_placement ORDER BY nodeport DESC LIMIT 1) dst WHERE src.nodeport < dst.nodeport AND s.logicalrelid = rel::regclass; + CALL citus_cleanup_orphaned_shards(); $$; CALL create_unbalanced_shards('rebalance_test_table'); SET citus.shard_replication_factor TO 2; @@ -502,6 +574,44 @@ SELECT * FROM table_placements_per_node; 57638 | rebalance_test_table | 6 (1 row) +-- check rebalances use the localhost guc by seeing it fail when the GUC is set to a non-existing host +ALTER SYSTEM SET citus.local_hostname TO 'foobar'; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +SELECT rebalance_table_shards('rebalance_test_table', + excluded_shard_list := excluded_shard_list, + threshold := 0, + shard_transfer_mode:='block_writes') +FROM ( + SELECT (array_agg(DISTINCT shardid ORDER BY shardid))[1:4] AS excluded_shard_list + FROM pg_dist_shard + WHERE logicalrelid = 'rebalance_test_table'::regclass + ) T; +ERROR: connection to the remote node foobar:57636 failed with the following error: could not translate host name "foobar" to address: +CALL citus_cleanup_orphaned_shards(); +ALTER SYSTEM RESET citus.local_hostname; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + pg_sleep +--------------------------------------------------------------------- + +(1 row) + -- Check excluded_shard_list by excluding four shards with smaller ids SELECT rebalance_table_shards('rebalance_test_table', excluded_shard_list := excluded_shard_list, @@ -517,6 +627,7 @@ FROM ( (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -526,17 +637,49 @@ SELECT * FROM table_placements_per_node; -- Check that max_shard_moves limits number of move operations -- First check that we error if not table owner +-- Turn on NOTICE messages +SET ROLE testrole; +-- Make sure that rebalance is stopped if source or target nodes are +-- unresponsive. +SELECT rebalance_table_shards('rebalance_test_table', + shard_transfer_mode:='block_writes'); +ERROR: target node localhost:xxxxx is not responsive +\c - - - :worker_1_port +SET citus.enable_create_role_propagation TO OFF; +CREATE USER testrole; +NOTICE: not propagating CREATE ROLE/USER commands to worker nodes +HINT: Connect to worker nodes directly to manually create all necessary users and roles. +GRANT ALL ON SCHEMA public TO testrole; +\c - - - :master_port +SET client_min_messages TO WARNING; SET ROLE testrole; SELECT rebalance_table_shards('rebalance_test_table', - threshold := 0, max_shard_moves := 1, shard_transfer_mode:='block_writes'); -WARNING: localhost:xxxxx is not responsive - rebalance_table_shards ---------------------------------------------------------------------- - -(1 row) - +ERROR: source node localhost:xxxxx is not responsive +\c - - - :worker_2_port +SET citus.enable_create_role_propagation TO OFF; +CREATE USER testrole; +NOTICE: not propagating CREATE ROLE/USER commands to worker nodes +HINT: Connect to worker nodes directly to manually create all necessary users and roles. +GRANT ALL ON SCHEMA public TO testrole; +\c - - - :master_port +SET client_min_messages TO WARNING; +SET citus.next_shard_id TO 123010; +SET ROLE testrole; +SELECT rebalance_table_shards('rebalance_test_table', + shard_transfer_mode:='block_writes'); +ERROR: must be owner of table rebalance_test_table +CONTEXT: while executing command on localhost:xxxxx RESET ROLE; +-- Confirm no moves took place at all during these errors +SELECT * FROM table_placements_per_node; + nodeport | logicalrelid | count +--------------------------------------------------------------------- + 57637 | rebalance_test_table | 1 + 57638 | rebalance_test_table | 5 +(2 rows) + +CALL citus_cleanup_orphaned_shards(); SELECT rebalance_table_shards('rebalance_test_table', threshold := 0, max_shard_moves := 1, shard_transfer_mode:='block_writes'); @@ -545,6 +688,7 @@ SELECT rebalance_table_shards('rebalance_test_table', (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -559,6 +703,7 @@ SELECT rebalance_table_shards('rebalance_test_table', threshold := 1, shard_tran (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -573,6 +718,7 @@ SELECT rebalance_table_shards('rebalance_test_table', threshold := 0); (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -588,6 +734,7 @@ SELECT rebalance_table_shards('rebalance_test_table', threshold := 0, shard_tran (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -761,6 +908,7 @@ SELECT COUNT(*) FROM imbalanced_table; -- Try force_logical SELECT rebalance_table_shards('imbalanced_table', threshold:=0, shard_transfer_mode:='force_logical'); ERROR: the force_logical transfer mode is currently unsupported +CALL citus_cleanup_orphaned_shards(); -- Test rebalance operation SELECT rebalance_table_shards('imbalanced_table', threshold:=0, shard_transfer_mode:='block_writes'); rebalance_table_shards @@ -768,6 +916,7 @@ SELECT rebalance_table_shards('imbalanced_table', threshold:=0, shard_transfer_m (1 row) +CALL citus_cleanup_orphaned_shards(); -- Confirm rebalance -- Shard counts in each node after rebalance SELECT * FROM public.table_placements_per_node; @@ -796,6 +945,48 @@ SELECT create_distributed_table('colocated_rebalance_test', 'id'); (1 row) +-- make sure that we do not allow shards on target nodes +-- that are not eligable to move shards +-- Try to move shards to a non-existing node +SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', 10000, 'block_writes') +FROM pg_dist_shard_placement +WHERE nodeport = :worker_2_port; +ERROR: Moving shards to a non-existing node is not supported +HINT: Add the target node via SELECT citus_add_node('localhost', 10000); +CALL citus_cleanup_orphaned_shards(); +-- Try to move shards to a node where shards are not allowed +SELECT * from master_set_node_property('localhost', :worker_1_port, 'shouldhaveshards', false); + master_set_node_property +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') +FROM pg_dist_shard_placement +WHERE nodeport = :worker_2_port; +ERROR: Moving shards to a node that shouldn't have a shard is not supported +HINT: Allow shards on the target node via SELECT * FROM citus_set_node_property('localhost', 57637, 'shouldhaveshards', true); +SELECT * from master_set_node_property('localhost', :worker_1_port, 'shouldhaveshards', true); + master_set_node_property +--------------------------------------------------------------------- + +(1 row) + +-- Try to move shards to a non-active node +UPDATE pg_dist_node SET isactive = false WHERE nodeport = :worker_1_port; +SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') +FROM pg_dist_shard_placement +WHERE nodeport = :worker_2_port; +ERROR: Moving shards to a non-active node is not supported +HINT: Activate the target node via SELECT citus_activate_node('localhost', 57637); +UPDATE pg_dist_node SET isactive = true WHERE nodeport = :worker_1_port; +-- Try to move shards to a secondary node +UPDATE pg_dist_node SET noderole = 'secondary' WHERE nodeport = :worker_1_port; +SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') +FROM pg_dist_shard_placement +WHERE nodeport = :worker_2_port; +ERROR: Moving shards to a secondary (e.g., replica) node is not supported +UPDATE pg_dist_node SET noderole = 'primary' WHERE nodeport = :worker_1_port; -- Move all shards to worker1 SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') FROM pg_dist_shard_placement @@ -806,6 +997,7 @@ WHERE nodeport = :worker_2_port; (2 rows) +CALL citus_cleanup_orphaned_shards(); SELECT create_distributed_table('colocated_rebalance_test2', 'id'); create_distributed_table --------------------------------------------------------------------- @@ -833,6 +1025,7 @@ SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, (1 row) +CALL citus_cleanup_orphaned_shards(); -- Confirm that nothing changed SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count @@ -863,7 +1056,7 @@ SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', rebala -- Check that we can call this function SELECT * FROM get_rebalance_progress(); - sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress + sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress | source_shard_size | target_shard_size --------------------------------------------------------------------- (0 rows) @@ -874,13 +1067,64 @@ SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, (1 row) +CALL citus_cleanup_orphaned_shards(); -- Check that we can call this function without a crash SELECT * FROM get_rebalance_progress(); - sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress + sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress | source_shard_size | target_shard_size --------------------------------------------------------------------- (0 rows) --- Confirm that the nodes are now there +-- Confirm that the shards are now there +SELECT * FROM public.table_placements_per_node; + nodeport | logicalrelid | count +--------------------------------------------------------------------- + 57637 | colocated_rebalance_test | 2 + 57638 | colocated_rebalance_test | 2 + 57637 | colocated_rebalance_test2 | 2 + 57638 | colocated_rebalance_test2 | 2 +(4 rows) + +CALL citus_cleanup_orphaned_shards(); +select * from pg_dist_placement; + placementid | shardid | shardstate | shardlength | groupid +--------------------------------------------------------------------- + 135 | 123023 | 1 | 0 | 14 + 138 | 123024 | 1 | 0 | 14 + 141 | 123027 | 1 | 0 | 14 + 142 | 123028 | 1 | 0 | 14 + 143 | 123021 | 1 | 0 | 16 + 144 | 123025 | 1 | 0 | 16 + 145 | 123022 | 1 | 0 | 16 + 146 | 123026 | 1 | 0 | 16 +(8 rows) + +-- Move all shards to worker1 again +SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') +FROM pg_dist_shard NATURAL JOIN pg_dist_placement NATURAL JOIN pg_dist_node +WHERE nodeport = :worker_2_port AND logicalrelid = 'colocated_rebalance_test'::regclass; + master_move_shard_placement +--------------------------------------------------------------------- + + +(2 rows) + +-- Confirm that the shards are now all on worker1 +SELECT * FROM public.table_placements_per_node; + nodeport | logicalrelid | count +--------------------------------------------------------------------- + 57637 | colocated_rebalance_test | 4 + 57637 | colocated_rebalance_test2 | 4 +(2 rows) + +-- Explicitly don't run citus_cleanup_orphaned_shards, rebalance_table_shards +-- should do that for automatically. +SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes'); + rebalance_table_shards +--------------------------------------------------------------------- + +(1 row) + +-- Confirm that the shards are now moved SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -931,6 +1175,7 @@ SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -953,6 +1198,7 @@ SELECT * FROM rebalance_table_shards('non_colocated_rebalance_test', threshold : (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -974,6 +1220,7 @@ SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -990,6 +1237,7 @@ SELECT * FROM rebalance_table_shards('non_colocated_rebalance_test', threshold : (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -1026,6 +1274,7 @@ SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'blo (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -1047,6 +1296,7 @@ SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'blo (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -1083,6 +1333,7 @@ SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'blo (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -1104,6 +1355,7 @@ SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'blo (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -1129,6 +1381,7 @@ SELECT * from master_drain_node('localhost', :worker_2_port, shard_transfer_mode (1 row) +CALL citus_cleanup_orphaned_shards(); select shouldhaveshards from pg_dist_node where nodeport = :worker_2_port; shouldhaveshards --------------------------------------------------------------------- @@ -1156,6 +1409,7 @@ SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'blo (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -1175,11 +1429,10 @@ RESET search_path; DROP SCHEMA test_schema_support CASCADE; \set VERBOSITY default REVOKE ALL ON SCHEMA public FROM testrole; -ERROR: role "testrole" does not exist -CONTEXT: while executing command on localhost:xxxxx DROP USER testrole; -- Test costs set citus.shard_count = 4; +SET citus.next_shard_id TO 123040; CREATE TABLE tab (x int); SELECT create_distributed_table('tab','x'); create_distributed_table @@ -1193,6 +1446,7 @@ INSERT INTO tab SELECT 1 from generate_series(1, 30000); INSERT INTO tab SELECT 2 from generate_series(1, 10000); INSERT INTO tab SELECT 3 from generate_series(1, 10000); INSERT INTO tab SELECT 6 from generate_series(1, 10000); +VACUUM FULL tab; ANALYZE tab; \c - - - :worker_1_port SELECT table_schema, table_name, row_estimate, total_bytes @@ -1211,8 +1465,8 @@ WHERE table_schema = 'public' ) a ORDER BY table_name; table_schema | table_name | row_estimate | total_bytes --------------------------------------------------------------------- - public | tab_123033 | 30000 | 1114112 - public | tab_123035 | 10000 | 393216 + public | tab_123040 | 30000 | 1089536 + public | tab_123042 | 10000 | 368640 (2 rows) \c - - - :worker_2_port @@ -1232,8 +1486,8 @@ WHERE table_schema = 'public' ) a ORDER BY table_name; table_schema | table_name | row_estimate | total_bytes --------------------------------------------------------------------- - public | tab_123034 | 10000 | 393216 - public | tab_123036 | 10000 | 393216 + public | tab_123041 | 10000 | 368640 + public | tab_123043 | 10000 | 368640 (2 rows) \c - - - :master_port @@ -1245,7 +1499,7 @@ SELECT * FROM get_rebalance_table_shards_plan('tab'); SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size'); table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport --------------------------------------------------------------------- - tab | 123035 | 0 | localhost | 57637 | localhost | 57638 + tab | 123042 | 0 | localhost | 57637 | localhost | 57638 (1 row) SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size', threshold := 0); @@ -1253,7 +1507,7 @@ WARNING: the given threshold is lower than the minimum threshold allowed by the DETAIL: Using threshold of 0.01 table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport --------------------------------------------------------------------- - tab | 123035 | 0 | localhost | 57637 | localhost | 57638 + tab | 123042 | 0 | localhost | 57637 | localhost | 57638 (1 row) SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes'); @@ -1262,6 +1516,7 @@ SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes') (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -1276,6 +1531,8 @@ NOTICE: Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ... (1 row) +CALL citus_cleanup_orphaned_shards(); +NOTICE: cleaned up 1 orphaned shards SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -1291,6 +1548,7 @@ DETAIL: Using threshold of 0.01 (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- @@ -1312,6 +1570,7 @@ INSERT INTO tab2 SELECT 1 from generate_series(1, 0); INSERT INTO tab2 SELECT 2 from generate_series(1, 60000); INSERT INTO tab2 SELECT 3 from generate_series(1, 10000); INSERT INTO tab2 SELECT 6 from generate_series(1, 10000); +VACUUM FULL tab, tab2; ANALYZE tab, tab2; \c - - - :worker_1_port SELECT table_schema, table_name, row_estimate, total_bytes @@ -1331,7 +1590,7 @@ WHERE table_schema = 'public' table_schema | table_name | row_estimate | total_bytes --------------------------------------------------------------------- public | tab2_123050 | 0 | 0 - public | tab_123033 | 30000 | 1114112 + public | tab_123040 | 30000 | 1089536 (2 rows) \c - - - :worker_2_port @@ -1351,25 +1610,43 @@ WHERE table_schema = 'public' ) a ORDER BY table_name; table_schema | table_name | row_estimate | total_bytes --------------------------------------------------------------------- - public | tab2_123051 | 10000 | 393216 - public | tab2_123052 | 10000 | 393216 - public | tab2_123053 | 60000 | 2203648 - public | tab_123034 | 10000 | 393216 - public | tab_123035 | 10000 | 368640 - public | tab_123036 | 10000 | 393216 + public | tab2_123051 | 10000 | 368640 + public | tab2_123052 | 10000 | 368640 + public | tab2_123053 | 60000 | 2179072 + public | tab_123041 | 10000 | 368640 + public | tab_123042 | 10000 | 368640 + public | tab_123043 | 10000 | 368640 (6 rows) \c - - - :master_port SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size'); +NOTICE: Ignoring move of shard xxxxx from localhost:xxxxx to localhost:xxxxx, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.151125 is lower than the improvement_threshold of 0.5 +NOTICE: Ignored 1 moves, all of which are shown in notices above +HINT: If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (0.5). table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport --------------------------------------------------------------------- - tab | 123036 | 0 | localhost | 57638 | localhost | 57637 + tab | 123041 | 0 | localhost | 57638 | localhost | 57637 + tab2 | 123051 | 0 | localhost | 57638 | localhost | 57637 + tab | 123042 | 0 | localhost | 57638 | localhost | 57637 + tab2 | 123052 | 0 | localhost | 57638 | localhost | 57637 +(4 rows) + +-- supports improvement_threshold +SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size', improvement_threshold := 0); + table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport +--------------------------------------------------------------------- + tab | 123043 | 0 | localhost | 57638 | localhost | 57637 tab2 | 123053 | 0 | localhost | 57638 | localhost | 57637 - tab | 123033 | 0 | localhost | 57637 | localhost | 57638 + tab | 123040 | 0 | localhost | 57637 | localhost | 57638 tab2 | 123050 | 0 | localhost | 57637 | localhost | 57638 (4 rows) SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'by_disk_size', shard_transfer_mode:='block_writes'); +NOTICE: Ignoring move of shard xxxxx from localhost:xxxxx to localhost:xxxxx, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.151125 is lower than the improvement_threshold of 0.5 +NOTICE: Ignored 1 moves, all of which are shown in notices above +HINT: If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (0.5). NOTICE: Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ... NOTICE: Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ... rebalance_table_shards @@ -1377,13 +1654,15 @@ NOTICE: Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ... (1 row) +CALL citus_cleanup_orphaned_shards(); +NOTICE: cleaned up 4 orphaned shards SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- - 57637 | tab | 1 - 57638 | tab | 3 - 57637 | tab2 | 1 - 57638 | tab2 | 3 + 57637 | tab | 3 + 57638 | tab | 1 + 57637 | tab2 | 3 + 57638 | tab2 | 1 (4 rows) ANALYZE tab, tab2; @@ -1401,50 +1680,51 @@ SELECT table_schema, table_name, row_estimate, total_bytes WHERE relkind = 'r' ) a WHERE table_schema = 'public' +) a ORDER BY table_name; + table_schema | table_name | row_estimate | total_bytes +--------------------------------------------------------------------- + public | tab2_123050 | 0 | 0 + public | tab2_123051 | 10000 | 368640 + public | tab2_123052 | 10000 | 368640 + public | tab_123040 | 30000 | 1089536 + public | tab_123041 | 10000 | 368640 + public | tab_123042 | 10000 | 368640 +(6 rows) + +\c - - - :worker_2_port +SELECT table_schema, table_name, row_estimate, total_bytes + FROM ( + SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM ( + SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME + , c.reltuples AS row_estimate + , pg_total_relation_size(c.oid) AS total_bytes + , pg_indexes_size(c.oid) AS index_bytes + , pg_total_relation_size(reltoastrelid) AS toast_bytes + FROM pg_class c + LEFT JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE relkind = 'r' + ) a +WHERE table_schema = 'public' ) a ORDER BY table_name; table_schema | table_name | row_estimate | total_bytes --------------------------------------------------------------------- public | tab2_123053 | 60000 | 2179072 - public | tab_123036 | 10000 | 368640 + public | tab_123043 | 10000 | 368640 (2 rows) -\c - - - :worker_2_port -SELECT table_schema, table_name, row_estimate, total_bytes - FROM ( - SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM ( - SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME - , c.reltuples AS row_estimate - , pg_total_relation_size(c.oid) AS total_bytes - , pg_indexes_size(c.oid) AS index_bytes - , pg_total_relation_size(reltoastrelid) AS toast_bytes - FROM pg_class c - LEFT JOIN pg_namespace n ON n.oid = c.relnamespace - WHERE relkind = 'r' - ) a -WHERE table_schema = 'public' -) a ORDER BY table_name; - table_schema | table_name | row_estimate | total_bytes ---------------------------------------------------------------------- - public | tab2_123050 | 0 | 0 - public | tab2_123051 | 10000 | 393216 - public | tab2_123052 | 10000 | 393216 - public | tab_123033 | 30000 | 1089536 - public | tab_123034 | 10000 | 393216 - public | tab_123035 | 10000 | 368640 -(6 rows) - \c - - - :master_port DROP TABLE tab2; -CREATE OR REPLACE FUNCTION capacity_high_worker_1(nodeidarg int) +CREATE OR REPLACE FUNCTION capacity_high_worker_2(nodeidarg int) RETURNS real AS $$ SELECT - (CASE WHEN nodeport = 57637 THEN 1000 ELSE 1 END)::real + (CASE WHEN nodeport = 57638 THEN 1000 ELSE 1 END)::real FROM pg_dist_node where nodeid = nodeidarg $$ LANGUAGE sql; +\set VERBOSITY terse SELECT citus_add_rebalance_strategy( - 'capacity_high_worker_1', + 'capacity_high_worker_2', 'citus_shard_cost_1', - 'capacity_high_worker_1', + 'capacity_high_worker_2', 'citus_shard_allowed_on_node_true', 0 ); @@ -1453,15 +1733,15 @@ SELECT citus_add_rebalance_strategy( (1 row) -SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'capacity_high_worker_1'); +SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'capacity_high_worker_2'); table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport --------------------------------------------------------------------- - tab | 123033 | 0 | localhost | 57638 | localhost | 57637 - tab | 123034 | 0 | localhost | 57638 | localhost | 57637 - tab | 123035 | 0 | localhost | 57638 | localhost | 57637 + tab | 123040 | 0 | localhost | 57637 | localhost | 57638 + tab | 123041 | 0 | localhost | 57637 | localhost | 57638 + tab | 123042 | 0 | localhost | 57637 | localhost | 57638 (3 rows) -SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'capacity_high_worker_1', shard_transfer_mode:='block_writes'); +SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'capacity_high_worker_2', shard_transfer_mode:='block_writes'); NOTICE: Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ... NOTICE: Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ... NOTICE: Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ... @@ -1470,13 +1750,15 @@ NOTICE: Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ... (1 row) +CALL citus_cleanup_orphaned_shards(); +NOTICE: cleaned up 3 orphaned shards SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- - 57637 | tab | 4 + 57638 | tab | 4 (1 row) -SELECT citus_set_default_rebalance_strategy('capacity_high_worker_1'); +SELECT citus_set_default_rebalance_strategy('capacity_high_worker_2'); citus_set_default_rebalance_strategy --------------------------------------------------------------------- @@ -1493,23 +1775,24 @@ SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes') (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- - 57637 | tab | 4 + 57638 | tab | 4 (1 row) -CREATE FUNCTION only_worker_2(shardid bigint, nodeidarg int) +CREATE FUNCTION only_worker_1(shardid bigint, nodeidarg int) RETURNS boolean AS $$ SELECT - (CASE WHEN nodeport = 57638 THEN TRUE ELSE FALSE END) + (CASE WHEN nodeport = 57637 THEN TRUE ELSE FALSE END) FROM pg_dist_node where nodeid = nodeidarg $$ LANGUAGE sql; SELECT citus_add_rebalance_strategy( - 'only_worker_2', + 'only_worker_1', 'citus_shard_cost_1', 'citus_node_capacity_1', - 'only_worker_2', + 'only_worker_1', 0 ); citus_add_rebalance_strategy @@ -1517,7 +1800,7 @@ SELECT citus_add_rebalance_strategy( (1 row) -SELECT citus_set_default_rebalance_strategy('only_worker_2'); +SELECT citus_set_default_rebalance_strategy('only_worker_1'); citus_set_default_rebalance_strategy --------------------------------------------------------------------- @@ -1526,10 +1809,10 @@ SELECT citus_set_default_rebalance_strategy('only_worker_2'); SELECT * FROM get_rebalance_table_shards_plan('tab'); table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport --------------------------------------------------------------------- - tab | 123033 | 0 | localhost | 57637 | localhost | 57638 - tab | 123034 | 0 | localhost | 57637 | localhost | 57638 - tab | 123035 | 0 | localhost | 57637 | localhost | 57638 - tab | 123036 | 0 | localhost | 57637 | localhost | 57638 + tab | 123040 | 0 | localhost | 57638 | localhost | 57637 + tab | 123041 | 0 | localhost | 57638 | localhost | 57637 + tab | 123042 | 0 | localhost | 57638 | localhost | 57637 + tab | 123043 | 0 | localhost | 57638 | localhost | 57637 (4 rows) SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes'); @@ -1542,10 +1825,12 @@ NOTICE: Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ... (1 row) +CALL citus_cleanup_orphaned_shards(); +NOTICE: cleaned up 4 orphaned shards SELECT * FROM public.table_placements_per_node; nodeport | logicalrelid | count --------------------------------------------------------------------- - 57638 | tab | 4 + 57637 | tab | 4 (1 row) SELECT citus_set_default_rebalance_strategy('by_shard_count'); @@ -1557,8 +1842,8 @@ SELECT citus_set_default_rebalance_strategy('by_shard_count'); SELECT * FROM get_rebalance_table_shards_plan('tab'); table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport --------------------------------------------------------------------- - tab | 123033 | 0 | localhost | 57638 | localhost | 57637 - tab | 123034 | 0 | localhost | 57638 | localhost | 57637 + tab | 123040 | 0 | localhost | 57637 | localhost | 57638 + tab | 123041 | 0 | localhost | 57637 | localhost | 57638 (2 rows) -- Check all the error handling cases @@ -1566,18 +1851,21 @@ SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'non_ ERROR: could not find rebalance strategy with name non_existing SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'non_existing'); ERROR: could not find rebalance strategy with name non_existing +CALL citus_cleanup_orphaned_shards(); SELECT * FROM master_drain_node('localhost', :worker_2_port, rebalance_strategy := 'non_existing'); ERROR: could not find rebalance strategy with name non_existing +CALL citus_cleanup_orphaned_shards(); SELECT citus_set_default_rebalance_strategy('non_existing'); ERROR: strategy with specified name does not exist -CONTEXT: PL/pgSQL function citus_set_default_rebalance_strategy(text) line 5 at RAISE UPDATE pg_dist_rebalance_strategy SET default_strategy=false; SELECT * FROM get_rebalance_table_shards_plan('tab'); ERROR: no rebalance_strategy was provided, but there is also no default strategy set SELECT * FROM rebalance_table_shards('tab'); ERROR: no rebalance_strategy was provided, but there is also no default strategy set +CALL citus_cleanup_orphaned_shards(); SELECT * FROM master_drain_node('localhost', :worker_2_port); ERROR: no rebalance_strategy was provided, but there is also no default strategy set +CALL citus_cleanup_orphaned_shards(); UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_shard_count'; CREATE OR REPLACE FUNCTION shard_cost_no_arguments() RETURNS real AS $$ SELECT 1.0::real $$ LANGUAGE sql; @@ -1607,13 +1895,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: signature for shard_cost_function is incorrect -DETAIL: number of arguments of shard_cost_no_arguments should be 1, not 0 -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'shard_cost_bad_arg_type', @@ -1622,13 +1903,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: signature for shard_cost_function is incorrect -DETAIL: argument type of shard_cost_bad_arg_type should be bigint -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'shard_cost_bad_return_type', @@ -1637,13 +1911,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: signature for shard_cost_function is incorrect -DETAIL: return type of shard_cost_bad_return_type should be real -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 0, @@ -1652,12 +1919,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: cache lookup failed for shard_cost_function with oid 0 -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'citus_shard_cost_1', @@ -1666,13 +1927,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: signature for node_capacity_function is incorrect -DETAIL: number of arguments of node_capacity_no_arguments should be 1, not 0 -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'citus_shard_cost_1', @@ -1681,13 +1935,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: signature for node_capacity_function is incorrect -DETAIL: argument type of node_capacity_bad_arg_type should be int -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'citus_shard_cost_1', @@ -1696,13 +1943,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: signature for node_capacity_function is incorrect -DETAIL: return type of node_capacity_bad_return_type should be real -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'citus_shard_cost_1', @@ -1711,12 +1951,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: cache lookup failed for node_capacity_function with oid 0 -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'citus_shard_cost_1', @@ -1725,13 +1959,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: signature for shard_allowed_on_node_function is incorrect -DETAIL: number of arguments of shard_allowed_on_node_no_arguments should be 2, not 0 -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'citus_shard_cost_1', @@ -1740,13 +1967,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: signature for shard_allowed_on_node_function is incorrect -DETAIL: type of first argument of shard_allowed_on_node_bad_arg1 should be bigint -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'citus_shard_cost_1', @@ -1755,13 +1975,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: signature for shard_allowed_on_node_function is incorrect -DETAIL: type of second argument of shard_allowed_on_node_bad_arg2 should be int -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'citus_shard_cost_1', @@ -1770,13 +1983,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: signature for shard_allowed_on_node_function is incorrect -DETAIL: return type of shard_allowed_on_node_bad_return_type should be boolean -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 SELECT citus_add_rebalance_strategy( 'insert_should_fail', 'citus_shard_cost_1', @@ -1785,12 +1991,6 @@ SELECT citus_add_rebalance_strategy( 0 ); ERROR: cache lookup failed for shard_allowed_on_node_function with oid 0 -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -SQL function "citus_add_rebalance_strategy" statement 1 -- Confirm that manual insert/update has the same checks INSERT INTO pg_catalog.pg_dist_rebalance_strategy( @@ -1807,20 +2007,8 @@ INSERT INTO 0 ); ERROR: signature for shard_cost_function is incorrect -DETAIL: number of arguments of shard_cost_no_arguments should be 1, not 0 -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM UPDATE pg_dist_rebalance_strategy SET shard_cost_function='shard_cost_no_arguments' WHERE name='by_disk_size'; ERROR: signature for shard_cost_function is incorrect -DETAIL: number of arguments of shard_cost_no_arguments should be 1, not 0 -CONTEXT: SQL statement "SELECT citus_validate_rebalance_strategy_functions( - NEW.shard_cost_function, - NEW.node_capacity_function, - NEW.shard_allowed_on_node_function)" -PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM -- Confirm that only a single default strategy can exist INSERT INTO pg_catalog.pg_dist_rebalance_strategy( @@ -1839,24 +2027,20 @@ INSERT INTO 0 ); ERROR: there cannot be two default strategies -CONTEXT: PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 19 at RAISE UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_disk_size'; ERROR: there cannot be two default strategies -CONTEXT: PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 19 at RAISE -- ensure the trigger allows updating the default strategy UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_shard_count'; -- Confirm that default strategy should be higher than minimum strategy SELECT citus_add_rebalance_strategy( 'default_threshold_too_low', 'citus_shard_cost_1', - 'capacity_high_worker_1', + 'capacity_high_worker_2', 'citus_shard_allowed_on_node_true', 0, 0.1 ); ERROR: default_threshold cannot be smaller than minimum_threshold -CONTEXT: PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 10 at RAISE -SQL function "citus_add_rebalance_strategy" statement 1 -- Make it a data node again SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true); master_set_node_property @@ -1882,7 +2066,6 @@ SET client_min_messages TO WARNING; CREATE TABLE dist_table_test_3(a int); SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO "statement"; SELECT create_distributed_table('dist_table_test_3', 'a'); create_distributed_table --------------------------------------------------------------------- @@ -1909,6 +2092,11 @@ SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE lo (1 row) SET citus.shard_replication_factor TO 2; +SELECT replicate_table_shards('dist_table_test_3', max_shard_copies := 4, shard_transfer_mode:='block_writes'); +ERROR: Table 'dist_table_test_3' is streaming replicated. Shards of streaming replicated tables cannot be copied +-- Mark table as coordinator replicated in order to be able to test replicate_table_shards +UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN + ('dist_table_test_3'::regclass); SELECT replicate_table_shards('dist_table_test_3', max_shard_copies := 4, shard_transfer_mode:='block_writes'); replicate_table_shards --------------------------------------------------------------------- @@ -1953,6 +2141,7 @@ SELECT rebalance_table_shards('rebalance_test_table', shard_transfer_mode:='bloc (1 row) +CALL citus_cleanup_orphaned_shards(); SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass; count --------------------------------------------------------------------- @@ -2014,6 +2203,7 @@ SELECT rebalance_table_shards(); (1 row) +CALL citus_cleanup_orphaned_shards(); DROP TABLE t1, r1, r2; -- verify there are no distributed tables before we perform the following tests. Preceding -- test suites should clean up their distributed tables. @@ -2062,6 +2252,7 @@ SELECT rebalance_table_shards(); (1 row) +CALL citus_cleanup_orphaned_shards(); -- verify the reference table is on all nodes after the rebalance SELECT count(*) FROM pg_dist_shard @@ -2114,6 +2305,11 @@ WHERE logicalrelid = 'r1'::regclass; 1 (1 row) +SELECT replicate_table_shards('t1', shard_replication_factor := 2); +ERROR: Table 't1' is streaming replicated. Shards of streaming replicated tables cannot be copied +-- Mark table as coordinator replicated in order to be able to test replicate_table_shards +UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN + ('t1'::regclass); SELECT replicate_table_shards('t1', shard_replication_factor := 2); replicate_table_shards --------------------------------------------------------------------- diff --git a/src/test/regress/expected/shard_rebalancer_unit.out b/src/test/regress/expected/shard_rebalancer_unit.out index dcbf29df3..fe88968fe 100644 --- a/src/test/regress/expected/shard_rebalancer_unit.out +++ b/src/test/regress/expected/shard_rebalancer_unit.out @@ -3,7 +3,8 @@ CREATE OR REPLACE FUNCTION shard_placement_rebalance_array( shard_placement_list json[], threshold float4 DEFAULT 0, max_shard_moves int DEFAULT 1000000, - drain_only bool DEFAULT false + drain_only bool DEFAULT false, + improvement_threshold float4 DEFAULT 0.5 ) RETURNS json[] AS 'citus' @@ -500,3 +501,244 @@ NOTICE: Stopped searching before we were out of moves. Please rerun the rebalan {"updatetype":1,"shardid":8,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432} (5 rows) +-- Don't move a big shards if it doesn't improve the utilization balance much. +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":20, "nodename":"a"}', + '{"shardid":3, "cost":100, "nodename":"b"}', + '{"shardid":4, "cost":50, "nodename":"b"}' + ]::json[] +)); +NOTICE: Ignoring move of shard xxxxx from b:5432 to a:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.1 is lower than the improvement_threshold of 0.5 +NOTICE: Ignored 1 moves, all of which are shown in notices above +HINT: If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (0.5). + unnest +--------------------------------------------------------------------- + {"updatetype":1,"shardid":4,"sourcename":"b","sourceport":5432,"targetname":"a","targetport":5432} +(1 row) + +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}']::json[], + ARRAY['{"shardid":1, "cost":40, "nodename":"a"}', + '{"shardid":2, "cost":40, "nodename":"a"}', + '{"shardid":3, "cost":100, "nodename":"b"}', + '{"shardid":4, "cost":100, "nodename":"b"}' + ]::json[] +)); +NOTICE: Ignoring move of shard xxxxx from b:5432 to a:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.2 is lower than the improvement_threshold of 0.5 +NOTICE: Ignoring move of shard xxxxx from b:5432 to a:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.2 is lower than the improvement_threshold of 0.5 +NOTICE: Ignored 2 moves, all of which are shown in notices above +HINT: If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (0.5). + unnest +--------------------------------------------------------------------- +(0 rows) + +-- improvement_threshold can be used to force a move of big shards +-- if needed. +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":20, "nodename":"a"}', + '{"shardid":3, "cost":100, "nodename":"b"}', + '{"shardid":4, "cost":50, "nodename":"b"}' + ]::json[], + improvement_threshold := 0.1 +)); + unnest +--------------------------------------------------------------------- + {"updatetype":1,"shardid":3,"sourcename":"b","sourceport":5432,"targetname":"a","targetport":5432} + {"updatetype":1,"shardid":1,"sourcename":"a","sourceport":5432,"targetname":"b","targetport":5432} + {"updatetype":1,"shardid":2,"sourcename":"a","sourceport":5432,"targetname":"b","targetport":5432} +(3 rows) + +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}']::json[], + ARRAY['{"shardid":1, "cost":40, "nodename":"a"}', + '{"shardid":2, "cost":40, "nodename":"a"}', + '{"shardid":3, "cost":100, "nodename":"b"}', + '{"shardid":4, "cost":100, "nodename":"b"}' + ]::json[], + improvement_threshold := 0.2 +)); + unnest +--------------------------------------------------------------------- + {"updatetype":1,"shardid":3,"sourcename":"b","sourceport":5432,"targetname":"a","targetport":5432} + {"updatetype":1,"shardid":1,"sourcename":"a","sourceport":5432,"targetname":"b","targetport":5432} +(2 rows) + +-- limits notices about ignored moves +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}', + '{"node_name": "d"}', + '{"node_name": "e"}', + '{"node_name": "f"}', + '{"node_name": "g"}' + ]::json[], + ARRAY['{"shardid":1, "cost":39, "nodename":"a"}', + '{"shardid":2, "cost":39, "nodename":"b"}', + '{"shardid":3, "cost":39, "nodename":"c"}', + '{"shardid":4, "cost":39, "nodename":"d"}', + '{"shardid":5, "cost":39, "nodename":"e"}', + '{"shardid":6, "cost":39, "nodename":"f"}', + '{"shardid":7, "cost":40, "nodename":"g"}', + '{"shardid":8, "cost":39, "nodename":"g"}' + ]::json[], + improvement_threshold := 0.1 +)); +NOTICE: Ignoring move of shard xxxxx from g:5432 to a:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to b:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to c:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to d:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to e:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignored 6 moves, 5 of which are shown in notices above +HINT: If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (0.1). + unnest +--------------------------------------------------------------------- +(0 rows) + +-- limits notices based on GUC +set citus.max_rebalancer_logged_ignored_moves = 1; +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}', + '{"node_name": "d"}', + '{"node_name": "e"}', + '{"node_name": "f"}', + '{"node_name": "g"}' + ]::json[], + ARRAY['{"shardid":1, "cost":39, "nodename":"a"}', + '{"shardid":2, "cost":39, "nodename":"b"}', + '{"shardid":3, "cost":39, "nodename":"c"}', + '{"shardid":4, "cost":39, "nodename":"d"}', + '{"shardid":5, "cost":39, "nodename":"e"}', + '{"shardid":6, "cost":39, "nodename":"f"}', + '{"shardid":7, "cost":40, "nodename":"g"}', + '{"shardid":8, "cost":39, "nodename":"g"}' + ]::json[], + improvement_threshold := 0.1 +)); +NOTICE: Ignoring move of shard xxxxx from g:5432 to a:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignored 6 moves, 1 of which are shown in notices above +HINT: If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (0.1). + unnest +--------------------------------------------------------------------- +(0 rows) + +set citus.max_rebalancer_logged_ignored_moves = 10; +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}', + '{"node_name": "d"}', + '{"node_name": "e"}', + '{"node_name": "f"}', + '{"node_name": "g"}' + ]::json[], + ARRAY['{"shardid":1, "cost":39, "nodename":"a"}', + '{"shardid":2, "cost":39, "nodename":"b"}', + '{"shardid":3, "cost":39, "nodename":"c"}', + '{"shardid":4, "cost":39, "nodename":"d"}', + '{"shardid":5, "cost":39, "nodename":"e"}', + '{"shardid":6, "cost":39, "nodename":"f"}', + '{"shardid":7, "cost":40, "nodename":"g"}', + '{"shardid":8, "cost":39, "nodename":"g"}' + ]::json[], + improvement_threshold := 0.1 +)); +NOTICE: Ignoring move of shard xxxxx from g:5432 to a:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to b:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to c:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to d:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to e:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to f:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignored 6 moves, all of which are shown in notices above +HINT: If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (0.1). + unnest +--------------------------------------------------------------------- +(0 rows) + +set citus.max_rebalancer_logged_ignored_moves = -1; +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}', + '{"node_name": "d"}', + '{"node_name": "e"}', + '{"node_name": "f"}', + '{"node_name": "g"}' + ]::json[], + ARRAY['{"shardid":1, "cost":39, "nodename":"a"}', + '{"shardid":2, "cost":39, "nodename":"b"}', + '{"shardid":3, "cost":39, "nodename":"c"}', + '{"shardid":4, "cost":39, "nodename":"d"}', + '{"shardid":5, "cost":39, "nodename":"e"}', + '{"shardid":6, "cost":39, "nodename":"f"}', + '{"shardid":7, "cost":40, "nodename":"g"}', + '{"shardid":8, "cost":39, "nodename":"g"}' + ]::json[], + improvement_threshold := 0.1 +)); +NOTICE: Ignoring move of shard xxxxx from g:5432 to a:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to b:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to c:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to d:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to e:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignoring move of shard xxxxx from g:5432 to f:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.025641 is lower than the improvement_threshold of 0.1 +NOTICE: Ignored 6 moves, all of which are shown in notices above +HINT: If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (0.1). + unnest +--------------------------------------------------------------------- +(0 rows) + +-- Combining improvement_threshold and capacity works as expected. +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b", "capacity": 2}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":10, "nodename":"a"}', + '{"shardid":3, "cost":10, "nodename":"a"}', + '{"shardid":4, "cost":100, "nodename":"b"}', + '{"shardid":5, "cost":50, "nodename":"b"}', + '{"shardid":6, "cost":50, "nodename":"b"}' + ]::json[], + improvement_threshold := 0.6 +)); +NOTICE: Ignoring move of shard xxxxx from a:5432 to b:5432, because the move only brings a small improvement relative to the shard its size +DETAIL: The balance improvement of 0.5 is lower than the improvement_threshold of 0.6 +NOTICE: Ignored 1 moves, all of which are shown in notices above +HINT: If you do want these moves to happen, try changing improvement_threshold to a lower value than what it is now (0.6). + unnest +--------------------------------------------------------------------- + {"updatetype":1,"shardid":5,"sourcename":"b","sourceport":5432,"targetname":"a","targetport":5432} + {"updatetype":1,"shardid":2,"sourcename":"a","sourceport":5432,"targetname":"b","targetport":5432} +(2 rows) + diff --git a/src/test/regress/expected/single_node.out b/src/test/regress/expected/single_node.out index f919e8779..6bf5af799 100644 --- a/src/test/regress/expected/single_node.out +++ b/src/test/regress/expected/single_node.out @@ -3,7 +3,6 @@ SET search_path TO single_node; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; SET citus.next_shard_id TO 90630500; -SET citus.replication_model TO 'streaming'; -- adding the coordinator as inactive is disallowed SELECT 1 FROM master_add_inactive_node('localhost', :master_port, groupid => 0); ERROR: coordinator node cannot be added as inactive node @@ -75,6 +74,54 @@ SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node WHERE groupid = (1 row) +-- verify the coordinator gets auto added with the localhost guc +ALTER SYSTEM SET citus.local_hostname TO '127.0.0.1'; --although not a hostname, should work for connecting locally +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE test(x int, y int); +SELECT create_distributed_table('test','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT groupid, nodename, nodeport, isactive, shouldhaveshards, hasmetadata, metadatasynced FROM pg_dist_node; + groupid | nodename | nodeport | isactive | shouldhaveshards | hasmetadata | metadatasynced +--------------------------------------------------------------------- + 0 | 127.0.0.1 | 57636 | t | t | t | t +(1 row) + +DROP TABLE test; +-- remove the coordinator to try again +SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node WHERE groupid = 0; + master_remove_node +--------------------------------------------------------------------- + +(1 row) + +ALTER SYSTEM RESET citus.local_hostname; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + pg_sleep +--------------------------------------------------------------------- + +(1 row) + CREATE TABLE test(x int, y int); SELECT create_distributed_table('test','x'); create_distributed_table @@ -244,21 +291,21 @@ INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1), (2, 2) RETURNING *; SET citus.log_remote_commands to true; -- observe that there is a conflict and the following query does nothing INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT DO NOTHING RETURNING *; -NOTICE: executing the command locally: INSERT INTO single_node.upsert_test_90630519 AS citus_table_alias (part_key, other_col) VALUES (1, 1) ON CONFLICT DO NOTHING RETURNING part_key, other_col, third_col +NOTICE: executing the command locally: INSERT INTO single_node.upsert_test_90630523 AS citus_table_alias (part_key, other_col) VALUES (1, 1) ON CONFLICT DO NOTHING RETURNING part_key, other_col, third_col part_key | other_col | third_col --------------------------------------------------------------------- (0 rows) -- same as the above with different syntax INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT (part_key) DO NOTHING RETURNING *; -NOTICE: executing the command locally: INSERT INTO single_node.upsert_test_90630519 AS citus_table_alias (part_key, other_col) VALUES (1, 1) ON CONFLICT(part_key) DO NOTHING RETURNING part_key, other_col, third_col +NOTICE: executing the command locally: INSERT INTO single_node.upsert_test_90630523 AS citus_table_alias (part_key, other_col) VALUES (1, 1) ON CONFLICT(part_key) DO NOTHING RETURNING part_key, other_col, third_col part_key | other_col | third_col --------------------------------------------------------------------- (0 rows) -- again the same query with another syntax INSERT INTO upsert_test (part_key, other_col) VALUES (1, 1) ON CONFLICT ON CONSTRAINT upsert_test_part_key_key DO NOTHING RETURNING *; -NOTICE: executing the command locally: INSERT INTO single_node.upsert_test_90630519 AS citus_table_alias (part_key, other_col) VALUES (1, 1) ON CONFLICT ON CONSTRAINT upsert_test_part_key_key_90630519 DO NOTHING RETURNING part_key, other_col, third_col +NOTICE: executing the command locally: INSERT INTO single_node.upsert_test_90630523 AS citus_table_alias (part_key, other_col) VALUES (1, 1) ON CONFLICT ON CONSTRAINT upsert_test_part_key_key_90630523 DO NOTHING RETURNING part_key, other_col, third_col part_key | other_col | third_col --------------------------------------------------------------------- (0 rows) @@ -266,7 +313,7 @@ NOTICE: executing the command locally: INSERT INTO single_node.upsert_test_9063 BEGIN; -- force local execution SELECT count(*) FROM upsert_test WHERE part_key = 1; -NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.upsert_test_90630519 upsert_test WHERE (part_key OPERATOR(pg_catalog.=) 1) +NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.upsert_test_90630523 upsert_test WHERE (part_key OPERATOR(pg_catalog.=) 1) count --------------------------------------------------------------------- 1 @@ -352,10 +399,10 @@ SET search_path TO single_node; DROP SCHEMA "Quoed.Schema" CASCADE; NOTICE: drop cascades to 5 other objects DETAIL: drop cascades to table "Quoed.Schema".simple_table_name -drop cascades to table "Quoed.Schema".simple_table_name_90630524 -drop cascades to table "Quoed.Schema".simple_table_name_90630525 -drop cascades to table "Quoed.Schema".simple_table_name_90630526 -drop cascades to table "Quoed.Schema".simple_table_name_90630527 +drop cascades to table "Quoed.Schema".simple_table_name_90630528 +drop cascades to table "Quoed.Schema".simple_table_name_90630529 +drop cascades to table "Quoed.Schema".simple_table_name_90630530 +drop cascades to table "Quoed.Schema".simple_table_name_90630531 -- test partitioned index creation with long name CREATE TABLE test_index_creation1 ( @@ -538,7 +585,7 @@ EXPLAIN (COSTS FALSE, ANALYZE TRUE, TIMING FALSE, SUMMARY FALSE) -> Task Tuple data received from node: 4 bytes Node: host=localhost port=xxxxx dbname=regression - -> Seq Scan on test_90630502 test (actual rows=2 loops=1) + -> Seq Scan on test_90630506 test (actual rows=2 loops=1) (8 rows) -- common utility command @@ -1287,7 +1334,7 @@ END;$$; SELECT * FROM pg_dist_node; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------------------------------------------------------------------- - 4 | 0 | localhost | 57636 | default | t | t | primary | default | t | t + 5 | 0 | localhost | 57636 | default | t | t | primary | default | t | t (1 row) SELECT create_distributed_function('call_delegation(int)', '$1', 'test'); @@ -1594,56 +1641,56 @@ SELECT pg_sleep(0.1); SET citus.executor_slow_start_interval TO 10; SELECT count(*) from another_schema_table; -NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630511 another_schema_table WHERE true -NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630512 another_schema_table WHERE true -NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630513 another_schema_table WHERE true -NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630514 another_schema_table WHERE true +NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630515 another_schema_table WHERE true +NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630516 another_schema_table WHERE true +NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630517 another_schema_table WHERE true +NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630518 another_schema_table WHERE true count --------------------------------------------------------------------- 0 (1 row) UPDATE another_schema_table SET b = b; -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630511 another_schema_table SET b = b -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630512 another_schema_table SET b = b -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630513 another_schema_table SET b = b -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630514 another_schema_table SET b = b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630515 another_schema_table SET b = b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630516 another_schema_table SET b = b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630517 another_schema_table SET b = b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630518 another_schema_table SET b = b -- INSERT .. SELECT pushdown and INSERT .. SELECT via repartitioning -- not that we ignore INSERT .. SELECT via coordinator as it relies on -- COPY command INSERT INTO another_schema_table SELECT * FROM another_schema_table; -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630511 AS citus_table_alias (a, b) SELECT a, b FROM single_node.another_schema_table_90630511 another_schema_table WHERE (a IS NOT NULL) -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630512 AS citus_table_alias (a, b) SELECT a, b FROM single_node.another_schema_table_90630512 another_schema_table WHERE (a IS NOT NULL) -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630513 AS citus_table_alias (a, b) SELECT a, b FROM single_node.another_schema_table_90630513 another_schema_table WHERE (a IS NOT NULL) -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630514 AS citus_table_alias (a, b) SELECT a, b FROM single_node.another_schema_table_90630514 another_schema_table WHERE (a IS NOT NULL) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630515 AS citus_table_alias (a, b) SELECT a, b FROM single_node.another_schema_table_90630515 another_schema_table WHERE (a IS NOT NULL) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630516 AS citus_table_alias (a, b) SELECT a, b FROM single_node.another_schema_table_90630516 another_schema_table WHERE (a IS NOT NULL) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630517 AS citus_table_alias (a, b) SELECT a, b FROM single_node.another_schema_table_90630517 another_schema_table WHERE (a IS NOT NULL) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630518 AS citus_table_alias (a, b) SELECT a, b FROM single_node.another_schema_table_90630518 another_schema_table WHERE (a IS NOT NULL) INSERT INTO another_schema_table SELECT b::int, a::int FROM another_schema_table; -NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630511_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630511_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630511 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 -NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630512_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630512_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630512 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 -NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630513_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630513_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630513 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 -NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630514_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630514_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630514 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 +NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630515_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630515_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630515 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 +NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630516_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630516_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630516 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 +NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630517_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630517_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630517 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 +NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630518_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630518_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630518 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 -- multi-row INSERTs INSERT INTO another_schema_table VALUES (1,1), (2,2), (3,3), (4,4), (5,5),(6,6),(7,7); -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630511 AS citus_table_alias (a, b) VALUES (1,1), (5,5) -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630512 AS citus_table_alias (a, b) VALUES (3,3), (4,4), (7,7) -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630513 AS citus_table_alias (a, b) VALUES (6,6) -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630514 AS citus_table_alias (a, b) VALUES (2,2) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630515 AS citus_table_alias (a, b) VALUES (1,1), (5,5) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630516 AS citus_table_alias (a, b) VALUES (3,3), (4,4), (7,7) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630517 AS citus_table_alias (a, b) VALUES (6,6) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630518 AS citus_table_alias (a, b) VALUES (2,2) -- INSERT..SELECT with re-partitioning when using local execution BEGIN; INSERT INTO another_schema_table VALUES (1,100); -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630511 (a, b) VALUES (1, 100) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630515 (a, b) VALUES (1, 100) INSERT INTO another_schema_table VALUES (2,100); -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630514 (a, b) VALUES (2, 100) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630518 (a, b) VALUES (2, 100) INSERT INTO another_schema_table SELECT b::int, a::int FROM another_schema_table; -NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630511_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630511_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630511 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 -NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630512_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630512_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630512 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 -NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630513_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630513_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630513 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 -NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630514_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630514_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630514 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630511 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_results('{repartitioned_results_xxxxx_from_90630511_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer, b integer) -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630512 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_results('{repartitioned_results_xxxxx_from_90630512_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer, b integer) -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630513 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_results('{repartitioned_results_xxxxx_from_90630511_to_2,repartitioned_results_xxxxx_from_90630513_to_2,repartitioned_results_xxxxx_from_90630514_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer, b integer) -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630514 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_results('{repartitioned_results_xxxxx_from_90630514_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer, b integer) +NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630515_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630515_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630515 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 +NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630516_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630516_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630516 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 +NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630517_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630517_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630517 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 +NOTICE: executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_90630518_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_90630518_to','SELECT b AS a, a AS b FROM single_node.another_schema_table_90630518 another_schema_table WHERE true',0,'hash','{-2147483648,-1073741824,0,1073741824}'::text[],'{-1073741825,-1,1073741823,2147483647}'::text[],true) WHERE rows_written > 0 +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630515 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_results('{repartitioned_results_xxxxx_from_90630515_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer, b integer) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630516 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_results('{repartitioned_results_xxxxx_from_90630516_to_1}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer, b integer) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630517 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_results('{repartitioned_results_xxxxx_from_90630515_to_2,repartitioned_results_xxxxx_from_90630517_to_2,repartitioned_results_xxxxx_from_90630518_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer, b integer) +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630518 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_results('{repartitioned_results_xxxxx_from_90630518_to_3}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer, b integer) SELECT * FROM another_schema_table WHERE a = 100 ORDER BY b; -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630513 another_schema_table WHERE (a OPERATOR(pg_catalog.=) 100) ORDER BY b +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630517 another_schema_table WHERE (a OPERATOR(pg_catalog.=) 100) ORDER BY b a | b --------------------------------------------------------------------- 100 | 1 @@ -1654,10 +1701,10 @@ ROLLBACK; -- intermediate results WITH cte_1 AS (SELECT * FROM another_schema_table LIMIT 1000) SELECT count(*) FROM cte_1; -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630511 another_schema_table WHERE true LIMIT '1000'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630512 another_schema_table WHERE true LIMIT '1000'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630513 another_schema_table WHERE true LIMIT '1000'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630514 another_schema_table WHERE true LIMIT '1000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630515 another_schema_table WHERE true LIMIT '1000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630516 another_schema_table WHERE true LIMIT '1000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630517 another_schema_table WHERE true LIMIT '1000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630518 another_schema_table WHERE true LIMIT '1000'::bigint NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte_1 count --------------------------------------------------------------------- @@ -1685,31 +1732,31 @@ SET citus.log_remote_commands to false; CREATE UNIQUE INDEX another_schema_table_pk ON another_schema_table(a); SET citus.log_local_commands to true; INSERT INTO another_schema_table SELECT * FROM another_schema_table LIMIT 10000 ON CONFLICT(a) DO NOTHING; -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630511 another_schema_table WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630512 another_schema_table WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630513 another_schema_table WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630514 another_schema_table WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630515 another_schema_table WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630516 another_schema_table WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630517 another_schema_table WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630518 another_schema_table WHERE true LIMIT '10000'::bigint NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630511 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630511'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630512 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630512'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630513 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630513'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630514 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630514'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630515 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630515'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630516 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630516'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630517 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630517'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630518 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630518'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING INSERT INTO another_schema_table SELECT * FROM another_schema_table ORDER BY a LIMIT 10 ON CONFLICT(a) DO UPDATE SET b = EXCLUDED.b + 1 RETURNING *; -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630511 another_schema_table WHERE true ORDER BY a LIMIT '10'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630512 another_schema_table WHERE true ORDER BY a LIMIT '10'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630513 another_schema_table WHERE true ORDER BY a LIMIT '10'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630514 another_schema_table WHERE true ORDER BY a LIMIT '10'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630515 another_schema_table WHERE true ORDER BY a LIMIT '10'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630516 another_schema_table WHERE true ORDER BY a LIMIT '10'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630517 another_schema_table WHERE true ORDER BY a LIMIT '10'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630518 another_schema_table WHERE true ORDER BY a LIMIT '10'::bigint NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630511 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630511'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO UPDATE SET b = (excluded.b OPERATOR(pg_catalog.+) 1) RETURNING citus_table_alias.a, citus_table_alias.b -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630512 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630512'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO UPDATE SET b = (excluded.b OPERATOR(pg_catalog.+) 1) RETURNING citus_table_alias.a, citus_table_alias.b -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630513 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630513'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO UPDATE SET b = (excluded.b OPERATOR(pg_catalog.+) 1) RETURNING citus_table_alias.a, citus_table_alias.b -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630514 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630514'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO UPDATE SET b = (excluded.b OPERATOR(pg_catalog.+) 1) RETURNING citus_table_alias.a, citus_table_alias.b +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630515 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630515'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO UPDATE SET b = (excluded.b OPERATOR(pg_catalog.+) 1) RETURNING citus_table_alias.a, citus_table_alias.b +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630516 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630516'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO UPDATE SET b = (excluded.b OPERATOR(pg_catalog.+) 1) RETURNING citus_table_alias.a, citus_table_alias.b +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630517 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630517'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO UPDATE SET b = (excluded.b OPERATOR(pg_catalog.+) 1) RETURNING citus_table_alias.a, citus_table_alias.b +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630518 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630518'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO UPDATE SET b = (excluded.b OPERATOR(pg_catalog.+) 1) RETURNING citus_table_alias.a, citus_table_alias.b a | b --------------------------------------------------------------------- 1 | @@ -1728,18 +1775,18 @@ NOTICE: executing the command locally: INSERT INTO single_node.another_schema_t WITH cte_1 AS (INSERT INTO non_binary_copy_test SELECT * FROM non_binary_copy_test LIMIT 10000 ON CONFLICT (key) DO UPDATE SET value = (0, 'citus0')::new_type RETURNING value) SELECT count(*) FROM cte_1; -NOTICE: executing the command locally: SELECT key, value FROM single_node.non_binary_copy_test_90630515 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value FROM single_node.non_binary_copy_test_90630516 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value FROM single_node.non_binary_copy_test_90630517 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value FROM single_node.non_binary_copy_test_90630518 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value FROM single_node.non_binary_copy_test_90630519 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value FROM single_node.non_binary_copy_test_90630520 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value FROM single_node.non_binary_copy_test_90630521 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value FROM single_node.non_binary_copy_test_90630522 non_binary_copy_test WHERE true LIMIT '10000'::bigint NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630515 AS citus_table_alias (key, value) SELECT key, value FROM read_intermediate_result('insert_select_XXX_90630515'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.value -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630516 AS citus_table_alias (key, value) SELECT key, value FROM read_intermediate_result('insert_select_XXX_90630516'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.value -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630517 AS citus_table_alias (key, value) SELECT key, value FROM read_intermediate_result('insert_select_XXX_90630517'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.value -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630518 AS citus_table_alias (key, value) SELECT key, value FROM read_intermediate_result('insert_select_XXX_90630518'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.value +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630519 AS citus_table_alias (key, value) SELECT key, value FROM read_intermediate_result('insert_select_XXX_90630519'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.value +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630520 AS citus_table_alias (key, value) SELECT key, value FROM read_intermediate_result('insert_select_XXX_90630520'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.value +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630521 AS citus_table_alias (key, value) SELECT key, value FROM read_intermediate_result('insert_select_XXX_90630521'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.value +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630522 AS citus_table_alias (key, value) SELECT key, value FROM read_intermediate_result('insert_select_XXX_90630522'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.value NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'text'::citus_copy_format) intermediate_result(value single_node.new_type)) cte_1 count --------------------------------------------------------------------- @@ -1748,25 +1795,25 @@ NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT in -- test with NULL columns ALTER TABLE non_binary_copy_test ADD COLUMN z INT; -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90630515, 'single_node', 'ALTER TABLE non_binary_copy_test ADD COLUMN z INT;') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90630516, 'single_node', 'ALTER TABLE non_binary_copy_test ADD COLUMN z INT;') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90630517, 'single_node', 'ALTER TABLE non_binary_copy_test ADD COLUMN z INT;') -NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90630518, 'single_node', 'ALTER TABLE non_binary_copy_test ADD COLUMN z INT;') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90630519, 'single_node', 'ALTER TABLE non_binary_copy_test ADD COLUMN z INT;') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90630520, 'single_node', 'ALTER TABLE non_binary_copy_test ADD COLUMN z INT;') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90630521, 'single_node', 'ALTER TABLE non_binary_copy_test ADD COLUMN z INT;') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (90630522, 'single_node', 'ALTER TABLE non_binary_copy_test ADD COLUMN z INT;') WITH cte_1 AS (INSERT INTO non_binary_copy_test SELECT * FROM non_binary_copy_test LIMIT 10000 ON CONFLICT (key) DO UPDATE SET value = (0, 'citus0')::new_type RETURNING z) SELECT bool_and(z is null) FROM cte_1; -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630515 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630516 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630517 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630518 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630519 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630520 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630521 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630522 non_binary_copy_test WHERE true LIMIT '10000'::bigint NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630515 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630515'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630516 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630516'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630517 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630517'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630518 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630518'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630519 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630519'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630520 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630520'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630521 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630521'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630522 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630522'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z NOTICE: executing the command locally: SELECT bool_and((z IS NULL)) AS bool_and FROM (SELECT intermediate_result.z FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(z integer)) cte_1 bool_and --------------------------------------------------------------------- @@ -1777,24 +1824,51 @@ NOTICE: executing the command locally: SELECT bool_and((z IS NULL)) AS bool_and WITH cte_1 AS (INSERT INTO non_binary_copy_test SELECT * FROM non_binary_copy_test LIMIT 10000 ON CONFLICT (key) DO UPDATE SET value = (0, 'citus0')::new_type RETURNING key, z) SELECT count(DISTINCT key::text), count(DISTINCT z::text) FROM cte_1; -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630515 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630516 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630517 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630518 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630519 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630520 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630521 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630522 non_binary_copy_test WHERE true LIMIT '10000'::bigint NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630515 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630515'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.key, citus_table_alias.z -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630516 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630516'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.key, citus_table_alias.z -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630517 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630517'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.key, citus_table_alias.z -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630518 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630518'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.key, citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630519 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630519'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.key, citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630520 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630520'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.key, citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630521 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630521'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.key, citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630522 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630522'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.key, citus_table_alias.z NOTICE: executing the command locally: SELECT count(DISTINCT (key)::text) AS count, count(DISTINCT (z)::text) AS count FROM (SELECT intermediate_result.key, intermediate_result.z FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, z integer)) cte_1 count | count --------------------------------------------------------------------- 1001 | 0 (1 row) +-- test disabling drop and truncate for known shards +SET citus.shard_replication_factor TO 1; +CREATE TABLE test_disabling_drop_and_truncate (a int); +SELECT create_distributed_table('test_disabling_drop_and_truncate', 'a'); +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102040, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) ');SELECT worker_apply_shard_ddl_command (102040, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102041, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) ');SELECT worker_apply_shard_ddl_command (102041, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102042, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) ');SELECT worker_apply_shard_ddl_command (102042, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') +NOTICE: executing the command locally: SELECT worker_apply_shard_ddl_command (102043, 'single_node', 'CREATE TABLE single_node.test_disabling_drop_and_truncate (a integer) ');SELECT worker_apply_shard_ddl_command (102043, 'single_node', 'ALTER TABLE single_node.test_disabling_drop_and_truncate OWNER TO postgres') + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SET citus.enable_manual_changes_to_shards TO off; +-- these should error out +DROP TABLE test_disabling_drop_and_truncate_102040; +ERROR: cannot modify "test_disabling_drop_and_truncate_102040" because it is a shard of a distributed table +HINT: Use the distributed table or set citus.enable_manual_changes_to_shards to on to modify shards directly +TRUNCATE TABLE test_disabling_drop_and_truncate_102040; +ERROR: cannot modify "test_disabling_drop_and_truncate_102040" because it is a shard of a distributed table +HINT: Use the distributed table or set citus.enable_manual_changes_to_shards to on to modify shards directly +RESET citus.enable_manual_changes_to_shards ; +-- these should work as expected +TRUNCATE TABLE test_disabling_drop_and_truncate_102040; +DROP TABLE test_disabling_drop_and_truncate_102040; +RESET citus.shard_replication_factor; +DROP TABLE test_disabling_drop_and_truncate; -- lets flush the copy often to make sure everyhing is fine SET citus.local_copy_flush_threshold TO 1; TRUNCATE another_schema_table; @@ -1810,18 +1884,18 @@ NOTICE: executing the copy locally for shard xxxxx WITH cte_1 AS (INSERT INTO another_schema_table SELECT * FROM another_schema_table ORDER BY a LIMIT 10000 ON CONFLICT(a) DO NOTHING RETURNING *) SELECT count(*) FROM cte_1; -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630511 another_schema_table WHERE true ORDER BY a LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630512 another_schema_table WHERE true ORDER BY a LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630513 another_schema_table WHERE true ORDER BY a LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630514 another_schema_table WHERE true ORDER BY a LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630515 another_schema_table WHERE true ORDER BY a LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630516 another_schema_table WHERE true ORDER BY a LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630517 another_schema_table WHERE true ORDER BY a LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630518 another_schema_table WHERE true ORDER BY a LIMIT '10000'::bigint NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630511 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630511'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING RETURNING citus_table_alias.a, citus_table_alias.b -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630512 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630512'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING RETURNING citus_table_alias.a, citus_table_alias.b -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630513 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630513'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING RETURNING citus_table_alias.a, citus_table_alias.b -NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630514 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630514'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING RETURNING citus_table_alias.a, citus_table_alias.b +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630515 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630515'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING RETURNING citus_table_alias.a, citus_table_alias.b +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630516 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630516'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING RETURNING citus_table_alias.a, citus_table_alias.b +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630517 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630517'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING RETURNING citus_table_alias.a, citus_table_alias.b +NOTICE: executing the command locally: INSERT INTO single_node.another_schema_table_90630518 AS citus_table_alias (a, b) SELECT a, b FROM read_intermediate_result('insert_select_XXX_90630518'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) ON CONFLICT(a) DO NOTHING RETURNING citus_table_alias.a, citus_table_alias.b NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte_1 count --------------------------------------------------------------------- @@ -1831,18 +1905,18 @@ NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT in WITH cte_1 AS (INSERT INTO non_binary_copy_test SELECT * FROM non_binary_copy_test LIMIT 10000 ON CONFLICT (key) DO UPDATE SET value = (0, 'citus0')::new_type RETURNING z) SELECT bool_and(z is null) FROM cte_1; -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630515 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630516 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630517 non_binary_copy_test WHERE true LIMIT '10000'::bigint -NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630518 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630519 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630520 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630521 non_binary_copy_test WHERE true LIMIT '10000'::bigint +NOTICE: executing the command locally: SELECT key, value, z FROM single_node.non_binary_copy_test_90630522 non_binary_copy_test WHERE true LIMIT '10000'::bigint NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx NOTICE: executing the copy locally for colocated file with shard xxxxx -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630515 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630515'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630516 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630516'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630517 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630517'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z -NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630518 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630518'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630519 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630519'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630520 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630520'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630521 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630521'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z +NOTICE: executing the command locally: INSERT INTO single_node.non_binary_copy_test_90630522 AS citus_table_alias (key, value, z) SELECT key, value, z FROM read_intermediate_result('insert_select_XXX_90630522'::text, 'text'::citus_copy_format) intermediate_result(key integer, value single_node.new_type, z integer) ON CONFLICT(key) DO UPDATE SET value = ROW(0, 'citus0'::text)::single_node.new_type RETURNING citus_table_alias.z NOTICE: executing the command locally: SELECT bool_and((z IS NULL)) AS bool_and FROM (SELECT intermediate_result.z FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(z integer)) cte_1 bool_and --------------------------------------------------------------------- @@ -1858,17 +1932,17 @@ $$coordinated_transaction_should_use_2PC$$; -- execution doesn't start a 2PC BEGIN; SELECT count(*) FROM another_schema_table; -NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630511 another_schema_table WHERE true -NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630512 another_schema_table WHERE true -NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630513 another_schema_table WHERE true -NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630514 another_schema_table WHERE true +NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630515 another_schema_table WHERE true +NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630516 another_schema_table WHERE true +NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630517 another_schema_table WHERE true +NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630518 another_schema_table WHERE true count --------------------------------------------------------------------- 10001 (1 row) SELECT count(*) FROM another_schema_table WHERE a = 1; -NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630511 another_schema_table WHERE (a OPERATOR(pg_catalog.=) 1) +NOTICE: executing the command locally: SELECT count(*) AS count FROM single_node.another_schema_table_90630515 another_schema_table WHERE (a OPERATOR(pg_catalog.=) 1) count --------------------------------------------------------------------- 1 @@ -1876,10 +1950,10 @@ NOTICE: executing the command locally: SELECT count(*) AS count FROM single_nod WITH cte_1 as (SELECT * FROM another_schema_table LIMIT 10) SELECT count(*) FROM cte_1; -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630511 another_schema_table WHERE true LIMIT '10'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630512 another_schema_table WHERE true LIMIT '10'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630513 another_schema_table WHERE true LIMIT '10'::bigint -NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630514 another_schema_table WHERE true LIMIT '10'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630515 another_schema_table WHERE true LIMIT '10'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630516 another_schema_table WHERE true LIMIT '10'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630517 another_schema_table WHERE true LIMIT '10'::bigint +NOTICE: executing the command locally: SELECT a, b FROM single_node.another_schema_table_90630518 another_schema_table WHERE true LIMIT '10'::bigint NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte_1 count --------------------------------------------------------------------- @@ -1888,7 +1962,7 @@ NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT in WITH cte_1 as (SELECT * FROM another_schema_table WHERE a = 1 LIMIT 10) SELECT count(*) FROM cte_1; -NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT another_schema_table.a, another_schema_table.b FROM single_node.another_schema_table_90630511 another_schema_table WHERE (another_schema_table.a OPERATOR(pg_catalog.=) 1) LIMIT 10) cte_1 +NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT another_schema_table.a, another_schema_table.b FROM single_node.another_schema_table_90630515 another_schema_table WHERE (another_schema_table.a OPERATOR(pg_catalog.=) 1) LIMIT 10) cte_1 count --------------------------------------------------------------------- 1 @@ -1905,10 +1979,10 @@ ROLLBACK; WITH cte_1 AS (SELECT count(*) as cnt FROM another_schema_table LIMIT 1000), cte_2 AS (SELECT coordinated_transaction_should_use_2PC() as enabled_2pc) SELECT cnt, enabled_2pc FROM cte_1, cte_2; -NOTICE: executing the command locally: SELECT count(*) AS cnt FROM single_node.another_schema_table_90630511 another_schema_table WHERE true LIMIT '1000'::bigint -NOTICE: executing the command locally: SELECT count(*) AS cnt FROM single_node.another_schema_table_90630512 another_schema_table WHERE true LIMIT '1000'::bigint -NOTICE: executing the command locally: SELECT count(*) AS cnt FROM single_node.another_schema_table_90630513 another_schema_table WHERE true LIMIT '1000'::bigint -NOTICE: executing the command locally: SELECT count(*) AS cnt FROM single_node.another_schema_table_90630514 another_schema_table WHERE true LIMIT '1000'::bigint +NOTICE: executing the command locally: SELECT count(*) AS cnt FROM single_node.another_schema_table_90630515 another_schema_table WHERE true LIMIT '1000'::bigint +NOTICE: executing the command locally: SELECT count(*) AS cnt FROM single_node.another_schema_table_90630516 another_schema_table WHERE true LIMIT '1000'::bigint +NOTICE: executing the command locally: SELECT count(*) AS cnt FROM single_node.another_schema_table_90630517 another_schema_table WHERE true LIMIT '1000'::bigint +NOTICE: executing the command locally: SELECT count(*) AS cnt FROM single_node.another_schema_table_90630518 another_schema_table WHERE true LIMIT '1000'::bigint NOTICE: executing the command locally: SELECT cte_1.cnt, cte_2.enabled_2pc FROM (SELECT intermediate_result.cnt FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(cnt bigint)) cte_1, (SELECT intermediate_result.enabled_2pc FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(enabled_2pc boolean)) cte_2 cnt | enabled_2pc --------------------------------------------------------------------- @@ -1919,10 +1993,10 @@ NOTICE: executing the command locally: SELECT cte_1.cnt, cte_2.enabled_2pc FROM -- execution starts a 2PC BEGIN; UPDATE another_schema_table SET b = b + 1; -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630511 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630512 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630513 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630514 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630515 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630516 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630517 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630518 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) SELECT coordinated_transaction_should_use_2PC(); coordinated_transaction_should_use_2pc --------------------------------------------------------------------- @@ -1935,10 +2009,10 @@ ROLLBACK; BEGIN; WITH cte_1 AS (UPDATE another_schema_table SET b = b + 1 RETURNING *) SELECT count(*) FROM cte_1; -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630511 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630512 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630513 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630514 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630515 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630516 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630517 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630518 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b NOTICE: executing the command locally: SELECT count(*) AS count FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte_1 count --------------------------------------------------------------------- @@ -1955,10 +2029,10 @@ ROLLBACK; -- same without transaction block WITH cte_1 AS (UPDATE another_schema_table SET b = b + 1 RETURNING *) SELECT coordinated_transaction_should_use_2PC(); -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630511 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630512 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630513 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630514 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630515 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630516 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630517 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630518 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) RETURNING a, b NOTICE: executing the command locally: SELECT single_node.coordinated_transaction_should_use_2pc() AS coordinated_transaction_should_use_2pc coordinated_transaction_should_use_2pc --------------------------------------------------------------------- @@ -1969,7 +2043,7 @@ NOTICE: executing the command locally: SELECT single_node.coordinated_transacti -- starts 2PC execution BEGIN; UPDATE another_schema_table SET b = b + 1 WHERE a = 1; -NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630511 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) WHERE (a OPERATOR(pg_catalog.=) 1) +NOTICE: executing the command locally: UPDATE single_node.another_schema_table_90630515 another_schema_table SET b = (b OPERATOR(pg_catalog.+) 1) WHERE (a OPERATOR(pg_catalog.=) 1) SELECT coordinated_transaction_should_use_2PC(); coordinated_transaction_should_use_2pc --------------------------------------------------------------------- @@ -1980,7 +2054,7 @@ ROLLBACK; -- same without transaction block WITH cte_1 AS (UPDATE another_schema_table SET b = b + 1 WHERE a = 1 RETURNING *) SELECT coordinated_transaction_should_use_2PC() FROM cte_1; -NOTICE: executing the command locally: WITH cte_1 AS (UPDATE single_node.another_schema_table_90630511 another_schema_table SET b = (another_schema_table.b OPERATOR(pg_catalog.+) 1) WHERE (another_schema_table.a OPERATOR(pg_catalog.=) 1) RETURNING another_schema_table.a, another_schema_table.b) SELECT single_node.coordinated_transaction_should_use_2pc() AS coordinated_transaction_should_use_2pc FROM cte_1 +NOTICE: executing the command locally: WITH cte_1 AS (UPDATE single_node.another_schema_table_90630515 another_schema_table SET b = (another_schema_table.b OPERATOR(pg_catalog.+) 1) WHERE (another_schema_table.a OPERATOR(pg_catalog.=) 1) RETURNING another_schema_table.a, another_schema_table.b) SELECT single_node.coordinated_transaction_should_use_2pc() AS coordinated_transaction_should_use_2pc FROM cte_1 coordinated_transaction_should_use_2pc --------------------------------------------------------------------- t diff --git a/src/test/regress/expected/single_node_truncate.out b/src/test/regress/expected/single_node_truncate.out new file mode 100644 index 000000000..bf1c99d69 --- /dev/null +++ b/src/test/regress/expected/single_node_truncate.out @@ -0,0 +1,147 @@ +CREATE SCHEMA single_node_truncate; +SET search_path TO single_node_truncate; +SET citus.shard_replication_factor TO 1; +-- helper view that prints out local table names and sizes in the schema +CREATE VIEW table_sizes AS +SELECT + c.relname as name, + pg_catalog.pg_table_size(c.oid) > 0 as has_data +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind = 'r' + AND n.nspname = 'single_node_truncate' +ORDER BY 1; +-- test truncating reference tables +CREATE TABLE ref(id int UNIQUE, data int); +INSERT INTO ref SELECT x,x FROM generate_series(1,10000) x; +SELECT create_reference_table('ref'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$single_node_truncate.ref$$) + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE citus_local(id int, ref_id int REFERENCES ref(id)); +INSERT INTO citus_local SELECT x,x FROM generate_series(1,10000) x; +-- verify that shell tables for citus local tables are empty +SELECT * FROM table_sizes; + name | has_data +--------------------------------------------------------------------- + citus_local | f + citus_local_102045 | t + ref | t + ref_102044 | t +(4 rows) + +-- verify that this UDF is noop on Citus local tables +SELECT truncate_local_data_after_distributing_table('citus_local'); + truncate_local_data_after_distributing_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM table_sizes; + name | has_data +--------------------------------------------------------------------- + citus_local | f + citus_local_102045 | t + ref | t + ref_102044 | t +(4 rows) + +-- test that we allow cascading truncates to citus local tables +BEGIN; +SELECT truncate_local_data_after_distributing_table('ref'); +NOTICE: truncate cascades to table "citus_local" + truncate_local_data_after_distributing_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM table_sizes; + name | has_data +--------------------------------------------------------------------- + citus_local | f + citus_local_102045 | t + ref | f + ref_102044 | t +(4 rows) + +ROLLBACK; +-- test that we allow distributing tables that have foreign keys to reference tables +CREATE TABLE dist(id int, ref_id int REFERENCES ref(id)); +INSERT INTO dist SELECT x,x FROM generate_series(1,10000) x; +SELECT create_distributed_table('dist','id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$single_node_truncate.dist$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- the following should truncate ref, dist and citus_local +BEGIN; +SELECT truncate_local_data_after_distributing_table('ref'); +NOTICE: truncate cascades to table "citus_local" +NOTICE: truncate cascades to table "dist" + truncate_local_data_after_distributing_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM table_sizes; + name | has_data +--------------------------------------------------------------------- + citus_local | f + citus_local_102045 | t + dist | f + dist_102047 | t + dist_102048 | t + dist_102049 | t + dist_102050 | t + ref | f + ref_102044 | t +(9 rows) + +ROLLBACK; +-- the following should truncate dist table only +BEGIN; +SELECT truncate_local_data_after_distributing_table('dist'); + truncate_local_data_after_distributing_table +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM table_sizes; + name | has_data +--------------------------------------------------------------------- + citus_local | f + citus_local_102045 | t + dist | f + dist_102047 | t + dist_102048 | t + dist_102049 | t + dist_102050 | t + ref | t + ref_102044 | t +(9 rows) + +ROLLBACK; +DROP TABLE ref, dist, citus_local; +DROP VIEW table_sizes; +DROP SCHEMA single_node_truncate CASCADE; +-- Remove the coordinator +SELECT 1 FROM master_remove_node('localhost', :master_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- restart nodeid sequence so that multi_cluster_management still has the same +-- nodeids +ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 1; diff --git a/src/test/regress/expected/subquery_and_cte.out b/src/test/regress/expected/subquery_and_cte.out index f175b6940..04b90447c 100644 --- a/src/test/regress/expected/subquery_and_cte.out +++ b/src/test/regress/expected/subquery_and_cte.out @@ -3,6 +3,7 @@ -- =================================================================== SET search_path TO subquery_and_ctes; CREATE TABLE users_table_local AS SELECT * FROM users_table; +SET citus.shard_replication_factor TO 1; CREATE TABLE dist_table (id int, value int); SELECT create_distributed_table('dist_table', 'id', colocate_with => 'users_table'); create_distributed_table diff --git a/src/test/regress/expected/tableam.out b/src/test/regress/expected/tableam.out index 799be790a..7a11d7354 100644 --- a/src/test/regress/expected/tableam.out +++ b/src/test/regress/expected/tableam.out @@ -31,7 +31,7 @@ insert into test_hash_dist values (1, 1); WARNING: fake_tuple_insert select create_distributed_table('test_hash_dist','id'); WARNING: fake_scan_getnextslot -CONTEXT: SQL statement "SELECT EXISTS (SELECT 1 FROM test_tableam.test_hash_dist)" +CONTEXT: SQL statement "SELECT TRUE FROM test_tableam.test_hash_dist LIMIT 1" WARNING: fake_scan_getnextslot NOTICE: Copying data from local table... WARNING: fake_scan_getnextslot @@ -85,7 +85,7 @@ insert into test_ref values (1); WARNING: fake_tuple_insert select create_reference_table('test_ref'); WARNING: fake_scan_getnextslot -CONTEXT: SQL statement "SELECT EXISTS (SELECT 1 FROM test_tableam.test_ref)" +CONTEXT: SQL statement "SELECT TRUE FROM test_tableam.test_ref LIMIT 1" WARNING: fake_scan_getnextslot NOTICE: Copying data from local table... WARNING: fake_scan_getnextslot @@ -156,9 +156,9 @@ SELECT master_remove_node('localhost', :master_port); CREATE TABLE test_range_dist(id int, val int) using fake_am; SELECT create_distributed_table('test_range_dist', 'id', 'range'); WARNING: fake_scan_getnextslot -CONTEXT: SQL statement "SELECT EXISTS (SELECT 1 FROM test_tableam.test_range_dist)" +CONTEXT: SQL statement "SELECT TRUE FROM test_tableam.test_range_dist LIMIT 1" WARNING: fake_scan_getnextslot -CONTEXT: SQL statement "SELECT EXISTS (SELECT 1 FROM test_tableam.test_range_dist)" +CONTEXT: SQL statement "SELECT TRUE FROM test_tableam.test_range_dist LIMIT 1" create_distributed_table --------------------------------------------------------------------- @@ -280,7 +280,7 @@ NOTICE: copying the data has completed DETAIL: The local data in the table is no longer visible, but is still on disk. HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.test_partitioned_p1$$) WARNING: fake_scan_getnextslot -CONTEXT: SQL statement "SELECT EXISTS (SELECT 1 FROM public.test_partitioned_p2)" +CONTEXT: SQL statement "SELECT TRUE FROM public.test_partitioned_p2 LIMIT 1" WARNING: fake_scan_getnextslot NOTICE: Copying data from local table... WARNING: fake_scan_getnextslot diff --git a/src/test/regress/expected/undistribute_table.out b/src/test/regress/expected/undistribute_table.out index 4ba209067..0dc631bab 100644 --- a/src/test/regress/expected/undistribute_table.out +++ b/src/test/regress/expected/undistribute_table.out @@ -135,6 +135,7 @@ CREATE FOREIGN TABLE foreign_table ( full_name text not null default '' ) SERVER fake_fdw_server OPTIONS (encoding 'utf-8', compression 'true'); SELECT create_distributed_table('foreign_table', 'id'); +NOTICE: foreign-data wrapper "fake_fdw" does not have an extension defined NOTICE: foreign-data wrapper "fake_fdw" does not have an extension defined create_distributed_table --------------------------------------------------------------------- diff --git a/src/test/regress/expected/undistribute_table_cascade_mx.out b/src/test/regress/expected/undistribute_table_cascade_mx.out index f52d3b0aa..b7ff62574 100644 --- a/src/test/regress/expected/undistribute_table_cascade_mx.out +++ b/src/test/regress/expected/undistribute_table_cascade_mx.out @@ -140,7 +140,6 @@ CREATE TABLE users ( , country_id int references countries(id) , primary key (org_id, id) ); -SET citus.replication_model to 'streaming'; -- "users" table was implicitly added to citus metadata when defining foreign key, -- so create_distributed_table would first undistribute it. -- Show that it works well when changing sequence dependencies on mx workers. diff --git a/src/test/regress/expected/union_pushdown.out b/src/test/regress/expected/union_pushdown.out index 2691ff461..ec56f3949 100644 --- a/src/test/regress/expected/union_pushdown.out +++ b/src/test/regress/expected/union_pushdown.out @@ -859,10 +859,123 @@ $$); t (1 row) +-- #4781 +CREATE TABLE test_a (id int, k int); +CREATE TABLE test_b (id int, k int); +SELECT create_distributed_table('test_a','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_b','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE OR REPLACE VIEW v AS SELECT * from test_a where k>1 UNION ALL SELECT * from test_b where k<1; +-- the followings can be pushed down since dist_key is used in the aggregation +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT COUNT(id) FROM v; +$$); + explain_has_distributed_subplan +--------------------------------------------------------------------- + f +(1 row) + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT AVG(id) FROM v; +$$); + explain_has_distributed_subplan +--------------------------------------------------------------------- + f +(1 row) + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT SUM(id) FROM v; +$$); + explain_has_distributed_subplan +--------------------------------------------------------------------- + f +(1 row) + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT MAX(id) FROM v; +$$); + explain_has_distributed_subplan +--------------------------------------------------------------------- + f +(1 row) + +-- cannot pushed down because postgres optimizes fields, needs to be fixed with #4781 +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT COUNT(k) FROM v; +$$); + explain_has_distributed_subplan +--------------------------------------------------------------------- + t +(1 row) + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT AVG(k) FROM v; +$$); + explain_has_distributed_subplan +--------------------------------------------------------------------- + t +(1 row) + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT SUM(k) FROM v; +$$); + explain_has_distributed_subplan +--------------------------------------------------------------------- + t +(1 row) + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT MAX(k) FROM v; +$$); + explain_has_distributed_subplan +--------------------------------------------------------------------- + t +(1 row) + +-- order by prevents postgres from optimizing fields so can be pushed down +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT id, COUNT(*) FROM v GROUP BY id ORDER BY id; +$$); + explain_has_distributed_subplan +--------------------------------------------------------------------- + f +(1 row) + +-- order by is not on dist_key so can't pushed down, needs to be fixed with #4781 +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT k, COUNT(*) FROM v GROUP BY k ORDER BY k; +$$); + explain_has_distributed_subplan +--------------------------------------------------------------------- + t +(1 row) + RESET client_min_messages; DROP SCHEMA union_pushdown CASCADE; -NOTICE: drop cascades to 4 other objects +NOTICE: drop cascades to 7 other objects DETAIL: drop cascades to table users_table_part drop cascades to table events_table_part drop cascades to table events_table_ref drop cascades to table events_table_local +drop cascades to table test_a +drop cascades to table test_b +drop cascades to view v diff --git a/src/test/regress/expected/upgrade_distributed_function_before.out b/src/test/regress/expected/upgrade_distributed_function_before.out index c3b614688..02b34e764 100644 --- a/src/test/regress/expected/upgrade_distributed_function_before.out +++ b/src/test/regress/expected/upgrade_distributed_function_before.out @@ -1,6 +1,5 @@ CREATE SCHEMA upgrade_distributed_function_before; SET search_path TO upgrade_distributed_function_before, public; -SET citus.replication_model TO streaming; SET citus.shard_replication_factor TO 1; CREATE TABLE t1 (a int PRIMARY KEY, b int); SELECT create_distributed_table('t1','a'); diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out index 06065b049..6be33257a 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects.out +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -35,9 +35,10 @@ ORDER BY 1; function citus_add_inactive_node(text,integer,integer,noderole,name) function citus_add_local_table_to_metadata(regclass,boolean) function citus_add_node(text,integer,integer,noderole,name) - function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real) + function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real,real) function citus_add_secondary_node(text,integer,text,integer,name) function citus_blocking_pids(integer) + function citus_cleanup_orphaned_shards() function citus_conninfo_cache_invalidate() function citus_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) function citus_create_restore_point(text) @@ -59,7 +60,6 @@ ORDER BY 1; function citus_internal.downgrade_columnar_storage(regclass) function citus_internal.find_groupid_for_node(text,integer) function citus_internal.pg_dist_node_trigger_func() - function citus_internal.pg_dist_rebalance_strategy_enterprise_check() function citus_internal.pg_dist_rebalance_strategy_trigger_func() function citus_internal.pg_dist_shard_placement_trigger_func() function citus_internal.refresh_isolation_tester_prepared_statement() @@ -123,7 +123,7 @@ ORDER BY 1; function get_current_transaction_id() function get_global_active_transactions() function get_rebalance_progress() - function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name) + function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name,real) function get_shard_id_for_distribution_column(regclass,"any") function isolate_tenant_to_new_shard(regclass,"any",text) function json_cat_agg(json) diff --git a/src/test/regress/expected/upgrade_list_citus_objects_0.out b/src/test/regress/expected/upgrade_list_citus_objects_0.out index 9752c6502..045b538f2 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects_0.out +++ b/src/test/regress/expected/upgrade_list_citus_objects_0.out @@ -32,7 +32,7 @@ ORDER BY 1; function citus_add_inactive_node(text,integer,integer,noderole,name) function citus_add_local_table_to_metadata(regclass,boolean) function citus_add_node(text,integer,integer,noderole,name) - function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real) + function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real,real) function citus_add_secondary_node(text,integer,text,integer,name) function citus_blocking_pids(integer) function citus_conninfo_cache_invalidate() @@ -118,7 +118,7 @@ ORDER BY 1; function get_current_transaction_id() function get_global_active_transactions() function get_rebalance_progress() - function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name) + function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name,real) function get_shard_id_for_distribution_column(regclass,"any") function isolate_tenant_to_new_shard(regclass,"any",text) function json_cat_agg(json) diff --git a/src/test/regress/expected/upgrade_pg_dist_object_test_after.out b/src/test/regress/expected/upgrade_pg_dist_object_test_after.out index 5a5d8cb63..7437177f5 100644 --- a/src/test/regress/expected/upgrade_pg_dist_object_test_after.out +++ b/src/test/regress/expected/upgrade_pg_dist_object_test_after.out @@ -39,11 +39,12 @@ drop cascades to table upgrade_basic.t_append SELECT i.* FROM citus.pg_dist_object, pg_identify_object_as_address(classid, objid, objsubid) i ORDER BY 1, 2, 3; type | object_names | object_args --------------------------------------------------------------------- + database | {postgres} | {} extension | {isn} | {} role | {postgres} | {} schema | {fooschema} | {} schema | {new_schema} | {} schema | {public} | {} type | {fooschema.footype} | {} - (6 rows) + (7 rows) diff --git a/src/test/regress/expected/upgrade_rebalance_strategy_after.out b/src/test/regress/expected/upgrade_rebalance_strategy_after.out index 36dd71b6c..da822fffd 100644 --- a/src/test/regress/expected/upgrade_rebalance_strategy_after.out +++ b/src/test/regress/expected/upgrade_rebalance_strategy_after.out @@ -1,8 +1,8 @@ SELECT * FROM pg_catalog.pg_dist_rebalance_strategy ORDER BY name; - name | default_strategy | shard_cost_function | node_capacity_function | shard_allowed_on_node_function | default_threshold | minimum_threshold + name | default_strategy | shard_cost_function | node_capacity_function | shard_allowed_on_node_function | default_threshold | minimum_threshold | improvement_threshold --------------------------------------------------------------------- - by_disk_size | f | citus_shard_cost_by_disk_size | citus_node_capacity_1 | citus_shard_allowed_on_node_true | 0.1 | 0.01 - by_shard_count | f | citus_shard_cost_1 | citus_node_capacity_1 | citus_shard_allowed_on_node_true | 0 | 0 - custom_strategy | t | upgrade_rebalance_strategy.shard_cost_2 | upgrade_rebalance_strategy.capacity_high_worker_1 | upgrade_rebalance_strategy.only_worker_2 | 0.5 | 0.2 + by_disk_size | f | citus_shard_cost_by_disk_size | citus_node_capacity_1 | citus_shard_allowed_on_node_true | 0.1 | 0.01 | 0.5 + by_shard_count | f | citus_shard_cost_1 | citus_node_capacity_1 | citus_shard_allowed_on_node_true | 0 | 0 | 0 + custom_strategy | t | upgrade_rebalance_strategy.shard_cost_2 | upgrade_rebalance_strategy.capacity_high_worker_1 | upgrade_rebalance_strategy.only_worker_2 | 0.5 | 0.2 | 0 (3 rows) diff --git a/src/test/regress/expected/upgrade_rebalance_strategy_before.out b/src/test/regress/expected/upgrade_rebalance_strategy_before.out index 0a12b1d60..cf1d122b3 100644 --- a/src/test/regress/expected/upgrade_rebalance_strategy_before.out +++ b/src/test/regress/expected/upgrade_rebalance_strategy_before.out @@ -15,14 +15,14 @@ CREATE FUNCTION only_worker_2(shardid bigint, nodeidarg int) (CASE WHEN nodeport = 57638 THEN TRUE ELSE FALSE END) FROM pg_dist_node where nodeid = nodeidarg $$ LANGUAGE sql; -ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger; SELECT citus_add_rebalance_strategy( 'custom_strategy', 'shard_cost_2', 'capacity_high_worker_1', 'only_worker_2', 0.5, - 0.2 + 0.2, + 0.3 ); citus_add_rebalance_strategy --------------------------------------------------------------------- @@ -35,4 +35,3 @@ SELECT citus_set_default_rebalance_strategy('custom_strategy'); (1 row) -ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger; diff --git a/src/test/regress/expected/window_functions.out b/src/test/regress/expected/window_functions.out index ff36aa993..0a41bc0cc 100644 --- a/src/test/regress/expected/window_functions.out +++ b/src/test/regress/expected/window_functions.out @@ -95,6 +95,9 @@ ORDER BY -- window function operates on the results of -- a join +-- we also want to verify that this doesn't crash +-- when the logging level is DEBUG4 +SET log_min_messages TO DEBUG4; SELECT us.user_id, SUM(us.value_1) OVER (PARTITION BY us.user_id) @@ -1593,3 +1596,57 @@ from public.users_table as ut limit 1; (1 row) +-- verify that this doesn't crash with DEBUG4 +SET log_min_messages TO DEBUG4; +SELECT + user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2)) +FROM ( + SELECT + DISTINCT us.user_id, us.value_2, value_1, random() as r1 + FROM + users_table as us, events_table + WHERE + us.user_id = events_table.user_id AND event_type IN (1,2) + ORDER BY + user_id, value_2 + ) s +GROUP BY + 1, value_1 +ORDER BY + 2 DESC, 1; + user_id | max +--------------------------------------------------------------------- + 1 | 5 + 3 | 5 + 3 | 5 + 4 | 5 + 5 | 5 + 5 | 5 + 6 | 5 + 6 | 5 + 1 | 4 + 2 | 4 + 3 | 4 + 3 | 4 + 3 | 4 + 4 | 4 + 4 | 4 + 5 | 4 + 5 | 4 + 1 | 3 + 2 | 3 + 2 | 3 + 2 | 3 + 6 | 3 + 2 | 2 + 4 | 2 + 4 | 2 + 4 | 2 + 6 | 2 + 1 | 1 + 3 | 1 + 5 | 1 + 6 | 1 + 5 | 0 +(32 rows) + diff --git a/src/test/regress/expected/window_functions_0.out b/src/test/regress/expected/window_functions_0.out index 51d453427..aea319c0b 100644 --- a/src/test/regress/expected/window_functions_0.out +++ b/src/test/regress/expected/window_functions_0.out @@ -95,6 +95,9 @@ ORDER BY -- window function operates on the results of -- a join +-- we also want to verify that this doesn't crash +-- when the logging level is DEBUG4 +SET log_min_messages TO DEBUG4; SELECT us.user_id, SUM(us.value_1) OVER (PARTITION BY us.user_id) @@ -1589,3 +1592,57 @@ from public.users_table as ut limit 1; (1 row) +-- verify that this doesn't crash with DEBUG4 +SET log_min_messages TO DEBUG4; +SELECT + user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2)) +FROM ( + SELECT + DISTINCT us.user_id, us.value_2, value_1, random() as r1 + FROM + users_table as us, events_table + WHERE + us.user_id = events_table.user_id AND event_type IN (1,2) + ORDER BY + user_id, value_2 + ) s +GROUP BY + 1, value_1 +ORDER BY + 2 DESC, 1; + user_id | max +--------------------------------------------------------------------- + 1 | 5 + 3 | 5 + 3 | 5 + 4 | 5 + 5 | 5 + 5 | 5 + 6 | 5 + 6 | 5 + 1 | 4 + 2 | 4 + 3 | 4 + 3 | 4 + 3 | 4 + 4 | 4 + 4 | 4 + 5 | 4 + 5 | 4 + 1 | 3 + 2 | 3 + 2 | 3 + 2 | 3 + 6 | 3 + 2 | 2 + 4 | 2 + 4 | 2 + 4 | 2 + 6 | 2 + 1 | 1 + 3 | 1 + 5 | 1 + 6 | 1 + 5 | 0 +(32 rows) + diff --git a/src/test/regress/isolation_schedule b/src/test/regress/isolation_schedule index 127bafd01..208d7e228 100644 --- a/src/test/regress/isolation_schedule +++ b/src/test/regress/isolation_schedule @@ -73,6 +73,7 @@ test: isolation_blocking_move_single_shard_commands_on_mx test: isolation_blocking_move_multi_shard_commands_on_mx test: isolation_shard_rebalancer test: isolation_rebalancer_deferred_drop +test: isolation_shard_rebalancer_progress # MX tests test: isolation_reference_on_mx diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule new file mode 100644 index 000000000..eabb3c811 --- /dev/null +++ b/src/test/regress/multi_1_schedule @@ -0,0 +1,354 @@ +# ---------- +# $Id$ +# +# Regression tests that exercise distributed planning/execution functionality. +# +# All new regression tests are expected to be run by this schedule. Tests that +# do not set specific task executor type should also be added to +# multi_task_tracker_extra_schedule. +# +# Note that we use variant comparison files to test version dependent regression +# test results. For more information: +# http://www.postgresql.org/docs/current/static/regress-variant.html +# ---------- + +# --- +# Tests around schema changes, these are run first, so there's no preexisting objects. +# --- +test: multi_extension +test: single_node +test: single_node_truncate +test: multi_cluster_management + +# below tests are placed right after multi_cluster_management as we do +# remove/add node operations and we do not want any preexisting objects +test: alter_role_propagation +test: propagate_extension_commands +test: escape_extension_name +test: ref_citus_local_fkeys +test: alter_database_owner + +test: multi_test_helpers multi_test_helpers_superuser +test: multi_test_catalog_views +test: multi_table_ddl +test: multi_sequence_default +test: multi_name_lengths +test: multi_name_resolution +test: multi_metadata_access +test: multi_metadata_attributes + +test: multi_read_from_secondaries + +# ---------- +# The following distributed tests depend on creating a partitioned table and +# uploading data to it. +# ---------- +test: multi_create_table +test: multi_create_table_superuser +test: multi_create_table_constraints multi_master_protocol multi_load_data multi_load_data_superuser multi_behavioral_analytics_create_table +test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries multi_insert_select multi_behavioral_analytics_create_table_superuser +test: multi_shard_update_delete recursive_dml_with_different_planners_executors +test: insert_select_repartition window_functions dml_recursive multi_insert_select_window +test: multi_insert_select_conflict citus_table_triggers +test: multi_row_insert insert_select_into_local_table multi_create_table_new_features alter_index + +# following should not run in parallel because it relies on connection counts to workers +test: insert_select_connection_leak + +# --------- +# at the end of the regression tests regarding recursively planned modifications +# ensure that we don't leak any intermediate results +# This test should not run in parallel with any other tests +# --------- +test: ensure_no_intermediate_data_leak + +# ---------- +# Tests for partitioning support +# ---------- +test: multi_partitioning_utils multi_partitioning partitioning_issue_3970 replicated_partitioned_table + +# ---------- +# Tests for foreign data wrapper support +# ---------- +test: multi_create_fdw + + + +# ---------- +# Tests for statistics propagation +# ---------- +test: propagate_statistics +test: pg13_propagate_statistics + +# ---------- +# Test for updating table statistics +# ---------- +test: citus_update_table_statistics + +# ---------- +# Parallel TPC-H tests to check our distributed execution behavior +# ---------- +test: multi_tpch_query1 multi_tpch_query3 multi_tpch_query6 multi_tpch_query10 +test: multi_tpch_query12 multi_tpch_query14 multi_tpch_query19 +test: multi_tpch_query7 multi_tpch_query7_nested + +# ---------- +# Parallel tests to check our join order planning logic. Note that we load data +# below; and therefore these tests should come after the execution tests. +# ---------- +test: multi_join_order_tpch_small multi_join_order_additional +test: multi_load_more_data +test: multi_join_order_tpch_repartition + +# ---------- +# Tests for repartition join planning and execution. Be careful when creating +# new shards before these tests, as they expect specific shard identifiers in +# the output. +# ---------- +test: multi_repartition_join_planning multi_repartition_join_pruning multi_repartition_join_task_assignment multi_repartition_join_ref +test: adaptive_executor_repartition + +# --------- +# Tests that modify data should run sequentially +# --------- +test: with_prepare + +# --------- +# Tests for recursive planning. +# --------- +test: with_nested with_where with_basics with_set_operations +test: with_modifying cte_prepared_modify cte_nested_modification +test: ensure_no_intermediate_data_leak +test: with_executors with_join with_partitioning with_transactions with_dml + + +# ---------- +# Tests to check our large record loading and shard deletion behavior +# ---------- +test: multi_load_large_records +test: multi_master_delete_protocol + +# ---------- +# Tests around DDL statements run on distributed tables +# ---------- +test: multi_index_statements +test: multi_alter_table_statements +test: multi_alter_table_add_constraints + +# ---------- +# multi_create_schema tests creation, loading, and querying of a table in a new +# schema (namespace). +# ---------- +test: multi_create_schema + +# ---------- +# Tests to check if we inform the user about potential caveats of creating new +# databases, schemas, roles, and authentication information. +# ---------- +test: multi_utility_warnings data_types + +# ---------- +# Tests to check the sequential and parallel executions of DDL and modification +# commands +# Should not be executed in parallel with other tests +# ---------- +test: sequential_modifications + +# --------- +# multi_append_table_to_shard loads data to create shards in a way that forces +# shard caching. +# --------- +test: multi_append_table_to_shard + +# --------- +# multi_outer_join loads data to create shards to test outer join mappings +# --------- +test: multi_outer_join + +# --- +# Tests covering mostly modification queries and required preliminary +# functionality related to metadata, shard creation, shard pruning and +# "hacky" copy script for hash partitioned tables. +# Note that the order of the following tests are important. multi_complex_count_distinct +# is independent from the rest of the group, it is added to increase parallelism. +# --- +test: multi_complex_count_distinct multi_select_distinct +test: multi_modifications +test: multi_distribution_metadata +test: multi_generate_ddl_commands multi_create_shards multi_prune_shard_list multi_repair_shards +test: multi_upsert multi_simple_queries multi_data_types +test: master_copy_shard_placement +# multi_utilities cannot be run in parallel with other tests because it checks +# global locks +test: multi_utilities +test: foreign_key_to_reference_table validate_constraint +test: multi_modifying_xacts +test: multi_repartition_udt multi_repartitioned_subquery_udf multi_subtransactions +test: multi_transaction_recovery + +test: local_dist_join_modifications +test: local_table_join +test: local_dist_join_mixed +test: citus_local_dist_joins + +# --------- +# multi_copy creates hash and range-partitioned tables and performs COPY +# multi_router_planner creates hash partitioned tables. +# --------- +test: multi_copy fast_path_router_modify pg_dump +test: multi_router_planner +# These 2 tests have prepared statements which sometimes get invalidated by concurrent tests, +# changing the debug output. We should not run them in parallel with others +test: null_parameters +test: multi_router_planner_fast_path + +# ---------- +# multi_large_shardid loads more lineitem data using high shard identifiers +# ---------- +test: multi_large_shardid + +# ---------- +# multi_size_queries tests various size commands on distributed tables +# ---------- +test: multi_size_queries + +# ---------- +# multi_drop_extension makes sure we can safely drop and recreate the extension +# ---------- +test: multi_drop_extension + +# ---------- +# multi_metadata_sync tests the propagation of mx-related metadata changes to metadata workers +# multi_unsupported_worker_operations tests that unsupported operations error out on metadata workers +# ---------- +test: multi_metadata_sync +test: multi_unsupported_worker_operations + +# ---------- +# grant_on_schema_propagation tests if the GRANT ... ON SCHEMA queries are propagated correctly +# multi_schema_support makes sure we can work with tables in schemas other than public with no problem +# ---------- +test: grant_on_schema_propagation +test: multi_schema_support + +# ---------- +# multi_function_evaluation tests edge-cases in master-side function pre-evaluation +# ---------- +test: multi_function_evaluation + +# ---------- +# multi_truncate tests truncate functionality for distributed tables +# ---------- +test: multi_truncate + +# ---------- +# multi_colocation_utils tests utility functions written for co-location feature & internal API +# multi_colocated_shard_transfer tests master_copy_shard_placement with colocated tables. +# ---------- +test: multi_colocation_utils +test: multi_colocated_shard_transfer + +# ---------- +# multi_citus_tools tests utility functions written for citus tools +# ---------- +test: multi_citus_tools + +# ---------- +# node_conninfo_reload tests that node_conninfo changes take effect +# ---------- +test: node_conninfo_reload + +# ---------- +# multi_foreign_key tests foreign key push down on distributed tables +# ---------- +test: multi_foreign_key +test: multi_foreign_key_relation_graph + +# ---------- +# multi_replicate_reference_table tests replicating reference tables to new nodes after we add new nodes +# multi_remove_node_reference_table tests metadata changes after master_remove_node +# ---------- +test: multi_replicate_reference_table +test: multi_remove_node_reference_table + +# -------- +# Replicating reference tables to coordinator. Add coordinator to pg_dist_node +# and rerun some of the tests. +# -------- +test: add_coordinator +test: multi_reference_table citus_local_tables_queries +test: foreign_key_to_reference_table +test: citus_local_table_triggers +test: replicate_reference_tables_to_coordinator +test: coordinator_shouldhaveshards +test: local_shard_utility_command_execution +test: citus_local_tables +test: multi_row_router_insert mixed_relkind_tests create_ref_dist_from_citus_local +test: undistribute_table_cascade +test: create_citus_local_table_cascade +test: fkeys_between_local_ref +test: auto_undist_citus_local + +test: remove_coordinator + +# ---------- +# multi_transactional_drop_shards tests for dropping shards using connection API +# ---------- +test: multi_transactional_drop_shards + +# ---------- +# multi_multiuser tests simple combinations of permission access and queries +# ---------- +test: multi_multiuser + +# --------- +# multi_cache_invalidation tests for an obscure crash citus used to exhibit when shardids +# changed the table they belonged to during a session +# -------- +test: multi_cache_invalidation + +# --------- +# multi_task_string_size tests task string size checks +# --------- +test: multi_task_string_size + +# --------- +# connection encryption tests +# --------- +test: ssl_by_default + +# --------- +# object distribution tests +# --------- +test: distributed_types distributed_types_conflict disable_object_propagation distributed_types_xact_add_enum_value +test: distributed_functions distributed_functions_conflict +test: distributed_collations distributed_collations_conflict +test: distributed_procedure + +# --------- +# deparsing logic tests +# --------- +test: multi_deparse_function multi_deparse_procedure + +# -------- +# cannot be run in parallel with any other tests as it checks +# statistics across sessions +# -------- +test: shared_connection_stats + +# --------- +# run queries generated by sql smith and sqlancer that caused issues in the past +# -------- +test: sqlsmith_failures sqlancer_failures + +# --------- +# test that no tests leaked intermediate results. This should always be last +# --------- +test: ensure_no_intermediate_data_leak + +# --------- +# ensures that we never leak any connection counts +# in the shared memory +# -------- +test: ensure_no_shared_connection_leak + diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 338b14968..a43098f60 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -1,40 +1,6 @@ -# ---------- -# $Id$ -# -# Regression tests that exercise distributed planning/execution functionality. -# -# All new regression tests are expected to be run by this schedule. Tests that -# do not set specific task executor type should also be added to -# multi_task_tracker_extra_schedule. -# -# Note that we use variant comparison files to test version dependent regression -# test results. For more information: -# http://www.postgresql.org/docs/current/static/regress-variant.html -# ---------- - -# --- -# Tests around schema changes, these are run first, so there's no preexisting objects. -# --- -test: multi_extension -test: single_node test: multi_cluster_management - -# below tests are placed right after multi_cluster_management as we do -# remove/add node operations and we do not want any preexisting objects -test: alter_role_propagation -test: propagate_extension_commands -test: escape_extension_name -test: ref_citus_local_fkeys - -test: multi_test_helpers multi_test_helpers_superuser +test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw test: multi_test_catalog_views -test: multi_table_ddl -test: multi_name_lengths -test: multi_name_resolution -test: multi_metadata_access -test: multi_metadata_attributes - -test: multi_read_from_secondaries # ---------- # The following distributed tests depend on creating a partitioned table and @@ -47,7 +13,7 @@ test: multi_behavioral_analytics_basics multi_behavioral_analytics_single_shard_ test: multi_shard_update_delete recursive_dml_with_different_planners_executors test: insert_select_repartition window_functions dml_recursive multi_insert_select_window test: multi_insert_select_conflict citus_table_triggers -test: multi_row_insert insert_select_into_local_table multi_create_table_new_features alter_index +test: multi_row_insert insert_select_into_local_table multi_create_table_new_features # following should not run in parallel because it relies on connection counts to workers test: insert_select_connection_leak @@ -62,12 +28,8 @@ test: ensure_no_intermediate_data_leak # ---------- # Tests for partitioning support # ---------- -test: multi_partitioning_utils multi_partitioning partitioning_issue_3970 replicated_partitioned_table +test: multi_partitioning_utils multi_partitioning replicated_partitioned_table -# ---------- -# Tests for foreign data wrapper support -# ---------- -test: multi_create_fdw # ---------- # Tests for recursive subquery planning @@ -91,17 +53,6 @@ test: cte_inline recursive_view_local_table values test: pg13 pg12 test: tableam -# ---------- -# Tests for statistics propagation -# ---------- -test: propagate_statistics -test: pg13_propagate_statistics - -# ---------- -# Test for updating table statistics -# ---------- -test: citus_update_table_statistics - # ---------- # Miscellaneous tests to check our query planning behavior # ---------- @@ -133,262 +84,6 @@ test: binary_protocol test: alter_table_set_access_method test: alter_distributed_table -# ---------- -# Parallel TPC-H tests to check our distributed execution behavior -# ---------- -test: multi_tpch_query1 multi_tpch_query3 multi_tpch_query6 multi_tpch_query10 -test: multi_tpch_query12 multi_tpch_query14 multi_tpch_query19 -test: multi_tpch_query7 multi_tpch_query7_nested - -# ---------- -# Parallel tests to check our join order planning logic. Note that we load data -# below; and therefore these tests should come after the execution tests. -# ---------- -test: multi_join_order_tpch_small multi_join_order_additional -test: multi_load_more_data -test: multi_join_order_tpch_repartition - -# ---------- -# Tests for repartition join planning and execution. Be careful when creating -# new shards before these tests, as they expect specific shard identifiers in -# the output. -# ---------- -test: multi_repartition_join_planning multi_repartition_join_pruning multi_repartition_join_task_assignment multi_repartition_join_ref -test: adaptive_executor_repartition - -# --------- -# Tests that modify data should run sequentially -# --------- -test: with_prepare - -# --------- -# Tests for recursive planning. -# --------- -test: with_nested with_where with_basics with_set_operations -test: with_modifying cte_prepared_modify cte_nested_modification -test: ensure_no_intermediate_data_leak -test: with_executors with_join with_partitioning with_transactions with_dml - - -# ---------- -# Tests to check our large record loading and shard deletion behavior -# ---------- -test: multi_load_large_records -test: multi_master_delete_protocol - -# ---------- -# Tests around DDL statements run on distributed tables -# ---------- -test: multi_index_statements -test: multi_alter_table_statements -test: multi_alter_table_add_constraints - -# ---------- -# multi_create_schema tests creation, loading, and querying of a table in a new -# schema (namespace). -# ---------- -test: multi_create_schema - -# ---------- -# Tests to check if we inform the user about potential caveats of creating new -# databases, schemas, roles, and authentication information. -# ---------- -test: multi_utility_warnings data_types - -# ---------- -# Tests to check the sequential and parallel executions of DDL and modification -# commands -# Should not be executed in parallel with other tests -# ---------- -test: sequential_modifications - -# --------- -# multi_append_table_to_shard loads data to create shards in a way that forces -# shard caching. -# --------- -test: multi_append_table_to_shard - -# --------- -# multi_outer_join loads data to create shards to test outer join mappings -# --------- -test: multi_outer_join - -# --- -# Tests covering mostly modification queries and required preliminary -# functionality related to metadata, shard creation, shard pruning and -# "hacky" copy script for hash partitioned tables. -# Note that the order of the following tests are important. multi_complex_count_distinct -# is independent from the rest of the group, it is added to increase parallelism. -# --- -test: multi_complex_count_distinct multi_select_distinct -test: multi_modifications -test: multi_distribution_metadata -test: multi_generate_ddl_commands multi_create_shards multi_prune_shard_list multi_repair_shards -test: multi_upsert multi_simple_queries multi_data_types -test: master_copy_shard_placement -# multi_utilities cannot be run in parallel with other tests because it checks -# global locks -test: multi_utilities -test: foreign_key_to_reference_table validate_constraint -test: multi_modifying_xacts -test: multi_repartition_udt multi_repartitioned_subquery_udf multi_subtransactions -test: multi_transaction_recovery - -test: local_dist_join_modifications -test: local_table_join -test: local_dist_join_mixed -test: citus_local_dist_joins - -# --------- -# multi_copy creates hash and range-partitioned tables and performs COPY -# multi_router_planner creates hash partitioned tables. -# --------- -test: multi_copy fast_path_router_modify pg_dump -test: multi_router_planner -# These 2 tests have prepared statements which sometimes get invalidated by concurrent tests, -# changing the debug output. We should not run them in parallel with others -test: null_parameters -test: multi_router_planner_fast_path - -# ---------- -# multi_large_shardid loads more lineitem data using high shard identifiers -# ---------- -test: multi_large_shardid - -# ---------- -# multi_size_queries tests various size commands on distributed tables -# ---------- -test: multi_size_queries - -# ---------- -# multi_drop_extension makes sure we can safely drop and recreate the extension -# ---------- -test: multi_drop_extension - -# ---------- -# multi_metadata_sync tests the propagation of mx-related metadata changes to metadata workers -# multi_unsupported_worker_operations tests that unsupported operations error out on metadata workers -# ---------- -test: multi_metadata_sync -test: multi_unsupported_worker_operations - -# ---------- -# grant_on_schema_propagation tests if the GRANT ... ON SCHEMA queries are propagated correctly -# multi_schema_support makes sure we can work with tables in schemas other than public with no problem -# ---------- -test: grant_on_schema_propagation -test: multi_schema_support - -# ---------- -# multi_function_evaluation tests edge-cases in master-side function pre-evaluation -# ---------- -test: multi_function_evaluation - -# ---------- -# multi_truncate tests truncate functionality for distributed tables -# ---------- -test: multi_truncate - -# ---------- -# multi_colocation_utils tests utility functions written for co-location feature & internal API -# multi_colocated_shard_transfer tests master_copy_shard_placement with colocated tables. -# ---------- -test: multi_colocation_utils -test: multi_colocated_shard_transfer - -# ---------- -# multi_citus_tools tests utility functions written for citus tools -# ---------- -test: multi_citus_tools - -# ---------- -# node_conninfo_reload tests that node_conninfo changes take effect -# ---------- -test: node_conninfo_reload - -# ---------- -# multi_foreign_key tests foreign key push down on distributed tables -# ---------- -test: multi_foreign_key -test: multi_foreign_key_relation_graph - -# ---------- -# multi_replicate_reference_table tests replicating reference tables to new nodes after we add new nodes -# multi_remove_node_reference_table tests metadata changes after master_remove_node -# ---------- -test: multi_replicate_reference_table -test: multi_remove_node_reference_table - -# -------- -# Replicating reference tables to coordinator. Add coordinator to pg_dist_node -# and rerun some of the tests. -# -------- -test: add_coordinator -test: multi_reference_table citus_local_tables_queries -test: foreign_key_to_reference_table -test: citus_local_table_triggers -test: replicate_reference_tables_to_coordinator -test: coordinator_shouldhaveshards -test: local_shard_utility_command_execution -test: citus_local_tables -test: multi_row_router_insert mixed_relkind_tests create_ref_dist_from_citus_local -test: undistribute_table_cascade -test: create_citus_local_table_cascade -test: fkeys_between_local_ref -test: auto_undist_citus_local - -test: remove_coordinator - -# ---------- -# multi_transactional_drop_shards tests for dropping shards using connection API -# ---------- -test: multi_transactional_drop_shards - -# ---------- -# multi_multiuser tests simple combinations of permission access and queries -# ---------- -test: multi_multiuser - -# --------- -# multi_cache_invalidation tests for an obscure crash citus used to exhibit when shardids -# changed the table they belonged to during a session -# -------- -test: multi_cache_invalidation - -# --------- -# multi_task_string_size tests task string size checks -# --------- -test: multi_task_string_size - -# --------- -# connection encryption tests -# --------- -test: ssl_by_default - -# --------- -# object distribution tests -# --------- -test: distributed_types distributed_types_conflict disable_object_propagation distributed_types_xact_add_enum_value -test: distributed_functions distributed_functions_conflict -test: distributed_collations distributed_collations_conflict -test: distributed_procedure - -# --------- -# deparsing logic tests -# --------- -test: multi_deparse_function multi_deparse_procedure - -# -------- -# cannot be run in parallel with any other tests as it checks -# statistics across sessions -# -------- -test: shared_connection_stats - -# --------- -# run queries generated by sql smith and sqlancer that caused issues in the past -# -------- -test: sqlsmith_failures sqlancer_failures - # --------- # test that no tests leaked intermediate results. This should always be last # --------- @@ -399,4 +94,3 @@ test: ensure_no_intermediate_data_leak # in the shared memory # -------- test: ensure_no_shared_connection_leak - diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl index c0d354640..09500007b 100755 --- a/src/test/regress/pg_regress_multi.pl +++ b/src/test/regress/pg_regress_multi.pl @@ -442,11 +442,13 @@ push(@pgOptions, "wal_retrieve_retry_interval=1000"); push(@pgOptions, "citus.shard_count=4"); push(@pgOptions, "citus.max_adaptive_executor_pool_size=4"); push(@pgOptions, "citus.shard_max_size=1500kB"); +push(@pgOptions, "citus.defer_shard_delete_interval=-1"); push(@pgOptions, "citus.repartition_join_bucket_count_per_node=2"); push(@pgOptions, "citus.sort_returning='on'"); push(@pgOptions, "citus.shard_replication_factor=2"); push(@pgOptions, "citus.node_connection_timeout=${connectionTimeout}"); push(@pgOptions, "citus.explain_analyze_sort_method='taskId'"); +push(@pgOptions, "citus.enable_manual_changes_to_shards=on"); # we disable slow start by default to encourage parallelism within tests push(@pgOptions, "citus.executor_slow_start_interval=0ms"); @@ -499,6 +501,8 @@ if($isolationtester) push(@pgOptions, "citus.metadata_sync_interval=1000"); push(@pgOptions, "citus.metadata_sync_retry_interval=100"); push(@pgOptions, "client_min_messages='warning'"); # pg12 introduced notice showing during isolation tests + push(@pgOptions, "citus.running_under_isolation_test=true"); + } # Add externally added options last, so they overwrite the default ones above diff --git a/src/test/regress/spec/columnar_temp_tables.spec b/src/test/regress/spec/columnar_temp_tables.spec index bffce93cb..456f1236e 100644 --- a/src/test/regress/spec/columnar_temp_tables.spec +++ b/src/test/regress/spec/columnar_temp_tables.spec @@ -42,6 +42,6 @@ step "s2-commit" COMMIT; } -# make sure that we allow creating same-named temporary columnar tables in different sessions -# also make sure that they don't block each other +// make sure that we allow creating same-named temporary columnar tables in different sessions +// also make sure that they don't block each other permutation "s1-begin" "s2-begin" "s1-create-temp" "s1-insert" "s2-create-temp" "s2-insert" "s1-commit" "s2-commit" diff --git a/src/test/regress/spec/columnar_write_concurrency.spec b/src/test/regress/spec/columnar_write_concurrency.spec index 06f9c06e0..edfead34a 100644 --- a/src/test/regress/spec/columnar_write_concurrency.spec +++ b/src/test/regress/spec/columnar_write_concurrency.spec @@ -89,13 +89,13 @@ step "s2-commit" COMMIT; } -# writes shouldn't block writes or reads +// writes shouldn't block writes or reads permutation "s1-begin" "s2-begin" "s1-insert" "s2-insert" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" -# copy vs insert +// copy vs insert permutation "s1-begin" "s2-begin" "s1-copy" "s2-insert" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" -# insert vs copy +// insert vs copy permutation "s1-begin" "s2-begin" "s2-insert" "s1-copy" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" # insert vs insert diff --git a/src/test/regress/spec/isolation_blocking_move_multi_shard_commands.spec b/src/test/regress/spec/isolation_blocking_move_multi_shard_commands.spec index ba534046b..45f5fed84 100644 --- a/src/test/regress/spec/isolation_blocking_move_multi_shard_commands.spec +++ b/src/test/regress/spec/isolation_blocking_move_multi_shard_commands.spec @@ -7,7 +7,8 @@ setup SELECT citus_internal.refresh_isolation_tester_prepared_statement(); SET citus.shard_count TO 8; - SET citus.shard_replication_factor TO 1; + SET citus.shard_replication_factor TO 1; + CREATE TABLE logical_replicate_placement (x int PRIMARY KEY, y int); SELECT create_distributed_table('logical_replicate_placement', 'x'); @@ -33,7 +34,7 @@ step "s1-begin" step "s1-move-placement" { - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; } step "s1-end" @@ -53,7 +54,7 @@ step "s1-insert" step "s1-get-shard-distribution" { - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; } session "s2" diff --git a/src/test/regress/spec/isolation_blocking_move_multi_shard_commands_on_mx.spec b/src/test/regress/spec/isolation_blocking_move_multi_shard_commands_on_mx.spec index ac26a5f2c..9d535d3df 100644 --- a/src/test/regress/spec/isolation_blocking_move_multi_shard_commands_on_mx.spec +++ b/src/test/regress/spec/isolation_blocking_move_multi_shard_commands_on_mx.spec @@ -31,9 +31,9 @@ setup false) FROM pg_dist_node; - SET citus.replication_model to streaming; SET citus.shard_replication_factor TO 1; + SET citus.shard_count TO 8; SET citus.shard_replication_factor TO 1; CREATE TABLE logical_replicate_placement (x int PRIMARY KEY, y int); @@ -60,7 +60,7 @@ step "s1-begin" step "s1-move-placement" { - SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; + SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard; } step "s1-commit" @@ -80,7 +80,7 @@ step "s1-insert" step "s1-get-shard-distribution" { - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; } session "s2" diff --git a/src/test/regress/spec/isolation_blocking_move_single_shard_commands.spec b/src/test/regress/spec/isolation_blocking_move_single_shard_commands.spec index f1250010f..f125904c0 100644 --- a/src/test/regress/spec/isolation_blocking_move_single_shard_commands.spec +++ b/src/test/regress/spec/isolation_blocking_move_single_shard_commands.spec @@ -7,6 +7,7 @@ setup SET citus.shard_count TO 8; SET citus.shard_replication_factor TO 1; + CREATE TABLE logical_replicate_placement (x int PRIMARY KEY, y int); SELECT create_distributed_table('logical_replicate_placement', 'x'); @@ -31,7 +32,7 @@ step "s1-begin" step "s1-move-placement" { - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes'); } step "s1-end" @@ -51,7 +52,7 @@ step "s1-insert" step "s1-get-shard-distribution" { - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; } session "s2" diff --git a/src/test/regress/spec/isolation_blocking_move_single_shard_commands_on_mx.spec b/src/test/regress/spec/isolation_blocking_move_single_shard_commands_on_mx.spec index d0a3f323f..005c59e5b 100644 --- a/src/test/regress/spec/isolation_blocking_move_single_shard_commands_on_mx.spec +++ b/src/test/regress/spec/isolation_blocking_move_single_shard_commands_on_mx.spec @@ -30,9 +30,9 @@ setup false) FROM pg_dist_node; - SET citus.replication_model to streaming; SET citus.shard_replication_factor TO 1; + SET citus.shard_count TO 8; CREATE TABLE logical_replicate_placement (x int PRIMARY KEY, y int); SELECT create_distributed_table('logical_replicate_placement', 'x'); @@ -78,7 +78,7 @@ step "s1-insert" step "s1-get-shard-distribution" { - select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport; + select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardstate != 4 and shardid in (SELECT * FROM selected_shard) order by nodeport; } session "s2" diff --git a/src/test/regress/spec/isolation_copy_placement_vs_modification.spec b/src/test/regress/spec/isolation_copy_placement_vs_modification.spec index fc3f5eb8b..28c65756e 100644 --- a/src/test/regress/spec/isolation_copy_placement_vs_modification.spec +++ b/src/test/regress/spec/isolation_copy_placement_vs_modification.spec @@ -8,17 +8,12 @@ setup SELECT create_distributed_table('test_repair_placement_vs_modification', 'x'); SELECT get_shard_id_for_distribution_column('test_repair_placement_vs_modification', 5) INTO selected_shard; - - SET citus.shard_replication_factor TO 1; - CREATE TABLE test_copy_placement_vs_modification (x int, y int); - SELECT create_distributed_table('test_copy_placement_vs_modification', 'x'); } teardown { DROP TABLE test_repair_placement_vs_modification; DROP TABLE selected_shard; - DROP TABLE test_copy_placement_vs_modification; } session "s1" @@ -66,36 +61,6 @@ step "s1-copy" COPY test_repair_placement_vs_modification FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; } -step "s1-insert-copy-table" -{ - INSERT INTO test_copy_placement_vs_modification VALUES (5, 10); -} - -step "s1-update-copy-table" -{ - UPDATE test_copy_placement_vs_modification SET y = 5 WHERE x = 5; -} - -step "s1-delete-copy-table" -{ - DELETE FROM test_copy_placement_vs_modification WHERE x = 5; -} - -step "s1-select-copy-table" -{ - SELECT count(*) FROM test_copy_placement_vs_modification WHERE x = 5; -} - -step "s1-ddl-copy-table" -{ - CREATE INDEX test_copy_placement_vs_modification_index ON test_copy_placement_vs_modification(x); -} - -step "s1-copy-copy-table" -{ - COPY test_copy_placement_vs_modification FROM PROGRAM 'echo 1,1 && echo 2,2 && echo 3,3 && echo 4,4 && echo 5,5' WITH CSV; -} - step "s1-commit" { COMMIT; @@ -118,13 +83,6 @@ step "s2-repair-placement" SELECT master_copy_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); } -step "s2-copy-placement" -{ - SELECT master_copy_shard_placement((SELECT get_shard_id_for_distribution_column('test_copy_placement_vs_modification', 5)), - 'localhost', 57637, 'localhost', 57638, - do_repair := false, transfer_mode := 'block_writes'); -} - step "s2-commit" { COMMIT; @@ -168,19 +126,3 @@ permutation "s1-insert" "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-b permutation "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-insert" "s2-commit" "s1-commit" "s2-print-content" permutation "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-copy" "s2-commit" "s1-commit" "s2-print-content" permutation "s1-begin" "s1-select" "s2-set-placement-inactive" "s2-begin" "s2-repair-placement" "s1-ddl" "s2-commit" "s1-commit" "s2-print-index-count" - -// verify that copy placement (do_repair := false) blocks other operations, except SELECT -permutation "s1-begin" "s2-begin" "s2-copy-placement" "s1-update-copy-table" "s2-commit" "s1-commit" -permutation "s1-begin" "s2-begin" "s2-copy-placement" "s1-delete-copy-table" "s2-commit" "s1-commit" -permutation "s1-begin" "s2-begin" "s2-copy-placement" "s1-insert-copy-table" "s2-commit" "s1-commit" -permutation "s1-begin" "s2-begin" "s2-copy-placement" "s1-copy-copy-table" "s2-commit" "s1-commit" -permutation "s1-begin" "s2-begin" "s2-copy-placement" "s1-ddl-copy-table" "s2-commit" "s1-commit" -permutation "s1-begin" "s2-begin" "s2-copy-placement" "s1-select-copy-table" "s2-commit" "s1-commit" - -// verify that copy placement (do_repair := false) is blocked by other operations, except SELECT -permutation "s1-begin" "s2-begin" "s1-update-copy-table" "s2-copy-placement" "s1-commit" "s2-commit" -permutation "s1-begin" "s2-begin" "s1-delete-copy-table" "s2-copy-placement" "s1-commit" "s2-commit" -permutation "s1-begin" "s2-begin" "s1-insert-copy-table" "s2-copy-placement" "s1-commit" "s2-commit" -permutation "s1-begin" "s2-begin" "s1-copy-copy-table" "s2-copy-placement" "s1-commit" "s2-commit" -permutation "s1-begin" "s2-begin" "s1-ddl-copy-table" "s2-copy-placement" "s1-commit" "s2-commit" -permutation "s1-begin" "s2-begin" "s1-select-copy-table" "s2-copy-placement" "s1-commit" "s2-commit" diff --git a/src/test/regress/spec/isolation_ensure_dependency_activate_node.spec b/src/test/regress/spec/isolation_ensure_dependency_activate_node.spec index 539c71c5c..3f69db1d7 100644 --- a/src/test/regress/spec/isolation_ensure_dependency_activate_node.spec +++ b/src/test/regress/spec/isolation_ensure_dependency_activate_node.spec @@ -91,7 +91,6 @@ step "s2-create-table" { CREATE TABLE t1 (a int, b int); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; SELECT create_distributed_table('t1', 'a'); } @@ -105,7 +104,6 @@ step "s2-create-table-with-type" { CREATE TABLE t1 (a int, b tt1); -- session needs to have replication factor set to 1, can't do in setup - SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; SELECT create_distributed_table('t1', 'a'); } diff --git a/src/test/regress/spec/isolation_mx_common.include.spec b/src/test/regress/spec/isolation_mx_common.include.spec index 4b6127660..497d57fc4 100644 --- a/src/test/regress/spec/isolation_mx_common.include.spec +++ b/src/test/regress/spec/isolation_mx_common.include.spec @@ -30,6 +30,5 @@ setup false) FROM pg_dist_node; - SET citus.replication_model to streaming; SET citus.shard_replication_factor TO 1; } diff --git a/src/test/regress/spec/isolation_rebalancer_deferred_drop.spec b/src/test/regress/spec/isolation_rebalancer_deferred_drop.spec index 6e91e97be..bf8a10eb7 100644 --- a/src/test/regress/spec/isolation_rebalancer_deferred_drop.spec +++ b/src/test/regress/spec/isolation_rebalancer_deferred_drop.spec @@ -3,6 +3,11 @@ setup { + CREATE OR REPLACE FUNCTION run_try_drop_marked_shards() + RETURNS VOID + AS 'citus' + LANGUAGE C STRICT VOLATILE; + CREATE OR REPLACE FUNCTION start_session_level_connection_to_node(text, integer) RETURNS void LANGUAGE C STRICT VOLATILE @@ -21,17 +26,15 @@ setup SELECT citus_internal.replace_isolation_tester_func(); SELECT citus_internal.refresh_isolation_tester_prepared_statement(); -CREATE OR REPLACE FUNCTION master_defer_delete_shards() - RETURNS int - LANGUAGE C STRICT - AS 'citus', $$master_defer_delete_shards$$; -COMMENT ON FUNCTION master_defer_delete_shards() - IS 'remove orphaned shards'; +CREATE OR REPLACE PROCEDURE isolation_cleanup_orphaned_shards() + LANGUAGE C + AS 'citus', $$isolation_cleanup_orphaned_shards$$; +COMMENT ON PROCEDURE isolation_cleanup_orphaned_shards() + IS 'cleanup orphaned shards'; SET citus.next_shard_id to 120000; SET citus.shard_count TO 8; SET citus.shard_replication_factor TO 1; - SET citus.defer_drop_after_shard_move TO ON; CREATE TABLE t1 (x int PRIMARY KEY, y int); SELECT create_distributed_table('t1', 'x'); @@ -56,13 +59,30 @@ step "s1-begin" step "s1-move-placement" { - SET citus.defer_drop_after_shard_move TO ON; - SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); +} + +step "s1-move-placement-back" +{ + SET client_min_messages to NOTICE; + SHOW log_error_verbosity; + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57638, 'localhost', 57637); +} + +step "s1-move-placement-without-deferred" { + SET citus.defer_drop_after_shard_move TO OFF; + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + } step "s1-drop-marked-shards" { - SELECT public.master_defer_delete_shards(); + SET client_min_messages to NOTICE; + CALL isolation_cleanup_orphaned_shards(); +} + +step "s1-lock-pg-dist-placement" { + LOCK TABLE pg_dist_placement IN SHARE ROW EXCLUSIVE MODE; } step "s1-commit" @@ -72,6 +92,14 @@ step "s1-commit" session "s2" +step "s2-begin" { + BEGIN; +} + +step "s2-drop-old-shards" { + SELECT run_try_drop_marked_shards(); +} + step "s2-start-session-level-connection" { SELECT start_session_level_connection_to_node('localhost', 57637); @@ -88,11 +116,26 @@ step "s2-lock-table-on-worker" SELECT run_commands_on_session_level_connection_to_node('LOCK TABLE t1_120000'); } +step "s2-select" { + SELECT COUNT(*) FROM t1; +} + step "s2-drop-marked-shards" { - SELECT public.master_defer_delete_shards(); + SET client_min_messages to DEBUG1; + CALL isolation_cleanup_orphaned_shards(); } +step "s2-commit" { + COMMIT; +} + + permutation "s1-begin" "s1-move-placement" "s1-drop-marked-shards" "s2-drop-marked-shards" "s1-commit" permutation "s1-begin" "s1-move-placement" "s2-drop-marked-shards" "s1-drop-marked-shards" "s1-commit" permutation "s1-begin" "s1-move-placement" "s2-start-session-level-connection" "s2-lock-table-on-worker" "s1-drop-marked-shards" "s1-commit" "s2-stop-connection" +// make sure we give a clear error when we try to replace an orphaned shard that is still in use +permutation "s1-begin" "s1-move-placement" "s2-start-session-level-connection" "s2-lock-table-on-worker" "s1-commit" "s1-begin" "s1-move-placement-back" "s1-commit" "s2-stop-connection" +// make sure we error if we cannot get the lock on pg_dist_placement +permutation "s1-begin" "s1-lock-pg-dist-placement" "s2-drop-old-shards" "s1-commit" +permutation "s1-begin" "s2-begin" "s2-select" "s1-move-placement-without-deferred" "s2-commit" "s1-commit" diff --git a/src/test/regress/spec/isolation_shard_rebalancer.spec b/src/test/regress/spec/isolation_shard_rebalancer.spec index ab3e0e6fe..1aca39ca6 100644 --- a/src/test/regress/spec/isolation_shard_rebalancer.spec +++ b/src/test/regress/spec/isolation_shard_rebalancer.spec @@ -3,9 +3,9 @@ setup SELECT 1 FROM master_add_node('localhost', 57637); SELECT 1 FROM master_add_node('localhost', 57638); CREATE TABLE colocated1 (test_id integer NOT NULL, data text); - SELECT create_distributed_table('colocated1', 'test_id', 'hash'); + SELECT create_distributed_table('colocated1', 'test_id', 'hash', 'none'); CREATE TABLE colocated2 (test_id integer NOT NULL, data text); - SELECT create_distributed_table('colocated2', 'test_id', 'hash'); + SELECT create_distributed_table('colocated2', 'test_id', 'hash', 'colocated1'); CREATE TABLE non_colocated (test_id integer NOT NULL, data text); SELECT create_distributed_table('non_colocated', 'test_id', 'hash', 'none'); } diff --git a/src/test/regress/spec/isolation_shard_rebalancer_progress.spec b/src/test/regress/spec/isolation_shard_rebalancer_progress.spec new file mode 100644 index 000000000..d2248292d --- /dev/null +++ b/src/test/regress/spec/isolation_shard_rebalancer_progress.spec @@ -0,0 +1,84 @@ +setup +{ + SELECT citus_internal.replace_isolation_tester_func(); + SELECT citus_internal.refresh_isolation_tester_prepared_statement(); + select setval('pg_dist_shardid_seq', GREATEST(1500000, nextval('pg_dist_shardid_seq'))); + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + SELECT 1 FROM master_add_node('localhost', 57637); + SELECT master_set_node_property('localhost', 57638, 'shouldhaveshards', false); + CREATE TABLE colocated1 (test_id integer NOT NULL, data text); + SELECT create_distributed_table('colocated1', 'test_id', 'hash', 'none'); + CREATE TABLE colocated2 (test_id integer NOT NULL, data text); + SELECT create_distributed_table('colocated2', 'test_id', 'hash', 'colocated1'); + -- 1 and 3 are chosen so they go to shard 1 and 2 + INSERT INTO colocated1(test_id) SELECT 1 from generate_series(0, 1000) i; + INSERT INTO colocated2(test_id) SELECT 1 from generate_series(0, 10000) i; + INSERT INTO colocated1(test_id) SELECT 3 from generate_series(0, 5000) i; + select * from pg_dist_placement; + SELECT master_set_node_property('localhost', 57638, 'shouldhaveshards', true); +} + +teardown +{ + DROP TABLE colocated2; + DROP TABLE colocated1; + SELECT citus_internal.restore_isolation_tester_func(); +} + +session "s1" + +step "s1-rebalance-c1" +{ + BEGIN; + SELECT * FROM get_rebalance_table_shards_plan('colocated1'); + SELECT rebalance_table_shards('colocated1', shard_transfer_mode:='block_writes'); +} + +step "s1-commit" +{ + COMMIT; +} + +session "s2" + +step "s2-lock-1" +{ + SELECT pg_advisory_lock(29279, 1); +} + +step "s2-lock-2" +{ + SELECT pg_advisory_lock(29279, 2); +} + +step "s2-unlock-1" +{ + SELECT pg_advisory_unlock(29279, 1); +} + +step "s2-unlock-2" +{ + SELECT pg_advisory_unlock(29279, 2); +} + +session "s3" + +step "s3-progress" +{ + set client_min_messages=NOTICE; + SELECT + table_name, + shardid, + shard_size, + sourcename, + sourceport, + source_shard_size, + targetname, + targetport, + target_shard_size, + progress + FROM get_rebalance_progress(); +} + +permutation "s2-lock-1" "s2-lock-2" "s1-rebalance-c1" "s3-progress" "s2-unlock-1" "s3-progress" "s2-unlock-2" "s3-progress" "s1-commit" "s3-progress" diff --git a/src/test/regress/spec/isolation_update_node.spec b/src/test/regress/spec/isolation_update_node.spec index feb21ffaf..09aabacce 100644 --- a/src/test/regress/spec/isolation_update_node.spec +++ b/src/test/regress/spec/isolation_update_node.spec @@ -19,6 +19,14 @@ step "s1-begin" BEGIN; } +step "s1-prepare-transaction" { + PREPARE transaction 'label'; +} + +step "s1-commit-prepared" { + COMMIT prepared 'label'; +} + step "s1-update-node-1" { SELECT 1 FROM master_update_node( @@ -27,6 +35,20 @@ step "s1-update-node-1" 58637); } +step "s1-update-node-nonexistent" { + SELECT 1 FROM master_update_node( + (select nodeid from pg_dist_node where nodeport = 57637), + 'non-existent', + 57637); +} + +step "s1-update-node-existent" { + SELECT 1 FROM master_update_node( + (select nodeid from pg_dist_node where nodeport = 57637), + 'localhost', + 57637); +} + step "s1-commit" { COMMIT; @@ -62,6 +84,25 @@ step "s2-update-node-2" 58638); } +step "s2-create-table" { + CREATE TABLE test (a int); + SELECT create_distributed_table('test','a'); +} + +step "s2-cache-prepared-statement" { + PREPARE foo AS SELECT COUNT(*) FROM test WHERE a = 3; + EXECUTE foo; + EXECUTE foo; + EXECUTE foo; + EXECUTE foo; + EXECUTE foo; + EXECUTE foo; +} + +step "s2-execute-prepared" { + EXECUTE foo; +} + step "s2-verify-metadata" { SELECT nodeid, groupid, nodename, nodeport FROM pg_dist_node ORDER BY nodeid; @@ -76,6 +117,10 @@ step "s2-start-metadata-sync-node-2" SELECT start_metadata_sync_to_node('localhost', 57638); } +step "s2-drop-table" { + DROP TABLE test; +} + step "s2-abort" { ABORT; @@ -91,3 +136,8 @@ permutation "s1-begin" "s1-update-node-1" "s2-begin" "s2-update-node-1" "s1-comm // cannot run start_metadata_sync_to_node in a transaction, so we're not // testing the reverse order here. permutation "s1-begin" "s1-update-node-1" "s2-start-metadata-sync-node-2" "s1-commit" "s2-verify-metadata" + +// make sure we have entries in prepared statement cache +// then make sure that after we update pg_dist_node, the changes are visible to +// the prepared statement +permutation "s2-create-table" "s1-begin" "s1-update-node-nonexistent" "s1-prepare-transaction" "s2-cache-prepared-statement" "s1-commit-prepared" "s2-execute-prepared" "s1-update-node-existent" "s2-drop-table" diff --git a/src/test/regress/spec/shared_connection_waits.spec b/src/test/regress/spec/shared_connection_waits.spec index 80a8cb9e3..e7cd7eaca 100644 --- a/src/test/regress/spec/shared_connection_waits.spec +++ b/src/test/regress/spec/shared_connection_waits.spec @@ -22,6 +22,7 @@ teardown SELECT set_max_shared_pool_size(100); DROP FUNCTION wake_up_connection_pool_waiters(); DROP FUNCTION set_max_shared_pool_size(int); + DROP TABLE test; } session "s1" diff --git a/src/test/regress/sql/.gitignore b/src/test/regress/sql/.gitignore index d0085e0d7..093d9b90b 100644 --- a/src/test/regress/sql/.gitignore +++ b/src/test/regress/sql/.gitignore @@ -1,35 +1,24 @@ +/columnar_chunk_filtering.sql +/columnar_copyto.sql +/columnar_data_types.sql +/columnar_load.sql +/hyperscale_tutorial.sql /multi_agg_distinct.sql /multi_agg_type_conversion.sql /multi_alter_table_statements.sql /multi_append_table_to_shard.sql /multi_behavioral_analytics_create_table.sql /multi_behavioral_analytics_create_table_superuser.sql +/multi_complex_count_distinct.sql /multi_copy.sql /multi_create_schema.sql /multi_large_shardid.sql -/multi_master_delete_protocol.sql -/multi_outer_join.sql -/multi_outer_join_reference.sql /multi_load_data.sql /multi_load_data_superuser.sql /multi_load_large_records.sql /multi_load_more_data.sql -/worker_copy.sql -/multi_complex_count_distinct.sql +/multi_master_delete_protocol.sql /multi_mx_copy_data.sql -/multi_behavioral_analytics_create_table.sql -/multi_insert_select_behavioral_analytics_create_table.sql -/hyperscale_tutorial.sql -/am_chunk_filtering.sql -/am_copyto.sql -/am_data_types.sql -/am_load.sql -/fdw_block_filtering.sql -/fdw_copyto.sql -/fdw_create.sql -/fdw_data_types.sql -/fdw_load.sql -/columnar_chunk_filtering.sql -/columnar_copyto.sql -/columnar_data_types.sql -/columnar_load.sql +/multi_outer_join.sql +/multi_outer_join_reference.sql +/worker_copy.sql diff --git a/src/test/regress/sql/alter_database_owner.sql b/src/test/regress/sql/alter_database_owner.sql new file mode 100644 index 000000000..ae8418468 --- /dev/null +++ b/src/test/regress/sql/alter_database_owner.sql @@ -0,0 +1,175 @@ +CREATE SCHEMA alter_database_owner; +SET search_path TO alter_database_owner, public; + +CREATE USER database_owner_1; +CREATE USER database_owner_2; +SELECT run_command_on_workers('CREATE USER database_owner_1'); +SELECT run_command_on_workers('CREATE USER database_owner_2'); + +-- make sure the propagation of ALTER DATABASE ... OWNER TO ... is on +SET citus.enable_alter_database_owner TO on; + +-- list the owners of the current database on all nodes +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + +-- remove a node to verify addition later +SELECT master_remove_node('localhost', :worker_2_port); + +-- verify we can change the owner of a database +ALTER DATABASE regression OWNER TO database_owner_1; + +-- list the owner of the current database on the coordinator +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); + +-- list the owners of the current database on all nodes +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + +-- turn off propagation to verify it does _not_ propagate to new nodes when turned off +SET citus.enable_alter_database_owner TO off; + +-- add back second node to verify the owner of the database was set accordingly +SELECT 1 FROM master_add_node('localhost', :worker_2_port); + +-- list the owners of the current database on all nodes, should reflect on newly added node +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + +-- turn on propagation to verify it does propagate to new nodes when enabled +SET citus.enable_alter_database_owner TO on; +SELECT master_remove_node('localhost', :worker_2_port); -- remove so we can re add with propagation on + +-- add back second node to verify the owner of the database was set accordingly +SELECT 1 FROM master_add_node('localhost', :worker_2_port); + +-- list the owners of the current database on all nodes, should reflect on newly added node +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + +-- test changing the owner in a transaction and rollback to cancel +BEGIN; +ALTER DATABASE regression OWNER TO database_owner_2; +ROLLBACK; +-- list the owners of the current database on all nodes +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + + +CREATE TABLE t (a int PRIMARY KEY); +SELECT create_distributed_table('t', 'a'); +-- test changing the owner in a xact that already had parallel execution +BEGIN; +SELECT count(*) FROM t; -- parallel execution; +ALTER DATABASE regression OWNER TO database_owner_2; -- should ERROR +ROLLBACK; + +-- list the owners of the current database on all nodes +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + +BEGIN; +SET LOCAL citus.multi_shard_modify_mode TO 'sequential'; +SELECT count(*) FROM t; -- parallel execution; +ALTER DATABASE regression OWNER TO database_owner_2; +COMMIT; + +-- list the owners of the current database on all nodes +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + +-- turn propagation off and verify it does not propagate interactively when turned off +SET citus.enable_alter_database_owner TO off; + +ALTER DATABASE regression OWNER TO database_owner_1; +-- list the owners of the current database on all nodes +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + +-- reset state of cluster +SET citus.enable_alter_database_owner TO on; +ALTER DATABASE regression OWNER TO current_user; +-- list the owners of the current database on all nodes +SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +SELECT run_command_on_workers($$ + SELECT u.rolname + FROM pg_database d + JOIN pg_roles u + ON (d.datdba = u.oid) + WHERE d.datname = current_database(); +$$); + +DROP USER database_owner_1; +DROP USER database_owner_2; +SELECT run_command_on_workers('DROP USER database_owner_1'); +SELECT run_command_on_workers('DROP USER database_owner_2'); +SET client_min_messages TO warning; +DROP SCHEMA alter_database_owner CASCADE; diff --git a/src/test/regress/sql/alter_distributed_table.sql b/src/test/regress/sql/alter_distributed_table.sql index 5357695fd..5df68c4ed 100644 --- a/src/test/regress/sql/alter_distributed_table.sql +++ b/src/test/regress/sql/alter_distributed_table.sql @@ -148,7 +148,7 @@ SELECT table_name::text, shard_count, access_method FROM public.citus_tables WHE -- test with metadata sync -SET citus.replication_model TO 'streaming'; +SET citus.shard_replication_factor TO 1; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); CREATE TABLE metadata_sync_table (a BIGSERIAL); @@ -159,7 +159,6 @@ SELECT alter_distributed_table('metadata_sync_table', shard_count:=8); SELECT table_name, shard_count FROM public.citus_tables WHERE table_name::text = 'metadata_sync_table'; -SET citus.replication_model TO DEFAULT; SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); -- test complex cascade operations diff --git a/src/test/regress/sql/ch_bench_having_mx.sql b/src/test/regress/sql/ch_bench_having_mx.sql index 419107f63..1e1a4cd17 100644 --- a/src/test/regress/sql/ch_bench_having_mx.sql +++ b/src/test/regress/sql/ch_bench_having_mx.sql @@ -1,5 +1,4 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1640000; -SET citus.replication_model TO streaming; SET citus.shard_replication_factor to 1; SET citus.shard_count to 4; @@ -123,7 +122,6 @@ order by s_i_id; \c - - - :master_port -SET citus.replication_model TO streaming; SET citus.shard_replication_factor to 1; SET citus.shard_count to 4; diff --git a/src/test/regress/sql/citus_local_tables.sql b/src/test/regress/sql/citus_local_tables.sql index f4da2ea5f..a4b235463 100644 --- a/src/test/regress/sql/citus_local_tables.sql +++ b/src/test/regress/sql/citus_local_tables.sql @@ -356,7 +356,7 @@ BEGIN; SELECT logicalrelid::regclass::text FROM pg_dist_partition, pg_tables WHERE tablename=logicalrelid::regclass::text AND schemaname='citus_local_tables_test_schema' AND - partmethod = 'n' AND repmodel = 'c' + partmethod = 'n' AND repmodel = 's' ORDER BY 1; ROLLBACK; @@ -376,7 +376,7 @@ BEGIN; SELECT logicalrelid::regclass::text FROM pg_dist_partition, pg_tables WHERE tablename=logicalrelid::regclass::text AND schemaname='citus_local_tables_test_schema' AND - partmethod = 'n' AND repmodel = 'c' + partmethod = 'n' AND repmodel = 's' ORDER BY 1; ROLLBACK; diff --git a/src/test/regress/sql/citus_local_tables_queries_mx.sql b/src/test/regress/sql/citus_local_tables_queries_mx.sql index b1a9e4622..f4a53bffc 100644 --- a/src/test/regress/sql/citus_local_tables_queries_mx.sql +++ b/src/test/regress/sql/citus_local_tables_queries_mx.sql @@ -15,7 +15,7 @@ RESET client_min_messages; -- start metadata sync to worker 1 SELECT start_metadata_sync_to_node('localhost', :worker_1_port); -SET citus.replication_model TO streaming; +SET citus.shard_replication_factor TO 1; CREATE TABLE dummy_reference_table(a int unique, b int); SELECT create_reference_table('dummy_reference_table'); @@ -545,7 +545,7 @@ TRUNCATE reference_table, citus_local_table, distributed_table; \c - - - :master_port SET search_path TO citus_local_table_queries_mx; -SET citus.replication_model TO streaming; +SET citus.shard_replication_factor TO 1; ALTER TABLE reference_table ADD CONSTRAINT pkey_ref PRIMARY KEY (a); ALTER TABLE citus_local_table ADD CONSTRAINT pkey_c PRIMARY KEY (a); @@ -588,7 +588,7 @@ ROLLBACK; \c - - - :master_port SET search_path TO citus_local_table_queries_mx; -SET citus.replication_model TO streaming; +SET citus.shard_replication_factor TO 1; ALTER TABLE distributed_table DROP CONSTRAINT fkey_dist_to_ref; @@ -602,7 +602,7 @@ ROLLBACK; \c - - - :master_port SET search_path TO citus_local_table_queries_mx; -SET citus.replication_model TO streaming; +SET citus.shard_replication_factor TO 1; -- remove uniqueness constraint and dependent foreign key constraint for next tests ALTER TABLE reference_table DROP CONSTRAINT fkey_ref_to_local; diff --git a/src/test/regress/sql/columnar_citus_integration.sql b/src/test/regress/sql/columnar_citus_integration.sql index 2d80175c7..6818eec9e 100644 --- a/src/test/regress/sql/columnar_citus_integration.sql +++ b/src/test/regress/sql/columnar_citus_integration.sql @@ -54,7 +54,7 @@ SELECT run_command_on_placements('table_option',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); -- change setting -SELECT alter_columnar_table_set('table_option', chunk_group_row_limit => 100); +SELECT alter_columnar_table_set('table_option', chunk_group_row_limit => 2000); -- verify setting SELECT run_command_on_placements('table_option',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; @@ -72,7 +72,7 @@ SELECT run_command_on_placements('table_option',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); -- change setting -SELECT alter_columnar_table_set('table_option', stripe_row_limit => 100); +SELECT alter_columnar_table_set('table_option', stripe_row_limit => 2000); -- verify setting SELECT run_command_on_placements('table_option',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; @@ -87,8 +87,8 @@ $cmd$); -- verify settings are propagated when creating a table CREATE TABLE table_option_2 (a int, b text) USING columnar; SELECT alter_columnar_table_set('table_option_2', - chunk_group_row_limit => 100, - stripe_row_limit => 1000, + chunk_group_row_limit => 2000, + stripe_row_limit => 20000, compression => 'pglz', compression_level => 15); SELECT create_distributed_table('table_option_2', 'a'); @@ -154,7 +154,7 @@ SELECT run_command_on_placements('table_option',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); -- change setting -SELECT alter_columnar_table_set('table_option', chunk_group_row_limit => 100); +SELECT alter_columnar_table_set('table_option', chunk_group_row_limit => 2000); -- verify setting SELECT run_command_on_placements('table_option',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; @@ -172,7 +172,7 @@ SELECT run_command_on_placements('table_option',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); -- change setting -SELECT alter_columnar_table_set('table_option', stripe_row_limit => 100); +SELECT alter_columnar_table_set('table_option', stripe_row_limit => 2000); -- verify setting SELECT run_command_on_placements('table_option',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; @@ -187,8 +187,8 @@ $cmd$); -- verify settings are propagated when creating a table CREATE TABLE table_option_2 (a int, b text) USING columnar; SELECT alter_columnar_table_set('table_option_2', - chunk_group_row_limit => 100, - stripe_row_limit => 1000, + chunk_group_row_limit => 2000, + stripe_row_limit => 20000, compression => 'pglz', compression_level => 19); SELECT create_distributed_table('table_option_2', 'a'); @@ -251,7 +251,7 @@ SELECT run_command_on_placements('table_option_reference',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); -- change setting -SELECT alter_columnar_table_set('table_option_reference', chunk_group_row_limit => 100); +SELECT alter_columnar_table_set('table_option_reference', chunk_group_row_limit => 2000); -- verify setting SELECT run_command_on_placements('table_option_reference',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; @@ -269,7 +269,7 @@ SELECT run_command_on_placements('table_option_reference',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); -- change setting -SELECT alter_columnar_table_set('table_option_reference', stripe_row_limit => 100); +SELECT alter_columnar_table_set('table_option_reference', stripe_row_limit => 2000); -- verify setting SELECT run_command_on_placements('table_option_reference',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; @@ -284,8 +284,8 @@ $cmd$); -- verify settings are propagated when creating a table CREATE TABLE table_option_reference_2 (a int, b text) USING columnar; SELECT alter_columnar_table_set('table_option_reference_2', - chunk_group_row_limit => 100, - stripe_row_limit => 1000, + chunk_group_row_limit => 2000, + stripe_row_limit => 20000, compression => 'pglz', compression_level => 9); SELECT create_reference_table('table_option_reference_2'); @@ -351,7 +351,7 @@ SELECT run_command_on_placements('table_option_citus_local',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); -- change setting -SELECT alter_columnar_table_set('table_option_citus_local', chunk_group_row_limit => 100); +SELECT alter_columnar_table_set('table_option_citus_local', chunk_group_row_limit => 2000); -- verify setting SELECT run_command_on_placements('table_option_citus_local',$cmd$ SELECT chunk_group_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; @@ -369,7 +369,7 @@ SELECT run_command_on_placements('table_option_citus_local',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; $cmd$); -- change setting -SELECT alter_columnar_table_set('table_option_citus_local', stripe_row_limit => 100); +SELECT alter_columnar_table_set('table_option_citus_local', stripe_row_limit => 2000); -- verify setting SELECT run_command_on_placements('table_option_citus_local',$cmd$ SELECT stripe_row_limit FROM columnar.options WHERE regclass = '%s'::regclass; @@ -384,8 +384,8 @@ $cmd$); -- verify settings are propagated when creating a table CREATE TABLE table_option_citus_local_2 (a int, b text) USING columnar; SELECT alter_columnar_table_set('table_option_citus_local_2', - chunk_group_row_limit => 100, - stripe_row_limit => 1000, + chunk_group_row_limit => 2000, + stripe_row_limit => 20000, compression => 'pglz', compression_level => 9); SELECT citus_add_local_table_to_metadata('table_option_citus_local_2'); diff --git a/src/test/regress/sql/columnar_empty.sql b/src/test/regress/sql/columnar_empty.sql index a733a8540..a3699fa58 100644 --- a/src/test/regress/sql/columnar_empty.sql +++ b/src/test/regress/sql/columnar_empty.sql @@ -8,8 +8,8 @@ create table t_compressed(a int) using columnar; -- set options SELECT alter_columnar_table_set('t_compressed', compression => 'pglz'); -SELECT alter_columnar_table_set('t_compressed', stripe_row_limit => 100); -SELECT alter_columnar_table_set('t_compressed', chunk_group_row_limit => 100); +SELECT alter_columnar_table_set('t_compressed', stripe_row_limit => 2000); +SELECT alter_columnar_table_set('t_compressed', chunk_group_row_limit => 1000); SELECT * FROM columnar.options WHERE regclass = 't_compressed'::regclass; diff --git a/src/test/regress/sql/columnar_insert.sql b/src/test/regress/sql/columnar_insert.sql index 4bc850f11..ad4660fe5 100644 --- a/src/test/regress/sql/columnar_insert.sql +++ b/src/test/regress/sql/columnar_insert.sql @@ -116,7 +116,7 @@ DROP TABLE test_toast_columnar; -- We support writing into zero column tables, but not reading from them. -- We test that metadata makes sense so we can fix the read path in future. CREATE TABLE zero_col() USING columnar; -SELECT alter_columnar_table_set('zero_col', chunk_group_row_limit => 10); +SELECT alter_columnar_table_set('zero_col', chunk_group_row_limit => 1000); INSERT INTO zero_col DEFAULT VALUES; INSERT INTO zero_col DEFAULT VALUES; diff --git a/src/test/regress/sql/columnar_tableoptions.sql b/src/test/regress/sql/columnar_tableoptions.sql index 1596d2546..0033042fc 100644 --- a/src/test/regress/sql/columnar_tableoptions.sql +++ b/src/test/regress/sql/columnar_tableoptions.sql @@ -24,14 +24,14 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; -- test changing the chunk_group_row_limit -SELECT alter_columnar_table_set('table_options', chunk_group_row_limit => 10); +SELECT alter_columnar_table_set('table_options', chunk_group_row_limit => 2000); -- show table_options settings SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; -- test changing the chunk_group_row_limit -SELECT alter_columnar_table_set('table_options', stripe_row_limit => 100); +SELECT alter_columnar_table_set('table_options', stripe_row_limit => 4000); -- show table_options settings SELECT * FROM columnar.options @@ -45,7 +45,7 @@ SELECT * FROM columnar.options WHERE regclass = 'table_options'::regclass; -- set all settings at the same time -SELECT alter_columnar_table_set('table_options', stripe_row_limit => 1000, chunk_group_row_limit => 100, compression => 'none', compression_level => 7); +SELECT alter_columnar_table_set('table_options', stripe_row_limit => 8000, chunk_group_row_limit => 4000, compression => 'none', compression_level => 7); -- show table_options settings SELECT * FROM columnar.options @@ -142,6 +142,14 @@ SELECT alter_columnar_table_set('table_options', compression => 'foobar'); SELECT alter_columnar_table_set('table_options', compression_level => 0); SELECT alter_columnar_table_set('table_options', compression_level => 20); +-- verify cannot set out of range stripe_row_limit & chunk_group_row_limit options +SELECT alter_columnar_table_set('table_options', stripe_row_limit => 999); +SELECT alter_columnar_table_set('table_options', stripe_row_limit => 10000001); +SELECT alter_columnar_table_set('table_options', chunk_group_row_limit => 999); +SELECT alter_columnar_table_set('table_options', chunk_group_row_limit => 100001); +SELECT alter_columnar_table_set('table_options', chunk_group_row_limit => 0); +INSERT INTO table_options VALUES (1); + -- verify options are removed when table is dropped DROP TABLE table_options; -- we expect no entries in çstore.options for anything not found int pg_class diff --git a/src/test/regress/sql/coordinator_evaluation_modify.sql b/src/test/regress/sql/coordinator_evaluation_modify.sql index 0b2f7fd19..649f5fd27 100644 --- a/src/test/regress/sql/coordinator_evaluation_modify.sql +++ b/src/test/regress/sql/coordinator_evaluation_modify.sql @@ -35,7 +35,6 @@ END; $$ language plpgsql STABLE; CREATE TYPE user_data AS (name text, age int); -SET citus.replication_model TO streaming; SET citus.shard_replication_factor TO 1; CREATE TABLE user_info_data (user_id int, u_data user_data, user_index int); diff --git a/src/test/regress/sql/coordinator_evaluation_select.sql b/src/test/regress/sql/coordinator_evaluation_select.sql index 5195a04c9..8b69734d5 100644 --- a/src/test/regress/sql/coordinator_evaluation_select.sql +++ b/src/test/regress/sql/coordinator_evaluation_select.sql @@ -29,7 +29,6 @@ SELECT create_distributed_function('get_local_node_id_volatile()'); CREATE TYPE user_data AS (name text, age int); -SET citus.replication_model TO streaming; SET citus.shard_replication_factor TO 1; CREATE TABLE user_info_data (user_id int, u_data user_data, user_index int); diff --git a/src/test/regress/sql/distributed_functions.sql b/src/test/regress/sql/distributed_functions.sql index cc688c4ef..f98be1029 100644 --- a/src/test/regress/sql/distributed_functions.sql +++ b/src/test/regress/sql/distributed_functions.sql @@ -167,13 +167,11 @@ SET citus.enable_ddl_propagation TO on; -- use an unusual type to force a new colocation group CREATE TABLE statement_table(id int2); -SET citus.replication_model TO 'statement'; -SET citus.shard_replication_factor TO 1; +SET citus.shard_replication_factor TO 2; SELECT create_distributed_table('statement_table','id'); -- create a table uses streaming-based replication (can be synced) CREATE TABLE streaming_table(id macaddr); -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; SELECT create_distributed_table('streaming_table','id'); @@ -198,7 +196,6 @@ select bool_or(hasmetadata) from pg_dist_node WHERE isactive AND noderole = 'pr SELECT create_distributed_function('increment(int2)', '$1'); SELECT create_distributed_function('increment(int2)', '$1', colocate_with := 'statement_table'); BEGIN; -SET LOCAL citus.replication_model TO 'statement'; DROP TABLE statement_table; SELECT create_distributed_function('increment(int2)', '$1'); END; @@ -377,7 +374,6 @@ SELECT create_distributed_function('eq_with_param_names(macaddr, macaddr)','$1') -- a function cannot be colocated with a table that is not "streaming" replicated SET citus.shard_replication_factor TO 2; CREATE TABLE replicated_table_func_test (a macaddr); -SET citus.replication_model TO "statement"; SELECT create_distributed_table('replicated_table_func_test', 'a'); SELECT create_distributed_function('eq_with_param_names(macaddr, macaddr)', '$1', colocate_with:='replicated_table_func_test'); @@ -387,7 +383,6 @@ SELECT public.wait_until_metadata_sync(30000); -- as long as there is a coercion path SET citus.shard_replication_factor TO 1; CREATE TABLE replicated_table_func_test_2 (a macaddr8); -SET citus.replication_model TO "streaming"; SELECT create_distributed_table('replicated_table_func_test_2', 'a'); SELECT create_distributed_function('eq_with_param_names(macaddr, macaddr)', 'val1', colocate_with:='replicated_table_func_test_2'); @@ -401,7 +396,6 @@ SELECT create_distributed_function('eq_with_param_names(macaddr, macaddr)', 'val -- finally, colocate the function with a distributed table SET citus.shard_replication_factor TO 1; CREATE TABLE replicated_table_func_test_4 (a macaddr); -SET citus.replication_model TO "streaming"; SELECT create_distributed_table('replicated_table_func_test_4', 'a'); SELECT create_distributed_function('eq_with_param_names(macaddr, macaddr)', '$1', colocate_with:='replicated_table_func_test_4'); diff --git a/src/test/regress/sql/distributed_procedure.sql b/src/test/regress/sql/distributed_procedure.sql index 6f0cbbb34..c0a42de28 100644 --- a/src/test/regress/sql/distributed_procedure.sql +++ b/src/test/regress/sql/distributed_procedure.sql @@ -29,7 +29,6 @@ CREATE OR REPLACE FUNCTION wait_until_metadata_sync(timeout INTEGER DEFAULT 1500 -- procedures are distributed by text arguments, when run in isolation it is not guaranteed a table actually exists. CREATE TABLE colocation_table(id text); -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; SELECT create_distributed_table('colocation_table','id'); diff --git a/src/test/regress/sql/failure_ddl.sql b/src/test/regress/sql/failure_ddl.sql index 2ea0eacdb..47448f758 100644 --- a/src/test/regress/sql/failure_ddl.sql +++ b/src/test/regress/sql/failure_ddl.sql @@ -64,20 +64,8 @@ ALTER TABLE test_table ADD COLUMN new_column INT; -- show that we've never commited the changes SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; --- kill as soon as the coordinator sends COMMIT -SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); -ALTER TABLE test_table ADD COLUMN new_column INT; -SELECT citus.mitmproxy('conn.allow()'); - --- since we've killed the connection just after --- the coordinator sends the COMMIT, the command should be applied --- to the distributed table and the shards on the other worker --- however, there is no way to recover the failure on the shards --- that live in the failed worker, since we're running 1PC -SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; -SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; - -- manually drop & re-create the table for the next tests +SELECT citus.mitmproxy('conn.allow()'); DROP TABLE test_table; SET citus.next_shard_id TO 100800; SET citus.multi_shard_commit_protocol TO '1pc'; diff --git a/src/test/regress/sql/failure_failover_to_local_execution.sql b/src/test/regress/sql/failure_failover_to_local_execution.sql index c43f2b41d..19722f6ba 100644 --- a/src/test/regress/sql/failure_failover_to_local_execution.sql +++ b/src/test/regress/sql/failure_failover_to_local_execution.sql @@ -8,7 +8,6 @@ SET citus.next_shard_id TO 1980000; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; CREATE TABLE failover_to_local (key int PRIMARY KEY, value varchar(10)); SELECT create_distributed_table('failover_to_local', 'key'); diff --git a/src/test/regress/sql/failure_mx_metadata_sync.sql b/src/test/regress/sql/failure_mx_metadata_sync.sql index aea247787..f3d11c6c6 100644 --- a/src/test/regress/sql/failure_mx_metadata_sync.sql +++ b/src/test/regress/sql/failure_mx_metadata_sync.sql @@ -6,7 +6,6 @@ SET SEARCH_PATH = mx_metadata_sync; SET citus.shard_count TO 2; SET citus.next_shard_id TO 16000000; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT pg_backend_pid() as pid \gset SELECT citus.mitmproxy('conn.allow()'); diff --git a/src/test/regress/sql/failure_replicated_partitions.sql b/src/test/regress/sql/failure_replicated_partitions.sql index cb4b0ba6b..1ea79fc83 100644 --- a/src/test/regress/sql/failure_replicated_partitions.sql +++ b/src/test/regress/sql/failure_replicated_partitions.sql @@ -4,7 +4,6 @@ SELECT citus.mitmproxy('conn.allow()'); SET citus.shard_replication_factor TO 2; -SET "citus.replication_model" to "statement"; SET citus.shard_count TO 4; CREATE TABLE partitioned_table ( diff --git a/src/test/regress/sql/fkeys_between_local_ref.sql b/src/test/regress/sql/fkeys_between_local_ref.sql index 0163b026e..640a7f66a 100644 --- a/src/test/regress/sql/fkeys_between_local_ref.sql +++ b/src/test/regress/sql/fkeys_between_local_ref.sql @@ -13,7 +13,7 @@ CREATE VIEW citus_local_tables_in_schema AS SELECT logicalrelid FROM pg_dist_partition, pg_tables WHERE tablename=logicalrelid::regclass::text AND schemaname='fkeys_between_local_ref' AND - partmethod = 'n' AND repmodel = 'c'; + partmethod = 'n' AND repmodel = 's'; -- remove coordinator if it is added to pg_dist_node and test diff --git a/src/test/regress/sql/foreign_key_to_reference_shard_rebalance.sql b/src/test/regress/sql/foreign_key_to_reference_shard_rebalance.sql index 390ad7357..546eeb89e 100644 --- a/src/test/regress/sql/foreign_key_to_reference_shard_rebalance.sql +++ b/src/test/regress/sql/foreign_key_to_reference_shard_rebalance.sql @@ -8,6 +8,7 @@ SET search_path to fkey_to_reference_shard_rebalance; SET citus.shard_replication_factor TO 1; SET citus.shard_count to 8; + CREATE TYPE foreign_details AS (name text, relid text, refd_relid text); CREATE VIEW table_fkeys_in_workers AS @@ -44,12 +45,14 @@ SELECT master_move_shard_placement(15000009, 'localhost', :worker_1_port, 'local SELECT count(*) FROM referencing_table2; +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_fkeys_in_workers WHERE relid LIKE 'fkey_to_reference_shard_rebalance.%' AND refd_relid LIKE 'fkey_to_reference_shard_rebalance.%' ORDER BY 1,2,3; SELECT master_move_shard_placement(15000009, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes'); SELECT count(*) FROM referencing_table2; +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_fkeys_in_workers WHERE relid LIKE 'fkey_to_reference_shard_rebalance.%' AND refd_relid LIKE 'fkey_to_reference_shard_rebalance.%' ORDER BY 1,2,3; -- create a function to show the diff --git a/src/test/regress/sql/insert_select_repartition.sql b/src/test/regress/sql/insert_select_repartition.sql index 0d66e2d77..234d3374d 100644 --- a/src/test/regress/sql/insert_select_repartition.sql +++ b/src/test/regress/sql/insert_select_repartition.sql @@ -4,7 +4,6 @@ SET search_path TO 'insert_select_repartition'; SET citus.next_shard_id TO 4213581; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; -- 4 shards, hash distributed. -- Negate distribution column value. @@ -451,7 +450,6 @@ DROP TABLE source_table, target_table; -- SET citus.shard_replication_factor TO 2; -SET citus.replication_model TO 'statement'; SET citus.shard_count TO 4; CREATE TABLE source_table(a int, b int); SELECT create_distributed_table('source_table', 'a'); @@ -625,7 +623,15 @@ DO UPDATE SET create table table_with_sequences (x int, y int, z bigserial); insert into table_with_sequences values (1,1); select create_distributed_table('table_with_sequences','x'); -explain insert into table_with_sequences select y, x from table_with_sequences; +explain (costs off) insert into table_with_sequences select y, x from table_with_sequences; + +-- verify that we don't report repartitioned insert/select for tables +-- with user-defined sequences. +CREATE SEQUENCE user_defined_sequence; +create table table_with_user_sequences (x int, y int, z bigint default nextval('user_defined_sequence')); +insert into table_with_user_sequences values (1,1); +select create_distributed_table('table_with_user_sequences','x'); +explain (costs off) insert into table_with_user_sequences select y, x from table_with_user_sequences; -- clean-up SET client_min_messages TO WARNING; diff --git a/src/test/regress/sql/intermediate_result_pruning.sql b/src/test/regress/sql/intermediate_result_pruning.sql index 1fba1a700..58beb8df0 100644 --- a/src/test/regress/sql/intermediate_result_pruning.sql +++ b/src/test/regress/sql/intermediate_result_pruning.sql @@ -497,8 +497,6 @@ INSERT INTO table_1 -- append partitioned/heap-type -SET citus.replication_model TO statement; - -- do not print out 'building index pg_toast_xxxxx_index' messages SET client_min_messages TO DEFAULT; CREATE TABLE range_partitioned(range_column text, data int); diff --git a/src/test/regress/sql/local_shard_copy.sql b/src/test/regress/sql/local_shard_copy.sql index 0f2535c73..688ba8b4b 100644 --- a/src/test/regress/sql/local_shard_copy.sql +++ b/src/test/regress/sql/local_shard_copy.sql @@ -9,8 +9,6 @@ SELECT 1 FROM master_add_node('localhost', :master_port, groupid := 0); SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; - CREATE TABLE reference_table (key int PRIMARY KEY); SELECT create_reference_table('reference_table'); diff --git a/src/test/regress/sql/local_shard_execution.sql b/src/test/regress/sql/local_shard_execution.sql index 8d72ac145..798aadc4e 100644 --- a/src/test/regress/sql/local_shard_execution.sql +++ b/src/test/regress/sql/local_shard_execution.sql @@ -3,7 +3,6 @@ SET search_path TO local_shard_execution; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SET citus.next_shard_id TO 1470000; CREATE TABLE reference_table (key int PRIMARY KEY); @@ -877,7 +876,6 @@ RESET citus.log_local_commands; \c - - - :master_port SET citus.next_shard_id TO 1480000; -- test both local and remote execution with custom type -SET citus.replication_model TO "streaming"; SET citus.shard_replication_factor TO 1; CREATE TYPE invite_resp AS ENUM ('yes', 'no', 'maybe'); @@ -1078,6 +1076,21 @@ DO UPDATE SET response = EXCLUDED.response RETURNING *; \c - - - :master_port +-- verify the local_hostname guc is used for local executions that should connect to the +-- local host +ALTER SYSTEM SET citus.local_hostname TO 'foobar'; +SELECT pg_reload_conf(); +SELECT pg_sleep(0.1); -- wait to make sure the config has changed before running the GUC +SET citus.enable_local_execution TO false; -- force a connection to the dummy placements + +-- run queries that use dummy placements for local execution +SELECT * FROM event_responses WHERE FALSE; +WITH cte_1 AS (SELECT * FROM event_responses LIMIT 1) SELECT count(*) FROM cte_1; + +ALTER SYSTEM RESET citus.local_hostname; +SELECT pg_reload_conf(); +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + SET client_min_messages TO ERROR; SET search_path TO public; DROP SCHEMA local_shard_execution CASCADE; diff --git a/src/test/regress/sql/locally_execute_intermediate_results.sql b/src/test/regress/sql/locally_execute_intermediate_results.sql index 526f142be..881eb0453 100644 --- a/src/test/regress/sql/locally_execute_intermediate_results.sql +++ b/src/test/regress/sql/locally_execute_intermediate_results.sql @@ -6,7 +6,6 @@ SET citus.log_local_commands TO TRUE; SET citus.shard_count TO 4; SET citus.next_shard_id TO 1580000; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE table_1 (key int, value text); SELECT create_distributed_table('table_1', 'key'); diff --git a/src/test/regress/sql/master_copy_shard_placement.sql b/src/test/regress/sql/master_copy_shard_placement.sql index 37396cdf6..87ba7fd0c 100644 --- a/src/test/regress/sql/master_copy_shard_placement.sql +++ b/src/test/regress/sql/master_copy_shard_placement.sql @@ -3,7 +3,6 @@ CREATE SCHEMA mcsp; SET search_path TO mcsp; SET citus.next_shard_id TO 8139000; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'statement'; CREATE TABLE ref_table(a int, b text unique); SELECT create_reference_table('ref_table'); @@ -25,6 +24,10 @@ CREATE TABLE history_p1 PARTITION OF history FOR VALUES FROM ('2019-01-01') TO ( CREATE TABLE history_p2 PARTITION OF history FOR VALUES FROM ('2020-01-01') TO ('2021-01-01'); SELECT create_distributed_table('history','key'); +-- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement +UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN + ('data'::regclass, 'history'::regclass); + INSERT INTO data VALUES ('key-1', 'value-1'); INSERT INTO data VALUES ('key-2', 'value-2'); @@ -88,8 +91,13 @@ SELECT count(*) FROM history; -- test we can not replicate MX tables SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; +-- metadata sync will fail as we have a statement replicated table +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + +-- use streaming replication to enable metadata syncing +UPDATE pg_dist_partition SET repmodel='s' WHERE logicalrelid IN + ('history'::regclass); SELECT start_metadata_sync_to_node('localhost', :worker_1_port); CREATE TABLE mx_table(a int); diff --git a/src/test/regress/sql/multi_cluster_management.sql b/src/test/regress/sql/multi_cluster_management.sql index 3464ef72c..adb07c1ad 100644 --- a/src/test/regress/sql/multi_cluster_management.sql +++ b/src/test/regress/sql/multi_cluster_management.sql @@ -33,6 +33,11 @@ SELECT master_get_active_worker_nodes(); SET citus.shard_count TO 16; SET citus.shard_replication_factor TO 1; +-- test warnings on setting the deprecated guc for replication model +BEGIN; +SET citus.replication_model to 'statement'; +ROLLBACK; + SELECT * FROM citus_activate_node('localhost', :worker_2_port); CREATE TABLE cluster_management_test (col_1 text, col_2 int); SELECT create_distributed_table('cluster_management_test', 'col_1', 'hash'); @@ -49,6 +54,9 @@ CREATE TABLE test_reference_table (y int primary key, name text); SELECT create_reference_table('test_reference_table'); INSERT INTO test_reference_table VALUES (1, '1'); +-- try to remove a node with active placements and reference tables +SELECT citus_remove_node('localhost', :worker_2_port); + -- try to disable a node with active placements see that node is removed -- observe that a notification is displayed SELECT master_disable_node('localhost', :worker_2_port); @@ -102,6 +110,7 @@ ABORT; \c - postgres - :master_port SET citus.next_shard_id TO 1220016; SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup +SET citus.shard_replication_factor TO 1; SELECT master_get_active_worker_nodes(); -- restore the node for next tests @@ -123,15 +132,21 @@ SELECT master_get_active_worker_nodes(); UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=:worker_2_group; SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port; CREATE TABLE cluster_management_test_colocated (col_1 text, col_2 int); -SELECT create_distributed_table('cluster_management_test_colocated', 'col_1', 'hash', colocate_with=>'cluster_management_test'); +-- Check that we warn the user about colocated shards that will not get created for shards that do not have active placements +SELECT create_distributed_table('cluster_management_test_colocated', 'col_1', 'hash', colocate_with => 'cluster_management_test'); -- Check that colocated shards don't get created for shards that are to be deleted SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard ORDER BY shardstate, shardid; --- try to remove a node with only to be deleted placements and see that removal still fails +SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4; +-- try to remove a node with only to be deleted placements and see that removal succeeds SELECT master_remove_node('localhost', :worker_2_port); SELECT master_get_active_worker_nodes(); +SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group); +-- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard) +INSERT INTO pg_dist_placement SELECT * FROM removed_placements; + -- clean-up SELECT 1 FROM master_add_node('localhost', :worker_2_port); UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=:worker_2_group; diff --git a/src/test/regress/sql/multi_colocated_shard_rebalance.sql b/src/test/regress/sql/multi_colocated_shard_rebalance.sql index 46de57776..0c6d5da17 100644 --- a/src/test/regress/sql/multi_colocated_shard_rebalance.sql +++ b/src/test/regress/sql/multi_colocated_shard_rebalance.sql @@ -7,6 +7,7 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 13000000; SET citus.shard_count TO 6; SET citus.shard_replication_factor TO 1; + -- create distributed tables CREATE TABLE table1_group1 ( id int PRIMARY KEY); SELECT create_distributed_table('table1_group1', 'id', 'hash'); @@ -59,6 +60,7 @@ SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_ SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_group1_13000006'::regclass; \c - - - :master_port + -- copy colocated shards again to see error message SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); @@ -140,6 +142,7 @@ WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) + AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; -- also connect worker to verify we successfully moved given shard (and other colocated shards) @@ -149,6 +152,7 @@ SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_ \c - - - :master_port + -- test moving NOT colocated shard -- status before shard move SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport @@ -158,6 +162,7 @@ WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND p.logicalrelid = 'table5_groupX'::regclass + AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; -- move NOT colocated shard @@ -170,7 +175,8 @@ FROM WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND - p.logicalrelid = 'table5_groupX'::regclass + p.logicalrelid = 'table5_groupX'::regclass AND + sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; @@ -183,6 +189,7 @@ WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND p.logicalrelid = 'table6_append'::regclass + AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; -- move shard in append distributed table @@ -195,7 +202,8 @@ FROM WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND - p.logicalrelid = 'table6_append'::regclass + p.logicalrelid = 'table6_append'::regclass AND + sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; @@ -228,6 +236,7 @@ WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) + AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; SELECT master_move_shard_placement(13000022, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'block_writes'); @@ -240,6 +249,7 @@ WHERE p.logicalrelid = s.logicalrelid AND s.shardid = sp.shardid AND colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) + AND sp.shardstate != 4 ORDER BY s.shardid, sp.nodeport; -- also connect worker to verify we successfully moved given shard (and other colocated shards) @@ -254,6 +264,7 @@ SELECT "Constraint", "Definition" FROM table_fkeys \c - - - :master_port + -- test shard copy with foreign constraints -- we expect it to error out because we do not support foreign constraints with replication factor > 1 SELECT master_copy_shard_placement(13000022, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false); @@ -305,6 +316,7 @@ SELECT count(*) FROM move_partitions.events; SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port) FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port +AND shardstate != 4 ORDER BY shardid LIMIT 1; SELECT count(*) FROM move_partitions.events; @@ -315,7 +327,7 @@ ALTER TABLE move_partitions.events_1 ADD CONSTRAINT e_1_pk PRIMARY KEY (id); -- should be able to move automatically now SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port) FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) -WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port +WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port AND shardstate != 4 ORDER BY shardid LIMIT 1; SELECT count(*) FROM move_partitions.events; @@ -323,7 +335,7 @@ SELECT count(*) FROM move_partitions.events; -- should also be able to move with block writes SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) -WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port +WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port AND shardstate != 4 ORDER BY shardid LIMIT 1; SELECT count(*) FROM move_partitions.events; diff --git a/src/test/regress/sql/multi_colocation_utils.sql b/src/test/regress/sql/multi_colocation_utils.sql index 4959087f8..b7604804d 100644 --- a/src/test/regress/sql/multi_colocation_utils.sql +++ b/src/test/regress/sql/multi_colocation_utils.sql @@ -429,6 +429,7 @@ UPDATE pg_dist_partition SET repmodel = 's' WHERE logicalrelid = 'table1_groupG' CREATE TABLE table2_groupG ( id int ); SELECT create_distributed_table('table2_groupG', 'id', colocate_with => 'table1_groupG'); +DROP TABLE table2_groupG; CREATE TABLE table2_groupG ( id int ); SELECT create_distributed_table('table2_groupG', 'id', colocate_with => 'NONE'); diff --git a/src/test/regress/sql/multi_create_table_superuser.sql b/src/test/regress/sql/multi_create_table_superuser.sql index 57a32878c..d72223223 100644 --- a/src/test/regress/sql/multi_create_table_superuser.sql +++ b/src/test/regress/sql/multi_create_table_superuser.sql @@ -1,37 +1,9 @@ - ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 360005; ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 100000; --- Since we're superuser, we can set the replication model to 'streaming' to --- create a one-off MX table... but if we forget to set the replication factor to one, --- we should see an error reminding us to fix that -SET citus.replication_model TO 'streaming'; -SELECT create_distributed_table('mx_table_test', 'col1'); - --- ok, so now actually create the one-off MX table SET citus.shard_replication_factor TO 1; -SELECT create_distributed_table('mx_table_test', 'col1'); -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='mx_table_test'::regclass; -DROP TABLE mx_table_test; - --- Show that master_create_distributed_table ignores citus.replication_model GUC -CREATE TABLE s_table(a int); -SELECT master_create_distributed_table('s_table', 'a', 'hash'); -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='s_table'::regclass; - --- Show that master_create_worker_shards complains when RF>1 and replication model is streaming -UPDATE pg_dist_partition SET repmodel = 's' WHERE logicalrelid='s_table'::regclass; -SELECT master_create_worker_shards('s_table', 4, 2); - -DROP TABLE s_table; - -RESET citus.replication_model; - --- Show that create_distributed_table with append and range distributions ignore --- citus.replication_model GUC -SET citus.shard_replication_factor TO 2; -SET citus.replication_model TO streaming; +-- test that range and append distributed tables have coordinator replication CREATE TABLE repmodel_test (a int); SELECT create_distributed_table('repmodel_test', 'a', 'append'); SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; @@ -42,9 +14,7 @@ SELECT create_distributed_table('repmodel_test', 'a', 'range'); SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; DROP TABLE repmodel_test; --- Show that master_create_distributed_table created statement replicated tables no matter --- what citus.replication_model set to - +-- test that deprecated api creates distributed tables with coordinator replication CREATE TABLE repmodel_test (a int); SELECT master_create_distributed_table('repmodel_test', 'a', 'hash'); SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; @@ -60,35 +30,7 @@ SELECT master_create_distributed_table('repmodel_test', 'a', 'range'); SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; DROP TABLE repmodel_test; --- Check that the replication_model overwrite behavior is the same with RF=1 -SET citus.shard_replication_factor TO 1; - -CREATE TABLE repmodel_test (a int); -SELECT create_distributed_table('repmodel_test', 'a', 'append'); -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; -DROP TABLE repmodel_test; - -CREATE TABLE repmodel_test (a int); -SELECT create_distributed_table('repmodel_test', 'a', 'range'); -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; -DROP TABLE repmodel_test; - -CREATE TABLE repmodel_test (a int); -SELECT master_create_distributed_table('repmodel_test', 'a', 'hash'); -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; -DROP TABLE repmodel_test; - -CREATE TABLE repmodel_test (a int); -SELECT master_create_distributed_table('repmodel_test', 'a', 'append'); -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; -DROP TABLE repmodel_test; - -CREATE TABLE repmodel_test (a int); -SELECT master_create_distributed_table('repmodel_test', 'a', 'range'); -SELECT repmodel FROM pg_dist_partition WHERE logicalrelid='repmodel_test'::regclass; -DROP TABLE repmodel_test; - -RESET citus.replication_model; +RESET citus.shard_replication_factor; ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 360025; diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql index 20e0e1df9..7bf553e02 100644 --- a/src/test/regress/sql/multi_extension.sql +++ b/src/test/regress/sql/multi_extension.sql @@ -52,7 +52,9 @@ BEGIN TRUNCATE TABLE extension_diff; CREATE TABLE current_objects AS - SELECT pg_catalog.pg_describe_object(classid, objid, 0) AS description + SELECT pg_catalog.pg_describe_object(classid, objid, 0) + || ' ' || + coalesce(pg_catalog.pg_get_function_result(objid), '') AS description FROM pg_catalog.pg_depend, pg_catalog.pg_extension e WHERE refclassid = 'pg_catalog.pg_extension'::pg_catalog.regclass AND refobjid = e.oid @@ -228,6 +230,16 @@ SELECT * FROM print_extension_changes(); ALTER EXTENSION citus UPDATE TO '10.1-1'; SELECT * FROM print_extension_changes(); +-- Test downgrade to 10.1-1 from 10.2-1 +ALTER EXTENSION citus UPDATE TO '10.2-1'; +ALTER EXTENSION citus UPDATE TO '10.1-1'; +-- Should be empty result since upgrade+downgrade should be a no-op +SELECT * FROM print_extension_changes(); + +-- Snapshot of state at 10.2-1 +ALTER EXTENSION citus UPDATE TO '10.2-1'; +SELECT * FROM print_extension_changes(); + DROP TABLE prev_objects, extension_diff; -- show running version diff --git a/src/test/regress/sql/multi_function_in_join.sql b/src/test/regress/sql/multi_function_in_join.sql index 2fb7fdf18..6abdafdad 100644 --- a/src/test/regress/sql/multi_function_in_join.sql +++ b/src/test/regress/sql/multi_function_in_join.sql @@ -13,7 +13,6 @@ CREATE SCHEMA functions_in_joins; SET search_path TO 'functions_in_joins'; SET citus.next_shard_id TO 2500000; -SET citus.replication_model to 'streaming'; SET citus.shard_replication_factor to 1; CREATE TABLE table1 (id int, data int); diff --git a/src/test/regress/sql/multi_insert_select.sql b/src/test/regress/sql/multi_insert_select.sql index 2f7136037..7de9f9763 100644 --- a/src/test/regress/sql/multi_insert_select.sql +++ b/src/test/regress/sql/multi_insert_select.sql @@ -1729,7 +1729,7 @@ FROM GROUP BY store_id, first_name, last_name; --- Volatile function in default should be disallowed +-- Volatile function in default should be disallowed - SERIAL pseudo-types CREATE TABLE table_with_serial ( store_id int, s bigserial @@ -1744,6 +1744,22 @@ FROM GROUP BY store_id; +-- Volatile function in default should be disallowed - user-defined sequence +CREATE SEQUENCE user_defined_sequence; +CREATE TABLE table_with_user_sequence ( + store_id int, + s bigint default nextval('user_defined_sequence') +); +SELECT create_distributed_table('table_with_user_sequence', 'store_id'); + +INSERT INTO table_with_user_sequence (store_id) +SELECT + store_id +FROM + table_with_defaults +GROUP BY + store_id; + -- do some more error/error message checks SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; @@ -2018,6 +2034,48 @@ SELECT * FROM dist_table_with_sequence ORDER BY user_id, value_1; DROP TABLE dist_table_with_sequence; +-- Select into distributed table with a user-defined sequence +CREATE SEQUENCE seq1; +CREATE SEQUENCE seq2; +CREATE TABLE dist_table_with_user_sequence (user_id int default nextval('seq1'), value_1 bigint default nextval('seq2')); +SELECT create_distributed_table('dist_table_with_user_sequence', 'user_id'); + +-- from local query +INSERT INTO dist_table_with_user_sequence (value_1) +SELECT s FROM generate_series(1,5) s; + +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + +-- from a distributed query +INSERT INTO dist_table_with_user_sequence (value_1) +SELECT value_1 FROM dist_table_with_user_sequence ORDER BY value_1; + +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + +TRUNCATE dist_table_with_user_sequence; + +INSERT INTO dist_table_with_user_sequence (user_id) +SELECT user_id FROM raw_events_second ORDER BY user_id; + +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + +WITH top10 AS ( + SELECT user_id FROM raw_events_second WHERE value_1 IS NOT NULL ORDER BY value_1 LIMIT 10 +) +INSERT INTO dist_table_with_user_sequence (value_1) +SELECT * FROM top10; + +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + +-- router queries become logical planner queries when there is a nextval call +INSERT INTO dist_table_with_user_sequence (user_id) +SELECT user_id FROM dist_table_with_user_sequence WHERE user_id = 1; + +SELECT * FROM dist_table_with_user_sequence ORDER BY user_id, value_1; + +DROP TABLE dist_table_with_user_sequence; +DROP SEQUENCE seq1, seq2; + -- Select from distributed table into reference table CREATE TABLE ref_table (user_id serial, value_1 int); SELECT create_reference_table('ref_table'); @@ -2037,6 +2095,27 @@ SELECT * FROM ref_table ORDER BY user_id, value_1; DROP TABLE ref_table; +-- Select from distributed table into reference table with user-defined sequence +CREATE SEQUENCE seq1; +CREATE TABLE ref_table_with_user_sequence (user_id int default nextval('seq1'), value_1 int); +SELECT create_reference_table('ref_table_with_user_sequence'); + +INSERT INTO ref_table_with_user_sequence +SELECT user_id, value_1 FROM raw_events_second; + +SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; + +INSERT INTO ref_table_with_user_sequence (value_1) +SELECT value_1 FROM raw_events_second ORDER BY value_1; + +SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; + +INSERT INTO ref_table_with_user_sequence SELECT * FROM ref_table_with_user_sequence; +SELECT * FROM ref_table_with_user_sequence ORDER BY user_id, value_1; + +DROP TABLE ref_table_with_user_sequence; +DROP SEQUENCE seq1; + -- Select from reference table into reference table CREATE TABLE ref1 (d timestamptz); SELECT create_reference_table('ref1'); @@ -2269,6 +2348,8 @@ DROP TABLE reference_table; DROP TABLE agg_events; DROP TABLE table_with_defaults; DROP TABLE table_with_serial; +DROP TABLE table_with_user_sequence; +DROP SEQUENCE user_defined_sequence; DROP TABLE text_table; DROP TABLE char_table; DROP TABLE table_with_starts_with_defaults; diff --git a/src/test/regress/sql/multi_metadata_attributes.sql b/src/test/regress/sql/multi_metadata_attributes.sql index 1845a4f53..174c92331 100644 --- a/src/test/regress/sql/multi_metadata_attributes.sql +++ b/src/test/regress/sql/multi_metadata_attributes.sql @@ -7,5 +7,5 @@ -- part of the query so new changes to it won't affect this test. SELECT attrelid::regclass, attname, atthasmissing, attmissingval FROM pg_attribute -WHERE atthasmissing AND attrelid NOT IN ('pg_dist_node'::regclass) +WHERE atthasmissing AND attrelid NOT IN ('pg_dist_node'::regclass, 'pg_dist_rebalance_strategy'::regclass) ORDER BY attrelid, attname; diff --git a/src/test/regress/sql/multi_metadata_sync.sql b/src/test/regress/sql/multi_metadata_sync.sql index 8a9b380af..45e0fe8e4 100644 --- a/src/test/regress/sql/multi_metadata_sync.sql +++ b/src/test/regress/sql/multi_metadata_sync.sql @@ -51,8 +51,9 @@ CREATE OR REPLACE FUNCTION pg_catalog.master_create_worker_shards(table_name tex AS 'citus', $$master_create_worker_shards$$ LANGUAGE C STRICT; --- Create a test table with constraints and SERIAL -CREATE TABLE mx_test_table (col_1 int UNIQUE, col_2 text NOT NULL, col_3 BIGSERIAL); +-- Create a test table with constraints and SERIAL and default from user defined sequence +CREATE SEQUENCE user_defined_seq; +CREATE TABLE mx_test_table (col_1 int UNIQUE, col_2 text NOT NULL, col_3 BIGSERIAL, col_4 BIGINT DEFAULT nextval('user_defined_seq')); SELECT master_create_distributed_table('mx_test_table', 'col_1', 'hash'); SELECT master_create_worker_shards('mx_test_table', 8, 1); @@ -82,6 +83,7 @@ SELECT unnest(master_metadata_snapshot()) order by 1; UPDATE pg_dist_partition SET partmethod='r' WHERE logicalrelid='non_mx_test_table'::regclass; SELECT unnest(master_metadata_snapshot()) order by 1; + -- Test start_metadata_sync_to_node UDF -- Ensure that hasmetadata=false for all nodes @@ -127,7 +129,6 @@ SELECT count(*) FROM pg_trigger WHERE tgrelid='mx_testing_schema.mx_test_table': -- Since we're superuser, we can set the replication model to 'streaming' to -- create some MX tables SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE SCHEMA mx_testing_schema_2; @@ -149,7 +150,6 @@ DROP TABLE mx_testing_schema_2.fk_test_2; DROP TABLE mx_testing_schema.fk_test_1; RESET citus.shard_replication_factor; -RESET citus.replication_model; -- Check that repeated calls to start_metadata_sync_to_node has no side effects \c - - - :master_port @@ -179,7 +179,6 @@ SELECT hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_port; -- Check that the distributed table can be queried from the worker \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); CREATE TABLE mx_query_test (a int, b text, c int); @@ -221,7 +220,6 @@ CREATE SCHEMA mx_test_schema_2; -- Create MX tables SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE mx_test_schema_1.mx_table_1 (col1 int UNIQUE, col2 text); CREATE INDEX mx_index_1 ON mx_test_schema_1.mx_table_1 (col1); @@ -360,7 +358,6 @@ SELECT nextval('pg_catalog.pg_dist_colocationid_seq') AS last_colocation_id \gse ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 10000; SET citus.shard_count TO 7; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE mx_colocation_test_1 (a int); SELECT create_distributed_table('mx_colocation_test_1', 'a'); @@ -429,7 +426,6 @@ DROP TABLE mx_colocation_test_2; \c - - - :master_port SET citus.shard_count TO 7; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE mx_temp_drop_test (a int); SELECT create_distributed_table('mx_temp_drop_test', 'a'); @@ -447,7 +443,6 @@ DROP TABLE mx_temp_drop_test; \c - - - :master_port SET citus.shard_count TO 3; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); SELECT stop_metadata_sync_to_node('localhost', :worker_2_port); @@ -468,7 +463,6 @@ INSERT INTO mx_table_with_small_sequence VALUES (1), (3); \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; -- Create an MX table with (BIGSERIAL) sequences CREATE TABLE mx_table_with_sequence(a int, b BIGSERIAL, c BIGSERIAL); @@ -555,7 +549,6 @@ CREATE USER mx_user; -- Create an mx table as a different user CREATE TABLE mx_table (a int, b BIGSERIAL); SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT create_distributed_table('mx_table', 'a'); \c - postgres - :master_port @@ -738,7 +731,6 @@ ALTER SYSTEM SET citus.metadata_sync_interval TO 300000; ALTER SYSTEM SET citus.metadata_sync_retry_interval TO 300000; SELECT pg_reload_conf(); -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; CREATE TABLE dist_table_1(a int); @@ -770,6 +762,51 @@ SELECT pg_reload_conf(); UPDATE pg_dist_node SET metadatasynced=true WHERE nodeport=:worker_1_port; +SELECT master_add_node('localhost', :worker_2_port); +SELECT start_metadata_sync_to_node('localhost', :worker_2_port); + +CREATE SEQUENCE mx_test_sequence_0; +CREATE SEQUENCE mx_test_sequence_1; + +-- test create_distributed_table +CREATE TABLE test_table (id int DEFAULT nextval('mx_test_sequence_0')); +SELECT create_distributed_table('test_table', 'id'); + +-- shouldn't work since it's partition column +ALTER TABLE test_table ALTER COLUMN id SET DEFAULT nextval('mx_test_sequence_1'); + +-- test different plausible commands +ALTER TABLE test_table ADD COLUMN id2 int DEFAULT nextval('mx_test_sequence_1'); +ALTER TABLE test_table ALTER COLUMN id2 DROP DEFAULT; +ALTER TABLE test_table ALTER COLUMN id2 SET DEFAULT nextval('mx_test_sequence_1'); + +SELECT unnest(master_metadata_snapshot()) order by 1; + +-- shouldn't work since test_table is MX +ALTER TABLE test_table ADD COLUMN id3 bigserial; + +-- shouldn't work since the above operations should be the only subcommands +ALTER TABLE test_table ADD COLUMN id4 int DEFAULT nextval('mx_test_sequence_1') CHECK (id4 > 0); +ALTER TABLE test_table ADD COLUMN id4 int, ADD COLUMN id5 int DEFAULT nextval('mx_test_sequence_1'); +ALTER TABLE test_table ALTER COLUMN id1 SET DEFAULT nextval('mx_test_sequence_1'), ALTER COLUMN id2 DROP DEFAULT; +ALTER TABLE test_table ADD COLUMN id4 bigserial CHECK (id4 > 0); + +\c - - - :worker_1_port +\ds + +\c - - - :master_port +CREATE SEQUENCE local_sequence; + +-- verify that DROP SEQUENCE will propagate the command to workers for +-- the distributed sequences mx_test_sequence_0 and mx_test_sequence_1 +DROP SEQUENCE mx_test_sequence_0, mx_test_sequence_1, local_sequence CASCADE; + +\c - - - :worker_1_port +\ds + +\c - - - :master_port +DROP TABLE test_table CASCADE; + -- Cleanup SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); SELECT stop_metadata_sync_to_node('localhost', :worker_2_port); @@ -781,7 +818,6 @@ DROP TABLE dist_table_1, dist_table_2; RESET citus.shard_count; RESET citus.shard_replication_factor; -RESET citus.replication_model; RESET citus.multi_shard_commit_protocol; ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART :last_group_id; diff --git a/src/test/regress/sql/multi_modifying_xacts.sql b/src/test/regress/sql/multi_modifying_xacts.sql index 6aa816147..1f27cf0a2 100644 --- a/src/test/regress/sql/multi_modifying_xacts.sql +++ b/src/test/regress/sql/multi_modifying_xacts.sql @@ -1,4 +1,3 @@ - SET citus.next_shard_id TO 1200000; SET citus.next_placement_id TO 1200000; @@ -1022,6 +1021,7 @@ CREATE TABLE itemgroups ( ); SELECT create_reference_table('itemgroups'); +DROP TABLE IF EXISTS users ; CREATE TABLE users ( id int PRIMARY KEY, name text, diff --git a/src/test/regress/sql/multi_move_mx.sql b/src/test/regress/sql/multi_move_mx.sql index c317a08d7..5689e8f80 100644 --- a/src/test/regress/sql/multi_move_mx.sql +++ b/src/test/regress/sql/multi_move_mx.sql @@ -8,7 +8,6 @@ SELECT start_metadata_sync_to_node('localhost', :worker_2_port); -- Create mx test tables SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE mx_table_1 (a int); SELECT create_distributed_table('mx_table_1', 'a'); @@ -87,9 +86,10 @@ SELECT FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE - logicalrelid = 'mx_table_1'::regclass + (logicalrelid = 'mx_table_1'::regclass OR logicalrelid = 'mx_table_2'::regclass - OR logicalrelid = 'mx_table_3'::regclass + OR logicalrelid = 'mx_table_3'::regclass) + AND shardstate != 4 ORDER BY logicalrelid, shardid; @@ -140,5 +140,3 @@ DELETE FROM pg_dist_node; DELETE FROM pg_dist_partition; DELETE FROM pg_dist_shard; DELETE FROM pg_dist_shard_placement; -\c - - - :master_port -RESET citus.replication_model; diff --git a/src/test/regress/sql/multi_multiuser.sql b/src/test/regress/sql/multi_multiuser.sql index 0cdca30d3..4ea32e09c 100644 --- a/src/test/regress/sql/multi_multiuser.sql +++ b/src/test/regress/sql/multi_multiuser.sql @@ -83,7 +83,6 @@ GRANT USAGE ON SCHEMA full_access_user_schema TO usage_access; \c - - - :master_port -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; -- create prepare tests @@ -170,7 +169,7 @@ IF substring(current_Setting('server_version'), '\d+')::int >= 12 THEN -- create columnar table CREATE TABLE columnar_table (a int) USING columnar; -- alter a columnar table that is created by that unprivileged user - SELECT alter_columnar_table_set('columnar_table', chunk_group_row_limit => 100); + SELECT alter_columnar_table_set('columnar_table', chunk_group_row_limit => 2000); -- and drop it DROP TABLE columnar_table; $$; diff --git a/src/test/regress/sql/multi_mx_add_coordinator.sql b/src/test/regress/sql/multi_mx_add_coordinator.sql index a8bab6ce0..0628aa94b 100644 --- a/src/test/regress/sql/multi_mx_add_coordinator.sql +++ b/src/test/regress/sql/multi_mx_add_coordinator.sql @@ -4,7 +4,6 @@ SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 8; SET citus.next_shard_id TO 7000000; SET citus.next_placement_id TO 7000000; -SET citus.replication_model TO streaming; SET client_min_messages TO WARNING; CREATE USER reprefuser WITH LOGIN; diff --git a/src/test/regress/sql/multi_mx_alter_distributed_table.sql b/src/test/regress/sql/multi_mx_alter_distributed_table.sql index 976ffb64e..7134527f5 100644 --- a/src/test/regress/sql/multi_mx_alter_distributed_table.sql +++ b/src/test/regress/sql/multi_mx_alter_distributed_table.sql @@ -2,7 +2,6 @@ CREATE SCHEMA mx_alter_distributed_table; SET search_path TO mx_alter_distributed_table; SET citus.shard_replication_factor TO 1; ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 1410000; -SET citus.replication_model TO 'streaming'; -- test alter_distributed_table UDF CREATE TABLE adt_table (a INT, b INT); diff --git a/src/test/regress/sql/multi_mx_call.sql b/src/test/regress/sql/multi_mx_call.sql index b39965b99..0cceb1661 100644 --- a/src/test/regress/sql/multi_mx_call.sql +++ b/src/test/regress/sql/multi_mx_call.sql @@ -6,7 +6,6 @@ set search_path to multi_mx_call, public; -- Create worker-local tables to test procedure calls were routed set citus.shard_replication_factor to 2; -set citus.replication_model to 'statement'; -- This table requires specific settings, create before getting into things create table mx_call_dist_table_replica(id int, val int); @@ -14,7 +13,6 @@ select create_distributed_table('mx_call_dist_table_replica', 'id'); insert into mx_call_dist_table_replica values (9,1),(8,2),(7,3),(6,4),(5,5); set citus.shard_replication_factor to 1; -set citus.replication_model to 'streaming'; -- -- Create tables and procedures we want to use in tests diff --git a/src/test/regress/sql/multi_mx_create_table.sql b/src/test/regress/sql/multi_mx_create_table.sql index 55d9399c0..59a644992 100644 --- a/src/test/regress/sql/multi_mx_create_table.sql +++ b/src/test/regress/sql/multi_mx_create_table.sql @@ -153,7 +153,6 @@ CREATE OPERATOR citus_mx_test_schema.=== ( \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; SET search_path TO public; CREATE TABLE nation_hash( diff --git a/src/test/regress/sql/multi_mx_ddl.sql b/src/test/regress/sql/multi_mx_ddl.sql index 0d3e26e88..0e3c0ff0e 100644 --- a/src/test/regress/sql/multi_mx_ddl.sql +++ b/src/test/regress/sql/multi_mx_ddl.sql @@ -107,7 +107,6 @@ DROP INDEX ddl_test_index; -- show that sequences owned by mx tables result in unique values SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 4; -SET citus.replication_model TO streaming; CREATE TABLE mx_sequence(key INT, value BIGSERIAL); SELECT create_distributed_table('mx_sequence', 'key'); @@ -154,7 +153,6 @@ ALTER EXTENSION seg ADD TABLE seg_test; -- sync table metadata, but skip CREATE TABLE SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 4; -SET citus.replication_model TO streaming; SELECT create_distributed_table('seg_test', 'x'); \c - - - :worker_1_port diff --git a/src/test/regress/sql/multi_mx_function_call_delegation.sql b/src/test/regress/sql/multi_mx_function_call_delegation.sql index 45ed820ba..ee9648942 100644 --- a/src/test/regress/sql/multi_mx_function_call_delegation.sql +++ b/src/test/regress/sql/multi_mx_function_call_delegation.sql @@ -4,7 +4,6 @@ CREATE SCHEMA multi_mx_function_call_delegation; SET search_path TO multi_mx_function_call_delegation, public; SET citus.shard_replication_factor TO 2; -SET citus.replication_model TO 'statement'; -- This table requires specific settings, create before getting into things create table mx_call_dist_table_replica(id int, val int); @@ -12,7 +11,6 @@ select create_distributed_table('mx_call_dist_table_replica', 'id'); insert into mx_call_dist_table_replica values (9,1),(8,2),(7,3),(6,4),(5,5); SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; -- -- Create tables and functions we want to use in tests @@ -237,7 +235,7 @@ select start_metadata_sync_to_node('localhost', :worker_2_port); \c - - - :master_port SET search_path to multi_mx_function_call_delegation, public; SET client_min_messages TO DEBUG1; -SET citus.replication_model = 'streaming'; +SET citus.shard_replication_factor = 1; -- -- Test non-const parameter values diff --git a/src/test/regress/sql/multi_mx_function_table_reference.sql b/src/test/regress/sql/multi_mx_function_table_reference.sql index e6efaff15..7b51eab04 100644 --- a/src/test/regress/sql/multi_mx_function_table_reference.sql +++ b/src/test/regress/sql/multi_mx_function_table_reference.sql @@ -9,7 +9,6 @@ CREATE SCHEMA function_table_reference; SET search_path TO function_table_reference; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); SELECT start_metadata_sync_to_node('localhost', :worker_2_port); diff --git a/src/test/regress/sql/multi_mx_hide_shard_names.sql b/src/test/regress/sql/multi_mx_hide_shard_names.sql index 7d6f1fd6a..3cc191c78 100644 --- a/src/test/regress/sql/multi_mx_hide_shard_names.sql +++ b/src/test/regress/sql/multi_mx_hide_shard_names.sql @@ -23,8 +23,6 @@ SET search_path TO 'mx_hide_shard_names'; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; - -SET citus.replication_model TO 'streaming'; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); SELECT start_metadata_sync_to_node('localhost', :worker_2_port); @@ -84,8 +82,6 @@ SET search_path TO 'mx_hide_shard_names'; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; - -- not existing shard ids appended to the distributed table name CREATE TABLE test_table_102008(id int, time date); SELECT create_distributed_table('test_table_102008', 'id'); @@ -109,8 +105,6 @@ CREATE SCHEMA mx_hide_shard_names_2; SET search_path TO 'mx_hide_shard_names_2'; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; - -SET citus.replication_model TO 'streaming'; CREATE TABLE test_table(id int, time date); SELECT create_distributed_table('test_table', 'id'); CREATE INDEX test_index ON mx_hide_shard_names_2.test_table(id); @@ -132,8 +126,6 @@ SELECT * FROM citus_shard_indexes_on_worker ORDER BY 2; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; - CREATE SCHEMA mx_hide_shard_names_3; SET search_path TO 'mx_hide_shard_names_3'; @@ -156,8 +148,6 @@ SELECT * FROM citus_shards_on_worker ORDER BY 2; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; - CREATE SCHEMA "CiTuS.TeeN"; SET search_path TO "CiTuS.TeeN"; diff --git a/src/test/regress/sql/multi_mx_insert_select_repartition.sql b/src/test/regress/sql/multi_mx_insert_select_repartition.sql index 29b4c2c01..e086444cf 100644 --- a/src/test/regress/sql/multi_mx_insert_select_repartition.sql +++ b/src/test/regress/sql/multi_mx_insert_select_repartition.sql @@ -4,7 +4,6 @@ CREATE SCHEMA multi_mx_insert_select_repartition; SET search_path TO multi_mx_insert_select_repartition; SET citus.next_shard_id TO 4213581; -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 4; diff --git a/src/test/regress/sql/multi_mx_metadata.sql b/src/test/regress/sql/multi_mx_metadata.sql index 2a1d49a65..7bd09b900 100644 --- a/src/test/regress/sql/multi_mx_metadata.sql +++ b/src/test/regress/sql/multi_mx_metadata.sql @@ -29,7 +29,6 @@ CREATE TABLE distributed_mx_table ( CREATE INDEX ON distributed_mx_table USING GIN (value); SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; SET citus.shard_count TO 4; @@ -75,7 +74,6 @@ WHERE logicalrelid = 'distributed_mx_table'::regclass; -- Create a table and then roll back the transaction \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; BEGIN; CREATE TABLE should_not_exist ( @@ -92,7 +90,6 @@ SELECT count(*) FROM pg_tables WHERE tablename = 'should_not_exist'; -- Ensure that we don't allow prepare on a metadata transaction \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; BEGIN; CREATE TABLE should_not_exist ( @@ -127,7 +124,6 @@ WHERE logicalrelid = 'citus_mx_schema_for_xacts.objects_for_xacts'::regclass; \c - - - :master_port SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; -- now show that we can rollback on creating mx table, but shards remain.... BEGIN; diff --git a/src/test/regress/sql/multi_mx_modifications_to_reference_tables.sql b/src/test/regress/sql/multi_mx_modifications_to_reference_tables.sql index 358622614..ab3d6d32d 100644 --- a/src/test/regress/sql/multi_mx_modifications_to_reference_tables.sql +++ b/src/test/regress/sql/multi_mx_modifications_to_reference_tables.sql @@ -12,7 +12,6 @@ SET search_path TO 'mx_modify_reference_table'; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); SELECT start_metadata_sync_to_node('localhost', :worker_2_port); diff --git a/src/test/regress/sql/multi_mx_node_metadata.sql b/src/test/regress/sql/multi_mx_node_metadata.sql index 834baa09e..990f2f6da 100644 --- a/src/test/regress/sql/multi_mx_node_metadata.sql +++ b/src/test/regress/sql/multi_mx_node_metadata.sql @@ -8,7 +8,6 @@ SELECT nextval('pg_catalog.pg_dist_colocationid_seq') AS last_colocation_id \gse SELECT nextval('pg_catalog.pg_dist_shardid_seq') AS last_shard_id \gset -SET citus.replication_model TO streaming; SET citus.shard_count TO 8; SET citus.shard_replication_factor TO 1; SET citus.replicate_reference_tables_on_activate TO off; @@ -62,6 +61,10 @@ SELECT create_reference_table('ref_table'); CREATE TABLE dist_table_1(a int primary key, b int references ref_table(a)); SELECT create_distributed_table('dist_table_1', 'a'); +CREATE SEQUENCE sequence; +CREATE TABLE reference_table (a int default nextval('sequence')); +SELECT create_reference_table('reference_table'); + -- update the node SELECT 1 FROM master_update_node((SELECT nodeid FROM pg_dist_node), 'localhost', :worker_2_port); @@ -355,7 +358,9 @@ DROP DATABASE db_to_drop; SELECT datname FROM pg_stat_activity WHERE application_name LIKE 'Citus Met%'; -- cleanup +DROP SEQUENCE sequence CASCADE; DROP TABLE ref_table; +DROP TABLE reference_table; TRUNCATE pg_dist_colocation; SELECT count(*) FROM (SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node) t; ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART :last_group_id; @@ -366,4 +371,3 @@ ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART :last_shard_id; RESET citus.shard_count; RESET citus.shard_replication_factor; -RESET citus.replication_model; diff --git a/src/test/regress/sql/multi_mx_partitioning.sql b/src/test/regress/sql/multi_mx_partitioning.sql index f4f5edb82..c24bcafc3 100644 --- a/src/test/regress/sql/multi_mx_partitioning.sql +++ b/src/test/regress/sql/multi_mx_partitioning.sql @@ -8,7 +8,6 @@ SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -- make sure wen can create partitioning tables in MX -SET citus.replication_model TO 'streaming'; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); -- 1-) Distributing partitioned table @@ -56,7 +55,6 @@ ORDER BY SELECT inhrelid::regclass FROM pg_inherits WHERE inhparent = 'partitioning_test'::regclass ORDER BY 1; \c - - - :master_port -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; -- 2-) Creating partition of a distributed table @@ -86,7 +84,6 @@ ORDER BY SELECT inhrelid::regclass FROM pg_inherits WHERE inhparent = 'partitioning_test'::regclass ORDER BY 1; \c - - - :master_port -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; -- 3-) Attaching non distributed table to a distributed table @@ -125,7 +122,6 @@ SELECT * FROM partitioning_test ORDER BY 1; SELECT inhrelid::regclass FROM pg_inherits WHERE inhparent = 'partitioning_test'::regclass ORDER BY 1; \c - - - :master_port -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; -- 4-) Attaching distributed table to distributed table @@ -175,7 +171,6 @@ DROP TABLE partitioning_test; DROP TABLE IF EXISTS partitioning_test_2013; -- test schema drop with partitioned tables -SET citus.replication_model TO 'streaming'; SET citus.shard_replication_factor TO 1; CREATE SCHEMA partition_test; SET SEARCH_PATH TO partition_test; diff --git a/src/test/regress/sql/multi_mx_reference_table.sql b/src/test/regress/sql/multi_mx_reference_table.sql index 8bd1de9d3..e40f96c84 100644 --- a/src/test/regress/sql/multi_mx_reference_table.sql +++ b/src/test/regress/sql/multi_mx_reference_table.sql @@ -471,7 +471,6 @@ ORDER BY \c - - - :master_port SET citus.shard_count TO 6; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_distributed_table('colocated_table_test', 'value_1'); diff --git a/src/test/regress/sql/multi_mx_repartition_udt_prepare.sql b/src/test/regress/sql/multi_mx_repartition_udt_prepare.sql index ce0240ca4..a2663c7a4 100644 --- a/src/test/regress/sql/multi_mx_repartition_udt_prepare.sql +++ b/src/test/regress/sql/multi_mx_repartition_udt_prepare.sql @@ -159,7 +159,6 @@ FUNCTION 1 test_udt_hash(test_udt); -- Distribute and populate the two tables. SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; SET citus.shard_count TO 3; SELECT create_distributed_table('repartition_udt', 'pk'); diff --git a/src/test/regress/sql/multi_mx_schema_support.sql b/src/test/regress/sql/multi_mx_schema_support.sql index 168aede74..627507e6a 100644 --- a/src/test/regress/sql/multi_mx_schema_support.sql +++ b/src/test/regress/sql/multi_mx_schema_support.sql @@ -229,7 +229,6 @@ CREATE SCHEMA "CiTuS.TeAeN"; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; -- in the first test make sure that we handle DDLs -- when search path is set diff --git a/src/test/regress/sql/multi_mx_transaction_recovery.sql b/src/test/regress/sql/multi_mx_transaction_recovery.sql index 4d50c9669..241382e49 100644 --- a/src/test/regress/sql/multi_mx_transaction_recovery.sql +++ b/src/test/regress/sql/multi_mx_transaction_recovery.sql @@ -2,7 +2,6 @@ SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; CREATE TABLE test_recovery (x text); SELECT create_distributed_table('test_recovery', 'x'); diff --git a/src/test/regress/sql/multi_mx_truncate_from_worker.sql b/src/test/regress/sql/multi_mx_truncate_from_worker.sql index 00492246b..f76f238fe 100644 --- a/src/test/regress/sql/multi_mx_truncate_from_worker.sql +++ b/src/test/regress/sql/multi_mx_truncate_from_worker.sql @@ -6,7 +6,6 @@ SET citus.next_placement_id TO 2380000; SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 6; -SET citus.replication_model TO streaming; CREATE TABLE "refer'ence_table"(id int PRIMARY KEY); SELECT create_reference_table('refer''ence_table'); diff --git a/src/test/regress/sql/multi_partitioning.sql b/src/test/regress/sql/multi_partitioning.sql index a02ceb942..8c905f944 100644 --- a/src/test/regress/sql/multi_partitioning.sql +++ b/src/test/regress/sql/multi_partitioning.sql @@ -1076,7 +1076,7 @@ IF EXISTS partitioning_locks_for_select; -- make sure we can create a partitioned table with streaming replication -SET citus.replication_model TO 'streaming'; +SET citus.shard_replication_factor TO 1; CREATE TABLE partitioning_test(id int, time date) PARTITION BY RANGE (time); CREATE TABLE partitioning_test_2009 PARTITION OF partitioning_test FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); SELECT create_distributed_table('partitioning_test', 'id'); @@ -1164,6 +1164,7 @@ GROUP BY ORDER BY 1,2; +SET citus.next_shard_id TO 1660300; -- test we don't deadlock when attaching and detaching partitions from partitioned -- tables with foreign keys @@ -1245,9 +1246,12 @@ ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2010; ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2011; ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2013; -DROP TABLE partitioning_test, partitioning_test_2008, partitioning_test_2009, - partitioning_test_2010, partitioning_test_2011, partitioning_test_2013, - reference_table, reference_table_2; +DROP TABLE partitioning_test_2008, partitioning_test_2009, partitioning_test_2010, + partitioning_test_2011, partitioning_test_2013, reference_table_2; +-- verify this doesn't crash and gives a debug message for dropped table +SET client_min_messages TO DEBUG1; +DROP TABLE partitioning_test, reference_table; +RESET client_min_messages; RESET SEARCH_PATH; diff --git a/src/test/regress/sql/multi_read_from_secondaries.sql b/src/test/regress/sql/multi_read_from_secondaries.sql index c9ef21b11..e50b41256 100644 --- a/src/test/regress/sql/multi_read_from_secondaries.sql +++ b/src/test/regress/sql/multi_read_from_secondaries.sql @@ -20,7 +20,7 @@ INSERT INTO source_table (a, b) VALUES (1, 5); INSERT INTO source_table (a, b) VALUES (10, 10); -- simulate actually having secondary nodes -SELECT nodeid, groupid, nodename, nodeport, noderack, isactive, noderole, nodecluster FROM pg_dist_node; +SELECT nodeid, groupid, nodename, nodeport, noderack, isactive, noderole, nodecluster FROM pg_dist_node ORDER BY 1, 2; UPDATE pg_dist_node SET noderole = 'secondary'; \c "dbname=regression options='-c\ citus.use_secondary_nodes=always'" diff --git a/src/test/regress/sql/multi_replicate_reference_table.sql b/src/test/regress/sql/multi_replicate_reference_table.sql index 7161fb3fe..497f38619 100644 --- a/src/test/regress/sql/multi_replicate_reference_table.sql +++ b/src/test/regress/sql/multi_replicate_reference_table.sql @@ -234,7 +234,6 @@ SELECT create_reference_table('replicate_reference_table_reference_one'); SET citus.shard_count TO 1; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; CREATE TABLE replicate_reference_table_reference_two(column1 int); @@ -591,7 +590,7 @@ SET citus.replicate_reference_tables_on_activate TO off; SELECT 1 FROM master_remove_node('localhost', :worker_2_port); SELECT 1 FROM master_add_node('localhost', :worker_2_port); -SET citus.replication_model TO streaming; +SET citus.shard_replication_factor TO 1; SELECT start_metadata_sync_to_node('localhost', :worker_1_port); SELECT master_copy_shard_placement( diff --git a/src/test/regress/sql/multi_row_insert.sql b/src/test/regress/sql/multi_row_insert.sql index bc2eb9598..8722ad26e 100644 --- a/src/test/regress/sql/multi_row_insert.sql +++ b/src/test/regress/sql/multi_row_insert.sql @@ -10,7 +10,7 @@ SELECT create_distributed_table('source_table_xyz', 'key', 'range'); CALL public.create_range_partitioned_shards('source_table_xyz', '{"(0,a)","(25,z)"}','{"(24,a)","(49,z)"}'); SELECT * FROM pg_dist_shard WHERE logicalrelid='source_table_xyz'::regclass::oid ORDER BY shardid; -SELECT shardid, nodename, nodeport FROM pg_dist_shard_placement WHERE EXISTS(SELECT shardid FROM pg_dist_shard WHERE shardid=pg_dist_shard_placement.shardid AND logicalrelid='source_table_xyz'::regclass::oid); +SELECT shardid, nodename, nodeport FROM pg_dist_shard_placement WHERE EXISTS(SELECT shardid FROM pg_dist_shard WHERE shardid=pg_dist_shard_placement.shardid AND logicalrelid='source_table_xyz'::regclass::oid) ORDER BY 1, 2, 3; INSERT INTO source_table_xyz VALUES ((0, 'a'), 1, (0, 'a')), ((1, 'b'), 2, (26, 'b')), diff --git a/src/test/regress/sql/multi_sequence_default.sql b/src/test/regress/sql/multi_sequence_default.sql new file mode 100644 index 000000000..a986e5ec8 --- /dev/null +++ b/src/test/regress/sql/multi_sequence_default.sql @@ -0,0 +1,242 @@ +-- +-- MULTI_SEQUENCE_DEFAULT +-- +-- Tests related to column defaults coming from a sequence +-- + +SET citus.next_shard_id TO 890000; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +CREATE SCHEMA sequence_default; +SET search_path = sequence_default, public; + + +-- Cannot add a column involving DEFAULT nextval('..') because the table is not empty +CREATE SEQUENCE seq_0; +CREATE TABLE seq_test_0 (x int, y int); +SELECT create_distributed_table('seq_test_0','x'); +INSERT INTO seq_test_0 SELECT 1, s FROM generate_series(1, 50) s; +ALTER TABLE seq_test_0 ADD COLUMN z int DEFAULT nextval('seq_0'); +ALTER TABLE seq_test_0 ADD COLUMN z serial; +-- follow hint +ALTER TABLE seq_test_0 ADD COLUMN z int; +ALTER TABLE seq_test_0 ALTER COLUMN z SET DEFAULT nextval('seq_0'); +SELECT * FROM seq_test_0 ORDER BY 1, 2 LIMIT 5; +\d seq_test_0 + + +-- check that we can add serial pseudo-type columns +-- when metadata is not yet synced to workers +TRUNCATE seq_test_0; +ALTER TABLE seq_test_0 ADD COLUMN w00 smallserial; +ALTER TABLE seq_test_0 ADD COLUMN w01 serial2; +ALTER TABLE seq_test_0 ADD COLUMN w10 serial; +ALTER TABLE seq_test_0 ADD COLUMN w11 serial4; +ALTER TABLE seq_test_0 ADD COLUMN w20 bigserial; +ALTER TABLE seq_test_0 ADD COLUMN w21 serial8; + +-- check alter column type precaution +ALTER TABLE seq_test_0 ALTER COLUMN z TYPE bigint; +ALTER TABLE seq_test_0 ALTER COLUMN z TYPE smallint; + + +-- MX tests + +-- check that there's not problem with group ID cache +CREATE TABLE seq_test_4 (x int, y int); +SELECT create_distributed_table('seq_test_4','x'); +CREATE SEQUENCE seq_4; +ALTER TABLE seq_test_4 ADD COLUMN a int DEFAULT nextval('seq_4'); +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); +DROP SEQUENCE seq_4 CASCADE; +TRUNCATE seq_test_4; +CREATE SEQUENCE seq_4; +ALTER TABLE seq_test_4 ADD COLUMN b int DEFAULT nextval('seq_4'); +-- on worker it should generate high sequence number +\c - - - :worker_1_port +INSERT INTO sequence_default.seq_test_4 VALUES (1,2) RETURNING *; +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + + +-- check sequence type consistency in all nodes +CREATE SEQUENCE seq_1; +-- type is bigint by default +\d seq_1 +CREATE TABLE seq_test_1 (x int, y int); +SELECT create_distributed_table('seq_test_1','x'); +ALTER TABLE seq_test_1 ADD COLUMN z int DEFAULT nextval('seq_1'); +-- type is changed to int +\d seq_1 +-- check insertion is within int bounds in the worker +\c - - - :worker_1_port +INSERT INTO sequence_default.seq_test_1 values (1, 2) RETURNING *; +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + + +-- check that we cannot add serial pseudo-type columns +-- when metadata is synced to workers +ALTER TABLE seq_test_1 ADD COLUMN w bigserial; + + +-- check for sequence type clashes +CREATE SEQUENCE seq_2; +CREATE TABLE seq_test_2 (x int, y bigint DEFAULT nextval('seq_2')); +-- should work +SELECT create_distributed_table('seq_test_2','x'); +DROP TABLE seq_test_2; +CREATE TABLE seq_test_2 (x int, y int DEFAULT nextval('seq_2')); +-- should work +SELECT create_distributed_table('seq_test_2','x'); +CREATE TABLE seq_test_2_0(x int, y smallint DEFAULT nextval('seq_2')); +-- shouldn't work +SELECT create_distributed_table('seq_test_2_0','x'); +DROP TABLE seq_test_2; +DROP TABLE seq_test_2_0; +-- should work +CREATE TABLE seq_test_2 (x int, y bigint DEFAULT nextval('seq_2')); +SELECT create_distributed_table('seq_test_2','x'); +DROP TABLE seq_test_2; +CREATE TABLE seq_test_2 (x int, y int DEFAULT nextval('seq_2'), z bigint DEFAULT nextval('seq_2')); +-- shouldn't work +SELECT create_distributed_table('seq_test_2','x'); + + +-- check rename is propagated properly +ALTER SEQUENCE seq_2 RENAME TO sequence_2; +-- check in the worker +\c - - - :worker_1_port +\d sequence_default.sequence_2 +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); +-- check rename with another schema +-- we notice that schema is also propagated as one of the sequence's dependencies +CREATE SCHEMA sequence_default_0; +CREATE SEQUENCE sequence_default_0.seq_3; +CREATE TABLE seq_test_3 (x int, y bigint DEFAULT nextval('sequence_default_0.seq_3')); +SELECT create_distributed_table('seq_test_3', 'x'); +ALTER SEQUENCE sequence_default_0.seq_3 RENAME TO sequence_3; +-- check in the worker +\c - - - :worker_1_port +\d sequence_default_0.sequence_3 +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); +DROP SEQUENCE sequence_default_0.sequence_3 CASCADE; +DROP SCHEMA sequence_default_0; + + +-- DROP SCHEMA problem: expected since we don't propagate DROP SCHEMA +CREATE TABLE seq_test_5 (x int, y int); +SELECT create_distributed_table('seq_test_5','x'); +CREATE SCHEMA sequence_default_1; +CREATE SEQUENCE sequence_default_1.seq_5; +ALTER TABLE seq_test_5 ADD COLUMN a int DEFAULT nextval('sequence_default_1.seq_5'); +DROP SCHEMA sequence_default_1 CASCADE; +-- sequence is gone from coordinator +INSERT INTO seq_test_5 VALUES (1, 2) RETURNING *; +-- but is still present on worker +\c - - - :worker_1_port +INSERT INTO sequence_default.seq_test_5 VALUES (1, 2) RETURNING *; +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); +-- apply workaround +SELECT run_command_on_workers('DROP SCHEMA sequence_default_1 CASCADE'); +-- now the sequence is gone from the worker as well +\c - - - :worker_1_port +INSERT INTO sequence_default.seq_test_5 VALUES (1, 2) RETURNING *; +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + + +-- check some more complex cases +CREATE SEQUENCE seq_6; +CREATE TABLE seq_test_6 (x int, t timestamptz DEFAULT now(), s int DEFAULT nextval('seq_6'), m int) PARTITION BY RANGE (t); +SELECT create_distributed_table('seq_test_6','x'); +-- shouldn't work since x is the partition column +ALTER TABLE seq_test_6 ALTER COLUMN x SET DEFAULT nextval('seq_6'); +-- should work since both s and m have int type +ALTER TABLE seq_test_6 ALTER COLUMN m SET DEFAULT nextval('seq_6'); + + +-- It is possible for a partition to have a different DEFAULT than its parent +CREATE SEQUENCE seq_7; +CREATE TABLE seq_test_7 (x text, s bigint DEFAULT nextval('seq_7'), t timestamptz DEFAULT now()) PARTITION BY RANGE (t); +SELECT create_distributed_table('seq_test_7','x'); +CREATE SEQUENCE seq_7_par; +CREATE TABLE seq_test_7_par (x text, s bigint DEFAULT nextval('seq_7_par'), t timestamptz DEFAULT now()); +ALTER TABLE seq_test_7 ATTACH PARTITION seq_test_7_par FOR VALUES FROM ('2021-05-31') TO ('2021-06-01'); +-- check that both sequences are in worker +\c - - - :worker_1_port +\d sequence_default.seq_7 +\d sequence_default.seq_7_par +\c - - - :master_port +SET citus.shard_replication_factor TO 1; +SET search_path = sequence_default, public; +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); + + +-- Check that various ALTER SEQUENCE commands +-- are not allowed for a distributed sequence for now +CREATE SEQUENCE seq_8; +CREATE SCHEMA sequence_default_8; +-- can change schema in a sequence not yet distributed +ALTER SEQUENCE seq_8 SET SCHEMA sequence_default_8; +ALTER SEQUENCE sequence_default_8.seq_8 SET SCHEMA sequence_default; +CREATE TABLE seq_test_8 (x int, y int DEFAULT nextval('seq_8')); +SELECT create_distributed_table('seq_test_8', 'x'); +-- cannot change sequence specifications +ALTER SEQUENCE seq_8 AS bigint; +ALTER SEQUENCE seq_8 INCREMENT BY 2; +ALTER SEQUENCE seq_8 MINVALUE 5 MAXVALUE 5000; +ALTER SEQUENCE seq_8 START WITH 6; +ALTER SEQUENCE seq_8 RESTART WITH 6; +ALTER SEQUENCE seq_8 NO CYCLE; +ALTER SEQUENCE seq_8 OWNED BY seq_test_7; +-- cannot change schema in a distributed sequence +ALTER SEQUENCE seq_8 SET SCHEMA sequence_default_8; +DROP SCHEMA sequence_default_8; + + +-- cannot use more than one sequence in a column default +CREATE SEQUENCE seq_9; +CREATE SEQUENCE seq_10; +CREATE TABLE seq_test_9 (x int, y int DEFAULT nextval('seq_9') - nextval('seq_10')); +SELECT create_distributed_table('seq_test_9', 'x'); + + +-- Check some cases when default is defined by +-- DEFAULT nextval('seq_name'::text) (not by DEFAULT nextval('seq_name')) +SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); +CREATE SEQUENCE seq_11; +CREATE TABLE seq_test_10 (col0 int, col1 int DEFAULT nextval('seq_11'::text)); +SELECT create_reference_table('seq_test_10'); +INSERT INTO seq_test_10 VALUES (0); +CREATE TABLE seq_test_11 (col0 int, col1 bigint DEFAULT nextval('seq_11'::text)); +-- works but doesn't create seq_11 in the workers +SELECT start_metadata_sync_to_node('localhost', :worker_1_port); +-- works because there is no dependency created between seq_11 and seq_test_10 +SELECT create_distributed_table('seq_test_11', 'col1'); +-- insertion from workers fails +\c - - - :worker_1_port +INSERT INTO sequence_default.seq_test_10 VALUES (1); +\c - - - :master_port + +-- clean up +DROP TABLE sequence_default.seq_test_7_par; +DROP SCHEMA sequence_default CASCADE; +SELECT run_command_on_workers('DROP SCHEMA IF EXISTS sequence_default CASCADE'); +SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); +SET search_path TO public; diff --git a/src/test/regress/sql/multi_table_ddl.sql b/src/test/regress/sql/multi_table_ddl.sql index 5e4fbf548..c9eed832e 100644 --- a/src/test/regress/sql/multi_table_ddl.sql +++ b/src/test/regress/sql/multi_table_ddl.sql @@ -2,6 +2,8 @@ -- MULTI_TABLE_DDL -- -- Tests around changing the schema and dropping of a distributed table +-- Test DEFAULTS coming from SERIAL pseudo-types, user-defined sequences +-- SET citus.next_shard_id TO 870000; @@ -66,7 +68,7 @@ SET citus.shard_count TO 2; SET citus.shard_replication_factor TO 1; SELECT create_distributed_table('testserialtable', 'group_id', 'hash'); --- should not be able to add additional serial columns +-- can add additional serial columns ALTER TABLE testserialtable ADD COLUMN other_id serial; -- and we shouldn't be able to change a distributed sequence's owner @@ -75,11 +77,16 @@ ALTER SEQUENCE testserialtable_id_seq OWNED BY NONE; -- or create a sequence with a distributed owner CREATE SEQUENCE standalone_sequence OWNED BY testserialtable.group_id; +-- EDIT: this doesn't error out for now in order to allow adding +-- new serial columns (they always come with owned_by command) +-- should be fixed later in ALTER SEQUENCE preprocessing + -- or even change a manual sequence to be owned by a distributed table CREATE SEQUENCE standalone_sequence; ALTER SEQUENCE standalone_sequence OWNED BY testserialtable.group_id; -- an edge case, but it's OK to change an owner to the same distributed table +-- EDIT: this doesn't work for now for a distributed sequence ALTER SEQUENCE testserialtable_id_seq OWNED BY testserialtable.id; -- drop distributed table @@ -89,3 +96,32 @@ DROP TABLE testserialtable; -- verify owned sequence is dropped \c - - - :worker_1_port \ds + +\c - - - :master_port + +-- test DEFAULT coming from SERIAL pseudo-types and user-defined sequences +CREATE SEQUENCE test_sequence_0; +CREATE SEQUENCE test_sequence_1; + +CREATE TABLE test_table (id1 int DEFAULT nextval('test_sequence_0')); +SELECT create_distributed_table('test_table', 'id1'); + +-- shouldn't work since it's partition column +ALTER TABLE test_table ALTER COLUMN id1 SET DEFAULT nextval('test_sequence_1'); + +-- test different plausible commands +ALTER TABLE test_table ADD COLUMN id2 int DEFAULT nextval('test_sequence_1'); +ALTER TABLE test_table ALTER COLUMN id2 DROP DEFAULT; +ALTER TABLE test_table ALTER COLUMN id2 SET DEFAULT nextval('test_sequence_1'); + +ALTER TABLE test_table ADD COLUMN id3 bigserial; + +-- shouldn't work since the above operations should be the only subcommands +ALTER TABLE test_table ADD COLUMN id4 int DEFAULT nextval('test_sequence_1') CHECK (id4 > 0); +ALTER TABLE test_table ADD COLUMN id4 int, ADD COLUMN id5 int DEFAULT nextval('test_sequence_1'); +ALTER TABLE test_table ALTER COLUMN id3 SET DEFAULT nextval('test_sequence_1'), ALTER COLUMN id2 DROP DEFAULT; +ALTER TABLE test_table ADD COLUMN id4 bigserial CHECK (id4 > 0); + +DROP TABLE test_table CASCADE; +DROP SEQUENCE test_sequence_0; +DROP SEQUENCE test_sequence_1; diff --git a/src/test/regress/sql/multi_test_helpers_superuser.sql b/src/test/regress/sql/multi_test_helpers_superuser.sql index 4026c2f00..0bd360b12 100644 --- a/src/test/regress/sql/multi_test_helpers_superuser.sql +++ b/src/test/regress/sql/multi_test_helpers_superuser.sql @@ -1,10 +1,3 @@ -CREATE OR REPLACE FUNCTION master_defer_delete_shards() - RETURNS int - LANGUAGE C STRICT - AS 'citus', $$master_defer_delete_shards$$; -COMMENT ON FUNCTION master_defer_delete_shards() - IS 'remove orphaned shards'; - CREATE OR REPLACE FUNCTION wait_until_metadata_sync(timeout INTEGER DEFAULT 15000) RETURNS void LANGUAGE C STRICT diff --git a/src/test/regress/sql/multi_transaction_recovery.sql b/src/test/regress/sql/multi_transaction_recovery.sql index 8aeb97ce6..054b85931 100644 --- a/src/test/regress/sql/multi_transaction_recovery.sql +++ b/src/test/regress/sql/multi_transaction_recovery.sql @@ -193,6 +193,18 @@ SELECT create_distributed_table('test_2pcskip', 'a'); INSERT INTO test_2pcskip SELECT i FROM generate_series(0, 5)i; SELECT recover_prepared_transactions(); +SELECT shardid INTO selected_shard FROM pg_dist_shard WHERE logicalrelid='test_2pcskip'::regclass LIMIT 1; +SELECT COUNT(*) FROM pg_dist_transaction; +BEGIN; +SET LOCAL citus.defer_drop_after_shard_move TO OFF; +SELECT citus_move_shard_placement((SELECT * FROM selected_shard), 'localhost', :worker_1_port, 'localhost', :worker_2_port); +COMMIT; +SELECT COUNT(*) FROM pg_dist_transaction; +SELECT recover_prepared_transactions(); + +SELECT citus_move_shard_placement((SELECT * FROM selected_shard), 'localhost', :worker_2_port, 'localhost', :worker_1_port); + + -- for the following test, ensure that 6 and 7 go to different shards on different workers SELECT count(DISTINCT nodeport) FROM pg_dist_shard_placement WHERE shardid IN (get_shard_id_for_distribution_column('test_2pcskip', 6),get_shard_id_for_distribution_column('test_2pcskip', 7)); -- only two of the connections will perform a write (INSERT) diff --git a/src/test/regress/sql/multi_truncate.sql b/src/test/regress/sql/multi_truncate.sql index f2153ce1c..25b582633 100644 --- a/src/test/regress/sql/multi_truncate.sql +++ b/src/test/regress/sql/multi_truncate.sql @@ -11,7 +11,7 @@ SET search_path TO multi_truncate; CREATE VIEW table_sizes AS SELECT c.relname as name, - pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as size + pg_catalog.pg_table_size(c.oid) > 0 as has_data FROM pg_catalog.pg_class c LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace WHERE c.relkind = 'r' @@ -292,7 +292,12 @@ INSERT INTO dist SELECT x,x FROM generate_series(1,10000) x; -- test that we do not cascade truncates to local referencing tables SELECT truncate_local_data_after_distributing_table('ref'); --- distribute the table and start testing allowed truncation queries +-- test that we do not allow distributing tables that have foreign keys to reference tables +SELECT create_distributed_table('dist','id'); +SHOW citus.multi_shard_modify_mode; + +-- distribute the table after a truncate +TRUNCATE dist; SELECT create_distributed_table('dist','id'); -- the following should truncate ref and dist diff --git a/src/test/regress/sql/multi_unsupported_worker_operations.sql b/src/test/regress/sql/multi_unsupported_worker_operations.sql index 5b07f2cac..7671f711c 100644 --- a/src/test/regress/sql/multi_unsupported_worker_operations.sql +++ b/src/test/regress/sql/multi_unsupported_worker_operations.sql @@ -15,7 +15,6 @@ ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 150000; -- Prepare the environment SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO 'streaming'; SET citus.shard_count TO 5; -- Create test tables @@ -238,4 +237,3 @@ SELECT worker_drop_distributed_table(logicalrelid::regclass::text) FROM pg_dist_ ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART :last_colocation_id; RESET citus.shard_replication_factor; -RESET citus.replication_model; diff --git a/src/test/regress/sql/multi_utilities.sql b/src/test/regress/sql/multi_utilities.sql index 0bf28ca3e..5500fc0bf 100644 --- a/src/test/regress/sql/multi_utilities.sql +++ b/src/test/regress/sql/multi_utilities.sql @@ -202,5 +202,9 @@ SELECT worker_hash('(1, 2)'::test_composite_type); SELECT citus_truncate_trigger(); +-- make sure worker_create_or_alter_role does not crash with NULL input +SELECT worker_create_or_alter_role(NULL, NULL, NULL); +SELECT worker_create_or_alter_role(NULL, 'create role dontcrash', NULL); + -- confirm that citus_create_restore_point works SELECT 1 FROM citus_create_restore_point('regression-test'); diff --git a/src/test/regress/sql/mx_coordinator_shouldhaveshards.sql b/src/test/regress/sql/mx_coordinator_shouldhaveshards.sql index 377b6acbe..9c0fb6035 100644 --- a/src/test/regress/sql/mx_coordinator_shouldhaveshards.sql +++ b/src/test/regress/sql/mx_coordinator_shouldhaveshards.sql @@ -2,7 +2,6 @@ CREATE SCHEMA mx_coordinator_shouldhaveshards; SET search_path TO mx_coordinator_shouldhaveshards; SET citus.shard_replication_factor to 1; -SET citus.replication_model TO streaming; SET client_min_messages TO WARNING; SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0); RESET client_min_messages; diff --git a/src/test/regress/sql/mx_foreign_key_to_reference_table.sql b/src/test/regress/sql/mx_foreign_key_to_reference_table.sql index b3d6dd3fe..ea0110da2 100644 --- a/src/test/regress/sql/mx_foreign_key_to_reference_table.sql +++ b/src/test/regress/sql/mx_foreign_key_to_reference_table.sql @@ -4,7 +4,6 @@ SET citus.shard_replication_factor TO 1; SET citus.shard_count TO 8; SET citus.next_shard_id TO 7000000; SET citus.next_placement_id TO 7000000; -SET citus.replication_model TO streaming; -- Setup the view so that we can check if the foreign keys are created properly CREATE TYPE foreign_details AS (name text, relid text, refd_relid text); diff --git a/src/test/regress/sql/recursive_dml_queries_mx.sql b/src/test/regress/sql/recursive_dml_queries_mx.sql index dc80b54c1..88b21e0b4 100644 --- a/src/test/regress/sql/recursive_dml_queries_mx.sql +++ b/src/test/regress/sql/recursive_dml_queries_mx.sql @@ -2,7 +2,6 @@ CREATE SCHEMA recursive_dml_queries_mx; SET search_path TO recursive_dml_queries_mx, public; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO streaming; CREATE TABLE recursive_dml_queries_mx.distributed_table (tenant_id text, dept int, info jsonb); SELECT create_distributed_table('distributed_table', 'tenant_id'); @@ -156,4 +155,3 @@ SET search_path TO recursive_dml_queries_mx, public; RESET client_min_messages; DROP SCHEMA recursive_dml_queries_mx CASCADE; RESET citus.shard_replication_factor; -RESET citus.replication_model; diff --git a/src/test/regress/sql/shard_move_deferred_delete.sql b/src/test/regress/sql/shard_move_deferred_delete.sql index 1b5ecb5a5..a052590d5 100644 --- a/src/test/regress/sql/shard_move_deferred_delete.sql +++ b/src/test/regress/sql/shard_move_deferred_delete.sql @@ -31,8 +31,13 @@ SELECT run_command_on_workers($cmd$ SELECT count(*) FROM pg_class WHERE relname = 't1_20000000'; $cmd$); +-- Make sure this cannot be run in a transaction +BEGIN; +CALL citus_cleanup_orphaned_shards(); +COMMIT; + -- execute delayed removal -SELECT public.master_defer_delete_shards(); +CALL citus_cleanup_orphaned_shards(); -- we expect the shard to be on only the second worker SELECT run_command_on_workers($cmd$ @@ -70,6 +75,10 @@ SELECT run_command_on_workers($cmd$ SELECT count(*) FROM pg_class WHERE relname = 't1_20000000'; $cmd$); +-- master_move_shard_placement automatically cleans up orphaned shards if +-- needed. +SELECT master_move_shard_placement(20000000, 'localhost', :worker_2_port, 'localhost', :worker_1_port); + SELECT run_command_on_workers($cmd$ -- override the function for testing purpose @@ -95,8 +104,6 @@ set citus.check_available_space_before_move to false; SELECT master_move_shard_placement(20000001, 'localhost', :worker_2_port, 'localhost', :worker_1_port); ROLLBACK; - --- we expect shard 0 to be on both of the workers SELECT run_command_on_workers($cmd$ SELECT count(*) FROM pg_class WHERE relname = 't1_20000000'; $cmd$); diff --git a/src/test/regress/sql/shard_rebalancer.sql b/src/test/regress/sql/shard_rebalancer.sql index ec8751d3b..0e41bac74 100644 --- a/src/test/regress/sql/shard_rebalancer.sql +++ b/src/test/regress/sql/shard_rebalancer.sql @@ -13,7 +13,10 @@ SELECT 1 FROM master_add_node('localhost', :master_port, groupId=>0); -- should just be noops even if we add the coordinator to the pg_dist_node SELECT rebalance_table_shards('dist_table_test'); +CALL citus_cleanup_orphaned_shards(); SELECT rebalance_table_shards(); +CALL citus_cleanup_orphaned_shards(); + -- test that calling rebalance_table_shards without specifying relation -- wouldn't move shard of the citus local table. @@ -22,13 +25,27 @@ SELECT citus_add_local_table_to_metadata('citus_local_table'); INSERT INTO citus_local_table VALUES (1, 2); SELECT rebalance_table_shards(); +CALL citus_cleanup_orphaned_shards(); -- show that citus local table shard is still on the coordinator SELECT tablename FROM pg_catalog.pg_tables where tablename like 'citus_local_table_%'; -- also check that we still can access shard relation, not the shell table SELECT count(*) FROM citus_local_table; +-- verify drain_node uses the localhostname guc by seeing it fail to connect to a non-existing name +ALTER SYSTEM SET citus.local_hostname TO 'foobar'; +SELECT pg_reload_conf(); +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + SELECT master_drain_node('localhost', :master_port); +CALL citus_cleanup_orphaned_shards(); + +ALTER SYSTEM RESET citus.local_hostname; +SELECT pg_reload_conf(); +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + +SELECT master_drain_node('localhost', :master_port); +CALL citus_cleanup_orphaned_shards(); -- show that citus local table shard is still on the coordinator SELECT tablename FROM pg_catalog.pg_tables where tablename like 'citus_local_table_%'; @@ -44,9 +61,24 @@ CREATE TABLE dist_table_test_2(a int); SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO "statement"; SELECT create_distributed_table('dist_table_test_2', 'a'); +-- Mark tables as coordinator replicated in order to be able to test replicate_table_shards +UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN + ('dist_table_test_2'::regclass); + +-- replicate_table_shards should fail when the hostname GUC is set to a non-reachable node +ALTER SYSTEM SET citus.local_hostname TO 'foobar'; +SELECT pg_reload_conf(); +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + +SET citus.shard_replication_factor TO 2; +SELECT replicate_table_shards('dist_table_test_2', max_shard_copies := 4, shard_transfer_mode:='block_writes'); + +ALTER SYSTEM RESET citus.local_hostname; +SELECT pg_reload_conf(); +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + -- replicate reference table should ignore the coordinator SET citus.shard_replication_factor TO 2; SELECT replicate_table_shards('dist_table_test_2', max_shard_copies := 4, shard_transfer_mode:='block_writes'); @@ -54,9 +86,11 @@ SELECT replicate_table_shards('dist_table_test_2', max_shard_copies := 4, shar DROP TABLE dist_table_test, dist_table_test_2, ref_table_test; RESET citus.shard_count; RESET citus.shard_replication_factor; -RESET citus.replication_model; -- Create a user to test multiuser usage of rebalancer functions +-- We explicitely don't create this user on worker nodes yet, so we can +-- test some more error handling. We create them later there. +SET citus.enable_create_role_propagation TO OFF; CREATE USER testrole; GRANT ALL ON SCHEMA public TO testrole; @@ -65,7 +99,8 @@ CREATE OR REPLACE FUNCTION shard_placement_rebalance_array( shard_placement_list json[], threshold float4 DEFAULT 0, max_shard_moves int DEFAULT 1000000, - drain_only bool DEFAULT false + drain_only bool DEFAULT false, + improvement_threshold float4 DEFAULT 0.5 ) RETURNS json[] AS 'citus' @@ -259,6 +294,7 @@ SELECT master_create_distributed_table('replication_test_table', 'int_column', ' CREATE VIEW replication_test_table_placements_per_node AS SELECT count(*) FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard WHERE logicalrelid = 'replication_test_table'::regclass + AND shardstate != 4 GROUP BY nodename, nodeport ORDER BY nodename, nodeport; @@ -337,6 +373,7 @@ SELECT master_create_distributed_table('rebalance_test_table', 'int_column', 'ap CREATE VIEW table_placements_per_node AS SELECT nodeport, logicalrelid::regclass, count(*) FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard +WHERE shardstate != 4 GROUP BY logicalrelid::regclass, nodename, nodeport ORDER BY logicalrelid::regclass, nodename, nodeport; @@ -359,6 +396,7 @@ AS $$ pg_dist_shard_placement src USING (shardid), (SELECT nodename, nodeport FROM pg_dist_shard_placement ORDER BY nodeport DESC LIMIT 1) dst WHERE src.nodeport < dst.nodeport AND s.logicalrelid = rel::regclass; + CALL citus_cleanup_orphaned_shards(); $$; CALL create_unbalanced_shards('rebalance_test_table'); @@ -376,6 +414,26 @@ WHERE logicalrelid = 'rebalance_test_table'::regclass; SELECT * FROM table_placements_per_node; +-- check rebalances use the localhost guc by seeing it fail when the GUC is set to a non-existing host +ALTER SYSTEM SET citus.local_hostname TO 'foobar'; +SELECT pg_reload_conf(); +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + +SELECT rebalance_table_shards('rebalance_test_table', + excluded_shard_list := excluded_shard_list, + threshold := 0, + shard_transfer_mode:='block_writes') +FROM ( + SELECT (array_agg(DISTINCT shardid ORDER BY shardid))[1:4] AS excluded_shard_list + FROM pg_dist_shard + WHERE logicalrelid = 'rebalance_test_table'::regclass + ) T; +CALL citus_cleanup_orphaned_shards(); + +ALTER SYSTEM RESET citus.local_hostname; +SELECT pg_reload_conf(); +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + -- Check excluded_shard_list by excluding four shards with smaller ids SELECT rebalance_table_shards('rebalance_test_table', @@ -387,33 +445,61 @@ FROM ( FROM pg_dist_shard WHERE logicalrelid = 'rebalance_test_table'::regclass ) T; +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_placements_per_node; -- Check that max_shard_moves limits number of move operations -- First check that we error if not table owner +-- Turn on NOTICE messages +SET ROLE testrole; +-- Make sure that rebalance is stopped if source or target nodes are +-- unresponsive. +SELECT rebalance_table_shards('rebalance_test_table', + shard_transfer_mode:='block_writes'); +\c - - - :worker_1_port +SET citus.enable_create_role_propagation TO OFF; +CREATE USER testrole; +GRANT ALL ON SCHEMA public TO testrole; +\c - - - :master_port +SET client_min_messages TO WARNING; +SET ROLE testrole; +SELECT rebalance_table_shards('rebalance_test_table', + shard_transfer_mode:='block_writes'); +\c - - - :worker_2_port +SET citus.enable_create_role_propagation TO OFF; +CREATE USER testrole; +GRANT ALL ON SCHEMA public TO testrole; +\c - - - :master_port +SET client_min_messages TO WARNING; +SET citus.next_shard_id TO 123010; SET ROLE testrole; SELECT rebalance_table_shards('rebalance_test_table', - threshold := 0, max_shard_moves := 1, shard_transfer_mode:='block_writes'); RESET ROLE; +-- Confirm no moves took place at all during these errors +SELECT * FROM table_placements_per_node; +CALL citus_cleanup_orphaned_shards(); SELECT rebalance_table_shards('rebalance_test_table', threshold := 0, max_shard_moves := 1, shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_placements_per_node; -- Check that threshold=1 doesn't move any shards SELECT rebalance_table_shards('rebalance_test_table', threshold := 1, shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_placements_per_node; -- Move the remaining shards using threshold=0 SELECT rebalance_table_shards('rebalance_test_table', threshold := 0); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_placements_per_node; @@ -421,6 +507,7 @@ SELECT * FROM table_placements_per_node; -- any effects. SELECT rebalance_table_shards('rebalance_test_table', threshold := 0, shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM table_placements_per_node; @@ -515,9 +602,11 @@ SELECT COUNT(*) FROM imbalanced_table; -- Try force_logical SELECT rebalance_table_shards('imbalanced_table', threshold:=0, shard_transfer_mode:='force_logical'); +CALL citus_cleanup_orphaned_shards(); -- Test rebalance operation SELECT rebalance_table_shards('imbalanced_table', threshold:=0, shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); -- Confirm rebalance -- Shard counts in each node after rebalance @@ -536,11 +625,42 @@ CREATE TABLE colocated_rebalance_test(id integer); CREATE TABLE colocated_rebalance_test2(id integer); SELECT create_distributed_table('colocated_rebalance_test', 'id'); + +-- make sure that we do not allow shards on target nodes +-- that are not eligable to move shards + +-- Try to move shards to a non-existing node +SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', 10000, 'block_writes') +FROM pg_dist_shard_placement +WHERE nodeport = :worker_2_port; +CALL citus_cleanup_orphaned_shards(); + +-- Try to move shards to a node where shards are not allowed +SELECT * from master_set_node_property('localhost', :worker_1_port, 'shouldhaveshards', false); +SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') +FROM pg_dist_shard_placement +WHERE nodeport = :worker_2_port; +SELECT * from master_set_node_property('localhost', :worker_1_port, 'shouldhaveshards', true); + +-- Try to move shards to a non-active node +UPDATE pg_dist_node SET isactive = false WHERE nodeport = :worker_1_port; +SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') +FROM pg_dist_shard_placement +WHERE nodeport = :worker_2_port; +UPDATE pg_dist_node SET isactive = true WHERE nodeport = :worker_1_port; + +-- Try to move shards to a secondary node +UPDATE pg_dist_node SET noderole = 'secondary' WHERE nodeport = :worker_1_port; +SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') +FROM pg_dist_shard_placement +WHERE nodeport = :worker_2_port; +UPDATE pg_dist_node SET noderole = 'primary' WHERE nodeport = :worker_1_port; + -- Move all shards to worker1 SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') FROM pg_dist_shard_placement WHERE nodeport = :worker_2_port; - +CALL citus_cleanup_orphaned_shards(); SELECT create_distributed_table('colocated_rebalance_test2', 'id'); @@ -551,6 +671,7 @@ SELECT * FROM public.table_placements_per_node; SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', threshold := 0, drain_only := true); -- Running with drain_only shouldn't do anything SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes', drain_only := true); +CALL citus_cleanup_orphaned_shards(); -- Confirm that nothing changed SELECT * FROM public.table_placements_per_node; @@ -563,10 +684,30 @@ SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', rebala SELECT * FROM get_rebalance_progress(); -- Actually do the rebalance SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes'); +CALL citus_cleanup_orphaned_shards(); -- Check that we can call this function without a crash SELECT * FROM get_rebalance_progress(); --- Confirm that the nodes are now there +-- Confirm that the shards are now there +SELECT * FROM public.table_placements_per_node; + +CALL citus_cleanup_orphaned_shards(); +select * from pg_dist_placement; + + +-- Move all shards to worker1 again +SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') +FROM pg_dist_shard NATURAL JOIN pg_dist_placement NATURAL JOIN pg_dist_node +WHERE nodeport = :worker_2_port AND logicalrelid = 'colocated_rebalance_test'::regclass; + +-- Confirm that the shards are now all on worker1 +SELECT * FROM public.table_placements_per_node; + +-- Explicitly don't run citus_cleanup_orphaned_shards, rebalance_table_shards +-- should do that for automatically. +SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes'); + +-- Confirm that the shards are now moved SELECT * FROM public.table_placements_per_node; @@ -580,18 +721,22 @@ SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaves SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', threshold := 0); SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; SELECT * FROM get_rebalance_table_shards_plan('non_colocated_rebalance_test', threshold := 0); SELECT * FROM rebalance_table_shards('non_colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; -- Put shards back SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true); SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; SELECT * FROM rebalance_table_shards('non_colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; -- testing behaviour when setting shouldhaveshards to false and rebalancing all @@ -599,11 +744,13 @@ SELECT * FROM public.table_placements_per_node; SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', false); SELECT * FROM get_rebalance_table_shards_plan(threshold := 0, drain_only := true); SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes', drain_only := true); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; -- Put shards back SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true); SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; -- testing behaviour when setting shouldhaveshards to false and rebalancing all @@ -611,11 +758,13 @@ SELECT * FROM public.table_placements_per_node; SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', false); SELECT * FROM get_rebalance_table_shards_plan(threshold := 0); SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; -- Put shards back SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true); SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; -- Make it a data node again @@ -623,12 +772,14 @@ SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaves -- testing behaviour of master_drain_node SELECT * from master_drain_node('localhost', :worker_2_port, shard_transfer_mode := 'block_writes'); +CALL citus_cleanup_orphaned_shards(); select shouldhaveshards from pg_dist_node where nodeport = :worker_2_port; SELECT * FROM public.table_placements_per_node; -- Put shards back SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true); SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; @@ -647,6 +798,7 @@ DROP USER testrole; -- Test costs set citus.shard_count = 4; +SET citus.next_shard_id TO 123040; CREATE TABLE tab (x int); SELECT create_distributed_table('tab','x'); -- The following numbers are chosen such that they are placed on different @@ -655,6 +807,7 @@ INSERT INTO tab SELECT 1 from generate_series(1, 30000); INSERT INTO tab SELECT 2 from generate_series(1, 10000); INSERT INTO tab SELECT 3 from generate_series(1, 10000); INSERT INTO tab SELECT 6 from generate_series(1, 10000); +VACUUM FULL tab; ANALYZE tab; \c - - - :worker_1_port @@ -695,12 +848,15 @@ SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_d SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size', threshold := 0); SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'by_disk_size', shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'by_disk_size', shard_transfer_mode:='block_writes', threshold := 0); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; -- Check that sizes of colocated tables are added together for rebalances @@ -712,6 +868,7 @@ INSERT INTO tab2 SELECT 1 from generate_series(1, 0); INSERT INTO tab2 SELECT 2 from generate_series(1, 60000); INSERT INTO tab2 SELECT 3 from generate_series(1, 10000); INSERT INTO tab2 SELECT 6 from generate_series(1, 10000); +VACUUM FULL tab, tab2; ANALYZE tab, tab2; \c - - - :worker_1_port @@ -747,7 +904,10 @@ WHERE table_schema = 'public' \c - - - :master_port SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size'); +-- supports improvement_threshold +SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size', improvement_threshold := 0); SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'by_disk_size', shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; ANALYZE tab, tab2; @@ -785,48 +945,53 @@ WHERE table_schema = 'public' DROP TABLE tab2; -CREATE OR REPLACE FUNCTION capacity_high_worker_1(nodeidarg int) +CREATE OR REPLACE FUNCTION capacity_high_worker_2(nodeidarg int) RETURNS real AS $$ SELECT - (CASE WHEN nodeport = 57637 THEN 1000 ELSE 1 END)::real + (CASE WHEN nodeport = 57638 THEN 1000 ELSE 1 END)::real FROM pg_dist_node where nodeid = nodeidarg $$ LANGUAGE sql; +\set VERBOSITY terse + SELECT citus_add_rebalance_strategy( - 'capacity_high_worker_1', + 'capacity_high_worker_2', 'citus_shard_cost_1', - 'capacity_high_worker_1', + 'capacity_high_worker_2', 'citus_shard_allowed_on_node_true', 0 ); -SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'capacity_high_worker_1'); -SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'capacity_high_worker_1', shard_transfer_mode:='block_writes'); +SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'capacity_high_worker_2'); +SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'capacity_high_worker_2', shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; -SELECT citus_set_default_rebalance_strategy('capacity_high_worker_1'); +SELECT citus_set_default_rebalance_strategy('capacity_high_worker_2'); SELECT * FROM get_rebalance_table_shards_plan('tab'); SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; -CREATE FUNCTION only_worker_2(shardid bigint, nodeidarg int) +CREATE FUNCTION only_worker_1(shardid bigint, nodeidarg int) RETURNS boolean AS $$ SELECT - (CASE WHEN nodeport = 57638 THEN TRUE ELSE FALSE END) + (CASE WHEN nodeport = 57637 THEN TRUE ELSE FALSE END) FROM pg_dist_node where nodeid = nodeidarg $$ LANGUAGE sql; SELECT citus_add_rebalance_strategy( - 'only_worker_2', + 'only_worker_1', 'citus_shard_cost_1', 'citus_node_capacity_1', - 'only_worker_2', + 'only_worker_1', 0 ); -SELECT citus_set_default_rebalance_strategy('only_worker_2'); +SELECT citus_set_default_rebalance_strategy('only_worker_1'); SELECT * FROM get_rebalance_table_shards_plan('tab'); SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM public.table_placements_per_node; SELECT citus_set_default_rebalance_strategy('by_shard_count'); @@ -835,14 +1000,18 @@ SELECT * FROM get_rebalance_table_shards_plan('tab'); -- Check all the error handling cases SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'non_existing'); SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'non_existing'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM master_drain_node('localhost', :worker_2_port, rebalance_strategy := 'non_existing'); +CALL citus_cleanup_orphaned_shards(); SELECT citus_set_default_rebalance_strategy('non_existing'); UPDATE pg_dist_rebalance_strategy SET default_strategy=false; SELECT * FROM get_rebalance_table_shards_plan('tab'); SELECT * FROM rebalance_table_shards('tab'); +CALL citus_cleanup_orphaned_shards(); SELECT * FROM master_drain_node('localhost', :worker_2_port); +CALL citus_cleanup_orphaned_shards(); UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_shard_count'; CREATE OR REPLACE FUNCTION shard_cost_no_arguments() @@ -1012,7 +1181,7 @@ UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_shard SELECT citus_add_rebalance_strategy( 'default_threshold_too_low', 'citus_shard_cost_1', - 'capacity_high_worker_1', + 'capacity_high_worker_2', 'citus_shard_allowed_on_node_true', 0, 0.1 @@ -1040,7 +1209,6 @@ CREATE TABLE dist_table_test_3(a int); SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -SET citus.replication_model TO "statement"; SELECT create_distributed_table('dist_table_test_3', 'a'); CREATE TABLE ref_table(a int); @@ -1053,6 +1221,12 @@ SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE lo SET citus.shard_replication_factor TO 2; SELECT replicate_table_shards('dist_table_test_3', max_shard_copies := 4, shard_transfer_mode:='block_writes'); +-- Mark table as coordinator replicated in order to be able to test replicate_table_shards +UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN + ('dist_table_test_3'::regclass); + +SELECT replicate_table_shards('dist_table_test_3', max_shard_copies := 4, shard_transfer_mode:='block_writes'); + SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass; SELECT 1 FROM master_remove_node('localhost', :master_port); @@ -1067,6 +1241,7 @@ SELECT 1 FROM master_add_node('localhost', :master_port, groupId=>0); SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass; SELECT rebalance_table_shards('rebalance_test_table', shard_transfer_mode:='block_writes'); +CALL citus_cleanup_orphaned_shards(); SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass; @@ -1099,6 +1274,7 @@ INSERT INTO r2 VALUES (1,2), (3,4); SELECT 1 from master_add_node('localhost', :worker_2_port); SELECT rebalance_table_shards(); +CALL citus_cleanup_orphaned_shards(); DROP TABLE t1, r1, r2; @@ -1125,6 +1301,7 @@ WHERE logicalrelid = 'r1'::regclass; -- rebalance with _only_ a reference table, this should trigger the copy SELECT rebalance_table_shards(); +CALL citus_cleanup_orphaned_shards(); -- verify the reference table is on all nodes after the rebalance SELECT count(*) @@ -1158,6 +1335,11 @@ WHERE logicalrelid = 'r1'::regclass; SELECT replicate_table_shards('t1', shard_replication_factor := 2); +-- Mark table as coordinator replicated in order to be able to test replicate_table_shards +UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN + ('t1'::regclass); +SELECT replicate_table_shards('t1', shard_replication_factor := 2); + -- verify the reference table is on all nodes after replicate_table_shards SELECT count(*) FROM pg_dist_shard diff --git a/src/test/regress/sql/shard_rebalancer_unit.sql b/src/test/regress/sql/shard_rebalancer_unit.sql index 8907ad1ec..51293a227 100644 --- a/src/test/regress/sql/shard_rebalancer_unit.sql +++ b/src/test/regress/sql/shard_rebalancer_unit.sql @@ -3,7 +3,8 @@ CREATE OR REPLACE FUNCTION shard_placement_rebalance_array( shard_placement_list json[], threshold float4 DEFAULT 0, max_shard_moves int DEFAULT 1000000, - drain_only bool DEFAULT false + drain_only bool DEFAULT false, + improvement_threshold float4 DEFAULT 0.5 ) RETURNS json[] AS 'citus' @@ -381,3 +382,151 @@ SELECT unnest(shard_placement_rebalance_array( ]::json[], max_shard_moves := 5 )); + + +-- Don't move a big shards if it doesn't improve the utilization balance much. +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":20, "nodename":"a"}', + '{"shardid":3, "cost":100, "nodename":"b"}', + '{"shardid":4, "cost":50, "nodename":"b"}' + ]::json[] +)); +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}']::json[], + ARRAY['{"shardid":1, "cost":40, "nodename":"a"}', + '{"shardid":2, "cost":40, "nodename":"a"}', + '{"shardid":3, "cost":100, "nodename":"b"}', + '{"shardid":4, "cost":100, "nodename":"b"}' + ]::json[] +)); + +-- improvement_threshold can be used to force a move of big shards +-- if needed. +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":20, "nodename":"a"}', + '{"shardid":3, "cost":100, "nodename":"b"}', + '{"shardid":4, "cost":50, "nodename":"b"}' + ]::json[], + improvement_threshold := 0.1 +)); +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}']::json[], + ARRAY['{"shardid":1, "cost":40, "nodename":"a"}', + '{"shardid":2, "cost":40, "nodename":"a"}', + '{"shardid":3, "cost":100, "nodename":"b"}', + '{"shardid":4, "cost":100, "nodename":"b"}' + ]::json[], + improvement_threshold := 0.2 +)); + +-- limits notices about ignored moves +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}', + '{"node_name": "d"}', + '{"node_name": "e"}', + '{"node_name": "f"}', + '{"node_name": "g"}' + ]::json[], + ARRAY['{"shardid":1, "cost":39, "nodename":"a"}', + '{"shardid":2, "cost":39, "nodename":"b"}', + '{"shardid":3, "cost":39, "nodename":"c"}', + '{"shardid":4, "cost":39, "nodename":"d"}', + '{"shardid":5, "cost":39, "nodename":"e"}', + '{"shardid":6, "cost":39, "nodename":"f"}', + '{"shardid":7, "cost":40, "nodename":"g"}', + '{"shardid":8, "cost":39, "nodename":"g"}' + ]::json[], + improvement_threshold := 0.1 +)); + + + +-- limits notices based on GUC +set citus.max_rebalancer_logged_ignored_moves = 1; +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}', + '{"node_name": "d"}', + '{"node_name": "e"}', + '{"node_name": "f"}', + '{"node_name": "g"}' + ]::json[], + ARRAY['{"shardid":1, "cost":39, "nodename":"a"}', + '{"shardid":2, "cost":39, "nodename":"b"}', + '{"shardid":3, "cost":39, "nodename":"c"}', + '{"shardid":4, "cost":39, "nodename":"d"}', + '{"shardid":5, "cost":39, "nodename":"e"}', + '{"shardid":6, "cost":39, "nodename":"f"}', + '{"shardid":7, "cost":40, "nodename":"g"}', + '{"shardid":8, "cost":39, "nodename":"g"}' + ]::json[], + improvement_threshold := 0.1 +)); +set citus.max_rebalancer_logged_ignored_moves = 10; +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}', + '{"node_name": "d"}', + '{"node_name": "e"}', + '{"node_name": "f"}', + '{"node_name": "g"}' + ]::json[], + ARRAY['{"shardid":1, "cost":39, "nodename":"a"}', + '{"shardid":2, "cost":39, "nodename":"b"}', + '{"shardid":3, "cost":39, "nodename":"c"}', + '{"shardid":4, "cost":39, "nodename":"d"}', + '{"shardid":5, "cost":39, "nodename":"e"}', + '{"shardid":6, "cost":39, "nodename":"f"}', + '{"shardid":7, "cost":40, "nodename":"g"}', + '{"shardid":8, "cost":39, "nodename":"g"}' + ]::json[], + improvement_threshold := 0.1 +)); +set citus.max_rebalancer_logged_ignored_moves = -1; +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b"}', + '{"node_name": "c"}', + '{"node_name": "d"}', + '{"node_name": "e"}', + '{"node_name": "f"}', + '{"node_name": "g"}' + ]::json[], + ARRAY['{"shardid":1, "cost":39, "nodename":"a"}', + '{"shardid":2, "cost":39, "nodename":"b"}', + '{"shardid":3, "cost":39, "nodename":"c"}', + '{"shardid":4, "cost":39, "nodename":"d"}', + '{"shardid":5, "cost":39, "nodename":"e"}', + '{"shardid":6, "cost":39, "nodename":"f"}', + '{"shardid":7, "cost":40, "nodename":"g"}', + '{"shardid":8, "cost":39, "nodename":"g"}' + ]::json[], + improvement_threshold := 0.1 +)); + + +-- Combining improvement_threshold and capacity works as expected. +SELECT unnest(shard_placement_rebalance_array( + ARRAY['{"node_name": "a"}', + '{"node_name": "b", "capacity": 2}']::json[], + ARRAY['{"shardid":1, "cost":20, "nodename":"a"}', + '{"shardid":2, "cost":10, "nodename":"a"}', + '{"shardid":3, "cost":10, "nodename":"a"}', + '{"shardid":4, "cost":100, "nodename":"b"}', + '{"shardid":5, "cost":50, "nodename":"b"}', + '{"shardid":6, "cost":50, "nodename":"b"}' + ]::json[], + improvement_threshold := 0.6 +)); diff --git a/src/test/regress/sql/single_node.sql b/src/test/regress/sql/single_node.sql index 7cbb20fbd..228039d34 100644 --- a/src/test/regress/sql/single_node.sql +++ b/src/test/regress/sql/single_node.sql @@ -3,7 +3,6 @@ SET search_path TO single_node; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; SET citus.next_shard_id TO 90630500; -SET citus.replication_model TO 'streaming'; -- adding the coordinator as inactive is disallowed SELECT 1 FROM master_add_inactive_node('localhost', :master_port, groupid => 0); @@ -43,6 +42,23 @@ DROP TABLE loc; -- remove the coordinator to try again with create_distributed_table SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node WHERE groupid = 0; +-- verify the coordinator gets auto added with the localhost guc +ALTER SYSTEM SET citus.local_hostname TO '127.0.0.1'; --although not a hostname, should work for connecting locally +SELECT pg_reload_conf(); +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + +CREATE TABLE test(x int, y int); +SELECT create_distributed_table('test','x'); + +SELECT groupid, nodename, nodeport, isactive, shouldhaveshards, hasmetadata, metadatasynced FROM pg_dist_node; +DROP TABLE test; +-- remove the coordinator to try again +SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node WHERE groupid = 0; + +ALTER SYSTEM RESET citus.local_hostname; +SELECT pg_reload_conf(); +SELECT pg_sleep(.1); -- wait to make sure the config has changed before running the GUC + CREATE TABLE test(x int, y int); SELECT create_distributed_table('test','x'); @@ -889,6 +905,25 @@ WITH cte_1 AS (INSERT INTO non_binary_copy_test SELECT * FROM non_binary_copy_test LIMIT 10000 ON CONFLICT (key) DO UPDATE SET value = (0, 'citus0')::new_type RETURNING key, z) SELECT count(DISTINCT key::text), count(DISTINCT z::text) FROM cte_1; +-- test disabling drop and truncate for known shards +SET citus.shard_replication_factor TO 1; +CREATE TABLE test_disabling_drop_and_truncate (a int); +SELECT create_distributed_table('test_disabling_drop_and_truncate', 'a'); +SET citus.enable_manual_changes_to_shards TO off; + +-- these should error out +DROP TABLE test_disabling_drop_and_truncate_102040; +TRUNCATE TABLE test_disabling_drop_and_truncate_102040; + +RESET citus.enable_manual_changes_to_shards ; + +-- these should work as expected +TRUNCATE TABLE test_disabling_drop_and_truncate_102040; +DROP TABLE test_disabling_drop_and_truncate_102040; + +RESET citus.shard_replication_factor; +DROP TABLE test_disabling_drop_and_truncate; + -- lets flush the copy often to make sure everyhing is fine SET citus.local_copy_flush_threshold TO 1; TRUNCATE another_schema_table; diff --git a/src/test/regress/sql/single_node_truncate.sql b/src/test/regress/sql/single_node_truncate.sql new file mode 100644 index 000000000..5b555ff91 --- /dev/null +++ b/src/test/regress/sql/single_node_truncate.sql @@ -0,0 +1,63 @@ +CREATE SCHEMA single_node_truncate; +SET search_path TO single_node_truncate; +SET citus.shard_replication_factor TO 1; + +-- helper view that prints out local table names and sizes in the schema +CREATE VIEW table_sizes AS +SELECT + c.relname as name, + pg_catalog.pg_table_size(c.oid) > 0 as has_data +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind = 'r' + AND n.nspname = 'single_node_truncate' +ORDER BY 1; + + +-- test truncating reference tables +CREATE TABLE ref(id int UNIQUE, data int); +INSERT INTO ref SELECT x,x FROM generate_series(1,10000) x; +SELECT create_reference_table('ref'); + +CREATE TABLE citus_local(id int, ref_id int REFERENCES ref(id)); +INSERT INTO citus_local SELECT x,x FROM generate_series(1,10000) x; + +-- verify that shell tables for citus local tables are empty +SELECT * FROM table_sizes; + +-- verify that this UDF is noop on Citus local tables +SELECT truncate_local_data_after_distributing_table('citus_local'); +SELECT * FROM table_sizes; + +-- test that we allow cascading truncates to citus local tables +BEGIN; +SELECT truncate_local_data_after_distributing_table('ref'); +SELECT * FROM table_sizes; +ROLLBACK; + +-- test that we allow distributing tables that have foreign keys to reference tables +CREATE TABLE dist(id int, ref_id int REFERENCES ref(id)); +INSERT INTO dist SELECT x,x FROM generate_series(1,10000) x; +SELECT create_distributed_table('dist','id'); + +-- the following should truncate ref, dist and citus_local +BEGIN; +SELECT truncate_local_data_after_distributing_table('ref'); +SELECT * FROM table_sizes; +ROLLBACK; + +-- the following should truncate dist table only +BEGIN; +SELECT truncate_local_data_after_distributing_table('dist'); +SELECT * FROM table_sizes; +ROLLBACK; + +DROP TABLE ref, dist, citus_local; +DROP VIEW table_sizes; +DROP SCHEMA single_node_truncate CASCADE; + +-- Remove the coordinator +SELECT 1 FROM master_remove_node('localhost', :master_port); +-- restart nodeid sequence so that multi_cluster_management still has the same +-- nodeids +ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 1; diff --git a/src/test/regress/sql/subquery_and_cte.sql b/src/test/regress/sql/subquery_and_cte.sql index b8f33b96c..47bfe7184 100644 --- a/src/test/regress/sql/subquery_and_cte.sql +++ b/src/test/regress/sql/subquery_and_cte.sql @@ -5,6 +5,7 @@ SET search_path TO subquery_and_ctes; CREATE TABLE users_table_local AS SELECT * FROM users_table; +SET citus.shard_replication_factor TO 1; CREATE TABLE dist_table (id int, value int); SELECT create_distributed_table('dist_table', 'id', colocate_with => 'users_table'); diff --git a/src/test/regress/sql/undistribute_table_cascade_mx.sql b/src/test/regress/sql/undistribute_table_cascade_mx.sql index c8f2f82ca..8cc966163 100644 --- a/src/test/regress/sql/undistribute_table_cascade_mx.sql +++ b/src/test/regress/sql/undistribute_table_cascade_mx.sql @@ -83,8 +83,6 @@ CREATE TABLE users ( , primary key (org_id, id) ); -SET citus.replication_model to 'streaming'; - -- "users" table was implicitly added to citus metadata when defining foreign key, -- so create_distributed_table would first undistribute it. -- Show that it works well when changing sequence dependencies on mx workers. diff --git a/src/test/regress/sql/union_pushdown.sql b/src/test/regress/sql/union_pushdown.sql index 11655a701..c40f7c6fd 100644 --- a/src/test/regress/sql/union_pushdown.sql +++ b/src/test/regress/sql/union_pushdown.sql @@ -654,6 +654,68 @@ JOIN users_table_part USING(user_id) LIMIT 1; $$); +-- #4781 +CREATE TABLE test_a (id int, k int); +CREATE TABLE test_b (id int, k int); +SELECT create_distributed_table('test_a','id'); +SELECT create_distributed_table('test_b','id'); + +CREATE OR REPLACE VIEW v AS SELECT * from test_a where k>1 UNION ALL SELECT * from test_b where k<1; +-- the followings can be pushed down since dist_key is used in the aggregation +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT COUNT(id) FROM v; +$$); + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT AVG(id) FROM v; +$$); + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT SUM(id) FROM v; +$$); + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT MAX(id) FROM v; +$$); + +-- cannot pushed down because postgres optimizes fields, needs to be fixed with #4781 +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT COUNT(k) FROM v; +$$); + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT AVG(k) FROM v; +$$); + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT SUM(k) FROM v; +$$); + +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT MAX(k) FROM v; +$$); + +-- order by prevents postgres from optimizing fields so can be pushed down +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT id, COUNT(*) FROM v GROUP BY id ORDER BY id; +$$); + +-- order by is not on dist_key so can't pushed down, needs to be fixed with #4781 +SELECT public.explain_has_distributed_subplan($$ +EXPLAIN +SELECT k, COUNT(*) FROM v GROUP BY k ORDER BY k; +$$); + + RESET client_min_messages; DROP SCHEMA union_pushdown CASCADE; diff --git a/src/test/regress/sql/upgrade_distributed_function_before.sql b/src/test/regress/sql/upgrade_distributed_function_before.sql index bf12d0b0c..f3682dc9a 100644 --- a/src/test/regress/sql/upgrade_distributed_function_before.sql +++ b/src/test/regress/sql/upgrade_distributed_function_before.sql @@ -1,6 +1,5 @@ CREATE SCHEMA upgrade_distributed_function_before; SET search_path TO upgrade_distributed_function_before, public; -SET citus.replication_model TO streaming; SET citus.shard_replication_factor TO 1; CREATE TABLE t1 (a int PRIMARY KEY, b int); diff --git a/src/test/regress/sql/upgrade_rebalance_strategy_before.sql b/src/test/regress/sql/upgrade_rebalance_strategy_before.sql index b76aa4ccd..458fb9cf6 100644 --- a/src/test/regress/sql/upgrade_rebalance_strategy_before.sql +++ b/src/test/regress/sql/upgrade_rebalance_strategy_before.sql @@ -18,7 +18,6 @@ CREATE FUNCTION only_worker_2(shardid bigint, nodeidarg int) (CASE WHEN nodeport = 57638 THEN TRUE ELSE FALSE END) FROM pg_dist_node where nodeid = nodeidarg $$ LANGUAGE sql; -ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger; SELECT citus_add_rebalance_strategy( 'custom_strategy', @@ -26,7 +25,7 @@ SELECT citus_add_rebalance_strategy( 'capacity_high_worker_1', 'only_worker_2', 0.5, - 0.2 + 0.2, + 0.3 ); SELECT citus_set_default_rebalance_strategy('custom_strategy'); -ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger; diff --git a/src/test/regress/sql/window_functions.sql b/src/test/regress/sql/window_functions.sql index ee515437f..5c94515a9 100644 --- a/src/test/regress/sql/window_functions.sql +++ b/src/test/regress/sql/window_functions.sql @@ -44,6 +44,9 @@ ORDER BY -- window function operates on the results of -- a join +-- we also want to verify that this doesn't crash +-- when the logging level is DEBUG4 +SET log_min_messages TO DEBUG4; SELECT us.user_id, SUM(us.value_1) OVER (PARTITION BY us.user_id) @@ -616,3 +619,21 @@ GROUP BY 1 ORDER BY 1; select null = sum(null::int2) over () from public.users_table as ut limit 1; +-- verify that this doesn't crash with DEBUG4 +SET log_min_messages TO DEBUG4; +SELECT + user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2)) +FROM ( + SELECT + DISTINCT us.user_id, us.value_2, value_1, random() as r1 + FROM + users_table as us, events_table + WHERE + us.user_id = events_table.user_id AND event_type IN (1,2) + ORDER BY + user_id, value_2 + ) s +GROUP BY + 1, value_1 +ORDER BY + 2 DESC, 1; diff --git a/src/test/regress/upgrade/config.py b/src/test/regress/upgrade/config.py index 8069ef5cb..56bd1f844 100644 --- a/src/test/regress/upgrade/config.py +++ b/src/test/regress/upgrade/config.py @@ -11,7 +11,7 @@ MIXED_AFTER_CITUS_UPGRADE_SCHEDULE = './mixed_after_citus_upgrade_schedule' MASTER = 'master' # This should be updated when citus version changes -MASTER_VERSION = '10.1' +MASTER_VERSION = '10.2' HOME = expanduser("~")